9471355e77
Replaced 'cat file | awk' with 'awk file' patterns for efficiency. This eliminates unnecessary child processes and improves performance. Changes: - Lines 1629-1635: hourly bot traffic analysis - Lines 1915-1955: false positive detection (awk single script) - Lines 1969-1998: statistics generation (added file argument) - Lines 2006-2007: top bots calculation - Lines 2010-2011: traffic breakdown calculation - Line 2016: domain bot types indexing - Lines 2636, 2645: bandwidth impact calculation These are all simple pipe-to-awk patterns that can be inverted to pass the file directly to awk instead of piping from cat.
4677 lines
183 KiB
Bash
Executable File
4677 lines
183 KiB
Bash
Executable File
#!/bin/bash
|
||
set -eo pipefail
|
||
|
||
#############################################################################
|
||
# Apache/cPanel Domain Log Bot & Botnet Analyzer
|
||
# Version: 3.1 Enhanced (with Library Integration)
|
||
# Advanced log analysis for bot activity, security threats, and botnets
|
||
#
|
||
# Features:
|
||
# - Comprehensive bot classification (legitimate, AI, monitoring, suspicious)
|
||
# - Enhanced attack vector detection (SQL injection, XSS, path traversal,
|
||
# RCE/shell upload, info disclosure, login bruteforce)
|
||
# - Threat scoring system (0-100 risk scores for each IP)
|
||
# - Time-series analysis with hourly traffic visualization
|
||
# - Response code intelligence (what are bots finding?)
|
||
# - False positive detection for legitimate monitoring services
|
||
# - Bandwidth cost estimation for bot traffic
|
||
# - Botnet pattern analysis (coordinated attacks, DDoS detection)
|
||
# - Prioritized blocklists sorted by threat severity
|
||
# - Actionable reports with copy-paste ready configurations
|
||
# - Performance optimized for large log files (>500k entries)
|
||
# - User filtering (analyze all users or specific user)
|
||
# - Auto-detects log directory based on control panel
|
||
#############################################################################
|
||
|
||
# Load libraries
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||
source "$SCRIPT_DIR/lib/common-functions.sh"
|
||
source "$SCRIPT_DIR/lib/system-detect.sh"
|
||
source "$SCRIPT_DIR/lib/user-manager.sh"
|
||
source "$SCRIPT_DIR/lib/ip-reputation.sh"
|
||
source "$SCRIPT_DIR/lib/bot-signatures.sh"
|
||
source "$SCRIPT_DIR/lib/attack-patterns.sh"
|
||
source "$SCRIPT_DIR/lib/threat-intelligence.sh"
|
||
|
||
# Default configuration (auto-detected from system)
|
||
LOG_DIR="${SYS_LOG_DIR:-/var/log/apache2/domlogs}"
|
||
|
||
# Use toolkit's tmp directory instead of system /tmp to avoid filling it up
|
||
# On large servers with 200+ domains, compressed temp files can still be 50-100MB
|
||
# Using toolkit's tmp dir means:
|
||
# - Won't fill up system /tmp
|
||
# - Gets auto-cleaned when toolkit is removed
|
||
# - Included in cleanup script (clean-and-push-toolkit.sh)
|
||
TOOLKIT_TMP_DIR="$SCRIPT_DIR/tmp"
|
||
mkdir -p "$TOOLKIT_TMP_DIR" 2>/dev/null
|
||
|
||
# NEW: Baseline history directory (stores 30 days of historical data per domain)
|
||
BASELINE_DIR="$TOOLKIT_TMP_DIR/baseline_history"
|
||
mkdir -p "$BASELINE_DIR" 2>/dev/null
|
||
|
||
TEMP_DIR="$TOOLKIT_TMP_DIR/bot_analysis_$$"
|
||
OUTPUT_FILE="$TOOLKIT_TMP_DIR/bot_analysis_report_$(date +%Y%m%d_%H%M%S).txt"
|
||
DAYS_BACK="" # Empty means all logs, otherwise filter by days
|
||
HOURS_BACK="" # Empty means all logs, otherwise filter by hours
|
||
FILTER_USER="" # Empty means all users, otherwise specific user
|
||
|
||
# Cache CSF availability (avoid checking command_v csf 5 times)
|
||
CSF_AVAILABLE=false
|
||
if command -v csf >/dev/null 2>&1 && [ -f /etc/csf/csf.conf ]; then
|
||
CSF_AVAILABLE=true
|
||
fi
|
||
|
||
# Parse command line arguments
|
||
while [[ $# -gt 0 ]]; do
|
||
case $1 in
|
||
-d|--days)
|
||
DAYS_BACK="$2"
|
||
shift 2
|
||
;;
|
||
-H|--hours)
|
||
HOURS_BACK="$2"
|
||
shift 2
|
||
;;
|
||
-l|--log-dir)
|
||
LOG_DIR="$2"
|
||
shift 2
|
||
;;
|
||
-o|--output)
|
||
OUTPUT_FILE="$2"
|
||
shift 2
|
||
;;
|
||
-u|--user)
|
||
FILTER_USER="$2"
|
||
shift 2
|
||
;;
|
||
-h|--help)
|
||
echo "Apache/cPanel Domain Log Bot & Botnet Analyzer v3.1"
|
||
echo ""
|
||
echo "Usage: $0 [-d DAYS | -H HOURS] [-u USER] [-l LOG_DIR] [-o OUTPUT_FILE]"
|
||
echo ""
|
||
echo "Options:"
|
||
echo " -d, --days DAYS Analyze only logs from last N days (24-hour periods)"
|
||
echo " -H, --hours HOURS Analyze only logs from last N hours"
|
||
echo " -u, --user USER Analyze only logs for specific cPanel user"
|
||
echo " -l, --log-dir DIR Custom log directory (auto-detected by default)"
|
||
echo " -o, --output FILE Custom output file path"
|
||
echo " -h, --help Show this help message"
|
||
echo ""
|
||
echo "Examples:"
|
||
echo " $0 # Analyze all logs in default directory"
|
||
echo " $0 -d 7 # Analyze logs from last 7 days"
|
||
echo " $0 -H 6 # Analyze logs from last 6 hours"
|
||
echo " $0 -l /custom/path # Use custom log directory"
|
||
echo ""
|
||
echo "Note: If both -d and -H are specified, only -H (hours) will be used."
|
||
echo ""
|
||
exit 0
|
||
;;
|
||
*)
|
||
echo "Unknown option: $1"
|
||
echo "Use -h for help"
|
||
exit 1
|
||
;;
|
||
esac
|
||
done
|
||
|
||
# Interactive prompts for missing options
|
||
prompt_time_range() {
|
||
clear
|
||
print_banner "Bot Analyzer - Time Range Selection"
|
||
echo ""
|
||
echo -e " ${GREEN}1)${NC} All available logs"
|
||
echo -e " ${GREEN}2)${NC} Last 1 hour"
|
||
echo -e " ${GREEN}3)${NC} Last 6 hours"
|
||
echo -e " ${GREEN}4)${NC} Last 24 hours"
|
||
echo -e " ${GREEN}5)${NC} Last 7 days"
|
||
echo -e " ${GREEN}6)${NC} Last 30 days"
|
||
echo -e " ${GREEN}7)${NC} Custom hours"
|
||
echo -e " ${GREEN}8)${NC} Custom days"
|
||
echo ""
|
||
|
||
# Validate time_choice input with retry loop
|
||
while true; do
|
||
read -p "Select time range (1-8): " time_choice
|
||
|
||
if ! [[ "$time_choice" =~ ^[1-8]$ ]]; then
|
||
print_error "Invalid choice. Please enter 1-8"
|
||
continue
|
||
fi
|
||
|
||
case $time_choice in
|
||
1) break ;; # All logs - no filter
|
||
2) HOURS_BACK=1; break ;;
|
||
3) HOURS_BACK=6; break ;;
|
||
4) HOURS_BACK=24; break ;;
|
||
5) DAYS_BACK=7; break ;;
|
||
6) DAYS_BACK=30; break ;;
|
||
7)
|
||
while true; do
|
||
read -p "Enter number of hours: " custom_hours
|
||
if [[ "$custom_hours" =~ ^[0-9]+$ ]] && [ "$custom_hours" -gt 0 ]; then
|
||
HOURS_BACK=$custom_hours
|
||
break 2 # Break out of both loops
|
||
else
|
||
print_error "Invalid input. Please enter a positive number"
|
||
fi
|
||
done
|
||
;;
|
||
8)
|
||
while true; do
|
||
read -p "Enter number of days: " custom_days
|
||
if [[ "$custom_days" =~ ^[0-9]+$ ]] && [ "$custom_days" -gt 0 ]; then
|
||
DAYS_BACK=$custom_days
|
||
break 2 # Break out of both loops
|
||
else
|
||
print_error "Invalid input. Please enter a positive number"
|
||
fi
|
||
done
|
||
;;
|
||
esac
|
||
done
|
||
}
|
||
|
||
prompt_user_scope() {
|
||
clear
|
||
print_banner "Bot Analyzer - User Scope Selection"
|
||
echo ""
|
||
echo -e " ${GREEN}1)${NC} All users (system-wide analysis)"
|
||
echo -e " ${GREEN}2)${NC} Specific user"
|
||
echo ""
|
||
|
||
# Validate user_choice input with retry loop
|
||
while true; do
|
||
read -p "Select option (1-2): " user_choice
|
||
|
||
if ! [[ "$user_choice" =~ ^[1-2]$ ]]; then
|
||
print_error "Invalid choice. Please enter 1 or 2"
|
||
continue
|
||
fi
|
||
|
||
if [ "$user_choice" = "2" ]; then
|
||
echo ""
|
||
local selected=$(select_user_interactive "Select user to analyze")
|
||
if [ $? -eq 0 ] && [ "$selected" != "ALL" ]; then
|
||
FILTER_USER="$selected"
|
||
fi
|
||
fi
|
||
break
|
||
done
|
||
}
|
||
|
||
# Interactive prompts for missing options
|
||
# Prompt for time range if not specified
|
||
if [ -z "$DAYS_BACK" ] && [ -z "$HOURS_BACK" ]; then
|
||
prompt_time_range
|
||
fi
|
||
|
||
# Prompt for user if not specified
|
||
if [ -z "$FILTER_USER" ]; then
|
||
prompt_user_scope
|
||
fi
|
||
|
||
# Validate time filter options
|
||
if [ -n "$DAYS_BACK" ] && [ -n "$HOURS_BACK" ]; then
|
||
echo -e "${YELLOW}Warning: Both days and hours specified. Using hours filter only.${NC}" >&2
|
||
DAYS_BACK=""
|
||
fi
|
||
|
||
# Color codes for terminal output
|
||
RED='\033[0;31m'
|
||
YELLOW='\033[1;33m'
|
||
GREEN='\033[0;32m'
|
||
BLUE='\033[0;34m'
|
||
CYAN='\033[0;36m'
|
||
BOLD='\033[1m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# Check for required commands
|
||
check_dependencies() {
|
||
local missing_deps=()
|
||
for cmd in awk grep sort uniq find sed head tail cut; do
|
||
if ! command -v "$cmd" >/dev/null 2>&1; then
|
||
missing_deps+=("$cmd")
|
||
fi
|
||
done
|
||
|
||
if [ ${#missing_deps[@]} -gt 0 ]; then
|
||
echo -e "${RED}Error: Missing required commands: ${missing_deps[*]}${NC}" >&2
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
# Check disk space
|
||
check_disk_space() {
|
||
local available_kb
|
||
local check_path="$SCRIPT_DIR"
|
||
available_kb=$(df "$check_path" 2>/dev/null | tail -1 | awk '{print $4}')
|
||
|
||
if [ -z "$available_kb" ]; then
|
||
echo -e "${YELLOW}Warning: Cannot determine available disk space for toolkit directory${NC}" >&2
|
||
return
|
||
fi
|
||
|
||
if [ "$available_kb" -lt 102400 ]; then # Less than 100MB
|
||
echo -e "${YELLOW}Warning: Low disk space in toolkit directory: $((available_kb/1024))MB available${NC}" >&2
|
||
read -p "Continue anyway? (y/N): " -n 1 -r
|
||
echo
|
||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||
exit 1
|
||
fi
|
||
fi
|
||
}
|
||
|
||
# Run dependency checks
|
||
check_dependencies
|
||
check_disk_space
|
||
|
||
# Create temp directory
|
||
mkdir -p "$TEMP_DIR" || {
|
||
echo -e "${RED}Error: Cannot create temp directory: $TEMP_DIR${NC}" >&2
|
||
exit 1
|
||
}
|
||
|
||
# Cleanup on exit
|
||
trap "rm -rf \"$TEMP_DIR\"" EXIT
|
||
|
||
#############################################################################
|
||
# Bot Signature Database
|
||
#############################################################################
|
||
# NOTE: Bot signatures now loaded from lib/bot-signatures.sh
|
||
# Arrays available: LEGIT_BOTS, AI_BOTS, MONITOR_BOTS, SUSPICIOUS_BOTS
|
||
|
||
#############################################################################
|
||
# Helper Functions
|
||
#############################################################################
|
||
|
||
print_header() {
|
||
echo -e "\n${CYAN}===============================================================${NC}"
|
||
echo -e "${CYAN}$1${NC}"
|
||
echo -e "${CYAN}===============================================================${NC}\n"
|
||
}
|
||
|
||
print_alert() {
|
||
echo -e "${RED}$1${NC}"
|
||
}
|
||
|
||
print_warning() {
|
||
echo -e "${YELLOW}$1${NC}"
|
||
}
|
||
|
||
print_info() {
|
||
echo -e "${BLUE} $1${NC}"
|
||
}
|
||
|
||
print_success() {
|
||
echo -e "${GREEN}$1${NC}"
|
||
}
|
||
|
||
#############################################################################
|
||
# Log Parsing Functions
|
||
#############################################################################
|
||
|
||
parse_logs() {
|
||
if [ "$INTERWORX_MODE" = "yes" ]; then
|
||
print_info "Parsing InterWorx domain logs from: /home/*/var/*/logs/"
|
||
else
|
||
print_info "Parsing logs from: $LOG_DIR"
|
||
fi
|
||
|
||
local find_opts=()
|
||
|
||
# Add time filter if specified (hours takes precedence over days)
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
local minutes=$((HOURS_BACK * 60))
|
||
find_opts+=(-mmin -"$minutes")
|
||
print_info "Filtering logs from last $HOURS_BACK hours"
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
find_opts+=(-mtime -"$DAYS_BACK")
|
||
print_info "Filtering logs from last $DAYS_BACK days"
|
||
fi
|
||
|
||
# Determine log file search pattern based on control panel
|
||
local log_search_path
|
||
local log_search_name
|
||
if [ "$INTERWORX_MODE" = "yes" ]; then
|
||
# InterWorx: Official docs from https://appendix.interworx.com/current/nodeworx/general/other/log-file-locations.html
|
||
# HTTP: /home/{user}/var/{domain}/logs/transfer.log
|
||
# HTTPS: /home/{user}/var/{domain}/logs/transfer-ssl.log
|
||
log_search_path="/home/*/var/*/logs"
|
||
log_search_name="transfer*.log"
|
||
else
|
||
# cPanel: /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
|
||
# Plesk: Research verified paths from https://docs.plesk.com/en-US/obsidian/
|
||
# Apache HTTP: /var/www/vhosts/system/{domain}/logs/access_log
|
||
# Apache HTTPS: /var/www/vhosts/system/{domain}/logs/access_ssl_log
|
||
# nginx HTTP: /var/www/vhosts/system/{domain}/logs/proxy_access_log
|
||
# nginx HTTPS: /var/www/vhosts/system/{domain}/logs/proxy_access_ssl_log
|
||
# Note: /var/www/vhosts/{domain}/logs/ are hardlinks (backward compat)
|
||
log_search_path="$LOG_DIR"
|
||
log_search_name="*"
|
||
fi
|
||
|
||
# Parse all domain logs
|
||
local file_count=0
|
||
local progress_interval=5 # Show progress every 5 files instead of 50
|
||
echo ""
|
||
{
|
||
find "$log_search_path" -type f -name "$log_search_name" ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | while read -r logfile; do
|
||
# Skip empty files
|
||
[ -s "$logfile" ] || continue
|
||
|
||
# Extract domain name based on control panel
|
||
if [ "$INTERWORX_MODE" = "yes" ]; then
|
||
# InterWorx: extract from path /home/user/var/domain.com/logs/transfer*.log
|
||
domain=$(echo "$logfile" | sed -n 's|^/home/.*/var/\([^/]*\)/logs/.*|\1|p')
|
||
elif [ "$SYS_CONTROL_PANEL" = "plesk" ]; then
|
||
# Plesk: extract from path /var/www/vhosts/system/domain.com/logs/{access_log,access_ssl_log,proxy_*}
|
||
domain=$(echo "$logfile" | sed -n 's|^/var/www/vhosts/system/\([^/]*\)/logs/.*|\1|p')
|
||
else
|
||
# cPanel: extract from filename /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
|
||
domain=$(basename "$logfile" | sed 's/-ssl_log$//')
|
||
fi
|
||
|
||
# Skip if domain extraction failed
|
||
[ -z "$domain" ] && continue
|
||
|
||
# User filtering: skip domains not belonging to the specified user
|
||
if [ -n "$FILTER_USER" ]; then
|
||
if ! echo "$user_domains" | grep -qFx "$domain"; then
|
||
continue
|
||
fi
|
||
fi
|
||
|
||
# Show progress every N files
|
||
file_count=$((file_count + 1))
|
||
if [ $((file_count % progress_interval)) -eq 0 ]; then
|
||
echo -ne "\r Parsed $file_count log files... (current: $domain)"
|
||
fi
|
||
|
||
# Parse Apache Combined Log Format with error handling
|
||
# Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
|
||
awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
|
||
BEGIN {
|
||
# Month name to number lookup
|
||
month["Jan"]=1; month["Feb"]=2; month["Mar"]=3; month["Apr"]=4
|
||
month["May"]=5; month["Jun"]=6; month["Jul"]=7; month["Aug"]=8
|
||
month["Sep"]=9; month["Oct"]=10; month["Nov"]=11; month["Dec"]=12
|
||
|
||
# Calculate cutoff timestamp in epoch seconds
|
||
if (hours_filter != "") {
|
||
cmd = "date -d \"" hours_filter " hours ago\" +%s 2>/dev/null || date -v-" hours_filter "H +%s 2>/dev/null"
|
||
cmd | getline cutoff_epoch
|
||
close(cmd)
|
||
} else if (days_filter != "") {
|
||
cmd = "date -d \"" days_filter " days ago\" +%s 2>/dev/null || date -v-" days_filter "d +%s 2>/dev/null"
|
||
cmd | getline cutoff_epoch
|
||
close(cmd)
|
||
}
|
||
}
|
||
{
|
||
# Skip empty lines and malformed entries
|
||
if (NF < 10 || length($0) < 50) next
|
||
|
||
# Extract IP (first field - space separated)
|
||
ip = $1
|
||
|
||
# Extract timestamp (between square brackets)
|
||
if (match($0, /\[([^\]]+)\]/, ts)) {
|
||
timestamp = ts[1]
|
||
} else {
|
||
timestamp = "unknown"
|
||
}
|
||
|
||
# Filter by timestamp if time filter is set
|
||
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_epoch != "") {
|
||
# Extract just the date/time part (before timezone)
|
||
# Format: 31/Dec/2025:10:30:15 -0500
|
||
split(timestamp, ts_parts, " ")
|
||
log_ts = ts_parts[1]
|
||
|
||
# Parse: dd/mmm/yyyy:HH:MM:SS
|
||
split(log_ts, dt, /[\/:]/)
|
||
day = dt[1]
|
||
mon = month[dt[2]]
|
||
year = dt[3]
|
||
hour = dt[4]
|
||
min = dt[5]
|
||
sec = dt[6]
|
||
|
||
# Convert to epoch using awk mktime (YYYY MM DD HH MM SS)
|
||
# mktime is much faster than spawning date command
|
||
if (mon != "") {
|
||
log_epoch = mktime(year " " mon " " day " " hour " " min " " sec)
|
||
|
||
# Numerical comparison of epoch seconds
|
||
if (log_epoch < cutoff_epoch) {
|
||
next # Skip this entry, too old
|
||
}
|
||
}
|
||
}
|
||
|
||
# Extract HTTP method, URL, and status
|
||
if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) {
|
||
http_method = req[1]
|
||
request_url = req[2]
|
||
status = req[3]
|
||
size = req[4]
|
||
} else {
|
||
# Fallback for malformed requests
|
||
http_method = "-"
|
||
request_url = "-"
|
||
status = "-"
|
||
size = "0"
|
||
}
|
||
|
||
# Extract User-Agent (last quoted string)
|
||
if (match($0, /"([^"]*)"[[:space:]]*$/, ua)) {
|
||
user_agent = ua[1]
|
||
if (user_agent == "") user_agent = "-"
|
||
} else {
|
||
user_agent = "-"
|
||
}
|
||
|
||
# Extract additional headers for enhanced analysis
|
||
referer = "-"
|
||
accept_lang = "-"
|
||
accept_encoding = "-"
|
||
|
||
# Extract Referer header
|
||
if (match($0, /"([^"]*)"[[:space:]]*"[^"]*"[[:space:]]*$/, ref)) {
|
||
referer = ref[1]
|
||
if (referer == "") referer = "-"
|
||
}
|
||
|
||
# Try to extract Accept-Language from log (if available)
|
||
if (match($0, /Accept-Language: ([^ ,;]*)/i, al)) {
|
||
accept_lang = al[1]
|
||
}
|
||
|
||
# Only output valid entries
|
||
if (ip != "" && ip !~ /^[[:space:]]*$/) {
|
||
print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp "|" referer "|" accept_lang
|
||
}
|
||
}' "$logfile" 2>/dev/null
|
||
done
|
||
} > "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Clear the progress line
|
||
echo -ne "\r\033[K"
|
||
|
||
if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then
|
||
print_alert "No log entries were parsed. Check log format or permissions."
|
||
return 1
|
||
fi
|
||
|
||
local line_count
|
||
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
||
local file_size_kb
|
||
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
|
||
|
||
# Compress for storage (gzip saves ~90% space on text)
|
||
# But we keep uncompressed version for fast analysis
|
||
gzip -c "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/parsed_logs.txt.gz" &
|
||
|
||
print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB uncompressed)"
|
||
return 0
|
||
}
|
||
|
||
#############################################################################
|
||
# Bot Detection & Classification
|
||
#############################################################################
|
||
|
||
classify_bots() {
|
||
print_info "Classifying bot traffic..."
|
||
|
||
# Build combined grep patterns for efficiency
|
||
local legit_pattern=$(printf "%s|" "${!LEGIT_BOTS[@]}" | sed 's/|$//')
|
||
local ai_pattern=$(printf "%s|" "${!AI_BOTS[@]}" | sed 's/|$//')
|
||
local monitor_pattern=$(printf "%s|" "${!MONITOR_BOTS[@]}" | sed 's/|$//')
|
||
local suspicious_pattern=$(printf "%s|" "${!SUSPICIOUS_BOTS[@]}" | sed 's/|$//')
|
||
|
||
# Process logs with AWK for better performance
|
||
awk -F'|' -v legit="$legit_pattern" -v ai="$ai_pattern" -v monitor="$monitor_pattern" -v suspicious="$suspicious_pattern" '
|
||
BEGIN {
|
||
# Convert patterns to lowercase for case-insensitive matching
|
||
legit_lower = tolower(legit)
|
||
ai_lower = tolower(ai)
|
||
monitor_lower = tolower(monitor)
|
||
suspicious_lower = tolower(suspicious)
|
||
}
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
size = $5
|
||
ua = $6
|
||
method = $7
|
||
timestamp = $8
|
||
ua_lower = tolower(ua)
|
||
|
||
bot_type = "unknown"
|
||
bot_name = "Unknown"
|
||
|
||
# Check each category in priority order
|
||
if (legit != "" && match(ua_lower, legit_lower)) {
|
||
bot_type = "legit"
|
||
# Extract actual bot name from UA
|
||
split(legit, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (ai != "" && match(ua_lower, ai_lower)) {
|
||
bot_type = "ai"
|
||
split(ai, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (monitor != "" && match(ua_lower, monitor_lower)) {
|
||
bot_type = "monitor"
|
||
split(monitor, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (suspicious != "" && match(ua_lower, suspicious_lower)) {
|
||
bot_type = "suspicious"
|
||
split(suspicious, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-requests|python-urllib|java\/|scan|check|monitor/)) {
|
||
# FIXED: Check for bot keywords FIRST, then verify it is not a legitimate browser
|
||
# This prevents bots from bypassing detection by including browser strings
|
||
|
||
# FIRST: Check if it is actually a legitimate browser with complete UA signature
|
||
# Real browsers have: Mozilla/5.0 + platform + rendering engine + browser version
|
||
is_real_browser = 0
|
||
|
||
# Chrome/Chromium-based: Must have Chrome/ AND (AppleWebKit OR Mobile)
|
||
if (match(ua_lower, /chrome\/[0-9]/) && (match(ua_lower, /applewebkit/) || match(ua_lower, /mobile/))) {
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /firefox\/[0-9]/) && match(ua_lower, /gecko\//)) {
|
||
# Firefox: Must have Firefox/ AND Gecko/
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /safari\/[0-9]/) && match(ua_lower, /version\//) && match(ua_lower, /applewebkit/) && !match(ua_lower, /chrome/)) {
|
||
# Safari: Must have Safari/ AND Version/ AND AppleWebKit (not Chrome)
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /edg\/[0-9]|edge\/[0-9]/)) {
|
||
# Edge: Must have Edg/ or Edge/
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /samsungbrowser\/[0-9]|ucbrowser\/[0-9]|opr\/[0-9]/)) {
|
||
# Mobile browsers: Samsung, UC, Opera Mobile
|
||
is_real_browser = 1
|
||
}
|
||
|
||
# If it is a real browser, skip bot classification
|
||
if (is_real_browser == 1) {
|
||
next
|
||
}
|
||
|
||
bot_type = "unidentified_bot"
|
||
# Extract first word of UA as bot name
|
||
match(ua, /^[^ ]+/, name)
|
||
bot_name = substr(name[0], 1, 30)
|
||
}
|
||
|
||
# Only print if bot_type is not "unknown" (i.e., we identified it as something)
|
||
if (bot_type != "unknown") {
|
||
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
||
}
|
||
}' < "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
|
||
|
||
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
|
||
print_alert "Bot classification failed"
|
||
return 1
|
||
fi
|
||
|
||
local classified_count
|
||
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
|
||
local file_size_kb
|
||
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
|
||
|
||
# Compress for storage in background
|
||
gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &
|
||
|
||
print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB uncompressed)"
|
||
return 0
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Baseline Management (historical tracking for anomaly detection)
|
||
#############################################################################
|
||
|
||
save_baseline() {
|
||
print_info "Storing baseline metrics for anomaly comparison..."
|
||
|
||
local today=$(date +%Y%m%d)
|
||
|
||
# Calculate current metrics
|
||
local total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
||
local unique_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
|
||
local bot_requests=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
|
||
local bot_pct=0
|
||
if [ "$total_requests" -gt 0 ]; then
|
||
bot_pct=$((bot_requests * 100 / total_requests))
|
||
fi
|
||
|
||
local sqli_attempts=$(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo "0")
|
||
local xss_attempts=$(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo "0")
|
||
local path_attempts=$(wc -l < "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null || echo "0")
|
||
local rce_attempts=$(wc -l < "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null || echo "0")
|
||
local login_attempts=$(wc -l < "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null || echo "0")
|
||
local total_attacks=$((sqli_attempts + xss_attempts + path_attempts + rce_attempts + login_attempts))
|
||
|
||
local high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
# Store baseline for each domain
|
||
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
||
while read -r domain; do
|
||
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
||
|
||
# Get domain-specific metrics
|
||
local domain_requests=$(grep -F "|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | wc -l || echo "0")
|
||
local domain_attacks=$(grep -F "|$domain|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
||
local domain_bots=$(grep -F "|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
# Append to baseline history (timestamp|requests|attacks|bots|high_risk_ips)
|
||
echo "$today|$domain_requests|$domain_attacks|$domain_bots|$high_risk_ips" >> "$baseline_file"
|
||
|
||
# Keep only last 30 days
|
||
tail -30 "$baseline_file" > "$baseline_file.tmp" && mv "$baseline_file.tmp" "$baseline_file"
|
||
done < "$TEMP_DIR/all_domains.txt"
|
||
fi
|
||
|
||
# Store global baseline
|
||
local global_baseline="$BASELINE_DIR/global_baseline.txt"
|
||
echo "$today|$total_requests|$unique_ips|$bot_pct|$total_attacks|$sqli_attempts|$xss_attempts|$path_attempts|$rce_attempts|$login_attempts|$high_risk_ips" >> "$global_baseline"
|
||
tail -30 "$global_baseline" > "$global_baseline.tmp" && mv "$global_baseline.tmp" "$global_baseline"
|
||
|
||
print_success "Baseline stored"
|
||
}
|
||
|
||
get_domain_baseline() {
|
||
local domain="$1"
|
||
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
||
|
||
if [ -f "$baseline_file" ]; then
|
||
cat "$baseline_file"
|
||
fi
|
||
}
|
||
|
||
calculate_baseline_average() {
|
||
local domain="$1"
|
||
local metric="$2" # requests, attacks, bots, etc.
|
||
local days="${3:-7}" # default 7 days
|
||
|
||
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
||
if [ ! -f "$baseline_file" ]; then
|
||
echo "0"
|
||
return
|
||
fi
|
||
|
||
# Get last N days
|
||
local col=2 # requests by default
|
||
case "$metric" in
|
||
attacks) col=3 ;;
|
||
bots) col=4 ;;
|
||
high_risk) col=5 ;;
|
||
esac
|
||
|
||
tail -"$days" "$baseline_file" 2>/dev/null | awk -F'|' -v col="$col" '{sum+=$col; count++} END {if (count>0) print int(sum/count); else print 0}'
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Attack Progression/Timeline Analysis
|
||
#############################################################################
|
||
|
||
analyze_attack_progression() {
|
||
print_info "Analyzing attack progression and sequences..."
|
||
|
||
# For each high-risk IP, show the sequence of attacks
|
||
awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -20 | while read -r ip; do
|
||
local progression_file="$TEMP_DIR/progression_${ip}.txt"
|
||
> "$progression_file"
|
||
|
||
# Extract all requests from this IP, in order
|
||
grep -F "$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
|
||
print $8 "|" $3 "|" $4 "|" $6
|
||
}' | sort >> "$progression_file"
|
||
|
||
# Detect attack phases
|
||
local phase="reconnaissance"
|
||
local phase_start=$(head -1 "$progression_file" 2>/dev/null | cut -d'|' -f1)
|
||
|
||
echo "$ip|$phase|$phase_start" >> "$TEMP_DIR/attack_phases.txt"
|
||
done
|
||
|
||
touch "$TEMP_DIR/attack_phases.txt"
|
||
print_success "Attack progression analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Header Analysis for Bot Detection
|
||
#############################################################################
|
||
|
||
analyze_headers() {
|
||
print_info "Analyzing request headers for bot patterns..."
|
||
|
||
# Analyze header patterns to improve bot detection accuracy
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
size = $5
|
||
ua = $6
|
||
method = $7
|
||
timestamp = $8
|
||
referer = $9
|
||
accept_lang = $10
|
||
|
||
ua_lower = tolower(ua)
|
||
referer_lower = tolower(referer)
|
||
|
||
# Pattern 1: Empty or missing Accept-Language (bots often have none)
|
||
if (accept_lang == "-" || accept_lang == "") {
|
||
empty_lang[ip]++
|
||
}
|
||
|
||
# Pattern 2: All-accepting Accept-Language (bots accept everything)
|
||
# Real browsers: en-US,en;q=0.9 (specific negotiation)
|
||
# Bots: */* or empty
|
||
if (accept_lang == "*/*" || accept_lang == "*") {
|
||
accepts_all[ip]++
|
||
}
|
||
|
||
# Pattern 3: Suspicious Referer patterns
|
||
# Bots often have no referer or fake ones
|
||
if (referer == "-" || referer == "") {
|
||
no_referer[ip]++
|
||
}
|
||
|
||
# Pattern 4: Referer from suspicious sources
|
||
if (match(referer_lower, /badbot|crawler|scanner|nikto|nmap|metasploit|sqlmap/)) {
|
||
suspicious_referer[ip]++
|
||
}
|
||
|
||
# Pattern 5: Referer mismatch (referer domain != target domain)
|
||
# Real users: referer usually from same domain or search engine
|
||
# Bots: random referer or none
|
||
if (referer != "-" && !match(referer_lower, domain)) {
|
||
if (!match(referer_lower, /google|bing|yahoo|facebook|twitter|reddit|instagram/)) {
|
||
cross_domain_referer[ip]++
|
||
}
|
||
}
|
||
|
||
# Pattern 6: HEAD requests (bot reconnaissance)
|
||
# Some bots use HEAD to test server without loading content
|
||
if (method == "HEAD") {
|
||
head_requests[ip]++
|
||
}
|
||
|
||
# Pattern 7: Options/Trace requests (security testing)
|
||
# Real users never use these
|
||
if (method == "OPTIONS" || method == "TRACE") {
|
||
dangerous_methods[ip]++
|
||
}
|
||
}
|
||
END {
|
||
# Flag IPs with multiple suspicious header patterns
|
||
for (ip in empty_lang) {
|
||
score = 0
|
||
|
||
# Assign points for suspicious header combinations
|
||
if (ip in empty_lang) score += 2
|
||
if (ip in accepts_all) score += 3
|
||
if (ip in no_referer) score += 1
|
||
if (ip in suspicious_referer) score += 5
|
||
if (ip in cross_domain_referer && (ip in no_referer)) score += 2
|
||
if (ip in head_requests && (head_requests[ip] > 50)) score += 4
|
||
if (ip in dangerous_methods) score += 10
|
||
|
||
# Only flag if high header suspicion score
|
||
if (score >= 8) {
|
||
print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/header_anomalies.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if it doesn't exist
|
||
touch "$TEMP_DIR/header_anomalies.txt"
|
||
print_success "Header analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Entry Point Analysis (where bots start)
|
||
#############################################################################
|
||
|
||
analyze_entry_points() {
|
||
print_info "Analyzing first request patterns (bot vs. user entry points)..."
|
||
|
||
# Get first request from each IP
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
BEGIN {
|
||
ip_first_request[ip] = url
|
||
ip_first_status[ip] = status
|
||
}
|
||
{
|
||
ip = $1
|
||
url = $3
|
||
status = $4
|
||
|
||
# Track first request from each IP (first occurrence in sorted logs)
|
||
if (!(ip in first_seen)) {
|
||
first_seen[ip] = 1
|
||
ip_first_request[ip] = url
|
||
ip_first_status[ip] = status
|
||
}
|
||
}
|
||
END {
|
||
for (ip in ip_first_request) {
|
||
url = ip_first_request[ip]
|
||
status = ip_first_status[ip]
|
||
url_lower = tolower(url)
|
||
|
||
# Suspicious entry points indicate bot/scanner
|
||
if (match(url_lower, /wp-admin|phpmyadmin|admin|xmlrpc|shell\.php|\.env|\.git|backdoor|config\.php/)) {
|
||
print ip "|admin_entry|" url "|" status > tmpdir "/suspicious_entry_points.txt"
|
||
}
|
||
# Legitimate entry: homepage or search
|
||
else if (match(url_lower, /^\/index|^\/$|^\/search|^\/page|^\/category/)) {
|
||
print ip "|normal_entry|" url > tmpdir "/normal_entry_points.txt"
|
||
}
|
||
# Unusual but possible: static files
|
||
else if (match(url_lower, /\.(css|js|jpg|png|gif|woff|svg)$/)) {
|
||
print ip "|static_entry|" url > tmpdir "/static_entry_points.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/suspicious_entry_points.txt")
|
||
close(tmpdir "/normal_entry_points.txt")
|
||
close(tmpdir "/static_entry_points.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Count suspicious entry points
|
||
if [ -f "$TEMP_DIR/suspicious_entry_points.txt" ]; then
|
||
suspicious_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt")
|
||
print_success "Found $suspicious_count IPs with suspicious entry points"
|
||
else
|
||
touch "$TEMP_DIR/suspicious_entry_points.txt"
|
||
fi
|
||
}
|
||
|
||
#############################################################################
|
||
# Threat Detection
|
||
#############################################################################
|
||
|
||
detect_threats() {
|
||
print_info "Detecting security threats..."
|
||
|
||
# Use a single AWK pass for multiple threat detections (more efficient)
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
size = $5
|
||
ua = $6
|
||
method = $7
|
||
url_lower = tolower(url)
|
||
ua_lower = tolower(ua)
|
||
|
||
# SQL Injection patterns (enhanced)
|
||
# FIXED: Hex pattern now requires SQL context to avoid false positives on blockchain/product IDs
|
||
if (match(url_lower, /union.*select|concat\(|benchmark\(|sleep\(|waitfor|cast\(|exec\(/) ||
|
||
match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) ||
|
||
match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) ||
|
||
match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) {
|
||
print ip "|" domain "|" url "|" status "|sqli" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# XSS patterns
|
||
# FIXED: DOM-based patterns (document.cookie, .innerhtml) only flagged in query strings
|
||
# This prevents false positives on documentation URLs like /docs/innerhtml-api-guide
|
||
if (match(url_lower, /<script|javascript:|onerror=|onload=|<iframe|eval\(|alert\(/) ||
|
||
match(url_lower, /\?.*(document\.cookie|document\.write|\.innerhtml)/)) {
|
||
print ip "|" domain "|" url "|" status "|xss" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Path Traversal / LFI
|
||
# FIXED: Added URL-encoded variants (%2e%2e, %5c for backslash)
|
||
# FIXED: Case-insensitive hex encoding support (%5C and %5c)
|
||
if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) ||
|
||
match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) {
|
||
print ip "|" domain "|" url "|" status "|path_traversal" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Shell upload / RCE attempts
|
||
# FIXED: Removed overly broad "any POST to .php" condition that caused massive false positives
|
||
# Now only detects actual shell commands, known malicious files, and suspicious upload patterns
|
||
if (match(url_lower, /cmd\.exe|\/bin\/bash|\/bin\/sh|phpinfo\(|system\(|exec\(|passthru\(|eval\(/) ||
|
||
match(url_lower, /shell\.php|c99\.php|r57\.php|r00t\.php|backdoor|webshell|cmd\.php|exploit\.php/) ||
|
||
match(url_lower, /base64_decode.*eval|gzinflate.*eval|assert.*\$_/) ||
|
||
(match(url_lower, /\.(php|phtml|php3|php4|php5|phar)\.suspected$/) && method == "POST")) {
|
||
print ip "|" domain "|" url "|" status "|rce_upload" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Info Disclosure attempts
|
||
# FIXED: Added status code validation - only flag successful access (200/301/302)
|
||
# FIXED: readme pattern now only matches actual files (.txt, .html, .md)
|
||
# FIXED: Added more backup file extensions and URL-encoded variants
|
||
# FIXED: phpinfo now only matches .php files (not documentation URLs)
|
||
# FIXED: Removed sitemap.xml.gz (intentionally public for SEO)
|
||
if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo\.php/) ||
|
||
match(url_lower, /readme\.(txt|html|md)$/) ||
|
||
match(url_lower, /web\.config|\.htaccess|\.htpasswd/) ||
|
||
match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql/)) {
|
||
# Only flag if successful access (200) or redirect (301/302)
|
||
# Failed attempts (404/403) are just scanning, tracked separately
|
||
if (status ~ /^(200|301|302)/) {
|
||
print ip "|" domain "|" url "|" status "|info_disclosure" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
}
|
||
|
||
# composer.json / package.json - lower severity, only if successful
|
||
if (match(url_lower, /composer\.json|package\.json|package-lock\.json/) && status == "200") {
|
||
print ip "|" domain "|" url "|" status "|config_exposure" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Login bruteforce
|
||
if (match(url_lower, /wp-login\.php|xmlrpc\.php/) && method == "POST") {
|
||
print ip "|" domain "|" url "|" status "|login_bruteforce" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Admin/sensitive endpoint probing
|
||
# FIXED: Only count FAILED attempts (403/401/404) - successful logins are legitimate
|
||
if (match(url_lower, /wp-admin|phpmyadmin|admin|administrator|login|wp-login|xmlrpc/) ||
|
||
match(url_lower, /\.env|\.git|\.sql|backup|config\./)) {
|
||
# Only flag failed access attempts (403 Forbidden, 401 Unauthorized, 404 Not Found)
|
||
# Successful access (200/302) means legitimate user or already compromised
|
||
if (status ~ /^(403|401|404)/) {
|
||
print ip "|" domain "|" url > tmpdir "/admin_probes_raw.txt"
|
||
}
|
||
}
|
||
|
||
# 404 scanning (reconnaissance)
|
||
if (status == "404" || status == "403") {
|
||
print ip "|" domain "|" url "|" status > tmpdir "/404_scans_raw.txt"
|
||
}
|
||
|
||
# Large data transfers (potential scraping)
|
||
if (size > 1000000) {
|
||
print ip "|" domain "|" url "|" size > tmpdir "/large_transfers_raw.txt"
|
||
}
|
||
|
||
# Suspicious user agents
|
||
if (match(ua_lower, /nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp/) ||
|
||
match(ua_lower, /metasploit|<script|null|python-requests|go-http-client/)) {
|
||
print ip "|" ua > tmpdir "/suspicious_ua_raw.txt"
|
||
}
|
||
|
||
# Track response codes for intelligence
|
||
print status > tmpdir "/response_codes_raw.txt"
|
||
}
|
||
' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Process attack vectors by type
|
||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
# Overall attack vectors summary
|
||
awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/attack_types.txt"
|
||
|
||
# Breakdown by attack type
|
||
for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
|
||
grep -F "|$attack_type" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | grep -F "|$attack_type$" | \
|
||
awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
|
||
sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true
|
||
done
|
||
|
||
# Old sqli file for backwards compatibility
|
||
if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then
|
||
cp "$TEMP_DIR/sqli_attempts.txt" "$TEMP_DIR/sqli_attempts_legacy.txt"
|
||
fi
|
||
else
|
||
touch "$TEMP_DIR/attack_types.txt"
|
||
fi
|
||
|
||
# Process raw data into sorted/counted results
|
||
if [ -f "$TEMP_DIR/admin_probes_raw.txt" ]; then
|
||
sort "$TEMP_DIR/admin_probes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/admin_probes.txt"
|
||
else
|
||
touch "$TEMP_DIR/admin_probes.txt"
|
||
fi
|
||
|
||
if [ -f "$TEMP_DIR/404_scans_raw.txt" ]; then
|
||
sort "$TEMP_DIR/404_scans_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/404_scans.txt"
|
||
else
|
||
touch "$TEMP_DIR/404_scans.txt"
|
||
fi
|
||
|
||
if [ -f "$TEMP_DIR/large_transfers_raw.txt" ]; then
|
||
sort "$TEMP_DIR/large_transfers_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/large_transfers.txt"
|
||
else
|
||
touch "$TEMP_DIR/large_transfers.txt"
|
||
fi
|
||
|
||
if [ -f "$TEMP_DIR/suspicious_ua_raw.txt" ]; then
|
||
sort "$TEMP_DIR/suspicious_ua_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/suspicious_ua.txt"
|
||
else
|
||
touch "$TEMP_DIR/suspicious_ua.txt"
|
||
fi
|
||
|
||
# Process response codes
|
||
if [ -f "$TEMP_DIR/response_codes_raw.txt" ]; then
|
||
sort "$TEMP_DIR/response_codes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/response_codes.txt"
|
||
else
|
||
touch "$TEMP_DIR/response_codes.txt"
|
||
fi
|
||
|
||
print_success "Threat detection complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: URL Entropy Analysis (detects fuzzing/scanning)
|
||
#############################################################################
|
||
|
||
analyze_url_entropy() {
|
||
print_info "Analyzing URL parameter entropy (fuzzing detection)..."
|
||
|
||
# Detect IPs that generate random parameters (scanning/fuzzing behavior)
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
url = $3
|
||
url_lower = tolower(url)
|
||
|
||
# Extract base path (before query string)
|
||
if (match(url, /([^?]+)/, path)) {
|
||
base_path = path[1]
|
||
} else {
|
||
base_path = url
|
||
}
|
||
|
||
# Extract query parameter values (not keys)
|
||
if (match(url, /\?(.+)/, query)) {
|
||
param_string = query[1]
|
||
|
||
# Count numeric parameters
|
||
if (match(param_string, /[0-9]+/)) {
|
||
numeric_params[ip base_path]++
|
||
}
|
||
}
|
||
|
||
# Track URLs from each IP
|
||
urls_per_ip[ip]++
|
||
unique_paths[ip][base_path]++
|
||
}
|
||
END {
|
||
# Find IPs hitting many unique paths with numeric variations
|
||
for (ip in urls_per_ip) {
|
||
unique_path_count = length(unique_paths[ip])
|
||
|
||
# If IP hits >20 URLs with lots of numeric params = scanning
|
||
if (urls_per_ip[ip] > 20 && unique_path_count > 5) {
|
||
# Likely fuzzing/parameter scanning
|
||
print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/fuzzing_ips.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if it doesn't exist
|
||
touch "$TEMP_DIR/fuzzing_ips.txt"
|
||
print_success "URL entropy analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Request Timing Analysis (DDoS & bot behavior detection)
|
||
#############################################################################
|
||
|
||
analyze_request_timing() {
|
||
print_info "Analyzing request timing patterns (DDoS detection)..."
|
||
|
||
# Analyze timing consistency to detect bots/DDoS
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
timestamp = $8
|
||
|
||
# Parse timestamp to get seconds (simplified)
|
||
if (match(timestamp, /([0-9]{2}):([0-9]{2}):([0-9]{2})/, t)) {
|
||
seconds = t[1] * 3600 + t[2] * 60 + t[3]
|
||
|
||
# Store timestamps for analysis
|
||
if (!(ip in request_times)) {
|
||
request_count[ip] = 0
|
||
request_times[ip] = ""
|
||
}
|
||
|
||
request_count[ip]++
|
||
request_times[ip] = request_times[ip] seconds ","
|
||
}
|
||
}
|
||
END {
|
||
# Analyze timing patterns
|
||
for (ip in request_count) {
|
||
count = request_count[ip]
|
||
|
||
# If more than 50 requests in the log
|
||
if (count > 50) {
|
||
# Split times and calculate average interval
|
||
split(request_times[ip], times, ",")
|
||
|
||
total_intervals = 0
|
||
interval_count = 0
|
||
|
||
for (i = 2; i < length(times); i++) {
|
||
if (times[i] > 0 && times[i-1] > 0) {
|
||
interval = times[i] - times[i-1]
|
||
if (interval < 0) interval += 86400 # Handle day boundary
|
||
|
||
total_intervals += interval
|
||
interval_count++
|
||
}
|
||
}
|
||
|
||
if (interval_count > 0) {
|
||
avg_interval = total_intervals / interval_count
|
||
|
||
# Very consistent timing = bot (typically 0.5-2 seconds apart)
|
||
# Real users: highly variable (5-60+ seconds)
|
||
if (avg_interval < 3 && count > 100) {
|
||
print ip "|consistent_bot_timing|" avg_interval "|" count > tmpdir "/timing_anomalies.txt"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
close(tmpdir "/timing_anomalies.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if it doesn't exist
|
||
touch "$TEMP_DIR/timing_anomalies.txt"
|
||
print_success "Request timing analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Fingerprinting - Combine multiple signals for accuracy
|
||
#############################################################################
|
||
|
||
calculate_bot_fingerprint() {
|
||
print_info "Calculating bot fingerprint confidence scores (combining multiple signals)..."
|
||
|
||
# Each signal contributes to confidence that an IP is a bot
|
||
# Real traffic rarely has ALL signals, bots typically have multiple
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
BEGIN {
|
||
# Initialize tracking arrays
|
||
}
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
ua = $6
|
||
referer = $9
|
||
accept_lang = $10
|
||
|
||
ua_lower = tolower(ua)
|
||
|
||
# Track per-IP fingerprint components
|
||
if (ip in ip_seen) {
|
||
ip_seen[ip]++
|
||
} else {
|
||
ip_seen[ip] = 1
|
||
}
|
||
|
||
# Signal 1: Bot-like User-Agent
|
||
if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python|java[^script]|perl|ruby|node\.js|headless|mechanize/)) {
|
||
ua_bot_signal[ip]++
|
||
}
|
||
|
||
# Signal 2: Missing/unusual Accept-Language
|
||
if (accept_lang == "-" || accept_lang == "" || accept_lang == "*/*") {
|
||
header_anomaly_signal[ip]++
|
||
}
|
||
|
||
# Signal 3: Missing Referer (bots often dont send it)
|
||
if (referer == "-" || referer == "") {
|
||
missing_referer[ip]++
|
||
}
|
||
|
||
# Signal 4: Successful requests indicate not just scanning
|
||
if (status ~ /^(200|301|302)/) {
|
||
success_requests[ip]++
|
||
}
|
||
|
||
# Signal 5: Direct admin/config access (suspicious entry)
|
||
if (match(url, /\/(wp-admin|phpmyadmin|admin|config\.php|\.env|\.git|\.htaccess|web\.config)/)) {
|
||
admin_access[ip]++
|
||
}
|
||
}
|
||
END {
|
||
# Calculate fingerprint scores for each IP
|
||
for (ip in ip_seen) {
|
||
score = 0
|
||
signal_count = 0
|
||
|
||
# Each signal adds confidence
|
||
if (ip in ua_bot_signal && ua_bot_signal[ip] > 0) {
|
||
score += 20
|
||
signal_count++
|
||
}
|
||
|
||
if (ip in header_anomaly_signal && header_anomaly_signal[ip] > 0) {
|
||
score += 15
|
||
signal_count++
|
||
}
|
||
|
||
if (ip in missing_referer && missing_referer[ip] > ip_seen[ip] * 0.7) {
|
||
score += 15 # 70%+ requests missing referer
|
||
signal_count++
|
||
}
|
||
|
||
if (ip in admin_access && admin_access[ip] > 0) {
|
||
score += 20 # Targeting admin areas
|
||
signal_count++
|
||
}
|
||
|
||
# Reduce score if mostly getting 200 OK (might be legitimate bot)
|
||
if (ip in success_requests && success_requests[ip] > ip_seen[ip] * 0.7) {
|
||
score -= 10 # Legitimate traffic (70%+ success)
|
||
}
|
||
|
||
# Multi-signal boost (confidence increases when multiple signals align)
|
||
if (signal_count >= 3) {
|
||
score += 25 # Strong indicator of bot when 3+ signals present
|
||
}
|
||
|
||
# Normalize to 0-100
|
||
if (score > 100) score = 100
|
||
if (score < 0) score = 0
|
||
|
||
# Output fingerprint for high-confidence bots (score >= 60)
|
||
if (score >= 60) {
|
||
printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/bot_fingerprints.txt")
|
||
}
|
||
' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
||
|
||
# Create file if empty
|
||
touch "$TEMP_DIR/bot_fingerprints.txt"
|
||
fingerprint_count=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
||
print_success "Fingerprint analysis complete ($fingerprint_count high-confidence bot IPs)"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Domain Targeting Analysis - Which domains are being attacked?
|
||
#############################################################################
|
||
|
||
analyze_domain_targeting_percentage() {
|
||
print_info "Analyzing per-domain attack patterns (what's attacking each domain)..."
|
||
|
||
# Build per-domain attack data
|
||
# Format: domain|attack_type|ip|count
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
NR == FNR {
|
||
# Skip attack vectors file - using parsed_logs for all data
|
||
next
|
||
}
|
||
{
|
||
# Main log processing
|
||
ip = $1
|
||
domain = $2
|
||
status = $4
|
||
|
||
# Track all IPs per domain
|
||
ips_per_domain[domain][ip]++
|
||
request_count_per_domain[domain]++
|
||
}
|
||
END {
|
||
# Output: domain|unique_ips|request_count
|
||
for (domain in ips_per_domain) {
|
||
ip_count = 0
|
||
for (ip in ips_per_domain[domain]) ip_count++
|
||
printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
|
||
}
|
||
}
|
||
' <([ -f "$TEMP_DIR/attack_vectors_raw.txt" ] && cat "$TEMP_DIR/attack_vectors_raw.txt" || echo "") "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" || true
|
||
|
||
# Also create per-domain attack type breakdown
|
||
# Format: domain|attack_type|ip|count
|
||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
awk -F'|' '
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
attack_type = $5
|
||
|
||
# Store as domain -> attack_type -> ip -> count
|
||
attack_data[domain][attack_type][ip]++
|
||
attack_totals[domain][attack_type]++
|
||
}
|
||
END {
|
||
for (domain in attack_data) {
|
||
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
||
for (attack_type in attack_data[domain]) {
|
||
total = attack_totals[domain][attack_type]
|
||
for (ip in attack_data[domain][attack_type]) {
|
||
count = attack_data[domain][attack_type][ip]
|
||
printf "%s|%d|%d\n", attack_type "|" ip, count, total
|
||
}
|
||
}
|
||
}
|
||
}
|
||
' -v tmpdir="$TEMP_DIR" < "$TEMP_DIR/attack_vectors_raw.txt"
|
||
fi
|
||
|
||
print_success "Domain attack pattern analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Top URLs Analysis - What files/endpoints are bots hitting?
|
||
#############################################################################
|
||
|
||
analyze_top_urls_per_domain() {
|
||
print_info "Analyzing top targeted URLs per domain..."
|
||
|
||
# Get list of domains from targeting analysis
|
||
if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
|
||
while IFS='|' read -r domain request_count pct; do
|
||
local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
|
||
|
||
# Extract all URLs for this domain, sorted by frequency (no arbitrary limit)
|
||
awk -F'|' -v dom="$domain" '
|
||
$2 == dom {
|
||
urls[$3]++
|
||
}
|
||
END {
|
||
for (url in urls) {
|
||
printf "%s|%d\n", url, urls[url]
|
||
}
|
||
}
|
||
' < "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k2 -rn > "$domain_file"
|
||
done < "$TEMP_DIR/domain_targeting.txt"
|
||
fi
|
||
|
||
print_success "Top URLs analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Success Rate & Behavior Analysis (Added for accuracy improvement)
|
||
#############################################################################
|
||
|
||
analyze_success_rates() {
|
||
print_info "Analyzing request success rates and behavior patterns..."
|
||
|
||
# Calculate success rate (200/301/302 vs 404/403) for each IP
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
status = $4
|
||
|
||
# Count total requests
|
||
total[ip]++
|
||
|
||
# Count successful responses
|
||
if (status ~ /^(200|301|302)/) {
|
||
success[ip]++
|
||
}
|
||
# Count failed/blocked responses
|
||
else if (status ~ /^(404|403|401)/) {
|
||
failed[ip]++
|
||
}
|
||
}
|
||
END {
|
||
for (ip in total) {
|
||
success_count = (success[ip] ? success[ip] : 0)
|
||
failed_count = (failed[ip] ? failed[ip] : 0)
|
||
success_rate = (total[ip] > 0) ? int((success_count / total[ip]) * 100) : 0
|
||
fail_rate = (total[ip] > 0) ? int((failed_count / total[ip]) * 100) : 0
|
||
|
||
# High failure rate indicates scanning/probing
|
||
if (fail_rate >= 80 && total[ip] >= 20) {
|
||
print ip "|" total[ip] "|" fail_rate "|scanner" >> tmpdir "/high_failure_ips.txt"
|
||
}
|
||
# Very high success rate + high volume could be scraping
|
||
else if (success_rate >= 90 && total[ip] >= 100) {
|
||
print ip "|" total[ip] "|" success_rate "|scraper" >> tmpdir "/high_success_ips.txt"
|
||
}
|
||
|
||
# Output all rates for later analysis
|
||
print ip "|" total[ip] "|" success_rate "|" fail_rate >> tmpdir "/ip_success_rates.txt"
|
||
}
|
||
close(tmpdir "/high_failure_ips.txt")
|
||
close(tmpdir "/high_success_ips.txt")
|
||
close(tmpdir "/ip_success_rates.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Touch files if they don't exist
|
||
touch "$TEMP_DIR/high_failure_ips.txt" "$TEMP_DIR/high_success_ips.txt" "$TEMP_DIR/ip_success_rates.txt"
|
||
|
||
print_success "Success rate analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Botnet Detection
|
||
#############################################################################
|
||
|
||
detect_botnets() {
|
||
print_info "Analyzing for botnet patterns..."
|
||
|
||
# Group IPs by similar behavior patterns
|
||
# Pattern 1: Multiple IPs hitting same URLs in coordinated manner
|
||
awk -F'|' '{print $1"|"$3}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
sort | uniq -c | awk '$1 > 10 {print $2}' | \
|
||
cut -d'|' -f2 | sort | uniq -c | sort -rn | \
|
||
awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt"
|
||
|
||
# Pattern 2: IPs with similar User-Agents hitting multiple domains
|
||
awk -F'|' '{print $1"|"$6}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt"
|
||
|
||
# Pattern 3: Detect IP ranges (Class C networks) with suspicious activity
|
||
awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
awk -F'.' '{print $1"."$2"."$3".0/24"}' | \
|
||
sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt"
|
||
|
||
# Pattern 4: Rapid fire requests (DDoS indicators)
|
||
# Extract timestamp and count requests per IP per minute
|
||
awk -F'|' '{
|
||
ip = $1
|
||
timestamp = $8
|
||
# Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS)
|
||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||
# Group by hour:minute for rapid-fire detection
|
||
time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5]
|
||
print ip "|" time_key
|
||
}
|
||
}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
sort | uniq -c | \
|
||
awk '$1 > 50 {print $1 " " $2}' | \
|
||
awk -F'|' '{print $1}' | \
|
||
awk 'BEGIN {ip=""} {ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \
|
||
sort -rn > "$TEMP_DIR/rapid_fire_ips.txt"
|
||
|
||
print_success "Botnet analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Server IP Detection
|
||
#############################################################################
|
||
|
||
detect_server_ips() {
|
||
print_info "Detecting server's own IP addresses..."
|
||
|
||
> "$TEMP_DIR/server_ips.txt"
|
||
|
||
# Method 1: Get all IPs from network interfaces
|
||
if command -v hostname >/dev/null 2>&1; then
|
||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' >> "$TEMP_DIR/server_ips.txt" || true
|
||
fi
|
||
|
||
# Method 2: Parse ip addr output
|
||
if command -v ip >/dev/null 2>&1; then
|
||
ip addr show 2>/dev/null | grep -oP 'inet \K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true
|
||
fi
|
||
|
||
# Method 3: Try ifconfig as fallback
|
||
if command -v ifconfig >/dev/null 2>&1; then
|
||
ifconfig 2>/dev/null | grep -oP 'inet (addr:)?\K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true
|
||
fi
|
||
|
||
# Method 4: Get public IP from external services (with timeout)
|
||
# Try multiple services for reliability
|
||
for service in "ifconfig.me/ip" "icanhazip.com" "ipecho.net/plain" "api.ipify.org"; do
|
||
public_ip=$(curl -s --max-time 3 "$service" 2>/dev/null | grep -oE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' || true)
|
||
if [ -n "$public_ip" ]; then
|
||
echo "$public_ip" >> "$TEMP_DIR/server_ips.txt"
|
||
break
|
||
fi
|
||
done
|
||
|
||
# Method 5: Check cPanel server IP if available
|
||
if [ -f "/var/cpanel/mainip" ]; then
|
||
cat /var/cpanel/mainip >> "$TEMP_DIR/server_ips.txt"
|
||
fi
|
||
|
||
# Remove duplicates and empty lines
|
||
sort -u "$TEMP_DIR/server_ips.txt" | grep -v '^$' > "$TEMP_DIR/server_ips_final.txt" || true
|
||
mv "$TEMP_DIR/server_ips_final.txt" "$TEMP_DIR/server_ips.txt"
|
||
|
||
server_ip_count=$(wc -l < "$TEMP_DIR/server_ips.txt" 2>/dev/null || echo 0)
|
||
|
||
if [ "$server_ip_count" -gt 0 ]; then
|
||
print_success "Detected $server_ip_count server IP(s) - these will be excluded from threat analysis"
|
||
else
|
||
print_warning "Could not detect server IPs automatically - proceeding without server IP filtering"
|
||
fi
|
||
}
|
||
|
||
# Helper function to validate IP address format
|
||
is_valid_ip() {
|
||
local ip="$1"
|
||
|
||
# IPv4 validation
|
||
if [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
|
||
local IFS='.'
|
||
local -a octets=($ip)
|
||
for octet in "${octets[@]}"; do
|
||
if [ "$octet" -gt 255 ]; then
|
||
return 1 # Invalid
|
||
fi
|
||
done
|
||
return 0 # Valid IPv4
|
||
fi
|
||
|
||
# IPv6 basic validation (simplified)
|
||
if [[ "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then
|
||
return 0 # Valid IPv6
|
||
fi
|
||
|
||
return 1 # Invalid
|
||
}
|
||
|
||
# Helper function to check if an IP should be excluded
|
||
is_excluded_ip() {
|
||
local ip="$1"
|
||
|
||
# First validate IP format
|
||
if ! is_valid_ip "$ip"; then
|
||
return 0 # Exclude invalid IPs
|
||
fi
|
||
|
||
# Check if private/internal IP
|
||
if [[ "$ip" =~ ^127\. ]] || \
|
||
[[ "$ip" =~ ^10\. ]] || \
|
||
[[ "$ip" =~ ^192\.168\. ]] || \
|
||
[[ "$ip" =~ ^172\.(1[6-9]|2[0-9]|3[01])\. ]] || \
|
||
[[ "$ip" =~ ^169\.254\. ]] || \
|
||
[[ "$ip" == "localhost" ]] || \
|
||
[[ "$ip" == "::1" ]]; then
|
||
return 0 # True - should be excluded
|
||
fi
|
||
|
||
# Check if it's the server's own IP
|
||
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
|
||
if grep -qFx "$ip" "$TEMP_DIR/server_ips.txt" 2>/dev/null; then
|
||
return 0 # True - should be excluded
|
||
fi
|
||
fi
|
||
|
||
return 1 # False - should not be excluded
|
||
}
|
||
|
||
#############################################################################
|
||
# Time-Series Analysis
|
||
#############################################################################
|
||
|
||
analyze_time_series() {
|
||
print_info "Analyzing time-series patterns..."
|
||
|
||
# Extract hourly bot traffic
|
||
awk -F'|' '$9 != "unknown" {
|
||
timestamp = $8
|
||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||
hour = ts[4]
|
||
print hour
|
||
}
|
||
}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true
|
||
|
||
# Extract hourly attack traffic
|
||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
# Parse timestamps from original parsed logs for IPs in attack vectors
|
||
awk -F'|' 'NR==FNR {ips[$1]=1; next} $1 in ips {
|
||
timestamp = $8
|
||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||
hour = ts[4]
|
||
print hour
|
||
}
|
||
}' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
|
||
fi
|
||
|
||
print_success "Time-series analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Threat Scoring
|
||
#############################################################################
|
||
|
||
calculate_threat_scores() {
|
||
print_info "Calculating threat scores..."
|
||
|
||
# Pre-count requests per IP (MUCH faster than grepping for each IP)
|
||
declare -A ip_request_counts
|
||
while IFS='|' read -r ip rest; do
|
||
((ip_request_counts["$ip"]++))
|
||
done < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Build hash tables from threat files for O(1) lookups
|
||
# OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
|
||
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
|
||
declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count
|
||
|
||
# Parse each threat file and build hash tables (optimized with awk)
|
||
[ -f "$TEMP_DIR/sqli_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_sqli["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/xss_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_xss["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/path_traversal_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_path["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/rce_upload_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_rce["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_login["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/suspicious_ua.txt" ] && while read -r ip; do
|
||
threat_ips_suspicious["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/rapid_fire_ips.txt" ] && while read -r ip; do
|
||
threat_ips_ddos["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt")
|
||
|
||
# Parse count-based threat files
|
||
[ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do
|
||
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
|
||
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//')
|
||
|
||
[ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do
|
||
[ -n "$ip" ] && threat_404_count["$ip"]=$count
|
||
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//')
|
||
|
||
# NEW: Load bot classifications to skip volume scoring for legitimate bots
|
||
declare -A legit_bot_ips
|
||
if [ -f "$TEMP_DIR/classified_bots.txt" ]; then
|
||
while IFS='|' read -r ip domain url status size ua method timestamp bot_type bot_name; do
|
||
if [ "$bot_type" = "legit" ]; then
|
||
legit_bot_ips["$ip"]=1
|
||
fi
|
||
done < "$TEMP_DIR/classified_bots.txt"
|
||
fi
|
||
|
||
# NEW: Load success rate data for scanning/scraping detection
|
||
declare -A scanner_ips scraper_ips ip_fail_rates
|
||
[ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do
|
||
scanner_ips["$ip"]=$fail_rate
|
||
done < "$TEMP_DIR/high_failure_ips.txt"
|
||
|
||
[ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do
|
||
scraper_ips["$ip"]=$success_rate
|
||
done < "$TEMP_DIR/high_success_ips.txt"
|
||
|
||
# Load all fail rates for threshold checks
|
||
[ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do
|
||
ip_fail_rates["$ip"]=$fail_rate
|
||
done < "$TEMP_DIR/ip_success_rates.txt"
|
||
|
||
# NEW: Load header anomalies
|
||
declare -A header_anomalies
|
||
[ -f "$TEMP_DIR/header_anomalies.txt" ] && while IFS='|' read -r ip anomaly_type score; do
|
||
header_anomalies["$ip"]=$score
|
||
done < "$TEMP_DIR/header_anomalies.txt"
|
||
|
||
# NEW: Load suspicious entry points
|
||
declare -A suspicious_entry_ips
|
||
[ -f "$TEMP_DIR/suspicious_entry_points.txt" ] && while IFS='|' read -r ip entry_type url status; do
|
||
suspicious_entry_ips["$ip"]=1
|
||
done < "$TEMP_DIR/suspicious_entry_points.txt"
|
||
|
||
# NEW: Load fuzzing/parameter scanning IPs
|
||
declare -A fuzzing_ips
|
||
[ -f "$TEMP_DIR/fuzzing_ips.txt" ] && while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
|
||
fuzzing_ips["$ip"]=$total_urls
|
||
done < "$TEMP_DIR/fuzzing_ips.txt"
|
||
|
||
# NEW: Load timing anomalies (consistent bot timing)
|
||
declare -A timing_anomalies
|
||
[ -f "$TEMP_DIR/timing_anomalies.txt" ] && while IFS='|' read -r ip timing_type avg_interval total_reqs; do
|
||
timing_anomalies["$ip"]=$avg_interval
|
||
done < "$TEMP_DIR/timing_anomalies.txt"
|
||
|
||
# Now calculate scores for each IP (using pre-counted requests)
|
||
for ip in "${!ip_request_counts[@]}"; do
|
||
# Skip excluded IPs
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
score=0
|
||
req_count=0
|
||
if [ -n "${ip_request_counts[$ip]}" ]; then
|
||
req_count=${ip_request_counts[$ip]}
|
||
fi
|
||
|
||
# IMPROVED: Base request volume scoring
|
||
# Skip volume scoring for legitimate bots (Google, Bing, etc.)
|
||
if [ -z "${legit_bot_ips[$ip]}" ]; then
|
||
# Not a legitimate bot - apply volume scoring
|
||
if [ "${req_count:-0}" -gt 10000 ]; then score=$((score + 10))
|
||
elif [ "${req_count:-0}" -gt 5000 ]; then score=$((score + 8))
|
||
elif [ "${req_count:-0}" -gt 1000 ]; then score=$((score + 5))
|
||
elif [ "${req_count:-0}" -gt 500 ]; then score=$((score + 3))
|
||
fi
|
||
fi
|
||
|
||
# NEW: Success rate analysis bonuses
|
||
# High failure rate (80%+ 404/403) = scanning behavior
|
||
if [ -n "${scanner_ips[$ip]}" ]; then
|
||
fail_rate=0
|
||
if [ -n "${scanner_ips[$ip]}" ]; then
|
||
fail_rate=${scanner_ips[$ip]}
|
||
fi
|
||
if [ "${fail_rate:-0}" -ge 90 ]; then
|
||
score=$((score + 8)) # Very high failure rate
|
||
elif [ "${fail_rate:-0}" -ge 80 ]; then
|
||
score=$((score + 5)) # High failure rate
|
||
fi
|
||
fi
|
||
|
||
# High success rate (90%+ 200/301/302) + high volume = potential scraping
|
||
if [ -n "${scraper_ips[$ip]}" ]; then
|
||
local safe_req_count=$((req_count + 0))
|
||
[ "$safe_req_count" -gt 500 ] && score=$((score + 7)) # Scraping behavior
|
||
fi
|
||
|
||
# Attack patterns
|
||
[ -n "${threat_ips_sqli[$ip]}" ] && score=$((score + 15))
|
||
[ -n "${threat_ips_xss[$ip]}" ] && score=$((score + 12))
|
||
[ -n "${threat_ips_path[$ip]}" ] && score=$((score + 15))
|
||
[ -n "${threat_ips_rce[$ip]}" ] && score=$((score + 20))
|
||
[ -n "${threat_ips_login[$ip]}" ] && score=$((score + 10))
|
||
[ -n "${threat_ips_suspicious[$ip]}" ] && score=$((score + 10))
|
||
[ -n "${threat_ips_ddos[$ip]}" ] && score=$((score + 10))
|
||
|
||
# NEW: Header anomalies (strong indicator of bots)
|
||
if [ -n "${header_anomalies[$ip]}" ]; then
|
||
header_score=${header_anomalies[$ip]:-0}
|
||
if [ "${header_score:-0}" -ge 12 ]; then
|
||
score=$((score + 8)) # Multiple header suspicions
|
||
elif [ "${header_score:-0}" -ge 8 ]; then
|
||
score=$((score + 5)) # Moderate header anomalies
|
||
fi
|
||
fi
|
||
|
||
# NEW: Suspicious entry point (direct jump to admin/config)
|
||
if [ -n "${suspicious_entry_ips[$ip]}" ]; then
|
||
score=$((score + 6)) # Direct attack attempt without probing
|
||
fi
|
||
|
||
# NEW: Fuzzing/parameter scanning behavior
|
||
if [ -n "${fuzzing_ips[$ip]}" ]; then
|
||
fuzz_requests=${fuzzing_ips[$ip]:-0}
|
||
if [ "${fuzz_requests:-0}" -gt 100 ]; then
|
||
score=$((score + 7)) # Aggressive fuzzing
|
||
elif [ "${fuzz_requests:-0}" -gt 50 ]; then
|
||
score=$((score + 4)) # Moderate fuzzing
|
||
fi
|
||
fi
|
||
|
||
# NEW: Timing anomalies (very consistent request timing = bot)
|
||
if [ -n "${timing_anomalies[$ip]}" ]; then
|
||
score=$((score + 6)) # Very consistent timing indicates automation
|
||
fi
|
||
|
||
# Admin probing - IMPROVED: Raised threshold to 50 (only failed attempts counted)
|
||
admin_count=${threat_admin_count[$ip]:-0}
|
||
if [ "${admin_count:-0}" -gt 100 ]; then
|
||
score=$((score + 10)) # Excessive probing
|
||
elif [ "${admin_count:-0}" -gt 50 ]; then
|
||
score=$((score + 5)) # Moderate probing
|
||
fi
|
||
|
||
# 404 scanning
|
||
scan_404=${threat_404_count[$ip]:-0}
|
||
[ "${scan_404:-0}" -gt 50 ] && score=$((score + 3))
|
||
|
||
# OPTIMIZATION: Skip external API calls for performance
|
||
# Threat Intelligence Enrichment can be done post-analysis for high-risk IPs only
|
||
# Uncommenting these will SIGNIFICANTLY slow down analysis (API calls for every IP)
|
||
#
|
||
# To enable threat intelligence enrichment:
|
||
# 1. Uncomment the code below
|
||
# 2. Ensure check_abuseipdb, get_country_code, and is_high_risk_country functions exist
|
||
# 3. Be aware this will make thousands of API calls and take much longer
|
||
#
|
||
# local abuse_data=$(check_abuseipdb "$ip" 2>/dev/null || echo "0|0|Unknown|Unknown")
|
||
# IFS='|' read -r abuse_confidence abuse_reports abuse_country abuse_isp <<< "$abuse_data"
|
||
#
|
||
# if [ "$abuse_confidence" -ge 75 ]; then
|
||
# score=$((score + 15)) # High confidence malicious
|
||
# elif [ "$abuse_confidence" -ge 50 ]; then
|
||
# score=$((score + 8)) # Moderate confidence
|
||
# elif [ "$abuse_confidence" -ge 25 ]; then
|
||
# score=$((score + 3)) # Low confidence
|
||
# fi
|
||
#
|
||
# local geo_country=$(get_country_code "$ip" 2>/dev/null || echo "XX")
|
||
# if is_high_risk_country "$geo_country" 2>/dev/null; then
|
||
# score=$((score + 5)) # High-risk country bonus
|
||
# fi
|
||
|
||
# Cap at 100
|
||
[ "${score:-0}" -gt 100 ] && score=100
|
||
|
||
# Only output IPs with score > 0
|
||
[ "${score:-0}" -gt 0 ] && echo "$score|$ip|$req_count"
|
||
|
||
# Track in centralized IP reputation database (background process)
|
||
if [ "${score:-0}" -gt 0 ]; then
|
||
(
|
||
# Update IP with hit count
|
||
increment_ip_hits "$ip" "$req_count" >/dev/null 2>&1
|
||
|
||
# Tag with specific attack types found
|
||
[ -n "${threat_ips_sqli[$ip]}" ] && flag_ip_attack "$ip" "SQL_INJECTION" 0 "Bot analyzer: SQL injection attempts" >/dev/null 2>&1
|
||
[ -n "${threat_ips_xss[$ip]}" ] && flag_ip_attack "$ip" "XSS" 0 "Bot analyzer: XSS attempts" >/dev/null 2>&1
|
||
[ -n "${threat_ips_path[$ip]}" ] && flag_ip_attack "$ip" "PATH_TRAVERSAL" 0 "Bot analyzer: Path traversal" >/dev/null 2>&1
|
||
[ -n "${threat_ips_rce[$ip]}" ] && flag_ip_attack "$ip" "RCE" 0 "Bot analyzer: RCE/shell upload attempts" >/dev/null 2>&1
|
||
[ -n "${threat_ips_login[$ip]}" ] && flag_ip_attack "$ip" "BRUTEFORCE" 0 "Bot analyzer: Login bruteforce" >/dev/null 2>&1
|
||
[ -n "${threat_ips_ddos[$ip]}" ] && flag_ip_attack "$ip" "DDOS" 0 "Bot analyzer: Rapid-fire requests" >/dev/null 2>&1
|
||
[ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
|
||
) &
|
||
fi
|
||
done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt"
|
||
|
||
# Wait for background IP reputation updates to complete (don't fail if background jobs error)
|
||
wait || true
|
||
|
||
print_success "Threat scores calculated and IP reputation updated"
|
||
}
|
||
|
||
#############################################################################
|
||
# False Positive Detection
|
||
#############################################################################
|
||
|
||
detect_false_positives() {
|
||
print_info "Detecting legitimate services (false positives)..."
|
||
|
||
# Known monitoring service patterns and legitimate CDNs
|
||
awk -F'|' '{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
ua = tolower($6)
|
||
|
||
# Monitoring Services
|
||
if (match(ua, /pingdom/) || match(ua, /pingdom\.com_bot/)) {
|
||
print ip "|Pingdom Monitoring|" ua "|" domain
|
||
}
|
||
else if (match(ua, /uptimerobot/)) {
|
||
print ip "|UptimeRobot Monitoring|" ua "|" domain
|
||
}
|
||
else if (match(ua, /statuscake/)) {
|
||
print ip "|StatusCake Monitoring|" ua "|" domain
|
||
}
|
||
# WordPress cache preload (WP Rocket, Hummingbird)
|
||
else if (match(url, /admin-ajax\.php.*cache_preload/) || match(url, /admin-ajax\.php.*wphb/)) {
|
||
print ip "|WordPress Cache Preload|" ua "|" domain
|
||
}
|
||
# Legitimate backup services
|
||
else if (match(ua, /jetpack|vaultpress|updraftplus|backwpup/)) {
|
||
print ip "|Backup Service|" ua "|" domain
|
||
}
|
||
# NEW: Google services
|
||
else if (match(ua, /googlebot|google web preview|google-read-aloud|bingbot|slurp|duckduckbot/)) {
|
||
print ip "|Search Engine Bot|" ua "|" domain
|
||
}
|
||
# NEW: Content delivery networks (usually legit)
|
||
else if (match(ua, /cloudflare|akamai|fastly|cloudfront|edgecast|maxcdn|amazon/)) {
|
||
print ip "|CDN Service|" ua "|" domain
|
||
}
|
||
# NEW: Analytics services
|
||
else if (match(ua, /googleanalytics|fbexternalhit|twitterbot|linkedinbot|pinterestbot|whatsapp|telegram/)) {
|
||
print ip "|Analytics\/Social Service|" ua "|" domain
|
||
}
|
||
# NEW: Payment processors (legitimate POST to checkout)
|
||
else if (match(url, /checkout|payment|paypal|stripe|square/) && match(ua, /paypal|stripe|square/)) {
|
||
print ip "|Payment Processor|" ua "|" domain
|
||
}
|
||
}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u > "$TEMP_DIR/false_positives.txt" || true
|
||
|
||
print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt" 2>/dev/null || echo 0) legitimate services identified)"
|
||
}
|
||
|
||
#############################################################################
|
||
# Statistical Analysis
|
||
#############################################################################
|
||
|
||
generate_statistics() {
|
||
print_info "Generating statistics..."
|
||
|
||
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
|
||
# This reads the uncompressed file ONCE instead of 4+ separate reads
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
# Count by domain (for top sites)
|
||
domains[$2]++
|
||
|
||
# Count by IP (for top IPs)
|
||
ips[$1]++
|
||
|
||
# Count by domain+URL (for top URLs)
|
||
urls[$2"|"$3]++
|
||
}
|
||
END {
|
||
# Output top sites
|
||
for (domain in domains) {
|
||
print domains[domain], domain > tmpdir "/top_sites_raw.txt"
|
||
}
|
||
|
||
# Output top IPs
|
||
for (ip in ips) {
|
||
print ips[ip], ip > tmpdir "/top_ips_raw.txt"
|
||
}
|
||
|
||
# Output top URLs
|
||
for (url in urls) {
|
||
print urls[url], url > tmpdir "/top_urls_raw.txt"
|
||
}
|
||
close(tmpdir "/top_sites_raw.txt")
|
||
close(tmpdir "/top_ips_raw.txt")
|
||
close(tmpdir "/top_urls_raw.txt")
|
||
}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null
|
||
|
||
# Sort and limit results (files may not exist if no data)
|
||
[ -f "$TEMP_DIR/top_sites_raw.txt" ] && sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt" || touch "$TEMP_DIR/top_sites.txt"
|
||
[ -f "$TEMP_DIR/top_ips_raw.txt" ] && sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt" || touch "$TEMP_DIR/top_ips.txt"
|
||
[ -f "$TEMP_DIR/top_urls_raw.txt" ] && sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt" || touch "$TEMP_DIR/top_urls.txt"
|
||
|
||
# Top 5 bots by request count (single decompression)
|
||
awk -F'|' '$9 != "unknown" {print $10}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \
|
||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true
|
||
|
||
# Traffic breakdown by bot type (single decompression)
|
||
awk -F'|' '{print $9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \
|
||
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true
|
||
|
||
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
|
||
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
||
# Create indexed bot traffic file (decompress once)
|
||
awk -F'|' '{print $2"|"$9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null > "$TEMP_DIR/domain_bot_types.txt" || true
|
||
|
||
while read -r domain; do
|
||
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
|
||
grep -F "$domain|" "$TEMP_DIR/domain_bot_types.txt" 2>/dev/null | cut -d'|' -f2 | \
|
||
sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt" || true
|
||
done < "$TEMP_DIR/all_domains.txt"
|
||
fi
|
||
|
||
print_success "Statistics generated"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Comparison Reports (detect trends)
|
||
#############################################################################
|
||
|
||
generate_comparison_report() {
|
||
print_info "Generating trend analysis and baseline comparison..."
|
||
|
||
# Store current results for comparison with previous analysis
|
||
local history_dir="$TOOLKIT_TMP_DIR/analysis_history"
|
||
mkdir -p "$history_dir"
|
||
|
||
local timestamp=$(date +%Y%m%d_%H%M%S)
|
||
local today=$(date +%Y%m%d)
|
||
local latest_report="$history_dir/latest_analysis_$today.txt"
|
||
|
||
# Extract key metrics from current analysis
|
||
{
|
||
echo "Timestamp: $timestamp"
|
||
echo "Total_Requests: $(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)"
|
||
echo "Unique_IPs: $(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)"
|
||
echo "High_Risk_IPs: $(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo 0)"
|
||
echo "Attack_Vectors: $(awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | sort -u | wc -l || echo 0)"
|
||
echo "SQL_Injection: $(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo 0)"
|
||
echo "XSS_Attempts: $(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo 0)"
|
||
echo "Bot_Traffic: $(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo 0)"
|
||
echo "Suspected_Scanners: $(wc -l < "$TEMP_DIR/high_failure_ips.txt" 2>/dev/null || echo 0)"
|
||
echo "Header_Anomalies: $(wc -l < "$TEMP_DIR/header_anomalies.txt" 2>/dev/null || echo 0)"
|
||
echo "Entry_Point_Suspicious: $(wc -l < "$TEMP_DIR/suspicious_entry_points.txt" 2>/dev/null || echo 0)"
|
||
echo "Fuzzing_IPs: $(wc -l < "$TEMP_DIR/fuzzing_ips.txt" 2>/dev/null || echo 0)"
|
||
} > "$latest_report"
|
||
|
||
# NEW: Generate baseline comparison
|
||
echo ""
|
||
print_header "BASELINE COMPARISON (Is this activity normal?)"
|
||
|
||
local total_requests=$(grep "^Total_Requests:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
||
local baseline_requests=$(calculate_baseline_average "server" "requests" 7)
|
||
|
||
if [ "${baseline_requests:-0}" -gt 0 ]; then
|
||
local request_pct=$((${total_requests:-0} * 100 / baseline_requests))
|
||
if [ "$request_pct" -gt 200 ]; then
|
||
echo -e "${RED}🔴 ABNORMAL: Requests are $(($request_pct - 100))% above 7-day average${NC}"
|
||
echo " Baseline (7-day avg): $baseline_requests requests"
|
||
echo " Today: $total_requests requests"
|
||
elif [ "$request_pct" -lt 50 ]; then
|
||
echo "🟢 LOW: Requests are $((100 - $request_pct))% below baseline"
|
||
else
|
||
echo "🟡 NORMAL: Requests within expected range"
|
||
fi
|
||
else
|
||
echo "📊 (No historical baseline yet - first analysis)"
|
||
fi
|
||
|
||
local high_risk=$(grep "^High_Risk_IPs:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
||
local baseline_attacks=$(calculate_baseline_average "server" "high_risk" 7)
|
||
|
||
if [ "${baseline_attacks:-0}" -gt 0 ]; then
|
||
local attack_ratio=$((${high_risk:-0} / baseline_attacks))
|
||
if [ "$attack_ratio" -gt 3 ]; then
|
||
echo -e "${RED}🔴 ABNORMAL: High-risk IPs are ${attack_ratio}x above baseline${NC}"
|
||
echo " Baseline (7-day avg): $baseline_attacks high-risk IPs"
|
||
echo " Today: $high_risk high-risk IPs"
|
||
elif [ "$high_risk" -gt "$baseline_attacks" ]; then
|
||
echo -e "${YELLOW}🟡 ELEVATED: $high_risk high-risk IPs (baseline: $baseline_attacks)${NC}"
|
||
else
|
||
echo "🟢 NORMAL: High-risk IPs within expected range"
|
||
fi
|
||
fi
|
||
|
||
# Compare with previous day's analysis
|
||
local yesterday=$(date -d "1 day ago" +%Y%m%d 2>/dev/null || date -v-1d +%Y%m%d 2>/dev/null)
|
||
local previous_report="$history_dir/latest_analysis_${yesterday}.txt"
|
||
|
||
if [ -f "$previous_report" ]; then
|
||
echo ""
|
||
print_header "DAY-OVER-DAY TRENDS"
|
||
|
||
# Extract metrics and calculate differences
|
||
local curr_high_risk=$(grep "^High_Risk_IPs:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
||
local prev_high_risk=$(grep "^High_Risk_IPs:" "$previous_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
||
local risk_diff=$((${curr_high_risk:-0} - ${prev_high_risk:-0}))
|
||
local risk_pct=0
|
||
|
||
if [ "${prev_high_risk:-0}" -gt 0 ]; then
|
||
risk_pct=$((risk_diff * 100 / prev_high_risk))
|
||
fi
|
||
|
||
# Display trend
|
||
if [ "$risk_diff" -gt 0 ]; then
|
||
echo "⚠️ High-Risk IPs: ${curr_high_risk:-0} (↑ $risk_diff IPs, +${risk_pct}%)"
|
||
elif [ "$risk_diff" -lt 0 ]; then
|
||
echo "✓ High-Risk IPs: ${curr_high_risk:-0} (↓ $((risk_diff * -1)) IPs, ${risk_pct}%)"
|
||
else
|
||
echo "→ High-Risk IPs: ${curr_high_risk:-0} (no change)"
|
||
fi
|
||
|
||
# Repeat for other metrics
|
||
local curr_sql=$(grep "^SQL_Injection:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
||
local prev_sql=$(grep "^SQL_Injection:" "$previous_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
||
local sql_diff=$((curr_sql - prev_sql))
|
||
|
||
if [ "$sql_diff" -gt 0 ]; then
|
||
echo "⚠️ SQL Injection: $curr_sql (↑ $sql_diff new attempts)"
|
||
elif [ "$sql_diff" -lt 0 ]; then
|
||
echo "✓ SQL Injection: $curr_sql (↓ $((sql_diff * -1)) fewer)"
|
||
else
|
||
echo "→ SQL Injection: $curr_sql (stable)"
|
||
fi
|
||
|
||
# Track repeat attackers
|
||
local repeat_attackers=0
|
||
if [ -f "$history_dir/known_attackers_${yesterday}.txt" ]; then
|
||
repeat_attackers=$(comm -12 <(awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u) <(sort -u "$history_dir/known_attackers_${yesterday}.txt") 2>/dev/null | wc -l || echo 0)
|
||
if [ "$repeat_attackers" -gt 0 ]; then
|
||
echo -e "${RED}🔄 REPEAT ATTACKERS: $repeat_attackers IPs from yesterday${NC}"
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
# Save current high-risk IPs for tomorrow's comparison
|
||
awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u > "$history_dir/known_attackers_${today}.txt"
|
||
}
|
||
|
||
#############################################################################
|
||
# Report Generation
|
||
#############################################################################
|
||
|
||
generate_report() {
|
||
exec > >(tee "$OUTPUT_FILE")
|
||
|
||
echo "==============================================================="
|
||
echo " APACHE/CPANEL BOT & BOTNET ANALYSIS REPORT"
|
||
echo " Generated: $(date '+%Y-%m-%d %H:%M:%S')"
|
||
echo "==============================================================="
|
||
|
||
# CRITICAL ALERTS SECTION
|
||
print_header "CRITICAL ALERTS"
|
||
|
||
alert_count=0
|
||
|
||
# Check for attack vectors
|
||
if [ -s "$TEMP_DIR/attack_types.txt" ]; then
|
||
print_alert "Security Attack Vectors Detected:"
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
attack_type=$(echo "$line" | awk '{print $2}')
|
||
|
||
case $attack_type in
|
||
sqli) echo " SQL Injection: $count attempts" ;;
|
||
xss) echo " XSS Attacks: $count attempts" ;;
|
||
path_traversal) echo " Path Traversal: $count attempts" ;;
|
||
rce_upload) echo " RCE/Shell Upload: $count attempts" ;;
|
||
info_disclosure) echo " Info Disclosure: $count attempts" ;;
|
||
login_bruteforce) echo " Login Bruteforce: $count attempts" ;;
|
||
esac
|
||
done < "$TEMP_DIR/attack_types.txt"
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# Check for suspicious scanners
|
||
if [ -s "$TEMP_DIR/suspicious_ua.txt" ]; then
|
||
scanner_count=$(wc -l < "$TEMP_DIR/suspicious_ua.txt")
|
||
print_alert "Malicious scanners detected: $scanner_count IPs"
|
||
echo " Top scanners:"
|
||
head -3 "$TEMP_DIR/suspicious_ua.txt" | while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
|
||
ua=$(echo "$line" | cut -d'|' -f2)
|
||
printf " %s requests - IP: %s - UA: %s\n" "$count" "$ip" "$ua"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for header anomalies (bot signatures)
|
||
if [ -s "$TEMP_DIR/header_anomalies.txt" ]; then
|
||
header_count=$(wc -l < "$TEMP_DIR/header_anomalies.txt")
|
||
print_alert "Header-based bot signatures detected: $header_count IPs"
|
||
echo " These IPs show suspicious header patterns (missing/unusual Accept-Language, Referer, etc.)"
|
||
head -5 "$TEMP_DIR/header_anomalies.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
anomaly_type=$(echo "$line" | awk -F'|' '{print $2}')
|
||
score=$(echo "$line" | awk -F'|' '{print $3}')
|
||
printf " • %s - Anomaly score: %s (detected: %s)\n" "$ip" "$score" "$anomaly_type"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for suspicious entry points
|
||
if [ -s "$TEMP_DIR/suspicious_entry_points.txt" ]; then
|
||
entry_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt")
|
||
print_alert "Suspicious entry points detected: $entry_count IPs"
|
||
echo " These IPs skip homepage/search and go straight to admin/config:"
|
||
head -5 "$TEMP_DIR/suspicious_entry_points.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
url=$(echo "$line" | awk -F'|' '{print $3}')
|
||
status=$(echo "$line" | awk -F'|' '{print $4}')
|
||
printf " • %s → %s (HTTP %s)\n" "$ip" "$url" "$status"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for fuzzing/scanning behavior
|
||
if [ -s "$TEMP_DIR/fuzzing_ips.txt" ]; then
|
||
fuzz_count=$(wc -l < "$TEMP_DIR/fuzzing_ips.txt")
|
||
print_alert "Parameter fuzzing/scanning detected: $fuzz_count IPs"
|
||
echo " These IPs are testing random parameters (vulnerability scanning):"
|
||
head -5 "$TEMP_DIR/fuzzing_ips.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
total_urls=$(echo "$line" | awk -F'|' '{print $3}')
|
||
unique_paths=$(echo "$line" | awk -F'|' '{print $4}')
|
||
printf " • %s - %s URLs across %s paths\n" "$ip" "$total_urls" "$unique_paths"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for timing anomalies (bot signatures)
|
||
if [ -s "$TEMP_DIR/timing_anomalies.txt" ]; then
|
||
timing_count=$(wc -l < "$TEMP_DIR/timing_anomalies.txt")
|
||
print_alert "Consistent timing pattern detected: $timing_count IPs"
|
||
echo " These IPs show mechanical request patterns (bot behavior):"
|
||
head -5 "$TEMP_DIR/timing_anomalies.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
avg_interval=$(echo "$line" | awk -F'|' '{print $3}')
|
||
total_reqs=$(echo "$line" | awk -F'|' '{print $4}')
|
||
printf " • %s - %.1f seconds average between requests (%s total requests)\n" "$ip" "$avg_interval" "$total_reqs"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# Check for rapid-fire IPs (potential DDoS)
|
||
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||
ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt")
|
||
print_alert "Potential DDoS sources: $ddos_count IPs with >50 req/min"
|
||
echo " Top offenders:"
|
||
head -3 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print " "$2" - "$1" rapid requests"}'
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# Check for suspicious networks
|
||
if [ -s "$TEMP_DIR/suspicious_networks.txt" ]; then
|
||
net_count=$(wc -l < "$TEMP_DIR/suspicious_networks.txt")
|
||
print_alert "Suspicious networks detected: $net_count Class C ranges"
|
||
echo " Top networks:"
|
||
head -3 "$TEMP_DIR/suspicious_networks.txt" | awk '{print " "$2" - "$1" requests"}'
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
if [ "${alert_count:-0}" -eq 0 ]; then
|
||
print_success "No critical threats detected"
|
||
fi
|
||
|
||
# QUICK STATS DASHBOARD
|
||
print_header "QUICK STATS DASHBOARD"
|
||
|
||
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
||
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
|
||
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
|
||
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
# Count private/internal IPs (excluded from threat analysis)
|
||
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' 2>/dev/null | wc -l || echo "0")
|
||
|
||
# Count server's own IPs in the logs
|
||
server_ip_hits=0
|
||
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
|
||
while read -r server_ip; do
|
||
if grep -q "^$server_ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null; then
|
||
server_ip_hits=$((server_ip_hits + 1))
|
||
fi
|
||
done < "$TEMP_DIR/server_ips.txt"
|
||
fi
|
||
|
||
echo "Total Requests: $(printf "%'d" $total_requests)"
|
||
echo "Unique IPs: $(printf "%'d" $unique_ips)"
|
||
|
||
# Show breakdown if we have excluded IPs
|
||
if [ "$private_ips" -gt 0 ] || [ "$server_ip_hits" -gt 0 ]; then
|
||
excluded_total=$((private_ips + server_ip_hits))
|
||
echo " ├─ Excluded IPs: $(printf "%'d" $excluded_total)"
|
||
[ "$private_ips" -gt 0 ] && echo " │ ├─ Private/Internal: $private_ips"
|
||
[ "$server_ip_hits" -gt 0 ] && echo " │ └─ Server's own: $server_ip_hits"
|
||
echo " └─ External IPs: $(printf "%'d" $((unique_ips - excluded_total)))"
|
||
fi
|
||
|
||
echo "Domains Analyzed: $unique_domains"
|
||
echo "Bot Requests: $(printf "%'d" $bot_requests) ($(awk "BEGIN {printf \"%.1f\", ($bot_requests/$total_requests)*100}")%)"
|
||
|
||
# Show detected server IPs
|
||
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
|
||
echo ""
|
||
echo " Server IPs Detected (excluded from threat analysis):"
|
||
while read -r server_ip; do
|
||
echo " • $server_ip"
|
||
done < "$TEMP_DIR/server_ips.txt"
|
||
fi
|
||
echo ""
|
||
|
||
# Traffic breakdown
|
||
echo "Traffic Breakdown:"
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
||
type=$(echo "$line" | awk '{print $2}' || echo "unknown")
|
||
pct=$(awk "BEGIN {printf \"%.1f\", (${count:-0}/${total_requests:-1})*100}" 2>/dev/null || echo "0.0")
|
||
|
||
case $type in
|
||
legit) echo " Legitimate Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
ai) echo " AI Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
monitor) echo " 📡 Monitoring/SEO: $(printf "%'7d" $count) ($pct%)" ;;
|
||
suspicious) echo " Suspicious Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
unidentified_bot) echo " ❓ Unidentified Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
unknown) echo " Regular Traffic: $(printf "%'7d" $count) ($pct%)" ;;
|
||
esac
|
||
done < "$TEMP_DIR/traffic_breakdown.txt"
|
||
|
||
# TIME-SERIES ANALYSIS
|
||
if [ -s "$TEMP_DIR/hourly_bot_traffic.txt" ]; then
|
||
echo ""
|
||
echo "Bot Traffic Timeline (hourly):"
|
||
max_bot_traffic=$(awk '{print $1}' "$TEMP_DIR/hourly_bot_traffic.txt" | sort -rn | head -1)
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
hour=$(echo "$line" | awk '{print $2}')
|
||
# Create simple bar chart
|
||
bar_width=$((count * 10 / max_bot_traffic))
|
||
[ "${bar_width:-0}" -eq 0 ] && [ "${count:-0}" -gt 0 ] && bar_width=1
|
||
bar=$(printf '█%.0s' $(seq 1 $bar_width))
|
||
spaces=$(printf '░%.0s' $(seq 1 $((10 - bar_width))))
|
||
|
||
# Detect spikes (>2x average)
|
||
avg_traffic=$((total_requests / 24))
|
||
spike=""
|
||
[ ${count:-0} -gt $((avg_traffic * 2)) ] && spike=" SPIKE"
|
||
|
||
# Strip leading zeros to avoid octal interpretation
|
||
hour_num=$((10#$hour))
|
||
next_hour=$((hour_num + 1))
|
||
printf " %02d:00-%02d:00: %s%s %'6d bot requests%s\n" "$hour_num" "$next_hour" "$bar" "$spaces" "$count" "$spike"
|
||
done < "$TEMP_DIR/hourly_bot_traffic.txt"
|
||
fi
|
||
|
||
# RESPONSE CODE INTELLIGENCE
|
||
if [ -s "$TEMP_DIR/response_codes.txt" ]; then
|
||
echo ""
|
||
echo "Response Code Analysis:"
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
||
code=$(echo "$line" | awk '{print $2}' || echo "000")
|
||
pct=$(awk "BEGIN {printf \"%.1f\", (${count:-0}/${total_requests:-1})*100}" 2>/dev/null || echo "0.0")
|
||
|
||
case $code in
|
||
200) echo " 200 (Success): $(printf "%'7d" $count) ($pct%) Bots are getting data" ;;
|
||
404) echo " 404 (Not Found): $(printf "%'7d" $count) ($pct%) Scanning for vulnerabilities" ;;
|
||
403) echo " 403 (Forbidden): $(printf "%'7d" $count) ($pct%) Blocked by existing rules" ;;
|
||
401) echo " 401 (Unauthorized):$(printf "%'7d" $count) ($pct%) Login attempts failing" ;;
|
||
500|502|503) echo " $code (Server Error):$(printf "%'7d" $count) ($pct%) Check if exploit triggered" ;;
|
||
301|302) echo " $code (Redirect): $(printf "%'7d" $count) ($pct%)" ;;
|
||
*) echo " $code: $(printf "%'7d" $count) ($pct%)" ;;
|
||
esac
|
||
done < "$TEMP_DIR/response_codes.txt" | head -7
|
||
fi
|
||
|
||
# FALSE POSITIVE WARNINGS
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ]; then
|
||
echo ""
|
||
echo "Whitelist Recommendations (Legitimate Services):"
|
||
while read -r line; do
|
||
ip=$(echo "$line" | cut -d'|' -f1)
|
||
service=$(echo "$line" | cut -d'|' -f2)
|
||
domain=$(echo "$line" | cut -d'|' -f4)
|
||
req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0)
|
||
echo " $ip - $req_count requests - Identified as: $service"
|
||
echo " → Domain: $domain"
|
||
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
||
done < "$TEMP_DIR/false_positives.txt" | head -6
|
||
fi
|
||
|
||
# NEW: HIGH-CONFIDENCE BOT FINGERPRINTS
|
||
if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then
|
||
echo ""
|
||
print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
|
||
echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
|
||
echo ""
|
||
|
||
awk -F'|' '
|
||
NR <= 15 {
|
||
ip = $1
|
||
score = $2
|
||
signals = $3
|
||
|
||
# Risk level based on score
|
||
if (score >= 80) risk = "CRITICAL"
|
||
else if (score >= 70) risk = "HIGH"
|
||
else if (score >= 60) risk = "MEDIUM"
|
||
else risk = "LOW"
|
||
|
||
printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
|
||
}' "$TEMP_DIR/bot_fingerprints.txt"
|
||
|
||
total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
||
echo ""
|
||
echo " Total high-confidence bots detected: $total IPs"
|
||
echo ""
|
||
else
|
||
echo ""
|
||
echo " No high-confidence bot fingerprints detected (requires multiple signals)"
|
||
echo ""
|
||
fi
|
||
|
||
# NEW: DOMAIN ATTACK TARGETING ANALYSIS (what's attacking each domain)
|
||
if [ -s "$TEMP_DIR/domain_targeting.txt" ]; then
|
||
echo ""
|
||
print_header "DOMAIN ATTACK TARGETING (Which domains are under attack & from where?)"
|
||
echo ""
|
||
|
||
total_domains=$(wc -l < "$TEMP_DIR/domain_targeting.txt" 2>/dev/null || echo "0")
|
||
echo "Total domains with attacks detected: $total_domains"
|
||
echo ""
|
||
|
||
# Show top attacked domains with attack details
|
||
awk -F'|' 'NR <= 10 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
||
domain_attack_count=$(grep -F "|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
if [ "$domain_attack_count" -gt 0 ]; then
|
||
echo " Domain: $domain ($domain_attack_count attack attempts)"
|
||
|
||
# Get all attacks on this domain, group by type
|
||
awk -F'|' -v dom="$domain" '
|
||
$2 == dom {
|
||
ip = $1
|
||
attack_type = $5
|
||
|
||
# Validate IP format
|
||
if (match(ip, /^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/)) {
|
||
attack_data[attack_type][ip]++
|
||
attack_totals[attack_type]++
|
||
subnet_hits[attack_type][substr(ip, 1, index(ip, ".", index(ip, ".")+1)-1)]++
|
||
}
|
||
}
|
||
END {
|
||
for (attack_type in attack_totals) {
|
||
printf " └─ %s: %d attempts\n", attack_type, attack_totals[attack_type]
|
||
|
||
# Show top 3 IPs for this attack type
|
||
attack_count = 0
|
||
for (ip in attack_data[attack_type]) {
|
||
if (attack_count >= 3) break
|
||
count = attack_data[attack_type][ip]
|
||
split(ip, parts, ".")
|
||
subnet = parts[1] "." parts[2] "." parts[3] ".0/24"
|
||
printf " ├─ %s (%d reqs) [subnet: %s]\n", ip, count, subnet
|
||
attack_count++
|
||
}
|
||
}
|
||
}' "$TEMP_DIR/attack_vectors_raw.txt"
|
||
echo ""
|
||
fi
|
||
done
|
||
else
|
||
echo ""
|
||
echo " No domain attack data available (all domains may be healthy)"
|
||
echo ""
|
||
fi
|
||
|
||
# NEW: TOP URLs BEING ATTACKED
|
||
if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
|
||
echo ""
|
||
print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)"
|
||
echo ""
|
||
|
||
# Show top URLs for top 3 most-attacked domains
|
||
urls_shown=0
|
||
awk -F'|' 'NR <= 3 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
||
local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
|
||
if [ -f "$domain_file" ] && [ -s "$domain_file" ]; then
|
||
echo " Domain: $domain"
|
||
awk -F'|' '{
|
||
url = $1
|
||
count = $2
|
||
printf " %3d requests → %s\n", count, url
|
||
}' "$domain_file" # Show all URLs, not just top 5
|
||
echo ""
|
||
fi
|
||
done
|
||
|
||
# Check if no URL data was shown
|
||
if [ "$urls_shown" -eq 0 ]; then
|
||
echo " No URL targeting data available"
|
||
echo ""
|
||
fi
|
||
else
|
||
echo ""
|
||
echo " No domain targeting data available"
|
||
echo ""
|
||
fi
|
||
|
||
# TOP 5 THREATS
|
||
print_header "TOP 5 THREATS (with recommended actions)"
|
||
|
||
echo "1. Highest Risk IPs (by threat score):"
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
counter=1
|
||
while read -r line && [ "${counter:-0}" -le 10 ]; do
|
||
score=$(echo "$line" | cut -d'|' -f1)
|
||
ip=$(echo "$line" | cut -d'|' -f2)
|
||
count=$(echo "$line" | cut -d'|' -f3)
|
||
|
||
# Determine threat level and action based on score
|
||
if [ "$score" -ge 80 ]; then
|
||
threat_level="CRITICAL"
|
||
threat_icon=""
|
||
action="BLOCK IMMEDIATELY + INVESTIGATE"
|
||
echo -e " ${RED}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
|
||
elif [ "$score" -ge 60 ]; then
|
||
threat_level="HIGH"
|
||
threat_icon=""
|
||
action="BLOCK or AGGRESSIVE RATE LIMIT"
|
||
echo -e " ${YELLOW}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
|
||
elif [ "$score" -ge 40 ]; then
|
||
threat_level="MODERATE"
|
||
threat_icon=""
|
||
action="RATE LIMIT RECOMMENDED"
|
||
echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
|
||
else
|
||
threat_level="LOW"
|
||
threat_icon=""
|
||
action="MONITOR"
|
||
echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
|
||
fi
|
||
|
||
echo " $count requests - Action: $action"
|
||
|
||
# Show which attack vectors this IP used
|
||
attack_types=""
|
||
grep -q "$ip" "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null && attack_types="${attack_types}SQL-Injection "
|
||
grep -q "$ip" "$TEMP_DIR/xss_attempts.txt" 2>/dev/null && attack_types="${attack_types}XSS "
|
||
grep -q "$ip" "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null && attack_types="${attack_types}Path-Traversal "
|
||
grep -q "$ip" "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null && attack_types="${attack_types}RCE/Upload "
|
||
grep -q "$ip" "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null && attack_types="${attack_types}Login-Bruteforce "
|
||
grep -q "$ip" "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null && attack_types="${attack_types}Scanner-UA "
|
||
grep -q "$ip" "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null && attack_types="${attack_types}DDoS-Pattern "
|
||
|
||
[ -n "$attack_types" ] && echo " Attack vectors: $attack_types"
|
||
|
||
counter=$((counter + 1))
|
||
done < "$TEMP_DIR/threat_scores.txt"
|
||
else
|
||
echo " No significant threats detected "
|
||
fi
|
||
echo ""
|
||
|
||
echo "2. Top Aggressive Bots:"
|
||
counter=1
|
||
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
||
count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}')
|
||
bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs)
|
||
|
||
action="Allow"
|
||
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then
|
||
action="Consider blocking (aggressive)"
|
||
fi
|
||
|
||
echo " [$counter] $bot - $count requests - Action: $action"
|
||
counter=$((counter + 1))
|
||
done < "$TEMP_DIR/top_bots.txt"
|
||
echo ""
|
||
|
||
echo "3. Admin Endpoint Probing:"
|
||
if [ -s "$TEMP_DIR/admin_probes.txt" ]; then
|
||
head -3 "$TEMP_DIR/admin_probes.txt" | while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
|
||
domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
|
||
url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
|
||
printf " %s attempts - IP: %s - %s%s\n" "$count" "$ip" "$domain" "$url"
|
||
done
|
||
echo " Action: Verify legitimate admin access or block"
|
||
else
|
||
echo " None detected "
|
||
fi
|
||
echo ""
|
||
|
||
echo "4. 404 Scanners (Reconnaissance):"
|
||
if [ -s "$TEMP_DIR/404_scans.txt" ]; then
|
||
head -3 "$TEMP_DIR/404_scans.txt" | awk '$1 > 10 {
|
||
count = $1
|
||
$1 = ""
|
||
gsub(/^[[:space:]]+\|?/, "")
|
||
split($0, parts, "|")
|
||
printf " %s failed requests - IP: %s - %s%s\n", count, parts[1], parts[2], parts[3]
|
||
}'
|
||
else
|
||
echo " None detected "
|
||
fi
|
||
echo ""
|
||
|
||
echo "5. Large Data Transfers:"
|
||
if [ -s "$TEMP_DIR/large_transfers.txt" ]; then
|
||
# Calculate total bot bandwidth
|
||
total_bot_bandwidth=0
|
||
if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then
|
||
total_bot_bandwidth=$(awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/classified_bots.txt")
|
||
fi
|
||
|
||
if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
|
||
bot_bandwidth_mb=$(awk "BEGIN {printf \"%.0f\", $total_bot_bandwidth/1048576}")
|
||
bot_bandwidth_gb=$(awk "BEGIN {printf \"%.2f\", $total_bot_bandwidth/1073741824}")
|
||
# Estimate cost at $0.09/GB (typical CDN pricing)
|
||
estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")
|
||
|
||
total_bandwidth=$(awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/parsed_logs.txt")
|
||
bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")
|
||
|
||
echo ""
|
||
echo " 💰 Bandwidth Impact:"
|
||
echo " Total bot bandwidth: ${bot_bandwidth_mb} MB (${bot_bandwidth_gb} GB) - ${bot_pct}% of total"
|
||
echo " Estimated cost: \$$estimated_cost (at \$0.09/GB CDN pricing)"
|
||
fi
|
||
echo ""
|
||
echo " Top bandwidth consumers:"
|
||
|
||
head -3 "$TEMP_DIR/large_transfers.txt" | while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
||
ip=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
domain=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f2 || echo "unknown")
|
||
url=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f3 || echo "unknown")
|
||
size=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f4 || echo "0")
|
||
size_mb=$(awk "BEGIN {printf \"%.1f\", ${size:-0}/1048576}" 2>/dev/null || echo "0.0")
|
||
total_ip_mb=$(awk "BEGIN {printf \"%.0f\", ${size:-0} * ${count:-0} / 1048576}" 2>/dev/null || echo "0")
|
||
printf " %s transfers from %s - %.1f MB avg (%s MB total) - %s%s\n" "$count" "$ip" "$size_mb" "$total_ip_mb" "$domain" "$url"
|
||
done
|
||
echo " Action: Verify if scraping, consider serving WebP/optimized images"
|
||
else
|
||
echo " None detected "
|
||
fi
|
||
|
||
# TOP 5 TARGETED SITES
|
||
print_header "TOP 5 TARGETED SITES (with risk breakdown)"
|
||
|
||
counter=1
|
||
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
||
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
||
domain=$(echo "$line" | awk '{print $2}' || echo "unknown")
|
||
|
||
echo "[$counter] $domain - $count requests"
|
||
|
||
# Show traffic breakdown for this domain
|
||
if [ -f "$TEMP_DIR/domain_${domain}_stats.txt" ]; then
|
||
tail -n +2 "$TEMP_DIR/domain_${domain}_stats.txt" | while read -r stat_line; do
|
||
stat_count=$(echo "$stat_line" | awk '{print $1}' || echo "0")
|
||
stat_type=$(echo "$stat_line" | awk '{print $2}' || echo "unknown")
|
||
pct=$(awk "BEGIN {printf \"%.1f\", (${stat_count:-0}/${count:-1})*100}" 2>/dev/null || echo "0.0")
|
||
|
||
case $stat_type in
|
||
suspicious) echo -e " ${YELLOW}Suspicious: $stat_count ($pct%)${NC}" ;;
|
||
ai) echo " AI Bots: $stat_count ($pct%)" ;;
|
||
legit) echo " Legit Bots: $stat_count ($pct%)" ;;
|
||
unknown) echo " Regular: $stat_count ($pct%)" ;;
|
||
*) echo " $stat_type: $stat_count ($pct%)" ;;
|
||
esac
|
||
done
|
||
fi
|
||
echo ""
|
||
|
||
counter=$((counter + 1))
|
||
done < "$TEMP_DIR/top_sites.txt"
|
||
|
||
# BLOCKLIST
|
||
print_header "COPY-PASTE READY BLOCKLIST (Prioritized by Threat Score)"
|
||
|
||
echo "# Apache .htaccess format:"
|
||
echo "# Add to .htaccess in document root"
|
||
echo "# IPs sorted by risk score (highest first)"
|
||
echo ""
|
||
|
||
# Use threat scores to prioritize blocklist (exclude false positives and excluded IPs)
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
# Get IPs with score >= 60 (HIGH and CRITICAL)
|
||
awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
|
||
ip=$(echo "$entry" | cut -d'|' -f1)
|
||
score=$(echo "$entry" | cut -d'|' -f2)
|
||
|
||
# Skip excluded IPs (private, localhost, server's own)
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
# Skip if in false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
|
||
echo "Deny from $ip # Risk score: $score/100"
|
||
done
|
||
else
|
||
# Fallback to old method
|
||
{
|
||
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
|
||
[ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
|
||
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
|
||
} | sort -u | head -30 | while read -r ip; do
|
||
echo "Deny from $ip"
|
||
done
|
||
fi
|
||
|
||
echo ""
|
||
echo "# cPanel User-Agent blocking (add to /etc/apache2/conf.d/includes/pre_main_global.conf):"
|
||
echo ""
|
||
echo "<IfModule mod_rewrite.c>"
|
||
echo " RewriteEngine On"
|
||
echo " RewriteCond %{HTTP_USER_AGENT} \"(nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp|metasploit)\" [NC]"
|
||
echo " RewriteRule ^ - [F,L]"
|
||
echo "</IfModule>"
|
||
echo ""
|
||
echo "# Optional: Block aggressive SEO bots (uncomment to enable)"
|
||
echo "# <IfModule mod_rewrite.c>"
|
||
echo "# RewriteEngine On"
|
||
echo "# RewriteCond %{HTTP_USER_AGENT} \"(AhrefsBot|SemrushBot|MJ12bot|DotBot|Meta-ExternalAgent|Go-http-client)\" [NC]"
|
||
echo "# RewriteRule ^ - [F,L]"
|
||
echo "# </IfModule>"
|
||
|
||
echo ""
|
||
echo "# CSF/iptables format:"
|
||
echo "# Run these commands as root:"
|
||
echo ""
|
||
|
||
# Same prioritized list for CSF
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
|
||
ip=$(echo "$entry" | cut -d'|' -f1)
|
||
score=$(echo "$entry" | cut -d'|' -f2)
|
||
|
||
# Skip excluded IPs (private, localhost, server's own)
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
# Skip if in false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
|
||
echo "csf -d $ip \"Threat score: $score/100\""
|
||
done
|
||
else
|
||
# Fallback
|
||
{
|
||
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
|
||
[ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
|
||
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
|
||
} | sort -u | head -30 | while read -r ip; do
|
||
echo "csf -d $ip \"Bot/Scanner threat\""
|
||
done
|
||
fi
|
||
|
||
# SUMMARY
|
||
print_header "📋 SUMMARY & RECOMMENDATIONS"
|
||
|
||
threat_score=0
|
||
|
||
# Calculate threat score from attack vectors
|
||
[ -s "$TEMP_DIR/sqli_attempts.txt" ] && threat_score=$((threat_score + 15))
|
||
[ -s "$TEMP_DIR/xss_attempts.txt" ] && threat_score=$((threat_score + 12))
|
||
[ -s "$TEMP_DIR/path_traversal_attempts.txt" ] && threat_score=$((threat_score + 15))
|
||
[ -s "$TEMP_DIR/rce_upload_attempts.txt" ] && threat_score=$((threat_score + 20))
|
||
[ -s "$TEMP_DIR/login_bruteforce_attempts.txt" ] && threat_score=$((threat_score + 10))
|
||
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && threat_score=$((threat_score + 8))
|
||
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && threat_score=$((threat_score + 5))
|
||
[ $(wc -l < "$TEMP_DIR/admin_probes.txt" 2>/dev/null || echo 0) -gt 10 ] && threat_score=$((threat_score + 3))
|
||
|
||
# Count high-risk IPs
|
||
high_risk_count=0
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
high_risk_count=$(awk -F'|' '$1 >= 60' "$TEMP_DIR/threat_scores.txt" | wc -l)
|
||
fi
|
||
|
||
if [ "${threat_score:-0}" -ge 25 ] || [ "${high_risk_count:-0}" -ge 5 ]; then
|
||
print_alert "THREAT LEVEL: CRITICAL - Immediate action required"
|
||
echo " Summary: Multiple attack vectors detected from $high_risk_count high-risk IPs"
|
||
echo ""
|
||
echo " Immediate Actions:"
|
||
echo " 1. ⚡ Apply the blocklist above IMMEDIATELY (prioritized by threat score)"
|
||
echo " 2. Review admin access logs for successful breaches"
|
||
echo " 3. 🛡 Enable ModSecurity WAF or Cloudflare if not already active"
|
||
echo " 4. 🔄 Update all CMS platforms and plugins urgently"
|
||
echo " 5. 🔐 Force password reset for admin accounts if login attempts detected"
|
||
echo " 6. Re-run this analysis in 1 hour to verify blocks are working"
|
||
elif [ "${threat_score:-0}" -ge 12 ] || [ "${high_risk_count:-0}" -ge 2 ]; then
|
||
print_warning "THREAT LEVEL: HIGH - Action recommended within 24 hours"
|
||
echo " Summary: Significant threat activity from $high_risk_count high-risk IPs"
|
||
echo ""
|
||
echo " Recommended Actions:"
|
||
echo " 1. Review and apply the blocklist above (focus on CRITICAL/HIGH scores)"
|
||
echo " 2. Enable rate limiting for admin endpoints"
|
||
echo " 3. Monitor logs closely for the next 24-48 hours"
|
||
echo " 4. Consider implementing fail2ban or similar IDS"
|
||
echo " 5. Review and update security plugins/modules"
|
||
elif [ "${threat_score:-0}" -ge 5 ]; then
|
||
print_warning "THREAT LEVEL: MODERATE - Routine security maintenance"
|
||
echo " Summary: Normal bot activity with some suspicious patterns"
|
||
echo ""
|
||
echo " Recommended Actions:"
|
||
echo " 1. Review suspicious IPs in the report"
|
||
echo " 2. Consider rate limiting aggressive bots"
|
||
echo " 3. Continue routine log monitoring"
|
||
echo " 4. Block aggressive SEO bots if impacting performance"
|
||
else
|
||
print_success "THREAT LEVEL: ✅ LOW - Normal operation"
|
||
echo " Summary: Minimal threat activity detected"
|
||
echo ""
|
||
echo " Recommended Actions:"
|
||
echo " 1. Continue routine log monitoring"
|
||
echo " 2. Review false positive warnings to whitelist legitimate services"
|
||
echo " 3. Consider blocking aggressive SEO bots if bandwidth is a concern"
|
||
fi
|
||
|
||
echo ""
|
||
echo "==============================================================="
|
||
echo "Report saved to: $OUTPUT_FILE"
|
||
echo "==============================================================="
|
||
}
|
||
|
||
################################################################################
|
||
# BASELINE HEALTH CHECK - Test domains before making changes
|
||
################################################################################
|
||
|
||
baseline_health_check() {
|
||
print_info "Loading baseline health status from cached data..."
|
||
echo ""
|
||
|
||
# Create baseline health file
|
||
> "$TEMP_DIR/baseline_health.txt"
|
||
|
||
# Use get_all_domain_statuses() from reference database instead of re-checking
|
||
# Returns: domain|http_code|https_code|status_summary
|
||
if ! command -v get_all_domain_statuses &>/dev/null; then
|
||
print_warning "Reference database functions not available - skipping health check"
|
||
return 0
|
||
fi
|
||
|
||
local tested=0
|
||
local working=0
|
||
local broken=0
|
||
|
||
# Get all domain statuses from cached reference database
|
||
while IFS='|' read -r domain http_status https_status result; do
|
||
[ -z "$domain" ] && continue
|
||
|
||
tested=$((tested + 1))
|
||
|
||
# Display status based on cached result
|
||
if [ "$result" = "200_OK" ]; then
|
||
working=$((working + 1))
|
||
echo -e " ${GREEN}✓${NC} $domain - HTTP:$http_status HTTPS:$https_status"
|
||
elif [ "$result" = "REDIRECT" ]; then
|
||
working=$((working + 1))
|
||
echo -e " ${YELLOW}→${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)"
|
||
elif [ "$result" = "403_FORBIDDEN" ]; then
|
||
broken=$((broken + 1))
|
||
echo -e " ${RED}✗${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)"
|
||
elif [ "$result" = "TIMEOUT" ] || [ "$result" = "UNREACHABLE" ]; then
|
||
broken=$((broken + 1))
|
||
echo -e " ${RED}⏱${NC} $domain - Timeout (unreachable)"
|
||
else
|
||
broken=$((broken + 1))
|
||
echo -e " ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status"
|
||
fi
|
||
|
||
# Store baseline: domain|http_status|https_status|result
|
||
echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt"
|
||
|
||
done < <(get_all_domain_statuses)
|
||
|
||
if [ "$tested" -eq 0 ]; then
|
||
print_warning "No domain status data available in reference database"
|
||
return 0
|
||
fi
|
||
|
||
echo ""
|
||
print_success "Baseline loaded from cache: $working working, $broken with issues"
|
||
echo ""
|
||
}
|
||
|
||
verify_domains_still_working() {
|
||
print_info "Checking current domain status from cached data..."
|
||
echo ""
|
||
|
||
if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then
|
||
print_warning "No baseline health data available"
|
||
return 0
|
||
fi
|
||
|
||
if ! command -v get_domain_status &>/dev/null; then
|
||
print_warning "Reference database functions not available - skipping verification"
|
||
return 0
|
||
fi
|
||
|
||
local changes_detected=0
|
||
local now_broken=0
|
||
|
||
while IFS='|' read -r domain baseline_http baseline_https baseline_result; do
|
||
[ -z "$domain" ] && continue
|
||
|
||
# Get current status from cached reference database
|
||
local current_status=$(get_domain_status "$domain")
|
||
|
||
if [ -z "$current_status" ]; then
|
||
# Domain not in cache - skip
|
||
continue
|
||
fi
|
||
|
||
# Parse current status: http_code|https_code|status_summary
|
||
IFS='|' read -r http_status https_status new_result <<< "$current_status"
|
||
|
||
# Compare to baseline
|
||
if [ "$baseline_result" != "$new_result" ]; then
|
||
changes_detected=$((changes_detected + 1))
|
||
|
||
# Check if it got worse
|
||
if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then
|
||
if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "UNREACHABLE" ]; then
|
||
now_broken=$((now_broken + 1))
|
||
echo -e " ${RED}⚠ BROKEN:${NC} $domain"
|
||
echo -e " Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)"
|
||
echo -e " After: $new_result (HTTP:$http_status HTTPS:$https_status)"
|
||
echo -e " ${RED}WARNING: This domain stopped working after your changes!${NC}"
|
||
echo ""
|
||
fi
|
||
# Check if it got better
|
||
elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ] || [ "$baseline_result" = "UNREACHABLE" ]; then
|
||
if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then
|
||
echo -e " ${GREEN}✅ FIXED:${NC} $domain"
|
||
echo -e " Before: $baseline_result"
|
||
echo -e " After: $new_result"
|
||
echo ""
|
||
fi
|
||
fi
|
||
fi
|
||
done < "$TEMP_DIR/baseline_health.txt"
|
||
|
||
if [ "${now_broken:-0}" -gt 0 ]; then
|
||
echo ""
|
||
print_alert "WARNING: $now_broken domain(s) may have stopped working!"
|
||
echo ""
|
||
echo "NOTE: Status is from cached data (max 1 hour old)."
|
||
echo "If you just made changes, the cache may not reflect real-time status."
|
||
echo ""
|
||
echo "Recommended actions:"
|
||
echo " 1. Review the firewall rules you just applied"
|
||
echo " 2. Check CSF temporary blocks: csf -t"
|
||
echo " 3. Check CSF deny list: csf -g"
|
||
echo " 4. Manually verify domain: curl -I http://domain.com"
|
||
echo " 5. Consider reverting changes if issues persist"
|
||
echo ""
|
||
elif [ "${changes_detected:-0}" -eq 0 ]; then
|
||
print_success "All domains show same status as baseline (cache-based check)"
|
||
else
|
||
print_success "Some status changes detected but no domains broken (cache-based check)"
|
||
fi
|
||
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
}
|
||
|
||
#############################################################################
|
||
# Main Execution
|
||
#############################################################################
|
||
|
||
main() {
|
||
echo ""
|
||
print_header "Starting Apache/cPanel Bot Analysis"
|
||
|
||
# InterWorx requires special log discovery (logs are in /home/user/var/domain.com/logs/)
|
||
if [ "$SYS_CONTROL_PANEL" = "interworx" ]; then
|
||
print_info "InterWorx detected - discovering domain logs..."
|
||
|
||
# Build time filter options
|
||
local find_opts=()
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
local minutes=$((HOURS_BACK * 60))
|
||
find_opts+=(-mmin -"$minutes")
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
find_opts+=(-mtime -"$DAYS_BACK")
|
||
fi
|
||
|
||
# Find all transfer*.log files in InterWorx structure (includes transfer.log and transfer-ssl.log)
|
||
log_count=$(find /home/*/var/*/logs -type f -name "transfer*.log" "${find_opts[@]}" 2>/dev/null | wc -l)
|
||
|
||
if [ "$log_count" -eq 0 ]; then
|
||
# Try without time filter to see if ANY logs exist
|
||
local total_logs=$(find /home/*/var/*/logs -type f -name "transfer*.log" 2>/dev/null | wc -l)
|
||
|
||
if [ "$total_logs" -eq 0 ]; then
|
||
print_alert "Error: No InterWorx access logs found in /home/*/var/*/logs/"
|
||
echo ""
|
||
echo "Diagnostic information:"
|
||
echo " Checking for InterWorx structure:"
|
||
local iw_structure=$(find /home -maxdepth 3 -type d -path "*/var/*/logs" 2>/dev/null | head -5)
|
||
if [ -n "$iw_structure" ]; then
|
||
echo " Found InterWorx directories:"
|
||
echo "$iw_structure"
|
||
echo ""
|
||
echo " Checking for any log files:"
|
||
find /home/*/var/*/logs -type f -name "*.log" 2>/dev/null | head -10
|
||
else
|
||
echo " No InterWorx directory structure found (expected: /home/user/var/domain.com/logs/)"
|
||
fi
|
||
exit 1
|
||
else
|
||
print_alert "No logs found matching time filter (last $HOURS_BACK hours)"
|
||
echo "Total logs available: $total_logs"
|
||
echo ""
|
||
read -p "Analyze all available logs instead? [y/N]: " choice
|
||
if [[ "$choice" =~ ^[Yy] ]]; then
|
||
log_count=$total_logs
|
||
find_opts=() # Clear time filter
|
||
else
|
||
exit 0
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
print_info "Found $log_count InterWorx domain log files to analyze"
|
||
|
||
# Override LOG_DIR for parse_logs function to use
|
||
export INTERWORX_MODE="yes"
|
||
export INTERWORX_FIND_OPTS="${find_opts[*]}"
|
||
else
|
||
# Standard cPanel/Plesk log discovery
|
||
# Check if log directory exists
|
||
if [ ! -d "$LOG_DIR" ]; then
|
||
print_alert "Error: Log directory not found: $LOG_DIR"
|
||
echo "Please specify the correct log directory with -l option"
|
||
exit 1
|
||
fi
|
||
|
||
# Check if logs exist
|
||
local find_opts=()
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
local minutes=$((HOURS_BACK * 60))
|
||
find_opts+=(-mmin -"$minutes")
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
find_opts+=(-mtime -"$DAYS_BACK")
|
||
fi
|
||
|
||
log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l)
|
||
if [ "$log_count" -eq 0 ]; then
|
||
# Try without time filter to see if ANY logs exist
|
||
local total_logs=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" 2>/dev/null | wc -l)
|
||
|
||
if [ "$total_logs" -eq 0 ]; then
|
||
print_alert "Error: No log files found in $LOG_DIR"
|
||
echo ""
|
||
echo "Diagnostic information:"
|
||
echo " Log directory: $LOG_DIR"
|
||
echo " Directory exists: $([ -d "$LOG_DIR" ] && echo "yes" || echo "no")"
|
||
if [ -d "$LOG_DIR" ]; then
|
||
echo " Total files in directory: $(find "$LOG_DIR" -type f 2>/dev/null | wc -l)"
|
||
echo " Sample files:"
|
||
find "$LOG_DIR" -type f 2>/dev/null | head -5 | sed 's/^/ /'
|
||
fi
|
||
echo ""
|
||
echo "Control panel: $SYS_CONTROL_PANEL"
|
||
exit 1
|
||
else
|
||
print_alert "No logs found matching time filter"
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
echo "No logs found from the last $HOURS_BACK hours"
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
echo "No logs found from the last $DAYS_BACK days"
|
||
fi
|
||
echo "Total logs available: $total_logs"
|
||
echo ""
|
||
read -p "Analyze all available logs instead? [y/N]: " choice
|
||
if [[ "$choice" =~ ^[Yy] ]]; then
|
||
log_count=$total_logs
|
||
find_opts=() # Clear time filter
|
||
else
|
||
exit 0
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
print_info "Found $log_count log files to analyze"
|
||
fi
|
||
|
||
# User filtering
|
||
if [ -n "$FILTER_USER" ]; then
|
||
print_info "Filtering logs for user: $FILTER_USER"
|
||
export user_domains=$(get_user_domains "$FILTER_USER")
|
||
if [ -z "$user_domains" ]; then
|
||
print_error "No domains found for user: $FILTER_USER"
|
||
exit 1
|
||
fi
|
||
print_info "User has $(echo "$user_domains" | wc -l) domain(s)"
|
||
else
|
||
export user_domains=""
|
||
fi
|
||
|
||
# Print time range info
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
print_info "Analyzing logs from the last $HOURS_BACK hours"
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
print_info "Analyzing logs from the last $DAYS_BACK days"
|
||
fi
|
||
|
||
# Baseline health check - test all domains before analysis
|
||
baseline_health_check
|
||
|
||
# Execute analysis pipeline with error handling
|
||
parse_logs || {
|
||
print_alert "Log parsing failed"
|
||
exit 1
|
||
}
|
||
|
||
classify_bots || {
|
||
print_alert "Bot classification failed"
|
||
exit 1
|
||
}
|
||
|
||
# NEW: Enhanced analysis functions (before threats detected)
|
||
analyze_headers # Detect header-based bot patterns
|
||
analyze_entry_points # Detect suspicious entry points
|
||
analyze_url_entropy # Detect fuzzing/parameter scanning
|
||
analyze_request_timing # Detect DDoS patterns via timing
|
||
|
||
detect_server_ips
|
||
detect_threats # Must be before fingerprinting/domain targeting (creates attack_vectors_raw.txt)
|
||
analyze_success_rates # Analyze success/failure rates for better accuracy
|
||
detect_botnets
|
||
analyze_time_series
|
||
calculate_threat_scores
|
||
detect_false_positives
|
||
generate_statistics
|
||
|
||
# NEW: Fingerprinting and domain targeting analysis (after threats detected)
|
||
calculate_bot_fingerprint # Combine signals for accuracy (reduce false positives)
|
||
analyze_domain_targeting_percentage # Show which domains are being targeted
|
||
analyze_top_urls_per_domain # Show what files/endpoints are being hit
|
||
|
||
generate_comparison_report # Show trends vs previous day
|
||
|
||
# NEW: Baseline and progression analysis
|
||
save_baseline # Store current metrics for historical comparison
|
||
analyze_attack_progression # Show attack sequences and phases
|
||
|
||
generate_report
|
||
|
||
print_success "Analysis complete!"
|
||
echo ""
|
||
echo "Report location: $OUTPUT_FILE"
|
||
|
||
# Analyze threat patterns and generate recommendations
|
||
analyze_domain_threats
|
||
analyze_geographic_threats
|
||
generate_recommendations
|
||
|
||
# Ask user what to do next
|
||
show_post_analysis_menu
|
||
}
|
||
|
||
################################################################################
|
||
# DOMAIN-LEVEL THREAT ANALYSIS
|
||
################################################################################
|
||
|
||
analyze_domain_threats() {
|
||
print_info "Analyzing per-domain threat patterns..."
|
||
|
||
# Create domain threat analysis file
|
||
> "$TEMP_DIR/domain_threats.txt"
|
||
> "$TEMP_DIR/domain_high_risk_ips.txt"
|
||
|
||
# MASSIVE OPTIMIZATION: Single AWK pass instead of nested loops with 25,000+ greps
|
||
# Old approach: O(domains × high_risk_IPs × file_size) = 83 minutes for 500 domains
|
||
# New approach: O(file_size) = seconds
|
||
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
BEGIN {
|
||
# Load high-risk IPs into memory
|
||
while ((getline < tmpdir "/threat_scores.txt") > 0) {
|
||
score = $1
|
||
ip = $2
|
||
if (score >= 70) {
|
||
high_risk[ip] = score
|
||
}
|
||
}
|
||
close(tmpdir "/threat_scores.txt")
|
||
|
||
# Load attack vectors
|
||
while ((getline < tmpdir "/attack_vectors_raw.txt") > 0) {
|
||
domain = $2
|
||
attack_counts[domain]++
|
||
}
|
||
close(tmpdir "/attack_vectors_raw.txt")
|
||
}
|
||
|
||
# Process parsed logs (single pass)
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
|
||
# Count total requests per domain
|
||
domain_requests[domain]++
|
||
|
||
# Track high-risk IPs per domain
|
||
if (ip in high_risk) {
|
||
domain_high_risk_count[domain]++
|
||
domain_high_risk_ips[domain] = domain_high_risk_ips[domain] ip ":" high_risk[ip] ":" ++domain_ip_count[domain":"ip] " "
|
||
}
|
||
}
|
||
END {
|
||
# Now process classified bots
|
||
while ((getline < tmpdir "/classified_bots.txt") > 0) {
|
||
domain = $2
|
||
bot_counts[domain]++
|
||
}
|
||
close(tmpdir "/classified_bots.txt")
|
||
|
||
# Output results for each domain
|
||
for (domain in domain_requests) {
|
||
total_req = domain_requests[domain]
|
||
bot_req = bot_counts[domain] + 0
|
||
bot_pct = (total_req > 0) ? (bot_req / total_req * 100) : 0
|
||
high_risk_count = domain_high_risk_count[domain] + 0
|
||
attacks = attack_counts[domain] + 0
|
||
high_risk_detail = domain_high_risk_ips[domain]
|
||
|
||
# domain|total_requests|bot_requests|bot_percentage|high_risk_ip_count|attack_attempts|high_risk_ips_detail
|
||
printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > tmpdir "/domain_threats.txt"
|
||
|
||
# Track high-risk IPs per domain
|
||
if (high_risk_count > 0) {
|
||
printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > tmpdir "/domain_high_risk_ips.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/domain_threats.txt")
|
||
close(tmpdir "/domain_high_risk_ips.txt")
|
||
}' "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Sort by high-risk IP count (descending)
|
||
sort -t'|' -k5 -rn "$TEMP_DIR/domain_threats.txt" > "$TEMP_DIR/domain_threats_sorted.txt"
|
||
|
||
# Get all unique domains
|
||
awk -F'|' '{print $1}' "$TEMP_DIR/domain_threats.txt" | sort -u > "$TEMP_DIR/all_domains.txt"
|
||
|
||
print_success "Domain threat analysis complete"
|
||
}
|
||
|
||
################################################################################
|
||
# GEOGRAPHIC ANALYSIS (Country-based threat tracking)
|
||
################################################################################
|
||
|
||
analyze_geographic_threats() {
|
||
print_info "Analyzing geographic distribution of threats..."
|
||
|
||
# Create geographic analysis file
|
||
> "$TEMP_DIR/geo_analysis.txt"
|
||
> "$TEMP_DIR/geo_needs_maxmind.txt"
|
||
|
||
# Check if GeoIP/MaxMind is available
|
||
local has_geoip=false
|
||
if command -v geoiplookup >/dev/null 2>&1 || command -v mmdbinspect >/dev/null 2>&1; then
|
||
has_geoip=true
|
||
fi
|
||
|
||
if [ "$has_geoip" = false ]; then
|
||
# Can't do full geographic analysis without GeoIP
|
||
# But we can still detect if traffic looks suspicious by analyzing IP ranges
|
||
|
||
# Count high-risk IPs by /24 network
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
awk -F'|' '$1 >= 70 {
|
||
split($2, ip, ".")
|
||
network = ip[1]"."ip[2]"."ip[3]".0/24"
|
||
print network
|
||
}' "$TEMP_DIR/threat_scores.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/high_risk_networks.txt"
|
||
|
||
local network_count=$(wc -l < "$TEMP_DIR/high_risk_networks.txt" 2>/dev/null || echo "0")
|
||
local total_high_risk=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" | wc -l)
|
||
|
||
if [ "$network_count" -gt 10 ] || [ "$total_high_risk" -gt 50 ]; then
|
||
# Multiple networks or many IPs suggests distributed attack
|
||
# Recommend MaxMind for geographic blocking
|
||
echo "DISTRIBUTED|$network_count networks|$total_high_risk IPs|MaxMind recommended" > "$TEMP_DIR/geo_needs_maxmind.txt"
|
||
fi
|
||
fi
|
||
|
||
print_info "Geographic analysis limited (MaxMind GeoIP2 not installed)"
|
||
else
|
||
# Full geographic analysis with GeoIP
|
||
print_info "Performing full geographic analysis with GeoIP..."
|
||
|
||
# TODO: Implement full GeoIP lookups when available
|
||
# This would lookup each high-risk IP and count by country
|
||
fi
|
||
|
||
print_success "Geographic analysis complete"
|
||
}
|
||
|
||
################################################################################
|
||
# RECOMMENDATION ENGINE
|
||
################################################################################
|
||
|
||
generate_recommendations() {
|
||
print_info "Generating intelligent recommendations..."
|
||
|
||
# Initialize recommendation file
|
||
> "$TEMP_DIR/recommendations.txt"
|
||
local rec_count=0
|
||
|
||
# Get total unique high-risk IPs
|
||
local total_high_risk_ips=0
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
total_high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
|
||
fi
|
||
|
||
# Get total domains affected
|
||
local total_domains=$(wc -l < "$TEMP_DIR/all_domains.txt" 2>/dev/null || echo "0")
|
||
local affected_domains=0
|
||
if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
|
||
affected_domains=$(wc -l < "$TEMP_DIR/domain_high_risk_ips.txt" || echo "0")
|
||
fi
|
||
|
||
# Determine attack scope: single domain vs server-wide
|
||
local attack_scope="unknown"
|
||
local primary_target=""
|
||
local primary_target_percentage=0
|
||
|
||
if [ "${affected_domains:-0}" -eq 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
|
||
attack_scope="single_domain"
|
||
primary_target=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f1)
|
||
# Calculate what % of high-risk IPs are targeting this domain
|
||
local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
|
||
if [ "${total_high_risk_ips:-0}" -gt 0 ] && [ "${domain_risk_count:-0}" -gt 0 ]; then
|
||
primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", (${domain_risk_count:-0} / ${total_high_risk_ips:-0}) * 100}")
|
||
fi
|
||
elif [ "${affected_domains:-0}" -gt 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
|
||
# Check if one domain is getting most of the traffic
|
||
local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5 || echo "0")
|
||
if [ "${top_domain_count:-0}" -gt 0 ] && [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
||
local top_percentage=$(awk "BEGIN {printf \"%.0f\", (${top_domain_count:-0} / ${total_high_risk_ips:-0}) * 100}")
|
||
if [ "$top_percentage" -ge 75 ]; then
|
||
attack_scope="primary_target"
|
||
primary_target=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f1)
|
||
primary_target_percentage=$top_percentage
|
||
else
|
||
attack_scope="server_wide"
|
||
fi
|
||
else
|
||
attack_scope="server_wide"
|
||
fi
|
||
elif [ "${affected_domains:-0}" -eq "${total_domains:-0}" ] && [ "${total_domains:-0}" -gt 1 ]; then
|
||
attack_scope="server_wide"
|
||
elif [ "${total_domains:-0}" -eq 1 ]; then
|
||
attack_scope="single_server"
|
||
primary_target=$(head -1 "$TEMP_DIR/all_domains.txt" 2>/dev/null)
|
||
fi
|
||
|
||
# RECOMMENDATION #1: IP Blocking Strategy
|
||
if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
||
rec_count=$((rec_count + 1))
|
||
if [ "${total_high_risk_ips:-0}" -le 10 ]; then
|
||
echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 1 hour|HIGH|CSF temporary block recommended for ${total_high_risk_ips} IPs with threat score >= 70" >> "$TEMP_DIR/recommendations.txt"
|
||
elif [ "${total_high_risk_ips:-0}" -le 50 ]; then
|
||
echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 24 hours|HIGH|Large number of threats detected - 24hr block recommended" >> "$TEMP_DIR/recommendations.txt"
|
||
else
|
||
echo "REC|$rec_count|ip_block_perm|Permanently block $total_high_risk_ips high-risk IPs|CRITICAL|Severe bot attack detected - permanent blocking recommended" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
|
||
# RECOMMENDATION #2: Connection Limit (CSF CT_LIMIT)
|
||
# Only recommend if CSF is installed and CT_LIMIT is enabled
|
||
if [ "$CSF_AVAILABLE" = true ]; then
|
||
# Check if CT_LIMIT is enabled (not set to 0)
|
||
local current_ct_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "0")
|
||
|
||
if [ "$current_ct_limit" -gt 0 ]; then
|
||
# Check concurrent connections from top IPs
|
||
local max_connections=0
|
||
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||
max_connections=$(head -1 "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null | awk '{print $1}' || echo "0")
|
||
fi
|
||
|
||
if [ "$max_connections" -gt 100 ] && [ "$max_connections" -lt "$current_ct_limit" ]; then
|
||
rec_count=$((rec_count + 1))
|
||
local recommended_limit=$((max_connections - 20))
|
||
echo "REC|$rec_count|csf_ct_limit|Reduce CSF CT_LIMIT from $current_ct_limit to $recommended_limit|MEDIUM|High concurrent connections detected ($max_connections from single IP)" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
# RECOMMENDATION #3: Domain-Specific .htaccess Protection
|
||
if [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "primary_target" ]; then
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|htaccess_domain|Add bot blocking to $primary_target .htaccess|HIGH|${primary_target_percentage}% of attacks target this domain" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# RECOMMENDATION #4: Server-wide Apache Protection
|
||
if [ "$attack_scope" = "server_wide" ]; then
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|apache_global|Add global bot blocking to Apache pre-virtualhost|HIGH|Attack affects $affected_domains of $total_domains domains" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# RECOMMENDATION #5: WordPress-specific (if attack patterns show wp-admin/wp-login attempts)
|
||
local wp_attacks=0
|
||
if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
wp_attacks=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
||
fi
|
||
|
||
if [ "${wp_attacks:-0}" -gt 50 ]; then
|
||
rec_count=$((rec_count + 1))
|
||
|
||
# Determine which domains have WordPress
|
||
local wp_domain_count=0
|
||
local wp_target_domain=""
|
||
|
||
if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
# Get unique domains with WP attacks
|
||
wp_domain_count=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | wc -l || echo "0")
|
||
wp_target_domain=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | head -1 || echo "")
|
||
fi
|
||
|
||
# Generate appropriate recommendation based on how many domains have WordPress attacks
|
||
if [ "${wp_domain_count:-0}" -eq 1 ] || [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "single_server" ]; then
|
||
# Single domain being attacked
|
||
echo "REC|$rec_count|wp_hardening|Harden WordPress on $wp_target_domain|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
|
||
elif [ "$attack_scope" = "primary_target" ]; then
|
||
# One primary target but others also affected
|
||
echo "REC|$rec_count|wp_hardening|Harden WordPress on $primary_target|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
|
||
else
|
||
# Multiple domains with WordPress attacks
|
||
echo "REC|$rec_count|wp_hardening|Harden WordPress across $wp_domain_count domains|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
|
||
# PORTFLOOD Protection removed - not appropriate for web servers with many sites
|
||
# Blocking ports 80/443 based on connection count breaks legitimate traffic
|
||
|
||
# RECOMMENDATION #7: CSF SYNFLOOD Protection (if DDoS patterns detected)
|
||
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||
local ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt" || echo "0")
|
||
if [ "${ddos_count:-0}" -gt 10 ]; then
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|csf_synflood|Enable CSF SYNFLOOD protection|HIGH|$ddos_count potential DDoS sources detected" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
|
||
# RECOMMENDATION #8: MaxMind GeoIP for Country Blocking (if distributed attack)
|
||
if [ -s "$TEMP_DIR/geo_needs_maxmind.txt" ]; then
|
||
local geo_info=$(cat "$TEMP_DIR/geo_needs_maxmind.txt")
|
||
local network_count=$(echo "$geo_info" | cut -d'|' -f2 | grep -oP '\d+' || echo "0")
|
||
local ip_count=$(echo "$geo_info" | cut -d'|' -f3 | grep -oP '\d+' || echo "0")
|
||
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|install_maxmind|Install MaxMind GeoIP2 for country-based blocking|MEDIUM|Distributed attack from $network_count networks ($ip_count IPs) - geographic blocking recommended" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# Store attack scope for menu system
|
||
echo "$attack_scope|$primary_target|$primary_target_percentage|$affected_domains|$total_domains" > "$TEMP_DIR/attack_scope.txt"
|
||
|
||
print_success "Generated $rec_count recommendations"
|
||
}
|
||
|
||
################################################################################
|
||
# POST-ANALYSIS MENU
|
||
################################################################################
|
||
|
||
show_post_analysis_menu() {
|
||
# Load attack scope information
|
||
local attack_scope="unknown"
|
||
local primary_target=""
|
||
local primary_target_percentage=0
|
||
local affected_domains=0
|
||
local total_domains=0
|
||
|
||
if [ -s "$TEMP_DIR/attack_scope.txt" ]; then
|
||
local scope_data=$(cat "$TEMP_DIR/attack_scope.txt")
|
||
attack_scope=$(echo "$scope_data" | cut -d'|' -f1)
|
||
primary_target=$(echo "$scope_data" | cut -d'|' -f2)
|
||
primary_target_percentage=$(echo "$scope_data" | cut -d'|' -f3)
|
||
affected_domains=$(echo "$scope_data" | cut -d'|' -f4)
|
||
total_domains=$(echo "$scope_data" | cut -d'|' -f5)
|
||
fi
|
||
|
||
# Check if there are any recommendations
|
||
local has_recommendations=false
|
||
local rec_count=0
|
||
if [ -s "$TEMP_DIR/recommendations.txt" ]; then
|
||
has_recommendations=true
|
||
rec_count=$(wc -l < "$TEMP_DIR/recommendations.txt")
|
||
fi
|
||
|
||
# Show menu
|
||
echo ""
|
||
echo "==============================================================="
|
||
print_header "THREAT ANALYSIS SUMMARY"
|
||
echo ""
|
||
|
||
# Display attack scope
|
||
case "$attack_scope" in
|
||
single_domain)
|
||
print_warning "ATTACK SCOPE: Single Domain Target"
|
||
echo " • Primary Target: $primary_target"
|
||
echo " • This domain is receiving 100% of high-risk traffic"
|
||
echo " • Recommendation: Domain-specific protection"
|
||
;;
|
||
primary_target)
|
||
print_warning "ATTACK SCOPE: Primarily Targeting One Domain"
|
||
echo " • Primary Target: $primary_target ($primary_target_percentage% of attacks)"
|
||
echo " • Other domains also affected: $affected_domains of $total_domains total"
|
||
echo " • Recommendation: Focus protection on primary target"
|
||
;;
|
||
server_wide)
|
||
print_alert "ATTACK SCOPE: Server-Wide Attack"
|
||
echo " • Multiple domains under attack: $affected_domains of $total_domains"
|
||
echo " • Attack is distributed across the server"
|
||
echo " • Recommendation: Server-wide protection needed"
|
||
;;
|
||
single_server)
|
||
print_info "ATTACK SCOPE: Single-Domain Server"
|
||
echo " • Target: $primary_target (only domain on server)"
|
||
echo " • Server-level protection will apply to this domain"
|
||
;;
|
||
*)
|
||
print_info "No significant threats detected"
|
||
;;
|
||
esac
|
||
|
||
echo ""
|
||
|
||
# Display recommendations
|
||
if [ "$has_recommendations" = true ]; then
|
||
echo "==============================================================="
|
||
print_header "RECOMMENDED ACTIONS ($rec_count recommendations)"
|
||
echo ""
|
||
|
||
local count=0
|
||
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
|
||
count=$((count + 1))
|
||
|
||
# Color code by priority
|
||
local priority_color=""
|
||
local priority_icon=""
|
||
case "$priority" in
|
||
CRITICAL)
|
||
priority_color="${RED}"
|
||
priority_icon=""
|
||
;;
|
||
HIGH)
|
||
priority_color="${YELLOW}"
|
||
priority_icon=""
|
||
;;
|
||
MEDIUM)
|
||
priority_color="${BLUE}"
|
||
priority_icon=""
|
||
;;
|
||
*)
|
||
priority_color="${NC}"
|
||
priority_icon=" "
|
||
;;
|
||
esac
|
||
|
||
echo -e " ${BOLD}[$count]${NC} $priority_icon $action_title"
|
||
echo -e " ${priority_color}Priority: $priority${NC} - $description"
|
||
echo ""
|
||
done < "$TEMP_DIR/recommendations.txt"
|
||
|
||
echo "==============================================================="
|
||
echo ""
|
||
echo "What would you like to do?"
|
||
echo ""
|
||
echo " 1) Go to Take Action Menu (implement recommended actions)"
|
||
echo " 2) Review Individual Recommendations (detailed view)"
|
||
echo ""
|
||
echo -e " ${RED}0)${NC} Back"
|
||
echo ""
|
||
read -p "Select option: " menu_choice
|
||
|
||
case "$menu_choice" in
|
||
1)
|
||
show_action_menu
|
||
;;
|
||
2)
|
||
show_detailed_recommendations
|
||
;;
|
||
0)
|
||
print_info "Returning to main menu..."
|
||
return 0
|
||
;;
|
||
*)
|
||
print_warning "Invalid option - returning to main menu"
|
||
return 0
|
||
;;
|
||
esac
|
||
else
|
||
print_success "No recommendations - your server appears secure"
|
||
echo ""
|
||
echo "Press Enter to return to main menu..."
|
||
read
|
||
return 0
|
||
fi
|
||
}
|
||
|
||
################################################################################
|
||
# DETAILED RECOMMENDATIONS VIEWER
|
||
################################################################################
|
||
|
||
show_detailed_recommendations() {
|
||
clear
|
||
print_banner "Detailed Recommendations"
|
||
echo ""
|
||
|
||
if [ ! -s "$TEMP_DIR/recommendations.txt" ]; then
|
||
print_warning "No recommendations available"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_post_analysis_menu
|
||
return
|
||
fi
|
||
|
||
local count=0
|
||
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
|
||
count=$((count + 1))
|
||
|
||
echo "==============================================================="
|
||
echo -e "${BOLD}Recommendation #$count:${NC} $action_title"
|
||
echo "==============================================================="
|
||
echo ""
|
||
echo "Priority: $priority"
|
||
echo "Action Type: $action_type"
|
||
echo "Description: $description"
|
||
echo ""
|
||
|
||
# Show specific details based on action type
|
||
case "$action_type" in
|
||
ip_block_temp|ip_block_perm)
|
||
echo "Affected IPs:"
|
||
awk -F'|' '$1 >= 70 {printf " • %s (score: %s)\n", $2, $1}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -10
|
||
;;
|
||
htaccess_domain)
|
||
local target_domain=$(echo "$action_title" | grep -oP 'to \K[^ ]+' 2>/dev/null || echo "")
|
||
echo "Target Domain: $target_domain"
|
||
if [ -s "$TEMP_DIR/domain_threats_sorted.txt" ]; then
|
||
grep -F "$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do
|
||
echo " • Total Requests: $total_req"
|
||
echo " • Bot Requests: $bot_req ($bot_pct%)"
|
||
echo " • High-Risk IPs: $high_risk"
|
||
echo " • Attack Attempts: $attacks"
|
||
done
|
||
fi
|
||
;;
|
||
apache_global)
|
||
echo "Affected Domains:"
|
||
if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
|
||
awk -F'|' '{printf " • %s (%s high-risk IPs)\n", $1, $2}' "$TEMP_DIR/domain_high_risk_ips.txt" | head -10
|
||
fi
|
||
;;
|
||
esac
|
||
|
||
echo ""
|
||
done < "$TEMP_DIR/recommendations.txt"
|
||
|
||
echo "==============================================================="
|
||
echo ""
|
||
read -p "Press Enter to return to action menu..."
|
||
show_post_analysis_menu
|
||
}
|
||
|
||
################################################################################
|
||
# ACTION MENU (IMPLEMENT RECOMMENDATIONS)
|
||
################################################################################
|
||
|
||
show_action_menu() {
|
||
clear
|
||
print_banner "Take Action Menu"
|
||
echo ""
|
||
|
||
# Build hash table of recommended actions with their priorities
|
||
declare -A recommended_actions
|
||
declare -A action_priorities
|
||
declare -A action_descriptions
|
||
|
||
if [ -s "$TEMP_DIR/recommendations.txt" ]; then
|
||
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
|
||
recommended_actions["$action_type"]=1
|
||
action_priorities["$action_type"]="$priority"
|
||
action_descriptions["$action_type"]="$description"
|
||
done < "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# Display all available actions (not just recommended ones)
|
||
echo "All Available Actions:"
|
||
echo ""
|
||
echo "Legend: = Recommended by analysis"
|
||
echo ""
|
||
|
||
local count=0
|
||
declare -a action_types
|
||
declare -a action_titles
|
||
declare -a action_descs
|
||
|
||
# Define all possible actions
|
||
# 1. IP Blocking Actions
|
||
count=$((count + 1))
|
||
action_types[$count]="ip_block_temp_1hr"
|
||
action_titles[$count]="Block high-risk IPs for 1 hour (CSF temporary)"
|
||
action_descs[$count]="Temporary firewall block, auto-expires after 1 hour"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="ip_block_temp_24hr"
|
||
action_titles[$count]="Block high-risk IPs for 24 hours (CSF temporary)"
|
||
action_descs[$count]="Temporary firewall block, auto-expires after 24 hours"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="ip_block_perm"
|
||
action_titles[$count]="Block high-risk IPs permanently (CSF permanent)"
|
||
action_descs[$count]="Permanent firewall block - requires manual removal"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_perm]}" "${action_priorities[ip_block_perm]}"
|
||
|
||
echo ""
|
||
echo "------------------------------------------------------------─"
|
||
echo ""
|
||
|
||
# 2. Domain/Site Protection
|
||
count=$((count + 1))
|
||
action_types[$count]="htaccess_domain"
|
||
action_titles[$count]="Add bot blocking to specific domain .htaccess"
|
||
action_descs[$count]="Domain-level protection via Apache .htaccess rules"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[htaccess_domain]}" "${action_priorities[htaccess_domain]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="apache_global"
|
||
action_titles[$count]="Add global bot blocking to Apache (all domains)"
|
||
action_descs[$count]="Server-wide Apache configuration, affects all sites"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[apache_global]}" "${action_priorities[apache_global]}"
|
||
|
||
echo ""
|
||
echo "------------------------------------------------------------─"
|
||
echo ""
|
||
|
||
# 3. CSF Firewall Configuration
|
||
count=$((count + 1))
|
||
action_types[$count]="csf_ct_limit"
|
||
action_titles[$count]="Adjust CSF connection tracking limit (CT_LIMIT)"
|
||
action_descs[$count]="Limit concurrent connections per IP address"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_ct_limit]}" "${action_priorities[csf_ct_limit]}"
|
||
|
||
# PORTFLOOD action removed - not appropriate for web servers
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="csf_synflood"
|
||
action_titles[$count]="Enable CSF SYNFLOOD protection"
|
||
action_descs[$count]="Protect against SYN flood DDoS attacks"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_synflood]}" "${action_priorities[csf_synflood]}"
|
||
|
||
echo ""
|
||
echo "------------------------------------------------------------─"
|
||
echo ""
|
||
|
||
# 4. Geographic & Application Hardening
|
||
count=$((count + 1))
|
||
action_types[$count]="install_maxmind"
|
||
action_titles[$count]="Install MaxMind GeoIP2 for country-based blocking"
|
||
action_descs[$count]="Enable geographic filtering with CSF CC_DENY (requires free MaxMind license)"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[install_maxmind]}" "${action_priorities[install_maxmind]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="wp_hardening"
|
||
action_titles[$count]="WordPress security hardening"
|
||
action_descs[$count]="Protect WordPress login and admin areas"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[wp_hardening]}" "${action_priorities[wp_hardening]}"
|
||
|
||
echo ""
|
||
echo "============================================================═"
|
||
echo ""
|
||
echo -e " ${RED}0)${NC} Back"
|
||
echo ""
|
||
read -p "Select action [0-$count]: " action_choice
|
||
|
||
# Validate choice
|
||
if [ "$action_choice" = "0" ]; then
|
||
show_post_analysis_menu
|
||
return
|
||
elif [ "$action_choice" -lt 1 ] || [ "$action_choice" -gt "$count" ] 2>/dev/null; then
|
||
print_warning "Invalid selection"
|
||
sleep 2
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Execute selected action
|
||
local selected_type="${action_types[$action_choice]}"
|
||
execute_action "$selected_type" "$action_choice"
|
||
}
|
||
|
||
# Helper function to display action options
|
||
display_action_option() {
|
||
local num=$1
|
||
local action_type=$2
|
||
local title=$3
|
||
local desc=$4
|
||
local is_recommended=$5
|
||
local priority=$6
|
||
|
||
# Show recommendation marker and priority if recommended
|
||
if [ -n "$is_recommended" ]; then
|
||
case "$priority" in
|
||
CRITICAL)
|
||
echo -e " ${RED}$num)${NC} ${BOLD}$title${NC} ${RED} RECOMMENDED [CRITICAL]${NC}"
|
||
;;
|
||
HIGH)
|
||
echo -e " ${YELLOW}$num)${NC} ${BOLD}$title${NC} ${YELLOW} RECOMMENDED [HIGH]${NC}"
|
||
;;
|
||
MEDIUM)
|
||
echo -e " ${BLUE}$num)${NC} ${BOLD}$title${NC} ${BLUE} RECOMMENDED [MEDIUM]${NC}"
|
||
;;
|
||
*)
|
||
echo -e " ${GREEN}$num)${NC} ${BOLD}$title${NC} ${GREEN} RECOMMENDED${NC}"
|
||
;;
|
||
esac
|
||
else
|
||
echo -e " $num) $title"
|
||
fi
|
||
echo " $desc"
|
||
}
|
||
|
||
################################################################################
|
||
# ACTION EXECUTION ENGINE
|
||
################################################################################
|
||
|
||
execute_action() {
|
||
local action_type="$1"
|
||
local rec_number="$2"
|
||
|
||
case "$action_type" in
|
||
ip_block_temp_1hr)
|
||
execute_ip_blocking_specific "1hr"
|
||
;;
|
||
ip_block_temp_24hr)
|
||
execute_ip_blocking_specific "24hr"
|
||
;;
|
||
ip_block_temp)
|
||
execute_ip_blocking "temp"
|
||
;;
|
||
ip_block_perm)
|
||
execute_ip_blocking "perm"
|
||
;;
|
||
csf_ct_limit)
|
||
execute_csf_ct_limit
|
||
;;
|
||
csf_synflood)
|
||
execute_csf_synflood
|
||
;;
|
||
htaccess_domain)
|
||
execute_htaccess_domain_blocking
|
||
;;
|
||
apache_global)
|
||
execute_apache_global_blocking
|
||
;;
|
||
install_maxmind)
|
||
execute_install_maxmind
|
||
;;
|
||
wp_hardening)
|
||
execute_wp_hardening
|
||
;;
|
||
rate_limiting)
|
||
execute_rate_limiting
|
||
;;
|
||
*)
|
||
print_warning "Action type '$action_type' not yet implemented"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
;;
|
||
esac
|
||
}
|
||
|
||
execute_ip_blocking_specific() {
|
||
local duration_type="$1" # "1hr" or "24hr"
|
||
|
||
clear
|
||
print_banner "IP Blocking - CSF Temporary Block"
|
||
echo ""
|
||
|
||
# Check if CSF is installed
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF (ConfigServer Security & Firewall) is not installed"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Get high-risk IPs
|
||
if [ ! -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
print_warning "No threat scores available"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
local high_risk_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
if [ "$high_risk_count" -eq 0 ]; then
|
||
print_info "No high-risk IPs detected (score >= 70)"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Set duration based on type
|
||
local duration
|
||
local duration_text
|
||
if [ "$duration_type" = "1hr" ]; then
|
||
duration=3600
|
||
duration_text="1 hour"
|
||
else
|
||
duration=86400
|
||
duration_text="24 hours"
|
||
fi
|
||
|
||
echo "This will block $high_risk_count high-risk IPs for $duration_text"
|
||
echo ""
|
||
echo "High-risk IPs (top 10):"
|
||
awk -F'|' '$1 >= 70 {printf " • %s (score: %s, %s requests)\n", $2, $1, $3}' "$TEMP_DIR/threat_scores.txt" | head -10
|
||
echo ""
|
||
|
||
if [ "$high_risk_count" -gt 10 ]; then
|
||
echo " ... and $((high_risk_count - 10)) more"
|
||
echo ""
|
||
fi
|
||
|
||
read -p "Proceed with blocking for $duration_text? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Collect IPs to block
|
||
local -a ips_to_block
|
||
while IFS='|' read -r score ip requests; do
|
||
if [ "$score" -ge 70 ]; then
|
||
# Skip excluded IPs
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
# Skip false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
ips_to_block+=("$ip")
|
||
fi
|
||
done < "$TEMP_DIR/threat_scores.txt"
|
||
|
||
# Apply blocks
|
||
echo ""
|
||
print_info "Applying CSF blocks for $duration_text..."
|
||
echo ""
|
||
|
||
local success_count=0
|
||
local fail_count=0
|
||
|
||
for ip in "${ips_to_block[@]}"; do
|
||
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
|
||
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
|
||
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
|
||
success_count=$((success_count + 1))
|
||
else
|
||
echo -e " ${RED}${NC} Failed to block $ip"
|
||
fail_count=$((fail_count + 1))
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
if [ "${success_count:-0}" -gt 0 ]; then
|
||
print_success "Successfully blocked $success_count IP(s) for $duration_text"
|
||
echo ""
|
||
echo "These blocks will automatically expire after $duration_text"
|
||
echo "To view temporary blocks: csf -t"
|
||
echo "To remove a block early: csf -tr IP"
|
||
fi
|
||
|
||
if [ "${fail_count:-0}" -gt 0 ]; then
|
||
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
|
||
fi
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF to apply changes..."
|
||
if csf -r >/dev/null 2>&1; then
|
||
print_success "CSF restarted successfully"
|
||
else
|
||
print_warning "CSF restart may have failed - check manually with: csf -r"
|
||
fi
|
||
|
||
echo ""
|
||
# Verify domains still work after blocking
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_ip_blocking() {
|
||
local block_mode="$1" # "temp" or "perm"
|
||
|
||
if [ "$block_mode" = "temp" ]; then
|
||
# Call the existing CSF blocking function
|
||
offer_csf_blocking
|
||
else
|
||
# Permanent blocking
|
||
clear
|
||
print_banner "Permanent IP Blocking"
|
||
echo ""
|
||
print_alert "WARNING: Permanent blocks must be manually removed later"
|
||
echo ""
|
||
echo "This will permanently block all high-risk IPs (score >= 70)"
|
||
echo ""
|
||
read -p "Are you sure you want to proceed? (yes/no): " confirm
|
||
|
||
if [ "$confirm" = "yes" ]; then
|
||
offer_csf_blocking
|
||
else
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
fi
|
||
fi
|
||
}
|
||
|
||
execute_csf_ct_limit() {
|
||
clear
|
||
print_banner "Update CSF Connection Tracking Limit"
|
||
echo ""
|
||
|
||
# Check if CSF is installed
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF is not installed on this server"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Get recommended limit from recommendation
|
||
local recommended_limit=$(grep "|csf_ct_limit|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | grep -oP 'to \K[0-9]+' || echo "100")
|
||
|
||
# Get current CT_LIMIT
|
||
local current_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "unknown")
|
||
|
||
echo "Current CT_LIMIT: $current_limit"
|
||
echo "Recommended CT_LIMIT: $recommended_limit"
|
||
echo ""
|
||
echo "This will modify /etc/csf/csf.conf and restart CSF"
|
||
echo ""
|
||
read -p "Enter new CT_LIMIT value [$recommended_limit]: " new_limit
|
||
|
||
# Use recommended if nothing entered
|
||
[ -z "$new_limit" ] && new_limit=$recommended_limit
|
||
|
||
# Validate it's a number
|
||
if ! [[ "$new_limit" =~ ^[0-9]+$ ]]; then
|
||
print_warning "Invalid number"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Update CSF config
|
||
print_info "Updating CT_LIMIT to $new_limit..."
|
||
|
||
if [ -f /etc/csf/csf.conf ]; then
|
||
sed -i "s/^CT_LIMIT = .*/CT_LIMIT = \"$new_limit\"/" /etc/csf/csf.conf
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF..."
|
||
csf -r >/dev/null 2>&1
|
||
|
||
print_success "CT_LIMIT updated successfully to $new_limit"
|
||
else
|
||
print_warning "Could not find /etc/csf/csf.conf"
|
||
fi
|
||
|
||
echo ""
|
||
# Verify domains still work after CT_LIMIT change
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_htaccess_domain_blocking() {
|
||
clear
|
||
print_banner "Add Bot Blocking to Domain .htaccess"
|
||
echo ""
|
||
|
||
# Get target domain from recommendation
|
||
local target_domain=$(grep "|htaccess_domain|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | head -1 | grep -oP 'to \K[^ ]+' || echo "")
|
||
|
||
if [ -z "$target_domain" ]; then
|
||
print_warning "Could not determine target domain"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
echo "Target Domain: $target_domain"
|
||
echo ""
|
||
|
||
# Find document root for this domain using reference database
|
||
local doc_root=""
|
||
if [ -s "$SCRIPT_DIR/.sysref" ]; then
|
||
doc_root=$(grep -F "DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4 || echo "")
|
||
fi
|
||
|
||
if [ -z "$doc_root" ]; then
|
||
print_warning "Document root not found in reference database"
|
||
echo "Please enter the document root manually:"
|
||
read -p "Document root: " doc_root
|
||
else
|
||
echo "Document root: $doc_root"
|
||
fi
|
||
|
||
if [ ! -d "$doc_root" ]; then
|
||
print_warning "Document root does not exist: $doc_root"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
local htaccess_file="$doc_root/.htaccess"
|
||
|
||
echo ""
|
||
echo "This will add bot blocking rules to: $htaccess_file"
|
||
echo ""
|
||
read -p "Proceed? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Create backup
|
||
if [ -f "$htaccess_file" ]; then
|
||
cp "$htaccess_file" "$htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
|
||
print_info "Backed up existing .htaccess"
|
||
fi
|
||
|
||
# Generate bot blocking rules
|
||
print_info "Adding bot blocking rules..."
|
||
|
||
# Get high-risk IPs for this domain
|
||
local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -F "|$target_domain|" 2>/dev/null || true | cut -d'|' -f1 | sort -u | while read ip; do
|
||
# Check if this IP has high threat score
|
||
if grep -q -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
|
||
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "0")
|
||
if [ "${score:-0}" -ge 70 ]; then
|
||
echo "$ip"
|
||
fi
|
||
fi
|
||
done || true)
|
||
|
||
# Add rules to .htaccess
|
||
{
|
||
echo ""
|
||
echo "# Bot blocking rules added by toolkit on $(date)"
|
||
echo "# High-risk IPs (threat score >= 70)"
|
||
echo "<IfModule mod_authz_core.c>"
|
||
for ip in $block_ips; do
|
||
echo " Require not ip $ip"
|
||
done
|
||
echo "</IfModule>"
|
||
echo ""
|
||
} >> "$htaccess_file"
|
||
|
||
local block_count=$(echo "$block_ips" | wc -w)
|
||
print_success "Added blocking rules for $block_count IPs to $htaccess_file"
|
||
echo ""
|
||
echo "Backup saved to: $htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
|
||
echo ""
|
||
|
||
# Verify domains still work after .htaccess changes
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_apache_global_blocking() {
|
||
clear
|
||
print_banner "Add Global Bot Blocking to Apache"
|
||
echo ""
|
||
|
||
print_warning "This feature will add blocking rules to Apache pre-virtualhost configuration"
|
||
echo "This affects ALL domains on the server"
|
||
echo ""
|
||
|
||
# Determine Apache config location
|
||
local apache_conf=""
|
||
if [ -d "/etc/apache2/conf.d" ]; then
|
||
apache_conf="/etc/apache2/conf.d/bot_blocking.conf"
|
||
elif [ -d "/etc/httpd/conf.d" ]; then
|
||
apache_conf="/etc/httpd/conf.d/bot_blocking.conf"
|
||
else
|
||
print_warning "Could not determine Apache config directory"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
echo "Configuration will be written to: $apache_conf"
|
||
echo ""
|
||
read -p "Proceed? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Create backup if file exists
|
||
if [ -f "$apache_conf" ]; then
|
||
cp "$apache_conf" "$apache_conf.backup.$(date +%Y%m%d_%H%M%S)"
|
||
print_info "Backed up existing configuration"
|
||
fi
|
||
|
||
# Generate global blocking rules
|
||
print_info "Generating global bot blocking configuration..."
|
||
|
||
{
|
||
echo "# Global bot blocking rules"
|
||
echo "# Generated by toolkit on $(date)"
|
||
echo ""
|
||
echo "<IfModule mod_authz_core.c>"
|
||
echo " # Block high-risk IPs (threat score >= 70)"
|
||
|
||
awk -F'|' '$1 >= 70 {print " Require not ip " $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null
|
||
|
||
echo "</IfModule>"
|
||
echo ""
|
||
} > "$apache_conf"
|
||
|
||
local block_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l)
|
||
print_success "Created global blocking configuration with $block_count IPs"
|
||
|
||
echo ""
|
||
echo "Restarting Apache to apply changes..."
|
||
|
||
if systemctl restart httpd 2>/dev/null || systemctl restart apache2 2>/dev/null; then
|
||
print_success "Apache restarted successfully"
|
||
else
|
||
print_warning "Could not restart Apache - please restart manually"
|
||
fi
|
||
|
||
echo ""
|
||
# Verify domains still work after Apache global blocking
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_wp_hardening() {
|
||
clear
|
||
print_banner "WordPress Hardening"
|
||
echo ""
|
||
print_info "WordPress hardening feature coming soon..."
|
||
echo ""
|
||
echo "Recommended manual actions:"
|
||
echo " • Install Wordfence or similar security plugin"
|
||
echo " • Enable two-factor authentication"
|
||
echo " • Limit login attempts"
|
||
echo " • Disable XML-RPC if not needed"
|
||
echo " • Use strong passwords"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
execute_rate_limiting() {
|
||
clear
|
||
print_banner "Enable Rate Limiting"
|
||
echo ""
|
||
print_info "Rate limiting modules like mod_evasive/mod_security can help with application-level DoS"
|
||
echo ""
|
||
echo "For better bot protection, consider:"
|
||
echo " - IP blocking (options 1-3) - Block specific attacking IPs"
|
||
echo " - CSF CT_LIMIT adjustment (option 4) - Limit connections per IP"
|
||
echo " - .htaccess rules (option 5) - Domain-specific blocking"
|
||
echo ""
|
||
echo "This option (rate limiting) is currently a placeholder for future implementation."
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
# execute_csf_portflood() removed - not appropriate for web servers with 400+ sites
|
||
# Blocking ports 80/443 based on connection count would break legitimate traffic
|
||
|
||
execute_csf_synflood() {
|
||
clear
|
||
print_banner "Enable CSF SYNFLOOD Protection"
|
||
echo ""
|
||
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF is not installed on this server"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Get current SYNFLOOD setting
|
||
local current_synflood=$(grep "^SYNFLOOD = " /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[^"]+' || echo "0")
|
||
|
||
echo "Current SYNFLOOD protection: ${current_synflood}"
|
||
echo ""
|
||
echo "SYNFLOOD protects against SYN flood DDoS attacks by limiting"
|
||
echo "the rate of new TCP connections."
|
||
echo ""
|
||
echo "Recommended settings:"
|
||
echo " SYNFLOOD = \"1\" (enable protection)"
|
||
echo " SYNFLOOD_RATE = \"100/s\" (100 connections per second)"
|
||
echo " SYNFLOOD_BURST = \"150\" (allow burst of 150)"
|
||
echo ""
|
||
read -p "Enable SYNFLOOD protection? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Update CSF config
|
||
print_info "Enabling SYNFLOOD protection..."
|
||
if [ -f /etc/csf/csf.conf ]; then
|
||
sed -i 's/^SYNFLOOD = .*/SYNFLOOD = "1"/' /etc/csf/csf.conf
|
||
sed -i 's/^SYNFLOOD_RATE = .*/SYNFLOOD_RATE = "100\/s"/' /etc/csf/csf.conf
|
||
sed -i 's/^SYNFLOOD_BURST = .*/SYNFLOOD_BURST = "150"/' /etc/csf/csf.conf
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF..."
|
||
csf -r >/dev/null 2>&1
|
||
|
||
print_success "SYNFLOOD protection enabled"
|
||
else
|
||
print_warning "Could not find /etc/csf/csf.conf"
|
||
fi
|
||
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
execute_install_maxmind() {
|
||
clear
|
||
print_banner "Install MaxMind GeoIP2 for Country Blocking"
|
||
echo ""
|
||
|
||
# Check if already installed
|
||
if command -v mmdbinspect >/dev/null 2>&1; then
|
||
print_success "MaxMind GeoIP2 tools already installed"
|
||
echo ""
|
||
echo "Next steps:"
|
||
echo "1. Sign up for free license at: https://www.maxmind.com/en/geolite2/signup"
|
||
echo "2. Get your license key from account page"
|
||
echo "3. Install CSF GeoIP module: /usr/local/csf/bin/csftest.pl -g"
|
||
echo "4. Configure CC_DENY in /etc/csf/csf.conf with country codes"
|
||
echo ""
|
||
echo "Example: CC_DENY = \"CN,RU,KP\" (block China, Russia, North Korea)"
|
||
echo ""
|
||
else
|
||
print_info "MaxMind GeoIP2 not detected"
|
||
echo ""
|
||
echo "To install MaxMind GeoIP2 for CSF country blocking:"
|
||
echo ""
|
||
echo "1. Sign up for free MaxMind account:"
|
||
echo " https://www.maxmind.com/en/geolite2/signup"
|
||
echo ""
|
||
echo "2. Get your license key from:"
|
||
echo " https://www.maxmind.com/en/accounts/current/license-key"
|
||
echo ""
|
||
echo "3. Install GeoIP Perl module:"
|
||
echo " yum install perl-Geo-IP"
|
||
echo " # or"
|
||
echo " cpan -i Geo::IP"
|
||
echo ""
|
||
echo "4. Test CSF GeoIP support:"
|
||
echo " /usr/local/csf/bin/csftest.pl -g"
|
||
echo ""
|
||
echo "5. Configure CC_DENY in /etc/csf/csf.conf:"
|
||
echo " CC_DENY = \"CN,RU\" (example: block China & Russia)"
|
||
echo ""
|
||
echo "6. Restart CSF:"
|
||
echo " csf -r"
|
||
echo ""
|
||
fi
|
||
|
||
# Show geographic analysis if available
|
||
if [ -s "$TEMP_DIR/high_risk_networks.txt" ]; then
|
||
echo "=========================================================══"
|
||
echo "High-Risk Networks Detected:"
|
||
echo ""
|
||
head -10 "$TEMP_DIR/high_risk_networks.txt" | while read count network; do
|
||
echo " • $network - $count high-risk IPs"
|
||
done
|
||
echo ""
|
||
fi
|
||
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
################################################################################
|
||
# INTERACTIVE CSF BLOCKING
|
||
################################################################################
|
||
|
||
offer_csf_blocking() {
|
||
echo ""
|
||
echo "==============================================================="
|
||
print_header "🛡 INTERACTIVE THREAT BLOCKING"
|
||
|
||
# Check if CSF is installed
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF (ConfigServer Security & Firewall) is not installed"
|
||
echo "Cannot offer automatic blocking without CSF"
|
||
return 0
|
||
fi
|
||
|
||
# Get high-risk IPs (score >= 70)
|
||
local high_risk_ips=()
|
||
local ip_scores=()
|
||
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
while read -r line; do
|
||
local score=$(echo "$line" | cut -d'|' -f1)
|
||
local ip=$(echo "$line" | cut -d'|' -f2)
|
||
|
||
# Only include scores >= 70 (HIGH and CRITICAL)
|
||
if [ "$score" -ge 70 ]; then
|
||
# Skip excluded IPs
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
# Skip false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
|
||
high_risk_ips+=("$ip")
|
||
ip_scores+=("$score")
|
||
fi
|
||
done < <(awk -F'|' '{print $1 "|" $2}' "$TEMP_DIR/threat_scores.txt" | sort -rn)
|
||
fi
|
||
|
||
# If no high-risk IPs, nothing to block
|
||
if [ ${#high_risk_ips[@]} -eq 0 ]; then
|
||
print_info "No high-risk IPs detected (score >= 70)"
|
||
return 0
|
||
fi
|
||
|
||
# Show IPs that would be blocked
|
||
echo ""
|
||
echo "Found ${#high_risk_ips[@]} high-risk IP(s) with threat score >= 70:"
|
||
echo ""
|
||
|
||
local count=0
|
||
for i in "${!high_risk_ips[@]}"; do
|
||
count=$((count + 1))
|
||
local ip="${high_risk_ips[$i]}"
|
||
local score="${ip_scores[$i]}"
|
||
local requests=$(grep -F "$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
|
||
|
||
# Color code by severity
|
||
if [ "$score" -ge 90 ]; then
|
||
echo -e " ${RED}[$count] $ip${NC} - Risk: ${RED}$score/100 CRITICAL${NC} ($requests requests)"
|
||
elif [ "$score" -ge 80 ]; then
|
||
echo -e " ${YELLOW}[$count] $ip${NC} - Risk: ${YELLOW}$score/100 HIGH${NC} ($requests requests)"
|
||
else
|
||
echo -e " [$count] $ip - Risk: $score/100 ELEVATED ($requests requests)"
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
echo "==============================================================="
|
||
echo ""
|
||
|
||
# Ask user if they want to block
|
||
echo -e "${BOLD}Would you like to temporarily block these IPs using CSF?${NC}"
|
||
echo ""
|
||
echo "Options:"
|
||
echo " 1) Block for 1 hour (temporary - auto-expires)"
|
||
echo " 2) Block for 24 hours (temporary - auto-expires)"
|
||
echo " 3) Block permanently (requires manual unblock)"
|
||
echo " 4) Don't block (manual review)"
|
||
echo ""
|
||
read -p "Select option [1-4]: " block_choice
|
||
|
||
case "$block_choice" in
|
||
1)
|
||
local duration=3600 # 1 hour in seconds
|
||
local duration_text="1 hour"
|
||
apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
|
||
;;
|
||
2)
|
||
local duration=86400 # 24 hours in seconds
|
||
local duration_text="24 hours"
|
||
apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
|
||
;;
|
||
3)
|
||
apply_csf_permanent_blocks "${high_risk_ips[@]}"
|
||
;;
|
||
4)
|
||
print_info "Skipping automatic blocking - manual review recommended"
|
||
echo "You can block IPs manually using: csf -td IP DURATION"
|
||
;;
|
||
*)
|
||
print_warning "Invalid option - skipping blocking"
|
||
;;
|
||
esac
|
||
}
|
||
|
||
apply_csf_blocks() {
|
||
local duration=$1
|
||
local duration_text=$2
|
||
shift 2
|
||
local ips=("$@")
|
||
|
||
echo ""
|
||
print_info "Applying temporary CSF blocks for $duration_text..."
|
||
echo ""
|
||
|
||
local success_count=0
|
||
local fail_count=0
|
||
|
||
for ip in "${ips[@]}"; do
|
||
# Get threat score for comment
|
||
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
|
||
# Use csf -td for temporary deny
|
||
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
|
||
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
|
||
success_count=$((success_count + 1))
|
||
else
|
||
echo -e " ${RED}${NC} Failed to block $ip"
|
||
fail_count=$((fail_count + 1))
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
if [ "${success_count:-0}" -gt 0 ]; then
|
||
print_success "Successfully blocked $success_count IP(s) for $duration_text"
|
||
echo ""
|
||
echo "These blocks will automatically expire after $duration_text"
|
||
echo "To view temporary blocks: csf -t"
|
||
echo "To remove a block early: csf -tr IP"
|
||
fi
|
||
|
||
if [ "${fail_count:-0}" -gt 0 ]; then
|
||
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
|
||
fi
|
||
|
||
# Restart CSF to apply changes
|
||
print_info "Restarting CSF to apply changes..."
|
||
if csf -r >/dev/null 2>&1; then
|
||
print_success "CSF restarted successfully"
|
||
else
|
||
print_warning "CSF restart may have failed - check manually with: csf -r"
|
||
fi
|
||
}
|
||
|
||
apply_csf_permanent_blocks() {
|
||
local ips=("$@")
|
||
|
||
echo ""
|
||
print_warning "Applying PERMANENT CSF blocks..."
|
||
echo "These will require manual removal using: csf -dr IP"
|
||
echo ""
|
||
read -p "Are you sure? This is permanent! (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Cancelled permanent blocking"
|
||
return 0
|
||
fi
|
||
|
||
echo ""
|
||
local success_count=0
|
||
local fail_count=0
|
||
|
||
for ip in "${ips[@]}"; do
|
||
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
|
||
# Use csf -d for permanent deny
|
||
if csf -d "$ip" "Bot threat score: $score/100 - Permanently blocked by toolkit" >/dev/null 2>&1; then
|
||
echo -e " ${GREEN}${NC} Permanently blocked $ip (score: $score/100)"
|
||
success_count=$((success_count + 1))
|
||
else
|
||
echo -e " ${RED}${NC} Failed to block $ip"
|
||
fail_count=$((fail_count + 1))
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
if [ "${success_count:-0}" -gt 0 ]; then
|
||
print_success "Successfully blocked $success_count IP(s) permanently"
|
||
echo ""
|
||
echo "To view blocked IPs: csf -g"
|
||
echo "To remove a block: csf -dr IP"
|
||
fi
|
||
|
||
if [ "${fail_count:-0}" -gt 0 ]; then
|
||
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
|
||
fi
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF to apply changes..."
|
||
if csf -r >/dev/null 2>&1; then
|
||
print_success "CSF restarted successfully"
|
||
else
|
||
print_warning "CSF restart may have failed - check manually with: csf -r"
|
||
fi
|
||
}
|
||
|
||
# Run the script
|
||
main "$@"
|