#!/bin/bash set -eo pipefail ############################################################################# # Apache/cPanel Domain Log Bot & Botnet Analyzer # Version: 3.1 Enhanced (with Library Integration) # Advanced log analysis for bot activity, security threats, and botnets # # Features: # - Comprehensive bot classification (legitimate, AI, monitoring, suspicious) # - Enhanced attack vector detection (SQL injection, XSS, path traversal, # RCE/shell upload, info disclosure, login bruteforce) # - Threat scoring system (0-100 risk scores for each IP) # - Time-series analysis with hourly traffic visualization # - Response code intelligence (what are bots finding?) # - False positive detection for legitimate monitoring services # - Bandwidth cost estimation for bot traffic # - Botnet pattern analysis (coordinated attacks, DDoS detection) # - Prioritized blocklists sorted by threat severity # - Actionable reports with copy-paste ready configurations # - Performance optimized for large log files (>500k entries) # - User filtering (analyze all users or specific user) # - Auto-detects log directory based on control panel ############################################################################# # Load libraries SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" source "$SCRIPT_DIR/lib/common-functions.sh" source "$SCRIPT_DIR/lib/system-detect.sh" source "$SCRIPT_DIR/lib/user-manager.sh" source "$SCRIPT_DIR/lib/ip-reputation.sh" source "$SCRIPT_DIR/lib/bot-signatures.sh" source "$SCRIPT_DIR/lib/attack-patterns.sh" source "$SCRIPT_DIR/lib/threat-intelligence.sh" # Default configuration (auto-detected from system) LOG_DIR="${SYS_LOG_DIR:-/var/log/apache2/domlogs}" # Use toolkit's tmp directory instead of system /tmp to avoid filling it up # On large servers with 200+ domains, compressed temp files can still be 50-100MB # Using toolkit's tmp dir means: # - Won't fill up system /tmp # - Gets auto-cleaned when toolkit is removed # - Included in cleanup script (clean-and-push-toolkit.sh) TOOLKIT_TMP_DIR="$SCRIPT_DIR/tmp" mkdir -p "$TOOLKIT_TMP_DIR" 2>/dev/null # NEW: Baseline history directory (stores 30 days of historical data per domain) BASELINE_DIR="$TOOLKIT_TMP_DIR/baseline_history" mkdir -p "$BASELINE_DIR" 2>/dev/null TEMP_DIR="$TOOLKIT_TMP_DIR/bot_analysis_$$" OUTPUT_FILE="$TOOLKIT_TMP_DIR/bot_analysis_report_$(date +%Y%m%d_%H%M%S).txt" DAYS_BACK="" # Empty means all logs, otherwise filter by days HOURS_BACK="" # Empty means all logs, otherwise filter by hours FILTER_USER="" # Empty means all users, otherwise specific user # Cache CSF availability (avoid checking command_v csf 5 times) CSF_AVAILABLE=false if command -v csf >/dev/null 2>&1 && [ -f /etc/csf/csf.conf ]; then CSF_AVAILABLE=true fi # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in -d|--days) DAYS_BACK="$2" shift 2 ;; -H|--hours) HOURS_BACK="$2" shift 2 ;; -l|--log-dir) LOG_DIR="$2" shift 2 ;; -o|--output) OUTPUT_FILE="$2" shift 2 ;; -u|--user) FILTER_USER="$2" shift 2 ;; -h|--help) echo "Apache/cPanel Domain Log Bot & Botnet Analyzer v3.1" echo "" echo "Usage: $0 [-d DAYS | -H HOURS] [-u USER] [-l LOG_DIR] [-o OUTPUT_FILE]" echo "" echo "Options:" echo " -d, --days DAYS Analyze only logs from last N days (24-hour periods)" echo " -H, --hours HOURS Analyze only logs from last N hours" echo " -u, --user USER Analyze only logs for specific cPanel user" echo " -l, --log-dir DIR Custom log directory (auto-detected by default)" echo " -o, --output FILE Custom output file path" echo " -h, --help Show this help message" echo "" echo "Examples:" echo " $0 # Analyze all logs in default directory" echo " $0 -d 7 # Analyze logs from last 7 days" echo " $0 -H 6 # Analyze logs from last 6 hours" echo " $0 -l /custom/path # Use custom log directory" echo "" echo "Note: If both -d and -H are specified, only -H (hours) will be used." echo "" exit 0 ;; *) echo "Unknown option: $1" echo "Use -h for help" exit 1 ;; esac done # Interactive prompts for missing options prompt_time_range() { clear print_banner "Bot Analyzer - Time Range Selection" echo "" echo -e " ${GREEN}1)${NC} All available logs" echo -e " ${GREEN}2)${NC} Last 1 hour" echo -e " ${GREEN}3)${NC} Last 6 hours" echo -e " ${GREEN}4)${NC} Last 24 hours" echo -e " ${GREEN}5)${NC} Last 7 days" echo -e " ${GREEN}6)${NC} Last 30 days" echo -e " ${GREEN}7)${NC} Custom hours" echo -e " ${GREEN}8)${NC} Custom days" echo "" # Validate time_choice input with retry loop while true; do read -p "Select time range (1-8): " time_choice if ! [[ "$time_choice" =~ ^[1-8]$ ]]; then print_error "Invalid choice. Please enter 1-8" continue fi case $time_choice in 1) break ;; # All logs - no filter 2) HOURS_BACK=1; break ;; 3) HOURS_BACK=6; break ;; 4) HOURS_BACK=24; break ;; 5) DAYS_BACK=7; break ;; 6) DAYS_BACK=30; break ;; 7) while true; do read -p "Enter number of hours: " custom_hours if [[ "$custom_hours" =~ ^[0-9]+$ ]] && [ "$custom_hours" -gt 0 ]; then HOURS_BACK=$custom_hours break 2 # Break out of both loops else print_error "Invalid input. Please enter a positive number" fi done ;; 8) while true; do read -p "Enter number of days: " custom_days if [[ "$custom_days" =~ ^[0-9]+$ ]] && [ "$custom_days" -gt 0 ]; then DAYS_BACK=$custom_days break 2 # Break out of both loops else print_error "Invalid input. Please enter a positive number" fi done ;; esac done } prompt_user_scope() { clear print_banner "Bot Analyzer - User Scope Selection" echo "" echo -e " ${GREEN}1)${NC} All users (system-wide analysis)" echo -e " ${GREEN}2)${NC} Specific user" echo "" # Validate user_choice input with retry loop while true; do read -p "Select option (1-2): " user_choice if ! [[ "$user_choice" =~ ^[1-2]$ ]]; then print_error "Invalid choice. Please enter 1 or 2" continue fi if [ "$user_choice" = "2" ]; then echo "" local selected=$(select_user_interactive "Select user to analyze") if [ $? -eq 0 ] && [ "$selected" != "ALL" ]; then FILTER_USER="$selected" fi fi break done } # Interactive prompts for missing options # Prompt for time range if not specified if [ -z "$DAYS_BACK" ] && [ -z "$HOURS_BACK" ]; then prompt_time_range fi # Prompt for user if not specified if [ -z "$FILTER_USER" ]; then prompt_user_scope fi # Validate time filter options if [ -n "$DAYS_BACK" ] && [ -n "$HOURS_BACK" ]; then echo -e "${YELLOW}Warning: Both days and hours specified. Using hours filter only.${NC}" >&2 DAYS_BACK="" fi # Color codes for terminal output RED='\033[0;31m' YELLOW='\033[1;33m' GREEN='\033[0;32m' BLUE='\033[0;34m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' # No Color # Check for required commands check_dependencies() { local missing_deps=() for cmd in awk grep sort uniq find sed head tail cut; do if ! command -v "$cmd" >/dev/null 2>&1; then missing_deps+=("$cmd") fi done if [ ${#missing_deps[@]} -gt 0 ]; then echo -e "${RED}Error: Missing required commands: ${missing_deps[*]}${NC}" >&2 exit 1 fi } # Check disk space check_disk_space() { local available_kb local check_path="$SCRIPT_DIR" available_kb=$(df "$check_path" 2>/dev/null | tail -1 | awk '{print $4}') if [ -z "$available_kb" ]; then echo -e "${YELLOW}Warning: Cannot determine available disk space for toolkit directory${NC}" >&2 return fi if [ "$available_kb" -lt 102400 ]; then # Less than 100MB echo -e "${YELLOW}Warning: Low disk space in toolkit directory: $((available_kb/1024))MB available${NC}" >&2 read -p "Continue anyway? (y/N): " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1 fi fi } # Run dependency checks check_dependencies check_disk_space # Create temp directory mkdir -p "$TEMP_DIR" || { echo -e "${RED}Error: Cannot create temp directory: $TEMP_DIR${NC}" >&2 exit 1 } # Cleanup on exit trap "rm -rf \"$TEMP_DIR\"" EXIT ############################################################################# # Bot Signature Database ############################################################################# # NOTE: Bot signatures now loaded from lib/bot-signatures.sh # Arrays available: LEGIT_BOTS, AI_BOTS, MONITOR_BOTS, SUSPICIOUS_BOTS ############################################################################# # Helper Functions ############################################################################# print_header() { echo -e "\n${CYAN}===============================================================${NC}" echo -e "${CYAN}$1${NC}" echo -e "${CYAN}===============================================================${NC}\n" } print_alert() { echo -e "${RED}$1${NC}" } print_warning() { echo -e "${YELLOW}$1${NC}" } print_info() { echo -e "${BLUE} $1${NC}" } print_success() { echo -e "${GREEN}$1${NC}" } ############################################################################# # Log Parsing Functions ############################################################################# parse_logs() { if [ "$INTERWORX_MODE" = "yes" ]; then print_info "Parsing InterWorx domain logs from: /home/*/var/*/logs/" else print_info "Parsing logs from: $LOG_DIR" fi local find_opts=() # Add time filter if specified (hours takes precedence over days) if [ -n "$HOURS_BACK" ]; then local minutes=$((HOURS_BACK * 60)) find_opts+=(-mmin -"$minutes") print_info "Filtering logs from last $HOURS_BACK hours" elif [ -n "$DAYS_BACK" ]; then find_opts+=(-mtime -"$DAYS_BACK") print_info "Filtering logs from last $DAYS_BACK days" fi # Determine log file search pattern based on control panel local log_search_path local log_search_name if [ "$INTERWORX_MODE" = "yes" ]; then # InterWorx: Official docs from https://appendix.interworx.com/current/nodeworx/general/other/log-file-locations.html # HTTP: /home/{user}/var/{domain}/logs/transfer.log # HTTPS: /home/{user}/var/{domain}/logs/transfer-ssl.log log_search_path="/home/*/var/*/logs" log_search_name="transfer*.log" else # cPanel: /var/log/apache2/domlogs/domain.com or domain.com-ssl_log # Plesk: Research verified paths from https://docs.plesk.com/en-US/obsidian/ # Apache HTTP: /var/www/vhosts/system/{domain}/logs/access_log # Apache HTTPS: /var/www/vhosts/system/{domain}/logs/access_ssl_log # nginx HTTP: /var/www/vhosts/system/{domain}/logs/proxy_access_log # nginx HTTPS: /var/www/vhosts/system/{domain}/logs/proxy_access_ssl_log # Note: /var/www/vhosts/{domain}/logs/ are hardlinks (backward compat) log_search_path="$LOG_DIR" log_search_name="*" fi # Parse all domain logs local file_count=0 local progress_interval=5 # Show progress every 5 files instead of 50 echo "" { find "$log_search_path" -type f -name "$log_search_name" ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | while read -r logfile; do # Skip empty files [ -s "$logfile" ] || continue # Extract domain name based on control panel if [ "$INTERWORX_MODE" = "yes" ]; then # InterWorx: extract from path /home/user/var/domain.com/logs/transfer*.log domain=$(echo "$logfile" | sed -n 's|^/home/.*/var/\([^/]*\)/logs/.*|\1|p') elif [ "$SYS_CONTROL_PANEL" = "plesk" ]; then # Plesk: extract from path /var/www/vhosts/system/domain.com/logs/{access_log,access_ssl_log,proxy_*} domain=$(echo "$logfile" | sed -n 's|^/var/www/vhosts/system/\([^/]*\)/logs/.*|\1|p') else # cPanel: extract from filename /var/log/apache2/domlogs/domain.com or domain.com-ssl_log domain=$(basename "$logfile" | sed 's/-ssl_log$//') fi # Skip if domain extraction failed [ -z "$domain" ] && continue # User filtering: skip domains not belonging to the specified user if [ -n "$FILTER_USER" ]; then if ! echo "$user_domains" | grep -qFx "$domain"; then continue fi fi # Show progress every N files file_count=$((file_count + 1)) if [ $((file_count % progress_interval)) -eq 0 ]; then echo -ne "\r Parsed $file_count log files... (current: $domain)" fi # Parse Apache Combined Log Format with error handling # Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT" awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" ' BEGIN { # Month name to number lookup month["Jan"]=1; month["Feb"]=2; month["Mar"]=3; month["Apr"]=4 month["May"]=5; month["Jun"]=6; month["Jul"]=7; month["Aug"]=8 month["Sep"]=9; month["Oct"]=10; month["Nov"]=11; month["Dec"]=12 # Calculate cutoff timestamp in epoch seconds if (hours_filter != "") { cmd = "date -d \"" hours_filter " hours ago\" +%s 2>/dev/null || date -v-" hours_filter "H +%s 2>/dev/null" cmd | getline cutoff_epoch close(cmd) } else if (days_filter != "") { cmd = "date -d \"" days_filter " days ago\" +%s 2>/dev/null || date -v-" days_filter "d +%s 2>/dev/null" cmd | getline cutoff_epoch close(cmd) } } { # Skip empty lines and malformed entries if (NF < 10 || length($0) < 50) next # Extract IP (first field - space separated) ip = $1 # Extract timestamp (between square brackets) if (match($0, /\[([^\]]+)\]/, ts)) { timestamp = ts[1] } else { timestamp = "unknown" } # Filter by timestamp if time filter is set if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_epoch != "") { # Extract just the date/time part (before timezone) # Format: 31/Dec/2025:10:30:15 -0500 split(timestamp, ts_parts, " ") log_ts = ts_parts[1] # Parse: dd/mmm/yyyy:HH:MM:SS split(log_ts, dt, /[\/:]/) day = dt[1] mon = month[dt[2]] year = dt[3] hour = dt[4] min = dt[5] sec = dt[6] # Convert to epoch using awk mktime (YYYY MM DD HH MM SS) # mktime is much faster than spawning date command if (mon != "") { log_epoch = mktime(year " " mon " " day " " hour " " min " " sec) # Numerical comparison of epoch seconds if (log_epoch < cutoff_epoch) { next # Skip this entry, too old } } } # Extract HTTP method, URL, and status if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) { http_method = req[1] request_url = req[2] status = req[3] size = req[4] } else { # Fallback for malformed requests http_method = "-" request_url = "-" status = "-" size = "0" } # Extract User-Agent (last quoted string) if (match($0, /"([^"]*)"[[:space:]]*$/, ua)) { user_agent = ua[1] if (user_agent == "") user_agent = "-" } else { user_agent = "-" } # Extract additional headers for enhanced analysis referer = "-" accept_lang = "-" accept_encoding = "-" # Extract Referer header if (match($0, /"([^"]*)"[[:space:]]*"[^"]*"[[:space:]]*$/, ref)) { referer = ref[1] if (referer == "") referer = "-" } # Try to extract Accept-Language from log (if available) if (match($0, /Accept-Language: ([^ ,;]*)/i, al)) { accept_lang = al[1] } # Only output valid entries if (ip != "" && ip !~ /^[[:space:]]*$/) { print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp "|" referer "|" accept_lang } }' "$logfile" 2>/dev/null done } > "$TEMP_DIR/parsed_logs.txt" # Clear the progress line echo -ne "\r\033[K" if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then print_alert "No log entries were parsed. Check log format or permissions." return 1 fi local line_count line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0") local file_size_kb file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0") # Compress for storage (gzip saves ~90% space on text) # But we keep uncompressed version for fast analysis gzip -c "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/parsed_logs.txt.gz" & print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB uncompressed)" return 0 } ############################################################################# # Bot Detection & Classification ############################################################################# classify_bots() { print_info "Classifying bot traffic..." # Build combined grep patterns for efficiency local legit_pattern=$(printf "%s|" "${!LEGIT_BOTS[@]}" | sed 's/|$//') local ai_pattern=$(printf "%s|" "${!AI_BOTS[@]}" | sed 's/|$//') local monitor_pattern=$(printf "%s|" "${!MONITOR_BOTS[@]}" | sed 's/|$//') local suspicious_pattern=$(printf "%s|" "${!SUSPICIOUS_BOTS[@]}" | sed 's/|$//') # Process logs with AWK for better performance awk -F'|' -v legit="$legit_pattern" -v ai="$ai_pattern" -v monitor="$monitor_pattern" -v suspicious="$suspicious_pattern" ' BEGIN { # Convert patterns to lowercase for case-insensitive matching legit_lower = tolower(legit) ai_lower = tolower(ai) monitor_lower = tolower(monitor) suspicious_lower = tolower(suspicious) } { ip = $1 domain = $2 url = $3 status = $4 size = $5 ua = $6 method = $7 timestamp = $8 ua_lower = tolower(ua) bot_type = "unknown" bot_name = "Unknown" # Check each category in priority order if (legit != "" && match(ua_lower, legit_lower)) { bot_type = "legit" # Extract actual bot name from UA split(legit, bots, "|") for (i in bots) { if (match(ua_lower, tolower(bots[i]))) { bot_name = bots[i] break } } } else if (ai != "" && match(ua_lower, ai_lower)) { bot_type = "ai" split(ai, bots, "|") for (i in bots) { if (match(ua_lower, tolower(bots[i]))) { bot_name = bots[i] break } } } else if (monitor != "" && match(ua_lower, monitor_lower)) { bot_type = "monitor" split(monitor, bots, "|") for (i in bots) { if (match(ua_lower, tolower(bots[i]))) { bot_name = bots[i] break } } } else if (suspicious != "" && match(ua_lower, suspicious_lower)) { bot_type = "suspicious" split(suspicious, bots, "|") for (i in bots) { if (match(ua_lower, tolower(bots[i]))) { bot_name = bots[i] break } } } else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-requests|python-urllib|java\/|scan|check|monitor/)) { # FIXED: Check for bot keywords FIRST, then verify it is not a legitimate browser # This prevents bots from bypassing detection by including browser strings # FIRST: Check if it is actually a legitimate browser with complete UA signature # Real browsers have: Mozilla/5.0 + platform + rendering engine + browser version is_real_browser = 0 # Chrome/Chromium-based: Must have Chrome/ AND (AppleWebKit OR Mobile) if (match(ua_lower, /chrome\/[0-9]/) && (match(ua_lower, /applewebkit/) || match(ua_lower, /mobile/))) { is_real_browser = 1 } else if (match(ua_lower, /firefox\/[0-9]/) && match(ua_lower, /gecko\//)) { # Firefox: Must have Firefox/ AND Gecko/ is_real_browser = 1 } else if (match(ua_lower, /safari\/[0-9]/) && match(ua_lower, /version\//) && match(ua_lower, /applewebkit/) && !match(ua_lower, /chrome/)) { # Safari: Must have Safari/ AND Version/ AND AppleWebKit (not Chrome) is_real_browser = 1 } else if (match(ua_lower, /edg\/[0-9]|edge\/[0-9]/)) { # Edge: Must have Edg/ or Edge/ is_real_browser = 1 } else if (match(ua_lower, /samsungbrowser\/[0-9]|ucbrowser\/[0-9]|opr\/[0-9]/)) { # Mobile browsers: Samsung, UC, Opera Mobile is_real_browser = 1 } # If it is a real browser, skip bot classification if (is_real_browser == 1) { next } bot_type = "unidentified_bot" # Extract first word of UA as bot name match(ua, /^[^ ]+/, name) bot_name = substr(name[0], 1, 30) } # Only print if bot_type is not "unknown" (i.e., we identified it as something) if (bot_type != "unknown") { print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name } }' < "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt" if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then print_alert "Bot classification failed" return 1 fi local classified_count classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0") local file_size_kb file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0") # Compress for storage in background gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" & print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB uncompressed)" return 0 } ############################################################################# # NEW: Baseline Management (historical tracking for anomaly detection) ############################################################################# save_baseline() { print_info "Storing baseline metrics for anomaly comparison..." local today=$(date +%Y%m%d) # Calculate current metrics local total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0") local unique_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0") local bot_requests=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0") local bot_pct=0 if [ "$total_requests" -gt 0 ]; then bot_pct=$((bot_requests * 100 / total_requests)) fi local sqli_attempts=$(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo "0") local xss_attempts=$(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo "0") local path_attempts=$(wc -l < "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null || echo "0") local rce_attempts=$(wc -l < "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null || echo "0") local login_attempts=$(wc -l < "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null || echo "0") local total_attacks=$((sqli_attempts + xss_attempts + path_attempts + rce_attempts + login_attempts)) local high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0") # Store baseline for each domain if [ -f "$TEMP_DIR/all_domains.txt" ]; then while read -r domain; do local baseline_file="$BASELINE_DIR/${domain}_baseline.txt" # Get domain-specific metrics local domain_requests=$(grep -F "|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | wc -l || echo "0") local domain_attacks=$(grep -F "|$domain|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0") local domain_bots=$(grep -F "|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0") # Append to baseline history (timestamp|requests|attacks|bots|high_risk_ips) echo "$today|$domain_requests|$domain_attacks|$domain_bots|$high_risk_ips" >> "$baseline_file" # Keep only last 30 days tail -30 "$baseline_file" > "$baseline_file.tmp" && mv "$baseline_file.tmp" "$baseline_file" done < "$TEMP_DIR/all_domains.txt" fi # Store global baseline local global_baseline="$BASELINE_DIR/global_baseline.txt" echo "$today|$total_requests|$unique_ips|$bot_pct|$total_attacks|$sqli_attempts|$xss_attempts|$path_attempts|$rce_attempts|$login_attempts|$high_risk_ips" >> "$global_baseline" tail -30 "$global_baseline" > "$global_baseline.tmp" && mv "$global_baseline.tmp" "$global_baseline" print_success "Baseline stored" } get_domain_baseline() { local domain="$1" local baseline_file="$BASELINE_DIR/${domain}_baseline.txt" if [ -f "$baseline_file" ]; then cat "$baseline_file" fi } calculate_baseline_average() { local domain="$1" local metric="$2" # requests, attacks, bots, etc. local days="${3:-7}" # default 7 days local baseline_file="$BASELINE_DIR/${domain}_baseline.txt" if [ ! -f "$baseline_file" ]; then echo "0" return fi # Get last N days local col=2 # requests by default case "$metric" in attacks) col=3 ;; bots) col=4 ;; high_risk) col=5 ;; esac tail -"$days" "$baseline_file" 2>/dev/null | awk -F'|' -v col="$col" '{sum+=$col; count++} END {if (count>0) print int(sum/count); else print 0}' } ############################################################################# # NEW: Attack Progression/Timeline Analysis ############################################################################# analyze_attack_progression() { print_info "Analyzing attack progression and sequences..." # For each high-risk IP, show the sequence of attacks awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -20 | while read -r ip; do local progression_file="$TEMP_DIR/progression_${ip}.txt" > "$progression_file" # Extract all requests from this IP, in order grep -F "$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{ print $8 "|" $3 "|" $4 "|" $6 }' | sort >> "$progression_file" # Detect attack phases local phase="reconnaissance" local phase_start=$(head -1 "$progression_file" 2>/dev/null | cut -d'|' -f1) echo "$ip|$phase|$phase_start" >> "$TEMP_DIR/attack_phases.txt" done touch "$TEMP_DIR/attack_phases.txt" print_success "Attack progression analysis complete" } ############################################################################# # Header Analysis for Bot Detection ############################################################################# analyze_headers() { print_info "Analyzing request headers for bot patterns..." # Analyze header patterns to improve bot detection accuracy awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 domain = $2 url = $3 status = $4 size = $5 ua = $6 method = $7 timestamp = $8 referer = $9 accept_lang = $10 ua_lower = tolower(ua) referer_lower = tolower(referer) # Pattern 1: Empty or missing Accept-Language (bots often have none) if (accept_lang == "-" || accept_lang == "") { empty_lang[ip]++ } # Pattern 2: All-accepting Accept-Language (bots accept everything) # Real browsers: en-US,en;q=0.9 (specific negotiation) # Bots: */* or empty if (accept_lang == "*/*" || accept_lang == "*") { accepts_all[ip]++ } # Pattern 3: Suspicious Referer patterns # Bots often have no referer or fake ones if (referer == "-" || referer == "") { no_referer[ip]++ } # Pattern 4: Referer from suspicious sources if (match(referer_lower, /badbot|crawler|scanner|nikto|nmap|metasploit|sqlmap/)) { suspicious_referer[ip]++ } # Pattern 5: Referer mismatch (referer domain != target domain) # Real users: referer usually from same domain or search engine # Bots: random referer or none if (referer != "-" && !match(referer_lower, domain)) { if (!match(referer_lower, /google|bing|yahoo|facebook|twitter|reddit|instagram/)) { cross_domain_referer[ip]++ } } # Pattern 6: HEAD requests (bot reconnaissance) # Some bots use HEAD to test server without loading content if (method == "HEAD") { head_requests[ip]++ } # Pattern 7: Options/Trace requests (security testing) # Real users never use these if (method == "OPTIONS" || method == "TRACE") { dangerous_methods[ip]++ } } END { # Flag IPs with multiple suspicious header patterns for (ip in empty_lang) { score = 0 # Assign points for suspicious header combinations if (ip in empty_lang) score += 2 if (ip in accepts_all) score += 3 if (ip in no_referer) score += 1 if (ip in suspicious_referer) score += 5 if (ip in cross_domain_referer && (ip in no_referer)) score += 2 if (ip in head_requests && (head_requests[ip] > 50)) score += 4 if (ip in dangerous_methods) score += 10 # Only flag if high header suspicion score if (score >= 8) { print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt" } } close(tmpdir "/header_anomalies.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Create file if it doesn't exist touch "$TEMP_DIR/header_anomalies.txt" print_success "Header analysis complete" } ############################################################################# # NEW: Entry Point Analysis (where bots start) ############################################################################# analyze_entry_points() { print_info "Analyzing first request patterns (bot vs. user entry points)..." # Get first request from each IP awk -F'|' -v tmpdir="$TEMP_DIR" ' BEGIN { ip_first_request[ip] = url ip_first_status[ip] = status } { ip = $1 url = $3 status = $4 # Track first request from each IP (first occurrence in sorted logs) if (!(ip in first_seen)) { first_seen[ip] = 1 ip_first_request[ip] = url ip_first_status[ip] = status } } END { for (ip in ip_first_request) { url = ip_first_request[ip] status = ip_first_status[ip] url_lower = tolower(url) # Suspicious entry points indicate bot/scanner if (match(url_lower, /wp-admin|phpmyadmin|admin|xmlrpc|shell\.php|\.env|\.git|backdoor|config\.php/)) { print ip "|admin_entry|" url "|" status > tmpdir "/suspicious_entry_points.txt" } # Legitimate entry: homepage or search else if (match(url_lower, /^\/index|^\/$|^\/search|^\/page|^\/category/)) { print ip "|normal_entry|" url > tmpdir "/normal_entry_points.txt" } # Unusual but possible: static files else if (match(url_lower, /\.(css|js|jpg|png|gif|woff|svg)$/)) { print ip "|static_entry|" url > tmpdir "/static_entry_points.txt" } } close(tmpdir "/suspicious_entry_points.txt") close(tmpdir "/normal_entry_points.txt") close(tmpdir "/static_entry_points.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Count suspicious entry points if [ -f "$TEMP_DIR/suspicious_entry_points.txt" ]; then suspicious_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt") print_success "Found $suspicious_count IPs with suspicious entry points" else touch "$TEMP_DIR/suspicious_entry_points.txt" fi } ############################################################################# # Threat Detection ############################################################################# detect_threats() { print_info "Detecting security threats..." # Use a single AWK pass for multiple threat detections (more efficient) awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 domain = $2 url = $3 status = $4 size = $5 ua = $6 method = $7 url_lower = tolower(url) ua_lower = tolower(ua) # SQL Injection patterns (enhanced) # FIXED: Hex pattern now requires SQL context to avoid false positives on blockchain/product IDs if (match(url_lower, /union.*select|concat\(|benchmark\(|sleep\(|waitfor|cast\(|exec\(/) || match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) || match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) || match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) { print ip "|" domain "|" url "|" status "|sqli" > tmpdir "/attack_vectors_raw.txt" } # XSS patterns # FIXED: DOM-based patterns (document.cookie, .innerhtml) only flagged in query strings # This prevents false positives on documentation URLs like /docs/innerhtml-api-guide if (match(url_lower, / tmpdir "/attack_vectors_raw.txt" } # Path Traversal / LFI # FIXED: Added URL-encoded variants (%2e%2e, %5c for backslash) # FIXED: Case-insensitive hex encoding support (%5C and %5c) if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) || match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) { print ip "|" domain "|" url "|" status "|path_traversal" > tmpdir "/attack_vectors_raw.txt" } # Shell upload / RCE attempts # FIXED: Removed overly broad "any POST to .php" condition that caused massive false positives # Now only detects actual shell commands, known malicious files, and suspicious upload patterns if (match(url_lower, /cmd\.exe|\/bin\/bash|\/bin\/sh|phpinfo\(|system\(|exec\(|passthru\(|eval\(/) || match(url_lower, /shell\.php|c99\.php|r57\.php|r00t\.php|backdoor|webshell|cmd\.php|exploit\.php/) || match(url_lower, /base64_decode.*eval|gzinflate.*eval|assert.*\$_/) || (match(url_lower, /\.(php|phtml|php3|php4|php5|phar)\.suspected$/) && method == "POST")) { print ip "|" domain "|" url "|" status "|rce_upload" > tmpdir "/attack_vectors_raw.txt" } # Info Disclosure attempts # FIXED: Added status code validation - only flag successful access (200/301/302) # FIXED: readme pattern now only matches actual files (.txt, .html, .md) # FIXED: Added more backup file extensions and URL-encoded variants # FIXED: phpinfo now only matches .php files (not documentation URLs) # FIXED: Removed sitemap.xml.gz (intentionally public for SEO) if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo\.php/) || match(url_lower, /readme\.(txt|html|md)$/) || match(url_lower, /web\.config|\.htaccess|\.htpasswd/) || match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql/)) { # Only flag if successful access (200) or redirect (301/302) # Failed attempts (404/403) are just scanning, tracked separately if (status ~ /^(200|301|302)/) { print ip "|" domain "|" url "|" status "|info_disclosure" > tmpdir "/attack_vectors_raw.txt" } } # composer.json / package.json - lower severity, only if successful if (match(url_lower, /composer\.json|package\.json|package-lock\.json/) && status == "200") { print ip "|" domain "|" url "|" status "|config_exposure" > tmpdir "/attack_vectors_raw.txt" } # Login bruteforce if (match(url_lower, /wp-login\.php|xmlrpc\.php/) && method == "POST") { print ip "|" domain "|" url "|" status "|login_bruteforce" > tmpdir "/attack_vectors_raw.txt" } # Admin/sensitive endpoint probing # FIXED: Only count FAILED attempts (403/401/404) - successful logins are legitimate if (match(url_lower, /wp-admin|phpmyadmin|admin|administrator|login|wp-login|xmlrpc/) || match(url_lower, /\.env|\.git|\.sql|backup|config\./)) { # Only flag failed access attempts (403 Forbidden, 401 Unauthorized, 404 Not Found) # Successful access (200/302) means legitimate user or already compromised if (status ~ /^(403|401|404)/) { print ip "|" domain "|" url > tmpdir "/admin_probes_raw.txt" } } # 404 scanning (reconnaissance) if (status == "404" || status == "403") { print ip "|" domain "|" url "|" status > tmpdir "/404_scans_raw.txt" } # Large data transfers (potential scraping) if (size > 1000000) { print ip "|" domain "|" url "|" size > tmpdir "/large_transfers_raw.txt" } # Suspicious user agents if (match(ua_lower, /nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp/) || match(ua_lower, /metasploit| tmpdir "/suspicious_ua_raw.txt" } # Track response codes for intelligence print status > tmpdir "/response_codes_raw.txt" } ' < "$TEMP_DIR/parsed_logs.txt" # Process attack vectors by type if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then # Overall attack vectors summary awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/attack_types.txt" # Breakdown by attack type for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do grep -F "|$attack_type|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | \ awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \ sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true done # Old sqli file for backwards compatibility if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then cp "$TEMP_DIR/sqli_attempts.txt" "$TEMP_DIR/sqli_attempts_legacy.txt" fi else touch "$TEMP_DIR/attack_types.txt" fi # Process raw data into sorted/counted results if [ -f "$TEMP_DIR/admin_probes_raw.txt" ]; then sort "$TEMP_DIR/admin_probes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/admin_probes.txt" else touch "$TEMP_DIR/admin_probes.txt" fi if [ -f "$TEMP_DIR/404_scans_raw.txt" ]; then sort "$TEMP_DIR/404_scans_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/404_scans.txt" else touch "$TEMP_DIR/404_scans.txt" fi if [ -f "$TEMP_DIR/large_transfers_raw.txt" ]; then sort "$TEMP_DIR/large_transfers_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/large_transfers.txt" else touch "$TEMP_DIR/large_transfers.txt" fi if [ -f "$TEMP_DIR/suspicious_ua_raw.txt" ]; then sort "$TEMP_DIR/suspicious_ua_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/suspicious_ua.txt" else touch "$TEMP_DIR/suspicious_ua.txt" fi # Process response codes if [ -f "$TEMP_DIR/response_codes_raw.txt" ]; then sort "$TEMP_DIR/response_codes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/response_codes.txt" else touch "$TEMP_DIR/response_codes.txt" fi print_success "Threat detection complete" } ############################################################################# # NEW: URL Entropy Analysis (detects fuzzing/scanning) ############################################################################# analyze_url_entropy() { print_info "Analyzing URL parameter entropy (fuzzing detection)..." # Detect IPs that generate random parameters (scanning/fuzzing behavior) awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 url = $3 url_lower = tolower(url) # Extract base path (before query string) if (match(url, /([^?]+)/, path)) { base_path = path[1] } else { base_path = url } # Extract query parameter values (not keys) if (match(url, /\?(.+)/, query)) { param_string = query[1] # Count numeric parameters if (match(param_string, /[0-9]+/)) { numeric_params[ip base_path]++ } } # Track URLs from each IP urls_per_ip[ip]++ unique_paths[ip][base_path]++ } END { # Find IPs hitting many unique paths with numeric variations for (ip in urls_per_ip) { unique_path_count = length(unique_paths[ip]) # If IP hits >20 URLs with lots of numeric params = scanning if (urls_per_ip[ip] > 20 && unique_path_count > 5) { # Likely fuzzing/parameter scanning print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt" } } close(tmpdir "/fuzzing_ips.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Create file if it doesn't exist touch "$TEMP_DIR/fuzzing_ips.txt" print_success "URL entropy analysis complete" } ############################################################################# # NEW: Request Timing Analysis (DDoS & bot behavior detection) ############################################################################# analyze_request_timing() { print_info "Analyzing request timing patterns (DDoS detection)..." # Analyze timing consistency to detect bots/DDoS awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 timestamp = $8 # Parse timestamp to get seconds (simplified) if (match(timestamp, /([0-9]{2}):([0-9]{2}):([0-9]{2})/, t)) { seconds = t[1] * 3600 + t[2] * 60 + t[3] # Store timestamps for analysis if (!(ip in request_times)) { request_count[ip] = 0 request_times[ip] = "" } request_count[ip]++ request_times[ip] = request_times[ip] seconds "," } } END { # Analyze timing patterns for (ip in request_count) { count = request_count[ip] # If more than 50 requests in the log if (count > 50) { # Split times and calculate average interval split(request_times[ip], times, ",") total_intervals = 0 interval_count = 0 for (i = 2; i < length(times); i++) { if (times[i] > 0 && times[i-1] > 0) { interval = times[i] - times[i-1] if (interval < 0) interval += 86400 # Handle day boundary total_intervals += interval interval_count++ } } if (interval_count > 0) { avg_interval = total_intervals / interval_count # Very consistent timing = bot (typically 0.5-2 seconds apart) # Real users: highly variable (5-60+ seconds) if (avg_interval < 3 && count > 100) { print ip "|consistent_bot_timing|" avg_interval "|" count > tmpdir "/timing_anomalies.txt" } } } } close(tmpdir "/timing_anomalies.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Create file if it doesn't exist touch "$TEMP_DIR/timing_anomalies.txt" print_success "Request timing analysis complete" } ############################################################################# # NEW: Fingerprinting - Combine multiple signals for accuracy ############################################################################# calculate_bot_fingerprint() { print_info "Calculating bot fingerprint confidence scores (combining multiple signals)..." # Each signal contributes to confidence that an IP is a bot # Real traffic rarely has ALL signals, bots typically have multiple awk -F'|' -v tmpdir="$TEMP_DIR" ' BEGIN { # Initialize tracking arrays fingerprint_file = tmpdir "/bot_fingerprints.txt" } { ip = $1 domain = $2 url = $3 status = $4 ua = $6 referer = $9 accept_lang = $10 ua_lower = tolower(ua) # Track per-IP fingerprint components if (ip in ip_seen) { ip_seen[ip]++ } else { ip_seen[ip] = 1 } # Signal 1: Bot-like User-Agent if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python|java[^script]|perl|ruby|node\.js|headless|mechanize/)) { ua_bot_signal[ip]++ } # Signal 2: Missing/unusual Accept-Language if (accept_lang == "-" || accept_lang == "" || accept_lang == "*/*") { header_anomaly_signal[ip]++ } # Signal 3: Missing Referer (bots often dont send it) if (referer == "-" || referer == "") { missing_referer[ip]++ } # Signal 4: Successful requests indicate not just scanning if (status ~ /^(200|301|302)/) { success_requests[ip]++ } # Signal 5: Direct admin/config access (suspicious entry) if (match(url, /\/(wp-admin|phpmyadmin|admin|config\.php|\.env|\.git|\.htaccess|web\.config)/)) { admin_access[ip]++ } } END { # Calculate fingerprint scores for each IP for (ip in ip_seen) { score = 0 signal_count = 0 # Each signal adds confidence if (ip in ua_bot_signal && ua_bot_signal[ip] > 0) { score += 20 signal_count++ } if (ip in header_anomaly_signal && header_anomaly_signal[ip] > 0) { score += 15 signal_count++ } if (ip in missing_referer && missing_referer[ip] > ip_seen[ip] * 0.7) { score += 15 # 70%+ requests missing referer signal_count++ } if (ip in admin_access && admin_access[ip] > 0) { score += 20 # Targeting admin areas signal_count++ } # Reduce score if mostly getting 200 OK (might be legitimate bot) if (ip in success_requests && success_requests[ip] > ip_seen[ip] * 0.7) { score -= 10 # Legitimate traffic (70%+ success) } # Multi-signal boost (confidence increases when multiple signals align) if (signal_count >= 3) { score += 25 # Strong indicator of bot when 3+ signals present } # Normalize to 0-100 if (score > 100) score = 100 if (score < 0) score = 0 # Output fingerprint for high-confidence bots (score >= 60) if (score >= 60) { printf "%s|%d|%d\n", ip, score, signal_count > fingerprint_file } } close(fingerprint_file) } ' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true # Create file if empty touch "$TEMP_DIR/bot_fingerprints.txt" fingerprint_count=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0") print_success "Fingerprint analysis complete ($fingerprint_count high-confidence bot IPs)" } ############################################################################# # NEW: Domain Targeting Analysis - Which domains are being attacked? ############################################################################# analyze_domain_targeting_percentage() { print_info "Analyzing per-domain attack patterns (what's attacking each domain)..." # Build per-domain attack data # Format: domain|attack_type|ip|count awk -F'|' -v tmpdir="$TEMP_DIR" ' NR == FNR { # Skip attack vectors file - using parsed_logs for all data next } { # Main log processing ip = $1 domain = $2 status = $4 # Track all IPs per domain ips_per_domain[domain][ip]++ request_count_per_domain[domain]++ } END { # Output: domain|unique_ips|request_count for (domain in ips_per_domain) { ip_count = 0 for (ip in ips_per_domain[domain]) ip_count++ printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain] } } ' <([ -f "$TEMP_DIR/attack_vectors_raw.txt" ] && cat "$TEMP_DIR/attack_vectors_raw.txt" || echo "") "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" || true # Also create per-domain attack type breakdown # Format: domain|attack_type|ip|count if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 domain = $2 attack_type = $5 # Store as domain -> attack_type -> ip -> count attack_data[domain][attack_type][ip]++ attack_totals[domain][attack_type]++ } END { for (domain in attack_data) { for (attack_type in attack_data[domain]) { total = attack_totals[domain][attack_type] for (ip in attack_data[domain][attack_type]) { count = attack_data[domain][attack_type][ip] printf "%s|%d|%d\n", attack_type "|" ip, count, total } } } } ' < "$TEMP_DIR/attack_vectors_raw.txt" fi print_success "Domain attack pattern analysis complete" } ############################################################################# # NEW: Top URLs Analysis - What files/endpoints are bots hitting? ############################################################################# analyze_top_urls_per_domain() { print_info "Analyzing top targeted URLs per domain..." # Get list of domains from targeting analysis if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then while IFS='|' read -r domain request_count pct; do local domain_file="$TEMP_DIR/domain_urls_${domain}.txt" # Extract all URLs for this domain, sorted by frequency (no arbitrary limit) awk -F'|' -v dom="$domain" ' $2 == dom { urls[$3]++ } END { for (url in urls) { printf "%s|%d\n", url, urls[url] } } ' < "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k2 -rn > "$domain_file" done < "$TEMP_DIR/domain_targeting.txt" fi print_success "Top URLs analysis complete" } ############################################################################# # NEW: Success Rate & Behavior Analysis (Added for accuracy improvement) ############################################################################# analyze_success_rates() { print_info "Analyzing request success rates and behavior patterns..." # Calculate success rate (200/301/302 vs 404/403) for each IP awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 status = $4 # Count total requests total[ip]++ # Count successful responses if (status ~ /^(200|301|302)/) { success[ip]++ } # Count failed/blocked responses else if (status ~ /^(404|403|401)/) { failed[ip]++ } } END { for (ip in total) { success_count = (success[ip] ? success[ip] : 0) failed_count = (failed[ip] ? failed[ip] : 0) success_rate = (total[ip] > 0) ? int((success_count / total[ip]) * 100) : 0 fail_rate = (total[ip] > 0) ? int((failed_count / total[ip]) * 100) : 0 # High failure rate indicates scanning/probing if (fail_rate >= 80 && total[ip] >= 20) { print ip "|" total[ip] "|" fail_rate "|scanner" >> tmpdir "/high_failure_ips.txt" } # Very high success rate + high volume could be scraping else if (success_rate >= 90 && total[ip] >= 100) { print ip "|" total[ip] "|" success_rate "|scraper" >> tmpdir "/high_success_ips.txt" } # Output all rates for later analysis print ip "|" total[ip] "|" success_rate "|" fail_rate >> tmpdir "/ip_success_rates.txt" } close(tmpdir "/high_failure_ips.txt") close(tmpdir "/high_success_ips.txt") close(tmpdir "/ip_success_rates.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Touch files if they don't exist touch "$TEMP_DIR/high_failure_ips.txt" "$TEMP_DIR/high_success_ips.txt" "$TEMP_DIR/ip_success_rates.txt" print_success "Success rate analysis complete" } ############################################################################# # Botnet Detection ############################################################################# detect_botnets() { print_info "Analyzing for botnet patterns..." # Group IPs by similar behavior patterns # Pattern 1: Multiple IPs hitting same URLs in coordinated manner awk -F'|' '{print $1"|"$3}' < "$TEMP_DIR/parsed_logs.txt" | \ sort | uniq -c | awk '$1 > 10 {print $2}' | \ cut -d'|' -f2 | sort | uniq -c | sort -rn | \ awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt" # Pattern 2: IPs with similar User-Agents hitting multiple domains awk -F'|' '{print $1"|"$6}' < "$TEMP_DIR/parsed_logs.txt" | \ sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt" # Pattern 3: Detect IP ranges (Class C networks) with suspicious activity awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | \ awk -F'.' '{print $1"."$2"."$3".0/24"}' | \ sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt" # Pattern 4: Rapid fire requests (DDoS indicators) # Extract timestamp and count requests per IP per minute awk -F'|' '{ ip = $1 timestamp = $8 # Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS) if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2})/, ts)) { # Group by hour:minute for rapid-fire detection time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5] print ip "|" time_key } }' < "$TEMP_DIR/parsed_logs.txt" | \ sort | uniq -c | \ awk '$1 > 50 {print $1 " " $2}' | \ awk -F'|' '{print $1}' | \ awk 'BEGIN {ip=""} {ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \ sort -rn > "$TEMP_DIR/rapid_fire_ips.txt" print_success "Botnet analysis complete" } ############################################################################# # Server IP Detection ############################################################################# detect_server_ips() { print_info "Detecting server's own IP addresses..." > "$TEMP_DIR/server_ips.txt" # Method 1: Get all IPs from network interfaces if command -v hostname >/dev/null 2>&1; then hostname -I 2>/dev/null | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' >> "$TEMP_DIR/server_ips.txt" || true fi # Method 2: Parse ip addr output if command -v ip >/dev/null 2>&1; then ip addr show 2>/dev/null | grep -oP 'inet \K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true fi # Method 3: Try ifconfig as fallback if command -v ifconfig >/dev/null 2>&1; then ifconfig 2>/dev/null | grep -oP 'inet (addr:)?\K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true fi # Method 4: Get public IP from external services (with timeout) # Try multiple services for reliability for service in "ifconfig.me/ip" "icanhazip.com" "ipecho.net/plain" "api.ipify.org"; do public_ip=$(curl -s --max-time 3 "$service" 2>/dev/null | grep -oE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' || true) if [ -n "$public_ip" ]; then echo "$public_ip" >> "$TEMP_DIR/server_ips.txt" break fi done # Method 5: Check cPanel server IP if available if [ -f "/var/cpanel/mainip" ]; then cat /var/cpanel/mainip >> "$TEMP_DIR/server_ips.txt" fi # Remove duplicates and empty lines sort -u "$TEMP_DIR/server_ips.txt" | grep -v '^$' > "$TEMP_DIR/server_ips_final.txt" || true mv "$TEMP_DIR/server_ips_final.txt" "$TEMP_DIR/server_ips.txt" server_ip_count=$(wc -l < "$TEMP_DIR/server_ips.txt" 2>/dev/null || echo 0) if [ "$server_ip_count" -gt 0 ]; then print_success "Detected $server_ip_count server IP(s) - these will be excluded from threat analysis" else print_warning "Could not detect server IPs automatically - proceeding without server IP filtering" fi } # Helper function to validate IP address format is_valid_ip() { local ip="$1" # IPv4 validation if [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then local IFS='.' local -a octets=($ip) for octet in "${octets[@]}"; do if [ "$octet" -gt 255 ]; then return 1 # Invalid fi done return 0 # Valid IPv4 fi # IPv6 basic validation (simplified) if [[ "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then return 0 # Valid IPv6 fi return 1 # Invalid } # Helper function to check if an IP should be excluded is_excluded_ip() { local ip="$1" # First validate IP format if ! is_valid_ip "$ip"; then return 0 # Exclude invalid IPs fi # Check if private/internal IP if [[ "$ip" =~ ^127\. ]] || \ [[ "$ip" =~ ^10\. ]] || \ [[ "$ip" =~ ^192\.168\. ]] || \ [[ "$ip" =~ ^172\.(1[6-9]|2[0-9]|3[01])\. ]] || \ [[ "$ip" =~ ^169\.254\. ]] || \ [[ "$ip" == "localhost" ]] || \ [[ "$ip" == "::1" ]]; then return 0 # True - should be excluded fi # Check if it's the server's own IP (using pre-loaded array for speed) if [ -n "${server_ips_array[$ip]}" ]; then return 0 # True - should be excluded fi return 1 # False - should not be excluded } ############################################################################# # Time-Series Analysis ############################################################################# analyze_time_series() { print_info "Analyzing time-series patterns..." # Extract hourly bot traffic awk -F'|' '$9 != "unknown" { timestamp = $8 if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) { hour = ts[4] print hour } }' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true # Extract hourly attack traffic if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then # Parse timestamps from original parsed logs for IPs in attack vectors awk -F'|' 'NR==FNR {ips[$1]=1; next} $1 in ips { timestamp = $8 if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) { hour = ts[4] print hour } }' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt" fi print_success "Time-series analysis complete" } ############################################################################# # Threat Scoring ############################################################################# calculate_threat_scores() { print_info "Calculating threat scores..." # Pre-load server IPs for fast exclusion checking (avoids grep in loop) declare -A server_ips_array if [ -f "$TEMP_DIR/server_ips.txt" ]; then while read -r ip; do [ -n "$ip" ] && server_ips_array["$ip"]=1 done < "$TEMP_DIR/server_ips.txt" fi # Pre-count requests per IP (MUCH faster than grepping for each IP) declare -A ip_request_counts while IFS='|' read -r ip rest; do ((ip_request_counts["$ip"]++)) done < "$TEMP_DIR/parsed_logs.txt" # Build hash tables from threat files for O(1) lookups # OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster) declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count # Parse each threat file and build hash tables (using mapfile to avoid subshells) if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then mapfile -t sqli_ips < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1) for ip in "${sqli_ips[@]}"; do threat_ips_sqli["$ip"]=1; done fi if [ -f "$TEMP_DIR/xss_attempts.txt" ]; then mapfile -t xss_ips < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1) for ip in "${xss_ips[@]}"; do threat_ips_xss["$ip"]=1; done fi if [ -f "$TEMP_DIR/path_traversal_attempts.txt" ]; then mapfile -t path_ips < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1) for ip in "${path_ips[@]}"; do threat_ips_path["$ip"]=1; done fi if [ -f "$TEMP_DIR/rce_upload_attempts.txt" ]; then mapfile -t rce_ips < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1) for ip in "${rce_ips[@]}"; do threat_ips_rce["$ip"]=1; done fi if [ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ]; then mapfile -t login_ips < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1) for ip in "${login_ips[@]}"; do threat_ips_login["$ip"]=1; done fi if [ -f "$TEMP_DIR/suspicious_ua.txt" ]; then mapfile -t susp_ips < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1) for ip in "${susp_ips[@]}"; do threat_ips_suspicious["$ip"]=1; done fi if [ -f "$TEMP_DIR/rapid_fire_ips.txt" ]; then mapfile -t ddos_ips < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt") for ip in "${ddos_ips[@]}"; do threat_ips_ddos["$ip"]=1; done fi # Parse count-based threat files [ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do [ -n "$ip" ] && threat_admin_count["$ip"]=$count done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//') [ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do [ -n "$ip" ] && threat_404_count["$ip"]=$count done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//') # NEW: Load bot classifications to skip volume scoring for legitimate bots declare -A legit_bot_ips if [ -f "$TEMP_DIR/classified_bots.txt" ]; then while IFS='|' read -r ip domain url status size ua method timestamp bot_type bot_name; do if [ "$bot_type" = "legit" ]; then legit_bot_ips["$ip"]=1 fi done < "$TEMP_DIR/classified_bots.txt" fi # NEW: Load success rate data for scanning/scraping detection declare -A scanner_ips scraper_ips ip_fail_rates [ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do scanner_ips["$ip"]=$fail_rate done < "$TEMP_DIR/high_failure_ips.txt" [ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do scraper_ips["$ip"]=$success_rate done < "$TEMP_DIR/high_success_ips.txt" # Load all fail rates for threshold checks [ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do ip_fail_rates["$ip"]=$fail_rate done < "$TEMP_DIR/ip_success_rates.txt" # NEW: Load header anomalies declare -A header_anomalies [ -f "$TEMP_DIR/header_anomalies.txt" ] && while IFS='|' read -r ip anomaly_type score; do header_anomalies["$ip"]=$score done < "$TEMP_DIR/header_anomalies.txt" # NEW: Load suspicious entry points declare -A suspicious_entry_ips [ -f "$TEMP_DIR/suspicious_entry_points.txt" ] && while IFS='|' read -r ip entry_type url status; do suspicious_entry_ips["$ip"]=1 done < "$TEMP_DIR/suspicious_entry_points.txt" # NEW: Load fuzzing/parameter scanning IPs declare -A fuzzing_ips [ -f "$TEMP_DIR/fuzzing_ips.txt" ] && while IFS='|' read -r ip fuzz_type total_urls unique_paths; do fuzzing_ips["$ip"]=$total_urls done < "$TEMP_DIR/fuzzing_ips.txt" # NEW: Load timing anomalies (consistent bot timing) declare -A timing_anomalies [ -f "$TEMP_DIR/timing_anomalies.txt" ] && while IFS='|' read -r ip timing_type avg_interval total_reqs; do timing_anomalies["$ip"]=$avg_interval done < "$TEMP_DIR/timing_anomalies.txt" # Now calculate scores for each IP (using pre-counted requests) for ip in "${!ip_request_counts[@]}"; do # Skip excluded IPs if is_excluded_ip "$ip"; then continue fi score=0 req_count=0 if [ -n "${ip_request_counts[$ip]}" ]; then req_count=${ip_request_counts[$ip]} fi # IMPROVED: Base request volume scoring # Skip volume scoring for legitimate bots (Google, Bing, etc.) if [ -z "${legit_bot_ips[$ip]}" ]; then # Not a legitimate bot - apply volume scoring if [ "${req_count:-0}" -gt 10000 ]; then score=$((score + 10)) elif [ "${req_count:-0}" -gt 5000 ]; then score=$((score + 8)) elif [ "${req_count:-0}" -gt 1000 ]; then score=$((score + 5)) elif [ "${req_count:-0}" -gt 500 ]; then score=$((score + 3)) fi fi # NEW: Success rate analysis bonuses # High failure rate (80%+ 404/403) = scanning behavior if [ -n "${scanner_ips[$ip]}" ]; then fail_rate=0 if [ -n "${scanner_ips[$ip]}" ]; then fail_rate=${scanner_ips[$ip]} fi if [ "${fail_rate:-0}" -ge 90 ]; then score=$((score + 8)) # Very high failure rate elif [ "${fail_rate:-0}" -ge 80 ]; then score=$((score + 5)) # High failure rate fi fi # High success rate (90%+ 200/301/302) + high volume = potential scraping if [ -n "${scraper_ips[$ip]}" ]; then local safe_req_count=$((req_count + 0)) [ "$safe_req_count" -gt 500 ] && score=$((score + 7)) # Scraping behavior fi # Attack patterns [ -n "${threat_ips_sqli[$ip]}" ] && score=$((score + 15)) [ -n "${threat_ips_xss[$ip]}" ] && score=$((score + 12)) [ -n "${threat_ips_path[$ip]}" ] && score=$((score + 15)) [ -n "${threat_ips_rce[$ip]}" ] && score=$((score + 20)) [ -n "${threat_ips_login[$ip]}" ] && score=$((score + 10)) [ -n "${threat_ips_suspicious[$ip]}" ] && score=$((score + 10)) [ -n "${threat_ips_ddos[$ip]}" ] && score=$((score + 10)) # NEW: Header anomalies (strong indicator of bots) if [ -n "${header_anomalies[$ip]}" ]; then header_score=${header_anomalies[$ip]:-0} if [ "${header_score:-0}" -ge 12 ]; then score=$((score + 8)) # Multiple header suspicions elif [ "${header_score:-0}" -ge 8 ]; then score=$((score + 5)) # Moderate header anomalies fi fi # NEW: Suspicious entry point (direct jump to admin/config) if [ -n "${suspicious_entry_ips[$ip]}" ]; then score=$((score + 6)) # Direct attack attempt without probing fi # NEW: Fuzzing/parameter scanning behavior if [ -n "${fuzzing_ips[$ip]}" ]; then fuzz_requests=${fuzzing_ips[$ip]:-0} if [ "${fuzz_requests:-0}" -gt 100 ]; then score=$((score + 7)) # Aggressive fuzzing elif [ "${fuzz_requests:-0}" -gt 50 ]; then score=$((score + 4)) # Moderate fuzzing fi fi # NEW: Timing anomalies (very consistent request timing = bot) if [ -n "${timing_anomalies[$ip]}" ]; then score=$((score + 6)) # Very consistent timing indicates automation fi # Admin probing - IMPROVED: Raised threshold to 50 (only failed attempts counted) admin_count=${threat_admin_count[$ip]:-0} if [ "${admin_count:-0}" -gt 100 ]; then score=$((score + 10)) # Excessive probing elif [ "${admin_count:-0}" -gt 50 ]; then score=$((score + 5)) # Moderate probing fi # 404 scanning scan_404=${threat_404_count[$ip]:-0} [ "${scan_404:-0}" -gt 50 ] && score=$((score + 3)) # OPTIMIZATION: Skip external API calls for performance # Threat Intelligence Enrichment can be done post-analysis for high-risk IPs only # Uncommenting these will SIGNIFICANTLY slow down analysis (API calls for every IP) # # To enable threat intelligence enrichment: # 1. Uncomment the code below # 2. Ensure check_abuseipdb, get_country_code, and is_high_risk_country functions exist # 3. Be aware this will make thousands of API calls and take much longer # # local abuse_data=$(check_abuseipdb "$ip" 2>/dev/null || echo "0|0|Unknown|Unknown") # IFS='|' read -r abuse_confidence abuse_reports abuse_country abuse_isp <<< "$abuse_data" # # if [ "$abuse_confidence" -ge 75 ]; then # score=$((score + 15)) # High confidence malicious # elif [ "$abuse_confidence" -ge 50 ]; then # score=$((score + 8)) # Moderate confidence # elif [ "$abuse_confidence" -ge 25 ]; then # score=$((score + 3)) # Low confidence # fi # # local geo_country=$(get_country_code "$ip" 2>/dev/null || echo "XX") # if is_high_risk_country "$geo_country" 2>/dev/null; then # score=$((score + 5)) # High-risk country bonus # fi # Cap at 100 [ "${score:-0}" -gt 100 ] && score=100 # Only output IPs with score > 0 [ "${score:-0}" -gt 0 ] && echo "$score|$ip|$req_count" # Track in centralized IP reputation database (background process) if [ "${score:-0}" -gt 0 ]; then ( # Update IP with hit count increment_ip_hits "$ip" "$req_count" >/dev/null 2>&1 # Tag with specific attack types found [ -n "${threat_ips_sqli[$ip]}" ] && flag_ip_attack "$ip" "SQL_INJECTION" 0 "Bot analyzer: SQL injection attempts" >/dev/null 2>&1 [ -n "${threat_ips_xss[$ip]}" ] && flag_ip_attack "$ip" "XSS" 0 "Bot analyzer: XSS attempts" >/dev/null 2>&1 [ -n "${threat_ips_path[$ip]}" ] && flag_ip_attack "$ip" "PATH_TRAVERSAL" 0 "Bot analyzer: Path traversal" >/dev/null 2>&1 [ -n "${threat_ips_rce[$ip]}" ] && flag_ip_attack "$ip" "RCE" 0 "Bot analyzer: RCE/shell upload attempts" >/dev/null 2>&1 [ -n "${threat_ips_login[$ip]}" ] && flag_ip_attack "$ip" "BRUTEFORCE" 0 "Bot analyzer: Login bruteforce" >/dev/null 2>&1 [ -n "${threat_ips_ddos[$ip]}" ] && flag_ip_attack "$ip" "DDOS" 0 "Bot analyzer: Rapid-fire requests" >/dev/null 2>&1 [ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1 ) & fi done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt" # Wait for background IP reputation updates to complete (don't fail if background jobs error) wait || true print_success "Threat scores calculated and IP reputation updated" } ############################################################################# # False Positive Detection ############################################################################# detect_false_positives() { print_info "Detecting legitimate services (false positives)..." # Known monitoring service patterns and legitimate CDNs awk -F'|' '{ ip = $1 domain = $2 url = $3 ua = tolower($6) # Monitoring Services if (match(ua, /pingdom/) || match(ua, /pingdom\.com_bot/)) { print ip "|Pingdom Monitoring|" ua "|" domain } else if (match(ua, /uptimerobot/)) { print ip "|UptimeRobot Monitoring|" ua "|" domain } else if (match(ua, /statuscake/)) { print ip "|StatusCake Monitoring|" ua "|" domain } # WordPress cache preload (WP Rocket, Hummingbird) else if (match(url, /admin-ajax\.php.*cache_preload/) || match(url, /admin-ajax\.php.*wphb/)) { print ip "|WordPress Cache Preload|" ua "|" domain } # Legitimate backup services else if (match(ua, /jetpack|vaultpress|updraftplus|backwpup/)) { print ip "|Backup Service|" ua "|" domain } # NEW: Google services else if (match(ua, /googlebot|google web preview|google-read-aloud|bingbot|slurp|duckduckbot/)) { print ip "|Search Engine Bot|" ua "|" domain } # NEW: Content delivery networks (usually legit) else if (match(ua, /cloudflare|akamai|fastly|cloudfront|edgecast|maxcdn|amazon/)) { print ip "|CDN Service|" ua "|" domain } # NEW: Analytics services else if (match(ua, /googleanalytics|fbexternalhit|twitterbot|linkedinbot|pinterestbot|whatsapp|telegram/)) { print ip "|Analytics\/Social Service|" ua "|" domain } # NEW: Payment processors (legitimate POST to checkout) else if (match(url, /checkout|payment|paypal|stripe|square/) && match(ua, /paypal|stripe|square/)) { print ip "|Payment Processor|" ua "|" domain } }' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u > "$TEMP_DIR/false_positives.txt" || true print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt" 2>/dev/null || echo 0) legitimate services identified)" } ############################################################################# # Statistical Analysis ############################################################################# generate_statistics() { print_info "Generating statistics..." # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs # This reads the uncompressed file ONCE instead of 4+ separate reads awk -F'|' -v tmpdir="$TEMP_DIR" ' { # Count by domain (for top sites) domains[$2]++ # Count by IP (for top IPs) ips[$1]++ # Count by domain+URL (for top URLs) urls[$2"|"$3]++ } END { # Output top sites for (domain in domains) { print domains[domain], domain > tmpdir "/top_sites_raw.txt" } # Output top IPs for (ip in ips) { print ips[ip], ip > tmpdir "/top_ips_raw.txt" } # Output top URLs for (url in urls) { print urls[url], url > tmpdir "/top_urls_raw.txt" } close(tmpdir "/top_sites_raw.txt") close(tmpdir "/top_ips_raw.txt") close(tmpdir "/top_urls_raw.txt") }' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null # Sort and limit results (files may not exist if no data) [ -f "$TEMP_DIR/top_sites_raw.txt" ] && sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt" || touch "$TEMP_DIR/top_sites.txt" [ -f "$TEMP_DIR/top_ips_raw.txt" ] && sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt" || touch "$TEMP_DIR/top_ips.txt" [ -f "$TEMP_DIR/top_urls_raw.txt" ] && sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt" || touch "$TEMP_DIR/top_urls.txt" # Top 5 bots by request count (single decompression) awk -F'|' '$9 != "unknown" {print $10}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \ sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true # Traffic breakdown by bot type (single decompression) awk -F'|' '{print $9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \ sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true # Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep) if [ -f "$TEMP_DIR/all_domains.txt" ]; then # Create indexed bot traffic file (decompress once) awk -F'|' '{print $2"|"$9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null > "$TEMP_DIR/domain_bot_types.txt" || true while read -r domain; do echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt" grep -F "$domain|" "$TEMP_DIR/domain_bot_types.txt" 2>/dev/null | cut -d'|' -f2 | \ sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt" || true done < "$TEMP_DIR/all_domains.txt" fi print_success "Statistics generated" } ############################################################################# # NEW: Comparison Reports (detect trends) ############################################################################# generate_comparison_report() { print_info "Generating trend analysis and baseline comparison..." # Store current results for comparison with previous analysis local history_dir="$TOOLKIT_TMP_DIR/analysis_history" mkdir -p "$history_dir" local timestamp=$(date +%Y%m%d_%H%M%S) local today=$(date +%Y%m%d) local latest_report="$history_dir/latest_analysis_$today.txt" # Extract key metrics from current analysis { echo "Timestamp: $timestamp" echo "Total_Requests: $(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)" echo "Unique_IPs: $(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)" echo "High_Risk_IPs: $(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo 0)" echo "Attack_Vectors: $(awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | sort -u | wc -l || echo 0)" echo "SQL_Injection: $(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo 0)" echo "XSS_Attempts: $(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo 0)" echo "Bot_Traffic: $(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo 0)" echo "Suspected_Scanners: $(wc -l < "$TEMP_DIR/high_failure_ips.txt" 2>/dev/null || echo 0)" echo "Header_Anomalies: $(wc -l < "$TEMP_DIR/header_anomalies.txt" 2>/dev/null || echo 0)" echo "Entry_Point_Suspicious: $(wc -l < "$TEMP_DIR/suspicious_entry_points.txt" 2>/dev/null || echo 0)" echo "Fuzzing_IPs: $(wc -l < "$TEMP_DIR/fuzzing_ips.txt" 2>/dev/null || echo 0)" } > "$latest_report" # NEW: Generate baseline comparison echo "" print_header "BASELINE COMPARISON (Is this activity normal?)" local total_requests=$(grep "^Total_Requests:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0") local baseline_requests=$(calculate_baseline_average "server" "requests" 7) if [ "${baseline_requests:-0}" -gt 0 ]; then local request_pct=$((${total_requests:-0} * 100 / baseline_requests)) if [ "$request_pct" -gt 200 ]; then echo -e "${RED}🔴 ABNORMAL: Requests are $(($request_pct - 100))% above 7-day average${NC}" echo " Baseline (7-day avg): $baseline_requests requests" echo " Today: $total_requests requests" elif [ "$request_pct" -lt 50 ]; then echo "🟢 LOW: Requests are $((100 - $request_pct))% below baseline" else echo "🟡 NORMAL: Requests within expected range" fi else echo "📊 (No historical baseline yet - first analysis)" fi local high_risk=$(grep "^High_Risk_IPs:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0") local baseline_attacks=$(calculate_baseline_average "server" "high_risk" 7) if [ "${baseline_attacks:-0}" -gt 0 ]; then local attack_ratio=$((${high_risk:-0} / baseline_attacks)) if [ "$attack_ratio" -gt 3 ]; then echo -e "${RED}🔴 ABNORMAL: High-risk IPs are ${attack_ratio}x above baseline${NC}" echo " Baseline (7-day avg): $baseline_attacks high-risk IPs" echo " Today: $high_risk high-risk IPs" elif [ "$high_risk" -gt "$baseline_attacks" ]; then echo -e "${YELLOW}🟡 ELEVATED: $high_risk high-risk IPs (baseline: $baseline_attacks)${NC}" else echo "🟢 NORMAL: High-risk IPs within expected range" fi fi # Compare with previous day's analysis local yesterday=$(date -d "1 day ago" +%Y%m%d 2>/dev/null || date -v-1d +%Y%m%d 2>/dev/null) local previous_report="$history_dir/latest_analysis_${yesterday}.txt" if [ -f "$previous_report" ]; then echo "" print_header "DAY-OVER-DAY TRENDS" # Extract metrics and calculate differences local curr_high_risk=$(grep "^High_Risk_IPs:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0") local prev_high_risk=$(grep "^High_Risk_IPs:" "$previous_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0") local risk_diff=$((${curr_high_risk:-0} - ${prev_high_risk:-0})) local risk_pct=0 if [ "${prev_high_risk:-0}" -gt 0 ]; then risk_pct=$((risk_diff * 100 / prev_high_risk)) fi # Display trend if [ "$risk_diff" -gt 0 ]; then echo "⚠️ High-Risk IPs: ${curr_high_risk:-0} (↑ $risk_diff IPs, +${risk_pct}%)" elif [ "$risk_diff" -lt 0 ]; then echo "✓ High-Risk IPs: ${curr_high_risk:-0} (↓ $((risk_diff * -1)) IPs, ${risk_pct}%)" else echo "→ High-Risk IPs: ${curr_high_risk:-0} (no change)" fi # Repeat for other metrics local curr_sql=$(grep "^SQL_Injection:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0") local prev_sql=$(grep "^SQL_Injection:" "$previous_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0") local sql_diff=$((curr_sql - prev_sql)) if [ "$sql_diff" -gt 0 ]; then echo "⚠️ SQL Injection: $curr_sql (↑ $sql_diff new attempts)" elif [ "$sql_diff" -lt 0 ]; then echo "✓ SQL Injection: $curr_sql (↓ $((sql_diff * -1)) fewer)" else echo "→ SQL Injection: $curr_sql (stable)" fi # Track repeat attackers local repeat_attackers=0 if [ -f "$history_dir/known_attackers_${yesterday}.txt" ]; then repeat_attackers=$(comm -12 <(awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u) <(sort -u "$history_dir/known_attackers_${yesterday}.txt") 2>/dev/null | wc -l || echo 0) if [ "$repeat_attackers" -gt 0 ]; then echo -e "${RED}🔄 REPEAT ATTACKERS: $repeat_attackers IPs from yesterday${NC}" fi fi fi # Save current high-risk IPs for tomorrow's comparison awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u > "$history_dir/known_attackers_${today}.txt" } ############################################################################# # Report Generation ############################################################################# generate_report() { exec > >(tee "$OUTPUT_FILE") echo "===============================================================" echo " APACHE/CPANEL BOT & BOTNET ANALYSIS REPORT" echo " Generated: $(date '+%Y-%m-%d %H:%M:%S')" echo "===============================================================" # CRITICAL ALERTS SECTION print_header "CRITICAL ALERTS" alert_count=0 # Check for attack vectors if [ -s "$TEMP_DIR/attack_types.txt" ]; then print_alert "Security Attack Vectors Detected:" while read -r line; do count=$(echo "$line" | awk '{print $1}') attack_type=$(echo "$line" | awk '{print $2}') case $attack_type in sqli) echo " SQL Injection: $count attempts" ;; xss) echo " XSS Attacks: $count attempts" ;; path_traversal) echo " Path Traversal: $count attempts" ;; rce_upload) echo " RCE/Shell Upload: $count attempts" ;; info_disclosure) echo " Info Disclosure: $count attempts" ;; login_bruteforce) echo " Login Bruteforce: $count attempts" ;; esac done < "$TEMP_DIR/attack_types.txt" echo "" alert_count=$((alert_count + 1)) fi # Check for suspicious scanners if [ -s "$TEMP_DIR/suspicious_ua.txt" ]; then scanner_count=$(wc -l < "$TEMP_DIR/suspicious_ua.txt") print_alert "Malicious scanners detected: $scanner_count IPs" echo " Top scanners:" head -3 "$TEMP_DIR/suspicious_ua.txt" | while read -r line; do count=$(echo "$line" | awk '{print $1}') ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1) ua=$(echo "$line" | cut -d'|' -f2) printf " %s requests - IP: %s - UA: %s\n" "$count" "$ip" "$ua" done echo "" alert_count=$((alert_count + 1)) fi # NEW: Check for header anomalies (bot signatures) if [ -s "$TEMP_DIR/header_anomalies.txt" ]; then header_count=$(wc -l < "$TEMP_DIR/header_anomalies.txt") print_alert "Header-based bot signatures detected: $header_count IPs" echo " These IPs show suspicious header patterns (missing/unusual Accept-Language, Referer, etc.)" head -5 "$TEMP_DIR/header_anomalies.txt" | while read -r line; do ip=$(echo "$line" | awk -F'|' '{print $1}') anomaly_type=$(echo "$line" | awk -F'|' '{print $2}') score=$(echo "$line" | awk -F'|' '{print $3}') printf " • %s - Anomaly score: %s (detected: %s)\n" "$ip" "$score" "$anomaly_type" done echo "" alert_count=$((alert_count + 1)) fi # NEW: Check for suspicious entry points if [ -s "$TEMP_DIR/suspicious_entry_points.txt" ]; then entry_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt") print_alert "Suspicious entry points detected: $entry_count IPs" echo " These IPs skip homepage/search and go straight to admin/config:" head -5 "$TEMP_DIR/suspicious_entry_points.txt" | while read -r line; do ip=$(echo "$line" | awk -F'|' '{print $1}') url=$(echo "$line" | awk -F'|' '{print $3}') status=$(echo "$line" | awk -F'|' '{print $4}') printf " • %s → %s (HTTP %s)\n" "$ip" "$url" "$status" done echo "" alert_count=$((alert_count + 1)) fi # NEW: Check for fuzzing/scanning behavior if [ -s "$TEMP_DIR/fuzzing_ips.txt" ]; then fuzz_count=$(wc -l < "$TEMP_DIR/fuzzing_ips.txt") print_alert "Parameter fuzzing/scanning detected: $fuzz_count IPs" echo " These IPs are testing random parameters (vulnerability scanning):" head -5 "$TEMP_DIR/fuzzing_ips.txt" | while read -r line; do ip=$(echo "$line" | awk -F'|' '{print $1}') total_urls=$(echo "$line" | awk -F'|' '{print $3}') unique_paths=$(echo "$line" | awk -F'|' '{print $4}') printf " • %s - %s URLs across %s paths\n" "$ip" "$total_urls" "$unique_paths" done echo "" alert_count=$((alert_count + 1)) fi # NEW: Check for timing anomalies (bot signatures) if [ -s "$TEMP_DIR/timing_anomalies.txt" ]; then timing_count=$(wc -l < "$TEMP_DIR/timing_anomalies.txt") print_alert "Consistent timing pattern detected: $timing_count IPs" echo " These IPs show mechanical request patterns (bot behavior):" head -5 "$TEMP_DIR/timing_anomalies.txt" | while read -r line; do ip=$(echo "$line" | awk -F'|' '{print $1}') avg_interval=$(echo "$line" | awk -F'|' '{print $3}') total_reqs=$(echo "$line" | awk -F'|' '{print $4}') printf " • %s - %.1f seconds average between requests (%s total requests)\n" "$ip" "$avg_interval" "$total_reqs" done echo "" alert_count=$((alert_count + 1)) fi # Check for rapid-fire IPs (potential DDoS) if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt") print_alert "Potential DDoS sources: $ddos_count IPs with >50 req/min" echo " Top offenders:" head -3 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print " "$2" - "$1" rapid requests"}' echo "" alert_count=$((alert_count + 1)) fi # Check for suspicious networks if [ -s "$TEMP_DIR/suspicious_networks.txt" ]; then net_count=$(wc -l < "$TEMP_DIR/suspicious_networks.txt") print_alert "Suspicious networks detected: $net_count Class C ranges" echo " Top networks:" head -3 "$TEMP_DIR/suspicious_networks.txt" | awk '{print " "$2" - "$1" requests"}' echo "" alert_count=$((alert_count + 1)) fi if [ "${alert_count:-0}" -eq 0 ]; then print_success "No critical threats detected" fi # QUICK STATS DASHBOARD print_header "QUICK STATS DASHBOARD" total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0") unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0") unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0") bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0") # Count private/internal IPs (excluded from threat analysis) private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' 2>/dev/null | wc -l || echo "0") # Count server's own IPs in the logs server_ip_hits=0 if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then while read -r server_ip; do if grep -q "^$server_ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null; then server_ip_hits=$((server_ip_hits + 1)) fi done < "$TEMP_DIR/server_ips.txt" fi echo "Total Requests: $(printf "%'d" $total_requests)" echo "Unique IPs: $(printf "%'d" $unique_ips)" # Show breakdown if we have excluded IPs if [ "$private_ips" -gt 0 ] || [ "$server_ip_hits" -gt 0 ]; then excluded_total=$((private_ips + server_ip_hits)) echo " ├─ Excluded IPs: $(printf "%'d" $excluded_total)" [ "$private_ips" -gt 0 ] && echo " │ ├─ Private/Internal: $private_ips" [ "$server_ip_hits" -gt 0 ] && echo " │ └─ Server's own: $server_ip_hits" echo " └─ External IPs: $(printf "%'d" $((unique_ips - excluded_total)))" fi echo "Domains Analyzed: $unique_domains" echo "Bot Requests: $(printf "%'d" $bot_requests) ($(awk "BEGIN {printf \"%.1f\", ($bot_requests/$total_requests)*100}")%)" # Show detected server IPs if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then echo "" echo " Server IPs Detected (excluded from threat analysis):" while read -r server_ip; do echo " • $server_ip" done < "$TEMP_DIR/server_ips.txt" fi echo "" # Traffic breakdown echo "Traffic Breakdown:" while read -r line; do count=$(echo "$line" | awk '{print $1}' || echo "0") type=$(echo "$line" | awk '{print $2}' || echo "unknown") pct=$(awk "BEGIN {printf \"%.1f\", (${count:-0}/${total_requests:-1})*100}" 2>/dev/null || echo "0.0") case $type in legit) echo " Legitimate Bots: $(printf "%'7d" $count) ($pct%)" ;; ai) echo " AI Bots: $(printf "%'7d" $count) ($pct%)" ;; monitor) echo " 📡 Monitoring/SEO: $(printf "%'7d" $count) ($pct%)" ;; suspicious) echo " Suspicious Bots: $(printf "%'7d" $count) ($pct%)" ;; unidentified_bot) echo " ❓ Unidentified Bots: $(printf "%'7d" $count) ($pct%)" ;; unknown) echo " Regular Traffic: $(printf "%'7d" $count) ($pct%)" ;; esac done < "$TEMP_DIR/traffic_breakdown.txt" # TIME-SERIES ANALYSIS if [ -s "$TEMP_DIR/hourly_bot_traffic.txt" ]; then echo "" echo "Bot Traffic Timeline (hourly):" max_bot_traffic=$(awk '{print $1}' "$TEMP_DIR/hourly_bot_traffic.txt" | sort -rn | head -1) max_bot_traffic=${max_bot_traffic:-1} # Prevent division by zero while read -r line; do count=$(echo "$line" | awk '{print $1}') hour=$(echo "$line" | awk '{print $2}') # Create simple bar chart bar_width=$((count * 10 / max_bot_traffic)) [ "${bar_width:-0}" -eq 0 ] && [ "${count:-0}" -gt 0 ] && bar_width=1 bar=$(printf '█%.0s' $(seq 1 $bar_width)) spaces=$(printf '░%.0s' $(seq 1 $((10 - bar_width)))) # Detect spikes (>2x average) avg_traffic=$((total_requests / 24)) spike="" [ ${count:-0} -gt $((avg_traffic * 2)) ] && spike=" SPIKE" # Strip leading zeros to avoid octal interpretation hour_num=$((10#$hour)) next_hour=$((hour_num + 1)) printf " %02d:00-%02d:00: %s%s %'6d bot requests%s\n" "$hour_num" "$next_hour" "$bar" "$spaces" "$count" "$spike" done < "$TEMP_DIR/hourly_bot_traffic.txt" fi # RESPONSE CODE INTELLIGENCE if [ -s "$TEMP_DIR/response_codes.txt" ]; then echo "" echo "Response Code Analysis:" while read -r line; do count=$(echo "$line" | awk '{print $1}' || echo "0") code=$(echo "$line" | awk '{print $2}' || echo "000") pct=$(awk "BEGIN {printf \"%.1f\", (${count:-0}/${total_requests:-1})*100}" 2>/dev/null || echo "0.0") case $code in 200) echo " 200 (Success): $(printf "%'7d" $count) ($pct%) Bots are getting data" ;; 404) echo " 404 (Not Found): $(printf "%'7d" $count) ($pct%) Scanning for vulnerabilities" ;; 403) echo " 403 (Forbidden): $(printf "%'7d" $count) ($pct%) Blocked by existing rules" ;; 401) echo " 401 (Unauthorized):$(printf "%'7d" $count) ($pct%) Login attempts failing" ;; 500|502|503) echo " $code (Server Error):$(printf "%'7d" $count) ($pct%) Check if exploit triggered" ;; 301|302) echo " $code (Redirect): $(printf "%'7d" $count) ($pct%)" ;; *) echo " $code: $(printf "%'7d" $count) ($pct%)" ;; esac done < "$TEMP_DIR/response_codes.txt" | head -7 fi # FALSE POSITIVE WARNINGS if [ -s "$TEMP_DIR/false_positives.txt" ]; then echo "" echo "Whitelist Recommendations (Legitimate Services):" while read -r line; do ip=$(echo "$line" | cut -d'|' -f1) service=$(echo "$line" | cut -d'|' -f2) domain=$(echo "$line" | cut -d'|' -f4) req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0) echo " $ip - $req_count requests - Identified as: $service" echo " → Domain: $domain" echo " → Action: VERIFY OWNERSHIP then whitelist" done < "$TEMP_DIR/false_positives.txt" | head -6 fi # NEW: HIGH-CONFIDENCE BOT FINGERPRINTS if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then echo "" print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)" echo "These IPs show MULTIPLE bot indicators combined (not just single signal):" echo "" awk -F'|' ' NR <= 15 { ip = $1 score = $2 signals = $3 # Risk level based on score if (score >= 80) risk = "CRITICAL" else if (score >= 70) risk = "HIGH" else if (score >= 60) risk = "MEDIUM" else risk = "LOW" printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals }' "$TEMP_DIR/bot_fingerprints.txt" total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0") echo "" echo " Total high-confidence bots detected: $total IPs" echo "" else echo "" echo " No high-confidence bot fingerprints detected (requires multiple signals)" echo "" fi # NEW: DOMAIN ATTACK TARGETING ANALYSIS (what's attacking each domain) if [ -s "$TEMP_DIR/domain_targeting.txt" ]; then echo "" print_header "DOMAIN ATTACK TARGETING (Which domains are under attack & from where?)" echo "" total_domains=$(wc -l < "$TEMP_DIR/domain_targeting.txt" 2>/dev/null || echo "0") echo "Total domains with attacks detected: $total_domains" echo "" # Show top attacked domains with attack details awk -F'|' 'NR <= 10 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do domain_attack_count=$(grep -F "|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0") if [ "$domain_attack_count" -gt 0 ]; then echo " Domain: $domain ($domain_attack_count attack attempts)" # Get all attacks on this domain, group by type awk -F'|' -v dom="$domain" ' $2 == dom { ip = $1 attack_type = $5 # Validate IP format if (match(ip, /^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/)) { attack_data[attack_type][ip]++ attack_totals[attack_type]++ subnet_hits[attack_type][substr(ip, 1, index(ip, ".", index(ip, ".")+1)-1)]++ } } END { for (attack_type in attack_totals) { printf " └─ %s: %d attempts\n", attack_type, attack_totals[attack_type] # Show top 3 IPs for this attack type attack_count = 0 for (ip in attack_data[attack_type]) { if (attack_count >= 3) break count = attack_data[attack_type][ip] split(ip, parts, ".") subnet = parts[1] "." parts[2] "." parts[3] ".0/24" printf " ├─ %s (%d reqs) [subnet: %s]\n", ip, count, subnet attack_count++ } } }' "$TEMP_DIR/attack_vectors_raw.txt" echo "" fi done else echo "" echo " No domain attack data available (all domains may be healthy)" echo "" fi # NEW: TOP URLs BEING ATTACKED if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then echo "" print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)" echo "" # Show top URLs for top 3 most-attacked domains urls_shown=0 awk -F'|' 'NR <= 3 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do local domain_file="$TEMP_DIR/domain_urls_${domain}.txt" if [ -f "$domain_file" ] && [ -s "$domain_file" ]; then echo " Domain: $domain" awk -F'|' '{ url = $1 count = $2 printf " %3d requests → %s\n", count, url }' "$domain_file" # Show all URLs, not just top 5 echo "" fi done # Check if no URL data was shown if [ "$urls_shown" -eq 0 ]; then echo " No URL targeting data available" echo "" fi else echo "" echo " No domain targeting data available" echo "" fi # TOP 5 THREATS print_header "TOP 5 THREATS (with recommended actions)" echo "1. Highest Risk IPs (by threat score):" if [ -s "$TEMP_DIR/threat_scores.txt" ]; then counter=1 while read -r line && [ "${counter:-0}" -le 10 ]; do score=$(echo "$line" | cut -d'|' -f1) ip=$(echo "$line" | cut -d'|' -f2) count=$(echo "$line" | cut -d'|' -f3) # Determine threat level and action based on score if [ "$score" -ge 80 ]; then threat_level="CRITICAL" threat_icon="" action="BLOCK IMMEDIATELY + INVESTIGATE" echo -e " ${RED}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}" elif [ "$score" -ge 60 ]; then threat_level="HIGH" threat_icon="" action="BLOCK or AGGRESSIVE RATE LIMIT" echo -e " ${YELLOW}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}" elif [ "$score" -ge 40 ]; then threat_level="MODERATE" threat_icon="" action="RATE LIMIT RECOMMENDED" echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level" else threat_level="LOW" threat_icon="" action="MONITOR" echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level" fi echo " $count requests - Action: $action" # Show which attack vectors this IP used attack_types="" grep -q "$ip" "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null && attack_types="${attack_types}SQL-Injection " grep -q "$ip" "$TEMP_DIR/xss_attempts.txt" 2>/dev/null && attack_types="${attack_types}XSS " grep -q "$ip" "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null && attack_types="${attack_types}Path-Traversal " grep -q "$ip" "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null && attack_types="${attack_types}RCE/Upload " grep -q "$ip" "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null && attack_types="${attack_types}Login-Bruteforce " grep -q "$ip" "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null && attack_types="${attack_types}Scanner-UA " grep -q "$ip" "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null && attack_types="${attack_types}DDoS-Pattern " [ -n "$attack_types" ] && echo " Attack vectors: $attack_types" counter=$((counter + 1)) done < "$TEMP_DIR/threat_scores.txt" else echo " No significant threats detected " fi echo "" echo "2. Top Aggressive Bots:" counter=1 while read -r line && [ "${counter:-0}" -le 5 ]; do count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}') bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs) action="Allow" if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then action="Consider blocking (aggressive)" fi echo " [$counter] $bot - $count requests - Action: $action" counter=$((counter + 1)) done < "$TEMP_DIR/top_bots.txt" echo "" echo "3. Admin Endpoint Probing:" if [ -s "$TEMP_DIR/admin_probes.txt" ]; then head -3 "$TEMP_DIR/admin_probes.txt" | while read -r line; do count=$(echo "$line" | awk '{print $1}') ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1) domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2) url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3) printf " %s attempts - IP: %s - %s%s\n" "$count" "$ip" "$domain" "$url" done echo " Action: Verify legitimate admin access or block" else echo " None detected " fi echo "" echo "4. 404 Scanners (Reconnaissance):" if [ -s "$TEMP_DIR/404_scans.txt" ]; then head -3 "$TEMP_DIR/404_scans.txt" | awk '$1 > 10 { count = $1 $1 = "" gsub(/^[[:space:]]+\|?/, "") split($0, parts, "|") printf " %s failed requests - IP: %s - %s%s\n", count, parts[1], parts[2], parts[3] }' else echo " None detected " fi echo "" echo "5. Large Data Transfers:" if [ -s "$TEMP_DIR/large_transfers.txt" ]; then # Calculate total bot bandwidth total_bot_bandwidth=0 if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then total_bot_bandwidth=$(awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/classified_bots.txt") fi if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then bot_bandwidth_mb=$(awk "BEGIN {printf \"%.0f\", $total_bot_bandwidth/1048576}") bot_bandwidth_gb=$(awk "BEGIN {printf \"%.2f\", $total_bot_bandwidth/1073741824}") # Estimate cost at $0.09/GB (typical CDN pricing) estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}") total_bandwidth=$(awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/parsed_logs.txt") bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}") echo "" echo " 💰 Bandwidth Impact:" echo " Total bot bandwidth: ${bot_bandwidth_mb} MB (${bot_bandwidth_gb} GB) - ${bot_pct}% of total" echo " Estimated cost: \$$estimated_cost (at \$0.09/GB CDN pricing)" fi echo "" echo " Top bandwidth consumers:" head -3 "$TEMP_DIR/large_transfers.txt" | while read -r line; do count=$(echo "$line" | awk '{print $1}' || echo "0") ip=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f1 || echo "unknown") domain=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f2 || echo "unknown") url=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f3 || echo "unknown") size=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f4 || echo "0") size_mb=$(awk "BEGIN {printf \"%.1f\", ${size:-0}/1048576}" 2>/dev/null || echo "0.0") total_ip_mb=$(awk "BEGIN {printf \"%.0f\", ${size:-0} * ${count:-0} / 1048576}" 2>/dev/null || echo "0") printf " %s transfers from %s - %.1f MB avg (%s MB total) - %s%s\n" "$count" "$ip" "$size_mb" "$total_ip_mb" "$domain" "$url" done echo " Action: Verify if scraping, consider serving WebP/optimized images" else echo " None detected " fi # TOP 5 TARGETED SITES print_header "TOP 5 TARGETED SITES (with risk breakdown)" counter=1 while read -r line && [ "${counter:-0}" -le 5 ]; do count=$(echo "$line" | awk '{print $1}' || echo "0") domain=$(echo "$line" | awk '{print $2}' || echo "unknown") echo "[$counter] $domain - $count requests" # Show traffic breakdown for this domain if [ -f "$TEMP_DIR/domain_${domain}_stats.txt" ]; then tail -n +2 "$TEMP_DIR/domain_${domain}_stats.txt" | while read -r stat_line; do stat_count=$(echo "$stat_line" | awk '{print $1}' || echo "0") stat_type=$(echo "$stat_line" | awk '{print $2}' || echo "unknown") pct=$(awk "BEGIN {printf \"%.1f\", (${stat_count:-0}/${count:-1})*100}" 2>/dev/null || echo "0.0") case $stat_type in suspicious) echo -e " ${YELLOW}Suspicious: $stat_count ($pct%)${NC}" ;; ai) echo " AI Bots: $stat_count ($pct%)" ;; legit) echo " Legit Bots: $stat_count ($pct%)" ;; unknown) echo " Regular: $stat_count ($pct%)" ;; *) echo " $stat_type: $stat_count ($pct%)" ;; esac done fi echo "" counter=$((counter + 1)) done < "$TEMP_DIR/top_sites.txt" # BLOCKLIST print_header "COPY-PASTE READY BLOCKLIST (Prioritized by Threat Score)" echo "# Apache .htaccess format:" echo "# Add to .htaccess in document root" echo "# IPs sorted by risk score (highest first)" echo "" # Use threat scores to prioritize blocklist (exclude false positives and excluded IPs) if [ -s "$TEMP_DIR/threat_scores.txt" ]; then # Get IPs with score >= 60 (HIGH and CRITICAL) awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do ip=$(echo "$entry" | cut -d'|' -f1) score=$(echo "$entry" | cut -d'|' -f2) # Skip excluded IPs (private, localhost, server's own) if is_excluded_ip "$ip"; then continue fi # Skip if in false positives if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then continue fi echo "Deny from $ip # Risk score: $score/100" done else # Fallback to old method { [ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1 [ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u [ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}' } | sort -u | head -30 | while read -r ip; do echo "Deny from $ip" done fi echo "" echo "# cPanel User-Agent blocking (add to /etc/apache2/conf.d/includes/pre_main_global.conf):" echo "" echo "" echo " RewriteEngine On" echo " RewriteCond %{HTTP_USER_AGENT} \"(nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp|metasploit)\" [NC]" echo " RewriteRule ^ - [F,L]" echo "" echo "" echo "# Optional: Block aggressive SEO bots (uncomment to enable)" echo "# " echo "# RewriteEngine On" echo "# RewriteCond %{HTTP_USER_AGENT} \"(AhrefsBot|SemrushBot|MJ12bot|DotBot|Meta-ExternalAgent|Go-http-client)\" [NC]" echo "# RewriteRule ^ - [F,L]" echo "# " echo "" echo "# CSF/iptables format:" echo "# Run these commands as root:" echo "" # Same prioritized list for CSF if [ -s "$TEMP_DIR/threat_scores.txt" ]; then awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do ip=$(echo "$entry" | cut -d'|' -f1) score=$(echo "$entry" | cut -d'|' -f2) # Skip excluded IPs (private, localhost, server's own) if is_excluded_ip "$ip"; then continue fi # Skip if in false positives if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then continue fi echo "csf -d $ip \"Threat score: $score/100\"" done else # Fallback { [ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1 [ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u [ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}' } | sort -u | head -30 | while read -r ip; do echo "csf -d $ip \"Bot/Scanner threat\"" done fi # SUMMARY print_header "📋 SUMMARY & RECOMMENDATIONS" threat_score=0 # Calculate threat score from attack vectors [ -s "$TEMP_DIR/sqli_attempts.txt" ] && threat_score=$((threat_score + 15)) [ -s "$TEMP_DIR/xss_attempts.txt" ] && threat_score=$((threat_score + 12)) [ -s "$TEMP_DIR/path_traversal_attempts.txt" ] && threat_score=$((threat_score + 15)) [ -s "$TEMP_DIR/rce_upload_attempts.txt" ] && threat_score=$((threat_score + 20)) [ -s "$TEMP_DIR/login_bruteforce_attempts.txt" ] && threat_score=$((threat_score + 10)) [ -s "$TEMP_DIR/suspicious_ua.txt" ] && threat_score=$((threat_score + 8)) [ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && threat_score=$((threat_score + 5)) [ $(wc -l < "$TEMP_DIR/admin_probes.txt" 2>/dev/null || echo 0) -gt 10 ] && threat_score=$((threat_score + 3)) # Count high-risk IPs high_risk_count=0 if [ -s "$TEMP_DIR/threat_scores.txt" ]; then high_risk_count=$(awk -F'|' '$1 >= 60' "$TEMP_DIR/threat_scores.txt" | wc -l) fi if [ "${threat_score:-0}" -ge 25 ] || [ "${high_risk_count:-0}" -ge 5 ]; then print_alert "THREAT LEVEL: CRITICAL - Immediate action required" echo " Summary: Multiple attack vectors detected from $high_risk_count high-risk IPs" echo "" echo " Immediate Actions:" echo " 1. ⚡ Apply the blocklist above IMMEDIATELY (prioritized by threat score)" echo " 2. Review admin access logs for successful breaches" echo " 3. 🛡 Enable ModSecurity WAF or Cloudflare if not already active" echo " 4. 🔄 Update all CMS platforms and plugins urgently" echo " 5. 🔐 Force password reset for admin accounts if login attempts detected" echo " 6. Re-run this analysis in 1 hour to verify blocks are working" elif [ "${threat_score:-0}" -ge 12 ] || [ "${high_risk_count:-0}" -ge 2 ]; then print_warning "THREAT LEVEL: HIGH - Action recommended within 24 hours" echo " Summary: Significant threat activity from $high_risk_count high-risk IPs" echo "" echo " Recommended Actions:" echo " 1. Review and apply the blocklist above (focus on CRITICAL/HIGH scores)" echo " 2. Enable rate limiting for admin endpoints" echo " 3. Monitor logs closely for the next 24-48 hours" echo " 4. Consider implementing fail2ban or similar IDS" echo " 5. Review and update security plugins/modules" elif [ "${threat_score:-0}" -ge 5 ]; then print_warning "THREAT LEVEL: MODERATE - Routine security maintenance" echo " Summary: Normal bot activity with some suspicious patterns" echo "" echo " Recommended Actions:" echo " 1. Review suspicious IPs in the report" echo " 2. Consider rate limiting aggressive bots" echo " 3. Continue routine log monitoring" echo " 4. Block aggressive SEO bots if impacting performance" else print_success "THREAT LEVEL: ✅ LOW - Normal operation" echo " Summary: Minimal threat activity detected" echo "" echo " Recommended Actions:" echo " 1. Continue routine log monitoring" echo " 2. Review false positive warnings to whitelist legitimate services" echo " 3. Consider blocking aggressive SEO bots if bandwidth is a concern" fi echo "" echo "===============================================================" echo "Report saved to: $OUTPUT_FILE" echo "===============================================================" } ################################################################################ # BASELINE HEALTH CHECK - Test domains before making changes ################################################################################ baseline_health_check() { print_info "Loading baseline health status from cached data..." echo "" # Create baseline health file > "$TEMP_DIR/baseline_health.txt" # Use get_all_domain_statuses() from reference database instead of re-checking # Returns: domain|http_code|https_code|status_summary if ! command -v get_all_domain_statuses &>/dev/null; then print_warning "Reference database functions not available - skipping health check" return 0 fi local tested=0 local working=0 local broken=0 # Get all domain statuses from cached reference database while IFS='|' read -r domain http_status https_status result; do [ -z "$domain" ] && continue tested=$((tested + 1)) # Display status based on cached result if [ "$result" = "200_OK" ]; then working=$((working + 1)) echo -e " ${GREEN}✓${NC} $domain - HTTP:$http_status HTTPS:$https_status" elif [ "$result" = "REDIRECT" ]; then working=$((working + 1)) echo -e " ${YELLOW}→${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)" elif [ "$result" = "403_FORBIDDEN" ]; then broken=$((broken + 1)) echo -e " ${RED}✗${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)" elif [ "$result" = "TIMEOUT" ] || [ "$result" = "UNREACHABLE" ]; then broken=$((broken + 1)) echo -e " ${RED}⏱${NC} $domain - Timeout (unreachable)" else broken=$((broken + 1)) echo -e " ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status" fi # Store baseline: domain|http_status|https_status|result echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt" done < <(get_all_domain_statuses) if [ "$tested" -eq 0 ]; then print_warning "No domain status data available in reference database" return 0 fi echo "" print_success "Baseline loaded from cache: $working working, $broken with issues" echo "" } verify_domains_still_working() { print_info "Checking current domain status from cached data..." echo "" if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then print_warning "No baseline health data available" return 0 fi if ! command -v get_domain_status &>/dev/null; then print_warning "Reference database functions not available - skipping verification" return 0 fi local changes_detected=0 local now_broken=0 while IFS='|' read -r domain baseline_http baseline_https baseline_result; do [ -z "$domain" ] && continue # Get current status from cached reference database local current_status=$(get_domain_status "$domain") if [ -z "$current_status" ]; then # Domain not in cache - skip continue fi # Parse current status: http_code|https_code|status_summary IFS='|' read -r http_status https_status new_result <<< "$current_status" # Compare to baseline if [ "$baseline_result" != "$new_result" ]; then changes_detected=$((changes_detected + 1)) # Check if it got worse if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "UNREACHABLE" ]; then now_broken=$((now_broken + 1)) echo -e " ${RED}⚠ BROKEN:${NC} $domain" echo -e " Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)" echo -e " After: $new_result (HTTP:$http_status HTTPS:$https_status)" echo -e " ${RED}WARNING: This domain stopped working after your changes!${NC}" echo "" fi # Check if it got better elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ] || [ "$baseline_result" = "UNREACHABLE" ]; then if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then echo -e " ${GREEN}✅ FIXED:${NC} $domain" echo -e " Before: $baseline_result" echo -e " After: $new_result" echo "" fi fi fi done < "$TEMP_DIR/baseline_health.txt" if [ "${now_broken:-0}" -gt 0 ]; then echo "" print_alert "WARNING: $now_broken domain(s) may have stopped working!" echo "" echo "NOTE: Status is from cached data (max 1 hour old)." echo "If you just made changes, the cache may not reflect real-time status." echo "" echo "Recommended actions:" echo " 1. Review the firewall rules you just applied" echo " 2. Check CSF temporary blocks: csf -t" echo " 3. Check CSF deny list: csf -g" echo " 4. Manually verify domain: curl -I http://domain.com" echo " 5. Consider reverting changes if issues persist" echo "" elif [ "${changes_detected:-0}" -eq 0 ]; then print_success "All domains show same status as baseline (cache-based check)" else print_success "Some status changes detected but no domains broken (cache-based check)" fi echo "" read -p "Press Enter to continue..." } ############################################################################# # Main Execution ############################################################################# main() { echo "" print_header "Starting Apache/cPanel Bot Analysis" # InterWorx requires special log discovery (logs are in /home/user/var/domain.com/logs/) if [ "$SYS_CONTROL_PANEL" = "interworx" ]; then print_info "InterWorx detected - discovering domain logs..." # Build time filter options local find_opts=() if [ -n "$HOURS_BACK" ]; then local minutes=$((HOURS_BACK * 60)) find_opts+=(-mmin -"$minutes") elif [ -n "$DAYS_BACK" ]; then find_opts+=(-mtime -"$DAYS_BACK") fi # Find all transfer*.log files in InterWorx structure (includes transfer.log and transfer-ssl.log) log_count=$(find /home/*/var/*/logs -type f -name "transfer*.log" "${find_opts[@]}" 2>/dev/null | wc -l) if [ "$log_count" -eq 0 ]; then # Try without time filter to see if ANY logs exist local total_logs=$(find /home/*/var/*/logs -type f -name "transfer*.log" 2>/dev/null | wc -l) if [ "$total_logs" -eq 0 ]; then print_alert "Error: No InterWorx access logs found in /home/*/var/*/logs/" echo "" echo "Diagnostic information:" echo " Checking for InterWorx structure:" local iw_structure=$(find /home -maxdepth 3 -type d -path "*/var/*/logs" 2>/dev/null | head -5) if [ -n "$iw_structure" ]; then echo " Found InterWorx directories:" echo "$iw_structure" echo "" echo " Checking for any log files:" find /home/*/var/*/logs -type f -name "*.log" 2>/dev/null | head -10 else echo " No InterWorx directory structure found (expected: /home/user/var/domain.com/logs/)" fi exit 1 else print_alert "No logs found matching time filter (last $HOURS_BACK hours)" echo "Total logs available: $total_logs" echo "" read -p "Analyze all available logs instead? [y/N]: " choice if [[ "$choice" =~ ^[Yy] ]]; then log_count=$total_logs find_opts=() # Clear time filter else exit 0 fi fi fi print_info "Found $log_count InterWorx domain log files to analyze" # Override LOG_DIR for parse_logs function to use export INTERWORX_MODE="yes" export INTERWORX_FIND_OPTS="${find_opts[*]}" else # Standard cPanel/Plesk log discovery # Check if log directory exists if [ ! -d "$LOG_DIR" ]; then print_alert "Error: Log directory not found: $LOG_DIR" echo "Please specify the correct log directory with -l option" exit 1 fi # Check if logs exist local find_opts=() if [ -n "$HOURS_BACK" ]; then local minutes=$((HOURS_BACK * 60)) find_opts+=(-mmin -"$minutes") elif [ -n "$DAYS_BACK" ]; then find_opts+=(-mtime -"$DAYS_BACK") fi log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l) if [ "$log_count" -eq 0 ]; then # Try without time filter to see if ANY logs exist local total_logs=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" 2>/dev/null | wc -l) if [ "$total_logs" -eq 0 ]; then print_alert "Error: No log files found in $LOG_DIR" echo "" echo "Diagnostic information:" echo " Log directory: $LOG_DIR" echo " Directory exists: $([ -d "$LOG_DIR" ] && echo "yes" || echo "no")" if [ -d "$LOG_DIR" ]; then echo " Total files in directory: $(find "$LOG_DIR" -type f 2>/dev/null | wc -l)" echo " Sample files:" find "$LOG_DIR" -type f 2>/dev/null | head -5 | sed 's/^/ /' fi echo "" echo "Control panel: $SYS_CONTROL_PANEL" exit 1 else print_alert "No logs found matching time filter" if [ -n "$HOURS_BACK" ]; then echo "No logs found from the last $HOURS_BACK hours" elif [ -n "$DAYS_BACK" ]; then echo "No logs found from the last $DAYS_BACK days" fi echo "Total logs available: $total_logs" echo "" read -p "Analyze all available logs instead? [y/N]: " choice if [[ "$choice" =~ ^[Yy] ]]; then log_count=$total_logs find_opts=() # Clear time filter else exit 0 fi fi fi print_info "Found $log_count log files to analyze" fi # User filtering if [ -n "$FILTER_USER" ]; then print_info "Filtering logs for user: $FILTER_USER" export user_domains=$(get_user_domains "$FILTER_USER") if [ -z "$user_domains" ]; then print_error "No domains found for user: $FILTER_USER" exit 1 fi print_info "User has $(echo "$user_domains" | wc -l) domain(s)" else export user_domains="" fi # Print time range info if [ -n "$HOURS_BACK" ]; then print_info "Analyzing logs from the last $HOURS_BACK hours" elif [ -n "$DAYS_BACK" ]; then print_info "Analyzing logs from the last $DAYS_BACK days" fi # Baseline health check - test all domains before analysis baseline_health_check # Execute analysis pipeline with error handling parse_logs || { print_alert "Log parsing failed" exit 1 } classify_bots || { print_alert "Bot classification failed" exit 1 } # NEW: Enhanced analysis functions (before threats detected) analyze_headers # Detect header-based bot patterns analyze_entry_points # Detect suspicious entry points analyze_url_entropy # Detect fuzzing/parameter scanning analyze_request_timing # Detect DDoS patterns via timing detect_server_ips detect_threats # Must be before fingerprinting/domain targeting (creates attack_vectors_raw.txt) analyze_success_rates # Analyze success/failure rates for better accuracy detect_botnets analyze_time_series calculate_threat_scores detect_false_positives generate_statistics # NEW: Fingerprinting and domain targeting analysis (after threats detected) calculate_bot_fingerprint # Combine signals for accuracy (reduce false positives) analyze_domain_targeting_percentage # Show which domains are being targeted analyze_top_urls_per_domain # Show what files/endpoints are being hit generate_comparison_report # Show trends vs previous day # NEW: Baseline and progression analysis save_baseline # Store current metrics for historical comparison analyze_attack_progression # Show attack sequences and phases generate_report print_success "Analysis complete!" echo "" echo "Report location: $OUTPUT_FILE" # Analyze threat patterns and generate recommendations analyze_domain_threats analyze_geographic_threats generate_recommendations # Ask user what to do next show_post_analysis_menu } ################################################################################ # DOMAIN-LEVEL THREAT ANALYSIS ################################################################################ analyze_domain_threats() { print_info "Analyzing per-domain threat patterns..." # Create domain threat analysis file > "$TEMP_DIR/domain_threats.txt" > "$TEMP_DIR/domain_high_risk_ips.txt" # MASSIVE OPTIMIZATION: Single AWK pass instead of nested loops with 25,000+ greps # Old approach: O(domains × high_risk_IPs × file_size) = 83 minutes for 500 domains # New approach: O(file_size) = seconds awk -F'|' -v tmpdir="$TEMP_DIR" ' BEGIN { # Load high-risk IPs into memory while ((getline < tmpdir "/threat_scores.txt") > 0) { score = $1 ip = $2 if (score >= 70) { high_risk[ip] = score } } close(tmpdir "/threat_scores.txt") # Load attack vectors while ((getline < tmpdir "/attack_vectors_raw.txt") > 0) { domain = $2 attack_counts[domain]++ } close(tmpdir "/attack_vectors_raw.txt") } # Process parsed logs (single pass) { ip = $1 domain = $2 # Count total requests per domain domain_requests[domain]++ # Track high-risk IPs per domain if (ip in high_risk) { domain_high_risk_count[domain]++ domain_high_risk_ips[domain] = domain_high_risk_ips[domain] ip ":" high_risk[ip] ":" ++domain_ip_count[domain":"ip] " " } } END { # Now process classified bots while ((getline < tmpdir "/classified_bots.txt") > 0) { domain = $2 bot_counts[domain]++ } close(tmpdir "/classified_bots.txt") # Output results for each domain for (domain in domain_requests) { total_req = domain_requests[domain] bot_req = bot_counts[domain] + 0 bot_pct = (total_req > 0) ? (bot_req / total_req * 100) : 0 high_risk_count = domain_high_risk_count[domain] + 0 attacks = attack_counts[domain] + 0 high_risk_detail = domain_high_risk_ips[domain] # domain|total_requests|bot_requests|bot_percentage|high_risk_ip_count|attack_attempts|high_risk_ips_detail printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > tmpdir "/domain_threats.txt" # Track high-risk IPs per domain if (high_risk_count > 0) { printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > tmpdir "/domain_high_risk_ips.txt" } } close(tmpdir "/domain_threats.txt") close(tmpdir "/domain_high_risk_ips.txt") }' "$TEMP_DIR/parsed_logs.txt" # Sort by high-risk IP count (descending) sort -t'|' -k5 -rn "$TEMP_DIR/domain_threats.txt" > "$TEMP_DIR/domain_threats_sorted.txt" # Get all unique domains awk -F'|' '{print $1}' "$TEMP_DIR/domain_threats.txt" | sort -u > "$TEMP_DIR/all_domains.txt" print_success "Domain threat analysis complete" } ################################################################################ # GEOGRAPHIC ANALYSIS (Country-based threat tracking) ################################################################################ analyze_geographic_threats() { print_info "Analyzing geographic distribution of threats..." # Create geographic analysis file > "$TEMP_DIR/geo_analysis.txt" > "$TEMP_DIR/geo_needs_maxmind.txt" # Check if GeoIP/MaxMind is available local has_geoip=false if command -v geoiplookup >/dev/null 2>&1 || command -v mmdbinspect >/dev/null 2>&1; then has_geoip=true fi if [ "$has_geoip" = false ]; then # Can't do full geographic analysis without GeoIP # But we can still detect if traffic looks suspicious by analyzing IP ranges # Count high-risk IPs by /24 network if [ -s "$TEMP_DIR/threat_scores.txt" ]; then awk -F'|' '$1 >= 70 { split($2, ip, ".") network = ip[1]"."ip[2]"."ip[3]".0/24" print network }' "$TEMP_DIR/threat_scores.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/high_risk_networks.txt" local network_count=$(wc -l < "$TEMP_DIR/high_risk_networks.txt" 2>/dev/null || echo "0") local total_high_risk=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" | wc -l) if [ "$network_count" -gt 10 ] || [ "$total_high_risk" -gt 50 ]; then # Multiple networks or many IPs suggests distributed attack # Recommend MaxMind for geographic blocking echo "DISTRIBUTED|$network_count networks|$total_high_risk IPs|MaxMind recommended" > "$TEMP_DIR/geo_needs_maxmind.txt" fi fi print_info "Geographic analysis limited (MaxMind GeoIP2 not installed)" else # Full geographic analysis with GeoIP print_info "Performing full geographic analysis with GeoIP..." # TODO: Implement full GeoIP lookups when available # This would lookup each high-risk IP and count by country fi print_success "Geographic analysis complete" } ################################################################################ # RECOMMENDATION ENGINE ################################################################################ generate_recommendations() { print_info "Generating intelligent recommendations..." # Initialize recommendation file > "$TEMP_DIR/recommendations.txt" local rec_count=0 # Get total unique high-risk IPs local total_high_risk_ips=0 if [ -s "$TEMP_DIR/threat_scores.txt" ]; then total_high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0") fi # Get total domains affected local total_domains=$(wc -l < "$TEMP_DIR/all_domains.txt" 2>/dev/null || echo "0") local affected_domains=0 if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then affected_domains=$(wc -l < "$TEMP_DIR/domain_high_risk_ips.txt" || echo "0") fi # Determine attack scope: single domain vs server-wide local attack_scope="unknown" local primary_target="" local primary_target_percentage=0 if [ "${affected_domains:-0}" -eq 1 ] && [ "${total_domains:-0}" -gt 1 ]; then attack_scope="single_domain" primary_target=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f1) # Calculate what % of high-risk IPs are targeting this domain local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0") if [ "${total_high_risk_ips:-0}" -gt 0 ] && [ "${domain_risk_count:-0}" -gt 0 ]; then primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", (${domain_risk_count:-0} / ${total_high_risk_ips:-0}) * 100}") fi elif [ "${affected_domains:-0}" -gt 1 ] && [ "${total_domains:-0}" -gt 1 ]; then # Check if one domain is getting most of the traffic local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5 || echo "0") if [ "${top_domain_count:-0}" -gt 0 ] && [ "${total_high_risk_ips:-0}" -gt 0 ]; then local top_percentage=$(awk "BEGIN {printf \"%.0f\", (${top_domain_count:-0} / ${total_high_risk_ips:-0}) * 100}") if [ "$top_percentage" -ge 75 ]; then attack_scope="primary_target" primary_target=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f1) primary_target_percentage=$top_percentage else attack_scope="server_wide" fi else attack_scope="server_wide" fi elif [ "${affected_domains:-0}" -eq "${total_domains:-0}" ] && [ "${total_domains:-0}" -gt 1 ]; then attack_scope="server_wide" elif [ "${total_domains:-0}" -eq 1 ]; then attack_scope="single_server" primary_target=$(head -1 "$TEMP_DIR/all_domains.txt" 2>/dev/null) fi # RECOMMENDATION #1: IP Blocking Strategy if [ "${total_high_risk_ips:-0}" -gt 0 ]; then rec_count=$((rec_count + 1)) if [ "${total_high_risk_ips:-0}" -le 10 ]; then echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 1 hour|HIGH|CSF temporary block recommended for ${total_high_risk_ips} IPs with threat score >= 70" >> "$TEMP_DIR/recommendations.txt" elif [ "${total_high_risk_ips:-0}" -le 50 ]; then echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 24 hours|HIGH|Large number of threats detected - 24hr block recommended" >> "$TEMP_DIR/recommendations.txt" else echo "REC|$rec_count|ip_block_perm|Permanently block $total_high_risk_ips high-risk IPs|CRITICAL|Severe bot attack detected - permanent blocking recommended" >> "$TEMP_DIR/recommendations.txt" fi fi # RECOMMENDATION #2: Connection Limit (CSF CT_LIMIT) # Only recommend if CSF is installed and CT_LIMIT is enabled if [ "$CSF_AVAILABLE" = true ]; then # Check if CT_LIMIT is enabled (not set to 0) local current_ct_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "0") if [ "$current_ct_limit" -gt 0 ]; then # Check concurrent connections from top IPs local max_connections=0 if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then max_connections=$(head -1 "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null | awk '{print $1}' || echo "0") fi if [ "$max_connections" -gt 100 ] && [ "$max_connections" -lt "$current_ct_limit" ]; then rec_count=$((rec_count + 1)) local recommended_limit=$((max_connections - 20)) echo "REC|$rec_count|csf_ct_limit|Reduce CSF CT_LIMIT from $current_ct_limit to $recommended_limit|MEDIUM|High concurrent connections detected ($max_connections from single IP)" >> "$TEMP_DIR/recommendations.txt" fi fi fi # RECOMMENDATION #3: Domain-Specific .htaccess Protection if [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "primary_target" ]; then rec_count=$((rec_count + 1)) echo "REC|$rec_count|htaccess_domain|Add bot blocking to $primary_target .htaccess|HIGH|${primary_target_percentage}% of attacks target this domain" >> "$TEMP_DIR/recommendations.txt" fi # RECOMMENDATION #4: Server-wide Apache Protection if [ "$attack_scope" = "server_wide" ]; then rec_count=$((rec_count + 1)) echo "REC|$rec_count|apache_global|Add global bot blocking to Apache pre-virtualhost|HIGH|Attack affects $affected_domains of $total_domains domains" >> "$TEMP_DIR/recommendations.txt" fi # RECOMMENDATION #5: WordPress-specific (if attack patterns show wp-admin/wp-login attempts) local wp_attacks=0 if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then wp_attacks=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0") fi if [ "${wp_attacks:-0}" -gt 50 ]; then rec_count=$((rec_count + 1)) # Determine which domains have WordPress local wp_domain_count=0 local wp_target_domain="" if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then # Get unique domains with WP attacks wp_domain_count=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | wc -l || echo "0") wp_target_domain=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | head -1 || echo "") fi # Generate appropriate recommendation based on how many domains have WordPress attacks if [ "${wp_domain_count:-0}" -eq 1 ] || [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "single_server" ]; then # Single domain being attacked echo "REC|$rec_count|wp_hardening|Harden WordPress on $wp_target_domain|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt" elif [ "$attack_scope" = "primary_target" ]; then # One primary target but others also affected echo "REC|$rec_count|wp_hardening|Harden WordPress on $primary_target|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt" else # Multiple domains with WordPress attacks echo "REC|$rec_count|wp_hardening|Harden WordPress across $wp_domain_count domains|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt" fi fi # PORTFLOOD Protection removed - not appropriate for web servers with many sites # Blocking ports 80/443 based on connection count breaks legitimate traffic # RECOMMENDATION #7: CSF SYNFLOOD Protection (if DDoS patterns detected) if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then local ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt" || echo "0") if [ "${ddos_count:-0}" -gt 10 ]; then rec_count=$((rec_count + 1)) echo "REC|$rec_count|csf_synflood|Enable CSF SYNFLOOD protection|HIGH|$ddos_count potential DDoS sources detected" >> "$TEMP_DIR/recommendations.txt" fi fi # RECOMMENDATION #8: MaxMind GeoIP for Country Blocking (if distributed attack) if [ -s "$TEMP_DIR/geo_needs_maxmind.txt" ]; then local geo_info=$(cat "$TEMP_DIR/geo_needs_maxmind.txt") local network_count=$(echo "$geo_info" | cut -d'|' -f2 | grep -oP '\d+' || echo "0") local ip_count=$(echo "$geo_info" | cut -d'|' -f3 | grep -oP '\d+' || echo "0") rec_count=$((rec_count + 1)) echo "REC|$rec_count|install_maxmind|Install MaxMind GeoIP2 for country-based blocking|MEDIUM|Distributed attack from $network_count networks ($ip_count IPs) - geographic blocking recommended" >> "$TEMP_DIR/recommendations.txt" fi # Store attack scope for menu system echo "$attack_scope|$primary_target|$primary_target_percentage|$affected_domains|$total_domains" > "$TEMP_DIR/attack_scope.txt" print_success "Generated $rec_count recommendations" } ################################################################################ # POST-ANALYSIS MENU ################################################################################ show_post_analysis_menu() { # Load attack scope information local attack_scope="unknown" local primary_target="" local primary_target_percentage=0 local affected_domains=0 local total_domains=0 if [ -s "$TEMP_DIR/attack_scope.txt" ]; then local scope_data=$(cat "$TEMP_DIR/attack_scope.txt") attack_scope=$(echo "$scope_data" | cut -d'|' -f1) primary_target=$(echo "$scope_data" | cut -d'|' -f2) primary_target_percentage=$(echo "$scope_data" | cut -d'|' -f3) affected_domains=$(echo "$scope_data" | cut -d'|' -f4) total_domains=$(echo "$scope_data" | cut -d'|' -f5) fi # Check if there are any recommendations local has_recommendations=false local rec_count=0 if [ -s "$TEMP_DIR/recommendations.txt" ]; then has_recommendations=true rec_count=$(wc -l < "$TEMP_DIR/recommendations.txt") fi # Show menu echo "" echo "===============================================================" print_header "THREAT ANALYSIS SUMMARY" echo "" # Display attack scope case "$attack_scope" in single_domain) print_warning "ATTACK SCOPE: Single Domain Target" echo " • Primary Target: $primary_target" echo " • This domain is receiving 100% of high-risk traffic" echo " • Recommendation: Domain-specific protection" ;; primary_target) print_warning "ATTACK SCOPE: Primarily Targeting One Domain" echo " • Primary Target: $primary_target ($primary_target_percentage% of attacks)" echo " • Other domains also affected: $affected_domains of $total_domains total" echo " • Recommendation: Focus protection on primary target" ;; server_wide) print_alert "ATTACK SCOPE: Server-Wide Attack" echo " • Multiple domains under attack: $affected_domains of $total_domains" echo " • Attack is distributed across the server" echo " • Recommendation: Server-wide protection needed" ;; single_server) print_info "ATTACK SCOPE: Single-Domain Server" echo " • Target: $primary_target (only domain on server)" echo " • Server-level protection will apply to this domain" ;; *) print_info "No significant threats detected" ;; esac echo "" # Display recommendations if [ "$has_recommendations" = true ]; then echo "===============================================================" print_header "RECOMMENDED ACTIONS ($rec_count recommendations)" echo "" local count=0 while IFS='|' read -r rec_type rec_num action_type action_title priority description; do count=$((count + 1)) # Color code by priority local priority_color="" local priority_icon="" case "$priority" in CRITICAL) priority_color="${RED}" priority_icon="" ;; HIGH) priority_color="${YELLOW}" priority_icon="" ;; MEDIUM) priority_color="${BLUE}" priority_icon="" ;; *) priority_color="${NC}" priority_icon=" " ;; esac echo -e " ${BOLD}[$count]${NC} $priority_icon $action_title" echo -e " ${priority_color}Priority: $priority${NC} - $description" echo "" done < "$TEMP_DIR/recommendations.txt" echo "===============================================================" echo "" echo "What would you like to do?" echo "" echo " 1) Go to Take Action Menu (implement recommended actions)" echo " 2) Review Individual Recommendations (detailed view)" echo "" echo -e " ${RED}0)${NC} Back" echo "" read -p "Select option: " menu_choice case "$menu_choice" in 1) show_action_menu ;; 2) show_detailed_recommendations ;; 0) print_info "Returning to main menu..." return 0 ;; *) print_warning "Invalid option - returning to main menu" return 0 ;; esac else print_success "No recommendations - your server appears secure" echo "" echo "Press Enter to return to main menu..." read return 0 fi } ################################################################################ # DETAILED RECOMMENDATIONS VIEWER ################################################################################ show_detailed_recommendations() { clear print_banner "Detailed Recommendations" echo "" if [ ! -s "$TEMP_DIR/recommendations.txt" ]; then print_warning "No recommendations available" echo "" read -p "Press Enter to continue..." show_post_analysis_menu return fi local count=0 while IFS='|' read -r rec_type rec_num action_type action_title priority description; do count=$((count + 1)) echo "===============================================================" echo -e "${BOLD}Recommendation #$count:${NC} $action_title" echo "===============================================================" echo "" echo "Priority: $priority" echo "Action Type: $action_type" echo "Description: $description" echo "" # Show specific details based on action type case "$action_type" in ip_block_temp|ip_block_perm) echo "Affected IPs:" awk -F'|' '$1 >= 70 {printf " • %s (score: %s)\n", $2, $1}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -10 ;; htaccess_domain) local target_domain=$(echo "$action_title" | grep -oP 'to \K[^ ]+' 2>/dev/null || echo "") echo "Target Domain: $target_domain" if [ -s "$TEMP_DIR/domain_threats_sorted.txt" ]; then grep -F "$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do echo " • Total Requests: $total_req" echo " • Bot Requests: $bot_req ($bot_pct%)" echo " • High-Risk IPs: $high_risk" echo " • Attack Attempts: $attacks" done fi ;; apache_global) echo "Affected Domains:" if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then awk -F'|' '{printf " • %s (%s high-risk IPs)\n", $1, $2}' "$TEMP_DIR/domain_high_risk_ips.txt" | head -10 fi ;; esac echo "" done < "$TEMP_DIR/recommendations.txt" echo "===============================================================" echo "" read -p "Press Enter to return to action menu..." show_post_analysis_menu } ################################################################################ # ACTION MENU (IMPLEMENT RECOMMENDATIONS) ################################################################################ show_action_menu() { clear print_banner "Take Action Menu" echo "" # Build hash table of recommended actions with their priorities declare -A recommended_actions declare -A action_priorities declare -A action_descriptions if [ -s "$TEMP_DIR/recommendations.txt" ]; then while IFS='|' read -r rec_type rec_num action_type action_title priority description; do recommended_actions["$action_type"]=1 action_priorities["$action_type"]="$priority" action_descriptions["$action_type"]="$description" done < "$TEMP_DIR/recommendations.txt" fi # Display all available actions (not just recommended ones) echo "All Available Actions:" echo "" echo "Legend: = Recommended by analysis" echo "" local count=0 declare -a action_types declare -a action_titles declare -a action_descs # Define all possible actions # 1. IP Blocking Actions count=$((count + 1)) action_types[$count]="ip_block_temp_1hr" action_titles[$count]="Block high-risk IPs for 1 hour (CSF temporary)" action_descs[$count]="Temporary firewall block, auto-expires after 1 hour" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}" count=$((count + 1)) action_types[$count]="ip_block_temp_24hr" action_titles[$count]="Block high-risk IPs for 24 hours (CSF temporary)" action_descs[$count]="Temporary firewall block, auto-expires after 24 hours" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}" count=$((count + 1)) action_types[$count]="ip_block_perm" action_titles[$count]="Block high-risk IPs permanently (CSF permanent)" action_descs[$count]="Permanent firewall block - requires manual removal" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_perm]}" "${action_priorities[ip_block_perm]}" echo "" echo "------------------------------------------------------------─" echo "" # 2. Domain/Site Protection count=$((count + 1)) action_types[$count]="htaccess_domain" action_titles[$count]="Add bot blocking to specific domain .htaccess" action_descs[$count]="Domain-level protection via Apache .htaccess rules" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[htaccess_domain]}" "${action_priorities[htaccess_domain]}" count=$((count + 1)) action_types[$count]="apache_global" action_titles[$count]="Add global bot blocking to Apache (all domains)" action_descs[$count]="Server-wide Apache configuration, affects all sites" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[apache_global]}" "${action_priorities[apache_global]}" echo "" echo "------------------------------------------------------------─" echo "" # 3. CSF Firewall Configuration count=$((count + 1)) action_types[$count]="csf_ct_limit" action_titles[$count]="Adjust CSF connection tracking limit (CT_LIMIT)" action_descs[$count]="Limit concurrent connections per IP address" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_ct_limit]}" "${action_priorities[csf_ct_limit]}" # PORTFLOOD action removed - not appropriate for web servers count=$((count + 1)) action_types[$count]="csf_synflood" action_titles[$count]="Enable CSF SYNFLOOD protection" action_descs[$count]="Protect against SYN flood DDoS attacks" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_synflood]}" "${action_priorities[csf_synflood]}" echo "" echo "------------------------------------------------------------─" echo "" # 4. Geographic & Application Hardening count=$((count + 1)) action_types[$count]="install_maxmind" action_titles[$count]="Install MaxMind GeoIP2 for country-based blocking" action_descs[$count]="Enable geographic filtering with CSF CC_DENY (requires free MaxMind license)" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[install_maxmind]}" "${action_priorities[install_maxmind]}" count=$((count + 1)) action_types[$count]="wp_hardening" action_titles[$count]="WordPress security hardening" action_descs[$count]="Protect WordPress login and admin areas" display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[wp_hardening]}" "${action_priorities[wp_hardening]}" echo "" echo "============================================================═" echo "" echo -e " ${RED}0)${NC} Back" echo "" read -p "Select action [0-$count]: " action_choice # Validate choice if [ "$action_choice" = "0" ]; then show_post_analysis_menu return elif [ "$action_choice" -lt 1 ] || [ "$action_choice" -gt "$count" ] 2>/dev/null; then print_warning "Invalid selection" sleep 2 show_action_menu return fi # Execute selected action local selected_type="${action_types[$action_choice]}" execute_action "$selected_type" "$action_choice" } # Helper function to display action options display_action_option() { local num=$1 local action_type=$2 local title=$3 local desc=$4 local is_recommended=$5 local priority=$6 # Show recommendation marker and priority if recommended if [ -n "$is_recommended" ]; then case "$priority" in CRITICAL) echo -e " ${RED}$num)${NC} ${BOLD}$title${NC} ${RED} RECOMMENDED [CRITICAL]${NC}" ;; HIGH) echo -e " ${YELLOW}$num)${NC} ${BOLD}$title${NC} ${YELLOW} RECOMMENDED [HIGH]${NC}" ;; MEDIUM) echo -e " ${BLUE}$num)${NC} ${BOLD}$title${NC} ${BLUE} RECOMMENDED [MEDIUM]${NC}" ;; *) echo -e " ${GREEN}$num)${NC} ${BOLD}$title${NC} ${GREEN} RECOMMENDED${NC}" ;; esac else echo -e " $num) $title" fi echo " $desc" } ################################################################################ # ACTION EXECUTION ENGINE ################################################################################ execute_action() { local action_type="$1" local rec_number="$2" case "$action_type" in ip_block_temp_1hr) execute_ip_blocking_specific "1hr" ;; ip_block_temp_24hr) execute_ip_blocking_specific "24hr" ;; ip_block_temp) execute_ip_blocking "temp" ;; ip_block_perm) execute_ip_blocking "perm" ;; csf_ct_limit) execute_csf_ct_limit ;; csf_synflood) execute_csf_synflood ;; htaccess_domain) execute_htaccess_domain_blocking ;; apache_global) execute_apache_global_blocking ;; install_maxmind) execute_install_maxmind ;; wp_hardening) execute_wp_hardening ;; rate_limiting) execute_rate_limiting ;; *) print_warning "Action type '$action_type' not yet implemented" echo "" read -p "Press Enter to continue..." show_action_menu ;; esac } execute_ip_blocking_specific() { local duration_type="$1" # "1hr" or "24hr" clear print_banner "IP Blocking - CSF Temporary Block" echo "" # Check if CSF is installed if [ "$CSF_AVAILABLE" != true ]; then print_warning "CSF (ConfigServer Security & Firewall) is not installed" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Get high-risk IPs if [ ! -s "$TEMP_DIR/threat_scores.txt" ]; then print_warning "No threat scores available" echo "" read -p "Press Enter to continue..." show_action_menu return fi local high_risk_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0") if [ "$high_risk_count" -eq 0 ]; then print_info "No high-risk IPs detected (score >= 70)" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Set duration based on type local duration local duration_text if [ "$duration_type" = "1hr" ]; then duration=3600 duration_text="1 hour" else duration=86400 duration_text="24 hours" fi echo "This will block $high_risk_count high-risk IPs for $duration_text" echo "" echo "High-risk IPs (top 10):" awk -F'|' '$1 >= 70 {printf " • %s (score: %s, %s requests)\n", $2, $1, $3}' "$TEMP_DIR/threat_scores.txt" | head -10 echo "" if [ "$high_risk_count" -gt 10 ]; then echo " ... and $((high_risk_count - 10)) more" echo "" fi read -p "Proceed with blocking for $duration_text? (yes/no): " confirm if [ "$confirm" != "yes" ]; then print_info "Operation cancelled" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Collect IPs to block local -a ips_to_block while IFS='|' read -r score ip requests; do if [ "$score" -ge 70 ]; then # Skip excluded IPs if is_excluded_ip "$ip"; then continue fi # Skip false positives if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then continue fi ips_to_block+=("$ip") fi done < "$TEMP_DIR/threat_scores.txt" # Apply blocks echo "" print_info "Applying CSF blocks for $duration_text..." echo "" local success_count=0 local fail_count=0 for ip in "${ips_to_block[@]}"; do local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown") if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)" success_count=$((success_count + 1)) else echo -e " ${RED}${NC} Failed to block $ip" fail_count=$((fail_count + 1)) fi done echo "" if [ "${success_count:-0}" -gt 0 ]; then print_success "Successfully blocked $success_count IP(s) for $duration_text" echo "" echo "These blocks will automatically expire after $duration_text" echo "To view temporary blocks: csf -t" echo "To remove a block early: csf -tr IP" fi if [ "${fail_count:-0}" -gt 0 ]; then print_warning "$fail_count IP(s) failed to block - check CSF configuration" fi # Restart CSF print_info "Restarting CSF to apply changes..." if csf -r >/dev/null 2>&1; then print_success "CSF restarted successfully" else print_warning "CSF restart may have failed - check manually with: csf -r" fi echo "" # Verify domains still work after blocking verify_domains_still_working show_action_menu } execute_ip_blocking() { local block_mode="$1" # "temp" or "perm" if [ "$block_mode" = "temp" ]; then # Call the existing CSF blocking function offer_csf_blocking else # Permanent blocking clear print_banner "Permanent IP Blocking" echo "" print_alert "WARNING: Permanent blocks must be manually removed later" echo "" echo "This will permanently block all high-risk IPs (score >= 70)" echo "" read -p "Are you sure you want to proceed? (yes/no): " confirm if [ "$confirm" = "yes" ]; then offer_csf_blocking else print_info "Operation cancelled" echo "" read -p "Press Enter to continue..." show_action_menu fi fi } execute_csf_ct_limit() { clear print_banner "Update CSF Connection Tracking Limit" echo "" # Check if CSF is installed if [ "$CSF_AVAILABLE" != true ]; then print_warning "CSF is not installed on this server" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Get recommended limit from recommendation local recommended_limit=$(grep "|csf_ct_limit|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | grep -oP 'to \K[0-9]+' || echo "100") # Get current CT_LIMIT local current_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "unknown") echo "Current CT_LIMIT: $current_limit" echo "Recommended CT_LIMIT: $recommended_limit" echo "" echo "This will modify /etc/csf/csf.conf and restart CSF" echo "" read -p "Enter new CT_LIMIT value [$recommended_limit]: " new_limit # Use recommended if nothing entered [ -z "$new_limit" ] && new_limit=$recommended_limit # Validate it's a number if ! [[ "$new_limit" =~ ^[0-9]+$ ]]; then print_warning "Invalid number" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Update CSF config print_info "Updating CT_LIMIT to $new_limit..." if [ -f /etc/csf/csf.conf ]; then sed -i "s/^CT_LIMIT = .*/CT_LIMIT = \"$new_limit\"/" /etc/csf/csf.conf # Restart CSF print_info "Restarting CSF..." csf -r >/dev/null 2>&1 print_success "CT_LIMIT updated successfully to $new_limit" else print_warning "Could not find /etc/csf/csf.conf" fi echo "" # Verify domains still work after CT_LIMIT change verify_domains_still_working show_action_menu } execute_htaccess_domain_blocking() { clear print_banner "Add Bot Blocking to Domain .htaccess" echo "" # Get target domain from recommendation local target_domain=$(grep "|htaccess_domain|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | head -1 | grep -oP 'to \K[^ ]+' || echo "") if [ -z "$target_domain" ]; then print_warning "Could not determine target domain" echo "" read -p "Press Enter to continue..." show_action_menu return fi echo "Target Domain: $target_domain" echo "" # Find document root for this domain using reference database local doc_root="" if [ -s "$SCRIPT_DIR/.sysref" ]; then doc_root=$(grep -F "DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4 || echo "") fi if [ -z "$doc_root" ]; then print_warning "Document root not found in reference database" echo "Please enter the document root manually:" read -p "Document root: " doc_root else echo "Document root: $doc_root" fi if [ ! -d "$doc_root" ]; then print_warning "Document root does not exist: $doc_root" echo "" read -p "Press Enter to continue..." show_action_menu return fi local htaccess_file="$doc_root/.htaccess" echo "" echo "This will add bot blocking rules to: $htaccess_file" echo "" read -p "Proceed? (yes/no): " confirm if [ "$confirm" != "yes" ]; then print_info "Operation cancelled" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Create backup if [ -f "$htaccess_file" ]; then cp "$htaccess_file" "$htaccess_file.backup.$(date +%Y%m%d_%H%M%S)" print_info "Backed up existing .htaccess" fi # Generate bot blocking rules print_info "Adding bot blocking rules..." # Get high-risk IPs for this domain local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -F "|$target_domain|" 2>/dev/null || true | cut -d'|' -f1 | sort -u | while read ip; do # Check if this IP has high threat score if grep -q -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "0") if [ "${score:-0}" -ge 70 ]; then echo "$ip" fi fi done || true) # Add rules to .htaccess { echo "" echo "# Bot blocking rules added by toolkit on $(date)" echo "# High-risk IPs (threat score >= 70)" echo "" for ip in $block_ips; do echo " Require not ip $ip" done echo "" echo "" } >> "$htaccess_file" local block_count=$(echo "$block_ips" | wc -w) print_success "Added blocking rules for $block_count IPs to $htaccess_file" echo "" echo "Backup saved to: $htaccess_file.backup.$(date +%Y%m%d_%H%M%S)" echo "" # Verify domains still work after .htaccess changes verify_domains_still_working show_action_menu } execute_apache_global_blocking() { clear print_banner "Add Global Bot Blocking to Apache" echo "" print_warning "This feature will add blocking rules to Apache pre-virtualhost configuration" echo "This affects ALL domains on the server" echo "" # Determine Apache config location local apache_conf="" if [ -d "/etc/apache2/conf.d" ]; then apache_conf="/etc/apache2/conf.d/bot_blocking.conf" elif [ -d "/etc/httpd/conf.d" ]; then apache_conf="/etc/httpd/conf.d/bot_blocking.conf" else print_warning "Could not determine Apache config directory" echo "" read -p "Press Enter to continue..." show_action_menu return fi echo "Configuration will be written to: $apache_conf" echo "" read -p "Proceed? (yes/no): " confirm if [ "$confirm" != "yes" ]; then print_info "Operation cancelled" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Create backup if file exists if [ -f "$apache_conf" ]; then cp "$apache_conf" "$apache_conf.backup.$(date +%Y%m%d_%H%M%S)" print_info "Backed up existing configuration" fi # Generate global blocking rules print_info "Generating global bot blocking configuration..." { echo "# Global bot blocking rules" echo "# Generated by toolkit on $(date)" echo "" echo "" echo " # Block high-risk IPs (threat score >= 70)" awk -F'|' '$1 >= 70 {print " Require not ip " $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null echo "" echo "" } > "$apache_conf" local block_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l) print_success "Created global blocking configuration with $block_count IPs" echo "" echo "Restarting Apache to apply changes..." if systemctl restart httpd 2>/dev/null || systemctl restart apache2 2>/dev/null; then print_success "Apache restarted successfully" else print_warning "Could not restart Apache - please restart manually" fi echo "" # Verify domains still work after Apache global blocking verify_domains_still_working show_action_menu } execute_wp_hardening() { clear print_banner "WordPress Hardening" echo "" print_info "WordPress hardening feature coming soon..." echo "" echo "Recommended manual actions:" echo " • Install Wordfence or similar security plugin" echo " • Enable two-factor authentication" echo " • Limit login attempts" echo " • Disable XML-RPC if not needed" echo " • Use strong passwords" echo "" read -p "Press Enter to continue..." show_action_menu } execute_rate_limiting() { clear print_banner "Enable Rate Limiting" echo "" print_info "Rate limiting modules like mod_evasive/mod_security can help with application-level DoS" echo "" echo "For better bot protection, consider:" echo " - IP blocking (options 1-3) - Block specific attacking IPs" echo " - CSF CT_LIMIT adjustment (option 4) - Limit connections per IP" echo " - .htaccess rules (option 5) - Domain-specific blocking" echo "" echo "This option (rate limiting) is currently a placeholder for future implementation." echo "" read -p "Press Enter to continue..." show_action_menu } # execute_csf_portflood() removed - not appropriate for web servers with 400+ sites # Blocking ports 80/443 based on connection count would break legitimate traffic execute_csf_synflood() { clear print_banner "Enable CSF SYNFLOOD Protection" echo "" if [ "$CSF_AVAILABLE" != true ]; then print_warning "CSF is not installed on this server" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Get current SYNFLOOD setting local current_synflood=$(grep "^SYNFLOOD = " /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[^"]+' || echo "0") echo "Current SYNFLOOD protection: ${current_synflood}" echo "" echo "SYNFLOOD protects against SYN flood DDoS attacks by limiting" echo "the rate of new TCP connections." echo "" echo "Recommended settings:" echo " SYNFLOOD = \"1\" (enable protection)" echo " SYNFLOOD_RATE = \"100/s\" (100 connections per second)" echo " SYNFLOOD_BURST = \"150\" (allow burst of 150)" echo "" read -p "Enable SYNFLOOD protection? (yes/no): " confirm if [ "$confirm" != "yes" ]; then print_info "Operation cancelled" echo "" read -p "Press Enter to continue..." show_action_menu return fi # Update CSF config print_info "Enabling SYNFLOOD protection..." if [ -f /etc/csf/csf.conf ]; then sed -i 's/^SYNFLOOD = .*/SYNFLOOD = "1"/' /etc/csf/csf.conf sed -i 's/^SYNFLOOD_RATE = .*/SYNFLOOD_RATE = "100\/s"/' /etc/csf/csf.conf sed -i 's/^SYNFLOOD_BURST = .*/SYNFLOOD_BURST = "150"/' /etc/csf/csf.conf # Restart CSF print_info "Restarting CSF..." csf -r >/dev/null 2>&1 print_success "SYNFLOOD protection enabled" else print_warning "Could not find /etc/csf/csf.conf" fi echo "" read -p "Press Enter to continue..." show_action_menu } execute_install_maxmind() { clear print_banner "Install MaxMind GeoIP2 for Country Blocking" echo "" # Check if already installed if command -v mmdbinspect >/dev/null 2>&1; then print_success "MaxMind GeoIP2 tools already installed" echo "" echo "Next steps:" echo "1. Sign up for free license at: https://www.maxmind.com/en/geolite2/signup" echo "2. Get your license key from account page" echo "3. Install CSF GeoIP module: /usr/local/csf/bin/csftest.pl -g" echo "4. Configure CC_DENY in /etc/csf/csf.conf with country codes" echo "" echo "Example: CC_DENY = \"CN,RU,KP\" (block China, Russia, North Korea)" echo "" else print_info "MaxMind GeoIP2 not detected" echo "" echo "To install MaxMind GeoIP2 for CSF country blocking:" echo "" echo "1. Sign up for free MaxMind account:" echo " https://www.maxmind.com/en/geolite2/signup" echo "" echo "2. Get your license key from:" echo " https://www.maxmind.com/en/accounts/current/license-key" echo "" echo "3. Install GeoIP Perl module:" echo " yum install perl-Geo-IP" echo " # or" echo " cpan -i Geo::IP" echo "" echo "4. Test CSF GeoIP support:" echo " /usr/local/csf/bin/csftest.pl -g" echo "" echo "5. Configure CC_DENY in /etc/csf/csf.conf:" echo " CC_DENY = \"CN,RU\" (example: block China & Russia)" echo "" echo "6. Restart CSF:" echo " csf -r" echo "" fi # Show geographic analysis if available if [ -s "$TEMP_DIR/high_risk_networks.txt" ]; then echo "=========================================================══" echo "High-Risk Networks Detected:" echo "" head -10 "$TEMP_DIR/high_risk_networks.txt" | while read count network; do echo " • $network - $count high-risk IPs" done echo "" fi read -p "Press Enter to continue..." show_action_menu } ################################################################################ # INTERACTIVE CSF BLOCKING ################################################################################ offer_csf_blocking() { echo "" echo "===============================================================" print_header "🛡 INTERACTIVE THREAT BLOCKING" # Check if CSF is installed if [ "$CSF_AVAILABLE" != true ]; then print_warning "CSF (ConfigServer Security & Firewall) is not installed" echo "Cannot offer automatic blocking without CSF" return 0 fi # Get high-risk IPs (score >= 70) local high_risk_ips=() local ip_scores=() if [ -s "$TEMP_DIR/threat_scores.txt" ]; then while read -r line; do local score=$(echo "$line" | cut -d'|' -f1) local ip=$(echo "$line" | cut -d'|' -f2) # Only include scores >= 70 (HIGH and CRITICAL) if [ "$score" -ge 70 ]; then # Skip excluded IPs if is_excluded_ip "$ip"; then continue fi # Skip false positives if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then continue fi high_risk_ips+=("$ip") ip_scores+=("$score") fi done < <(awk -F'|' '{print $1 "|" $2}' "$TEMP_DIR/threat_scores.txt" | sort -rn) fi # If no high-risk IPs, nothing to block if [ ${#high_risk_ips[@]} -eq 0 ]; then print_info "No high-risk IPs detected (score >= 70)" return 0 fi # Show IPs that would be blocked echo "" echo "Found ${#high_risk_ips[@]} high-risk IP(s) with threat score >= 70:" echo "" local count=0 for i in "${!high_risk_ips[@]}"; do count=$((count + 1)) local ip="${high_risk_ips[$i]}" local score="${ip_scores[$i]}" local requests=$(grep -F "$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0") # Color code by severity if [ "$score" -ge 90 ]; then echo -e " ${RED}[$count] $ip${NC} - Risk: ${RED}$score/100 CRITICAL${NC} ($requests requests)" elif [ "$score" -ge 80 ]; then echo -e " ${YELLOW}[$count] $ip${NC} - Risk: ${YELLOW}$score/100 HIGH${NC} ($requests requests)" else echo -e " [$count] $ip - Risk: $score/100 ELEVATED ($requests requests)" fi done echo "" echo "===============================================================" echo "" # Ask user if they want to block echo -e "${BOLD}Would you like to temporarily block these IPs using CSF?${NC}" echo "" echo "Options:" echo " 1) Block for 1 hour (temporary - auto-expires)" echo " 2) Block for 24 hours (temporary - auto-expires)" echo " 3) Block permanently (requires manual unblock)" echo " 4) Don't block (manual review)" echo "" read -p "Select option [1-4]: " block_choice case "$block_choice" in 1) local duration=3600 # 1 hour in seconds local duration_text="1 hour" apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}" ;; 2) local duration=86400 # 24 hours in seconds local duration_text="24 hours" apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}" ;; 3) apply_csf_permanent_blocks "${high_risk_ips[@]}" ;; 4) print_info "Skipping automatic blocking - manual review recommended" echo "You can block IPs manually using: csf -td IP DURATION" ;; *) print_warning "Invalid option - skipping blocking" ;; esac } apply_csf_blocks() { local duration=$1 local duration_text=$2 shift 2 local ips=("$@") echo "" print_info "Applying temporary CSF blocks for $duration_text..." echo "" local success_count=0 local fail_count=0 for ip in "${ips[@]}"; do # Get threat score for comment local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown") # Use csf -td for temporary deny if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)" success_count=$((success_count + 1)) else echo -e " ${RED}${NC} Failed to block $ip" fail_count=$((fail_count + 1)) fi done echo "" if [ "${success_count:-0}" -gt 0 ]; then print_success "Successfully blocked $success_count IP(s) for $duration_text" echo "" echo "These blocks will automatically expire after $duration_text" echo "To view temporary blocks: csf -t" echo "To remove a block early: csf -tr IP" fi if [ "${fail_count:-0}" -gt 0 ]; then print_warning "$fail_count IP(s) failed to block - check CSF configuration" fi # Restart CSF to apply changes print_info "Restarting CSF to apply changes..." if csf -r >/dev/null 2>&1; then print_success "CSF restarted successfully" else print_warning "CSF restart may have failed - check manually with: csf -r" fi } apply_csf_permanent_blocks() { local ips=("$@") echo "" print_warning "Applying PERMANENT CSF blocks..." echo "These will require manual removal using: csf -dr IP" echo "" read -p "Are you sure? This is permanent! (yes/no): " confirm if [ "$confirm" != "yes" ]; then print_info "Cancelled permanent blocking" return 0 fi echo "" local success_count=0 local fail_count=0 for ip in "${ips[@]}"; do local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown") # Use csf -d for permanent deny if csf -d "$ip" "Bot threat score: $score/100 - Permanently blocked by toolkit" >/dev/null 2>&1; then echo -e " ${GREEN}${NC} Permanently blocked $ip (score: $score/100)" success_count=$((success_count + 1)) else echo -e " ${RED}${NC} Failed to block $ip" fail_count=$((fail_count + 1)) fi done echo "" if [ "${success_count:-0}" -gt 0 ]; then print_success "Successfully blocked $success_count IP(s) permanently" echo "" echo "To view blocked IPs: csf -g" echo "To remove a block: csf -dr IP" fi if [ "${fail_count:-0}" -gt 0 ]; then print_warning "$fail_count IP(s) failed to block - check CSF configuration" fi # Restart CSF print_info "Restarting CSF to apply changes..." if csf -r >/dev/null 2>&1; then print_success "CSF restarted successfully" else print_warning "CSF restart may have failed - check manually with: csf -r" fi } # Run the script main "$@"