Linux-Server-Management-Too…/modules/security/bot-analyzer.sh

#!/bin/bash
set -eo pipefail

#############################################################################
# Apache/cPanel Domain Log Bot & Botnet Analyzer
# Version: 3.1 Enhanced (with Library Integration)
# Advanced log analysis for bot activity, security threats, and botnets
#
# Features:
# - Comprehensive bot classification (legitimate, AI, monitoring, suspicious)
# - Enhanced attack vector detection (SQL injection, XSS, path traversal,
#   RCE/shell upload, info disclosure, login bruteforce)
# - Threat scoring system (0-100 risk scores for each IP)
# - Time-series analysis with hourly traffic visualization
# - Response code intelligence (what are bots finding?)
# - False positive detection for legitimate monitoring services
# - Bandwidth cost estimation for bot traffic
# - Botnet pattern analysis (coordinated attacks, DDoS detection)
# - Prioritized blocklists sorted by threat severity
# - Actionable reports with copy-paste ready configurations
# - Performance optimized for large log files (>500k entries)
# - User filtering (analyze all users or specific user)
# - Auto-detects log directory based on control panel
#############################################################################

# Load libraries
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$SCRIPT_DIR/lib/common-functions.sh"
source "$SCRIPT_DIR/lib/system-detect.sh"
source "$SCRIPT_DIR/lib/user-manager.sh"
source "$SCRIPT_DIR/lib/ip-reputation.sh"
source "$SCRIPT_DIR/lib/bot-signatures.sh"
source "$SCRIPT_DIR/lib/attack-patterns.sh"
source "$SCRIPT_DIR/lib/threat-intelligence.sh"

# Default configuration (auto-detected from system)
LOG_DIR="${SYS_LOG_DIR:-/var/log/apache2/domlogs}"

# Use toolkit's tmp directory instead of system /tmp to avoid filling it up
# On large servers with 200+ domains, compressed temp files can still be 50-100MB
# Using toolkit's tmp dir means:
# - Won't fill up system /tmp
# - Gets auto-cleaned when toolkit is removed
# - Included in cleanup script (clean-and-push-toolkit.sh)
TOOLKIT_TMP_DIR="$SCRIPT_DIR/tmp"
mkdir -p "$TOOLKIT_TMP_DIR" 2>/dev/null

# NEW: Baseline history directory (stores 30 days of historical data per domain)
BASELINE_DIR="$TOOLKIT_TMP_DIR/baseline_history"
mkdir -p "$BASELINE_DIR" 2>/dev/null

TEMP_DIR="$TOOLKIT_TMP_DIR/bot_analysis_$$"
OUTPUT_FILE="$TOOLKIT_TMP_DIR/bot_analysis_report_$(date +%Y%m%d_%H%M%S).txt"
DAYS_BACK=""  # Empty means all logs, otherwise filter by days
HOURS_BACK=""  # Empty means all logs, otherwise filter by hours
FILTER_USER=""  # Empty means all users, otherwise specific user

# Cache CSF availability (avoid checking command_v csf 5 times)
CSF_AVAILABLE=false
if command -v csf >/dev/null 2>&1 && [ -f /etc/csf/csf.conf ]; then
    CSF_AVAILABLE=true
fi

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        -d|--days)
            DAYS_BACK="$2"
            shift 2
            ;;
        -H|--hours)
            HOURS_BACK="$2"
            shift 2
            ;;
        -l|--log-dir)
            LOG_DIR="$2"
            shift 2
            ;;
        -o|--output)
            OUTPUT_FILE="$2"
            shift 2
            ;;
        -u|--user)
            FILTER_USER="$2"
            shift 2
            ;;
        -h|--help)
            echo "Apache/cPanel Domain Log Bot & Botnet Analyzer v3.1"
            echo ""
            echo "Usage: $0 [-d DAYS | -H HOURS] [-u USER] [-l LOG_DIR] [-o OUTPUT_FILE]"
            echo ""
            echo "Options:"
            echo "  -d, --days DAYS       Analyze only logs from last N days (24-hour periods)"
            echo "  -H, --hours HOURS     Analyze only logs from last N hours"
            echo "  -u, --user USER       Analyze only logs for specific cPanel user"
            echo "  -l, --log-dir DIR     Custom log directory (auto-detected by default)"
            echo "  -o, --output FILE     Custom output file path"
            echo "  -h, --help            Show this help message"
            echo ""
            echo "Examples:"
            echo "  $0                    # Analyze all logs in default directory"
            echo "  $0 -d 7               # Analyze logs from last 7 days"
            echo "  $0 -H 6               # Analyze logs from last 6 hours"
            echo "  $0 -l /custom/path    # Use custom log directory"
            echo ""
            echo "Note: If both -d and -H are specified, only -H (hours) will be used."
            echo ""
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use -h for help"
            exit 1
            ;;
    esac
done

# Interactive prompts for missing options
prompt_time_range() {
    clear
    print_banner "Bot Analyzer - Time Range Selection"
    echo ""
    echo -e "  ${GREEN}1)${NC} All available logs"
    echo -e "  ${GREEN}2)${NC} Last 1 hour"
    echo -e "  ${GREEN}3)${NC} Last 6 hours"
    echo -e "  ${GREEN}4)${NC} Last 24 hours"
    echo -e "  ${GREEN}5)${NC} Last 7 days"
    echo -e "  ${GREEN}6)${NC} Last 30 days"
    echo -e "  ${GREEN}7)${NC} Custom hours"
    echo -e "  ${GREEN}8)${NC} Custom days"
    echo ""

    # Validate time_choice input with retry loop
    while true; do
        read -p "Select time range (1-8): " time_choice

        if ! [[ "$time_choice" =~ ^[1-8]$ ]]; then
            print_error "Invalid choice. Please enter 1-8"
            continue
        fi

        case $time_choice in
            1) break ;; # All logs - no filter
            2) HOURS_BACK=1; break ;;
            3) HOURS_BACK=6; break ;;
            4) HOURS_BACK=24; break ;;
            5) DAYS_BACK=7; break ;;
            6) DAYS_BACK=30; break ;;
            7)
                while true; do
                    read -p "Enter number of hours: " custom_hours
                    if [[ "$custom_hours" =~ ^[0-9]+$ ]] && [ "$custom_hours" -gt 0 ]; then
                        HOURS_BACK=$custom_hours
                        break 2  # Break out of both loops
                    else
                        print_error "Invalid input. Please enter a positive number"
                    fi
                done
                ;;
            8)
                while true; do
                    read -p "Enter number of days: " custom_days
                    if [[ "$custom_days" =~ ^[0-9]+$ ]] && [ "$custom_days" -gt 0 ]; then
                        DAYS_BACK=$custom_days
                        break 2  # Break out of both loops
                    else
                        print_error "Invalid input. Please enter a positive number"
                    fi
                done
                ;;
        esac
    done
}

prompt_user_scope() {
    clear
    print_banner "Bot Analyzer - User Scope Selection"
    echo ""
    echo -e "  ${GREEN}1)${NC} All users (system-wide analysis)"
    echo -e "  ${GREEN}2)${NC} Specific user"
    echo ""

    # Validate user_choice input with retry loop
    while true; do
        read -p "Select option (1-2): " user_choice

        if ! [[ "$user_choice" =~ ^[1-2]$ ]]; then
            print_error "Invalid choice. Please enter 1 or 2"
            continue
        fi

        if [ "$user_choice" = "2" ]; then
            echo ""
            local selected=$(select_user_interactive "Select user to analyze")
            if [ $? -eq 0 ] && [ "$selected" != "ALL" ]; then
                FILTER_USER="$selected"
            fi
        fi
        break
    done
}

# Interactive prompts for missing options
# Prompt for time range if not specified
if [ -z "$DAYS_BACK" ] && [ -z "$HOURS_BACK" ]; then
    prompt_time_range
fi

# Prompt for user if not specified
if [ -z "$FILTER_USER" ]; then
    prompt_user_scope
fi

# Validate time filter options
if [ -n "$DAYS_BACK" ] && [ -n "$HOURS_BACK" ]; then
    echo -e "${YELLOW}Warning: Both days and hours specified. Using hours filter only.${NC}" >&2
    DAYS_BACK=""
fi

# Color codes for terminal output
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m' # No Color

# Check for required commands
check_dependencies() {
    local missing_deps=()
    for cmd in awk grep sort uniq find sed head tail cut; do
        if ! command -v "$cmd" >/dev/null 2>&1; then
            missing_deps+=("$cmd")
        fi
    done

    if [ ${#missing_deps[@]} -gt 0 ]; then
        echo -e "${RED}Error: Missing required commands: ${missing_deps[*]}${NC}" >&2
        exit 1
    fi
}

# Check disk space
check_disk_space() {
    local available_kb
    local check_path="$SCRIPT_DIR"
    available_kb=$(df "$check_path" 2>/dev/null | tail -1 | awk '{print $4}')

    if [ -z "$available_kb" ]; then
        echo -e "${YELLOW}Warning: Cannot determine available disk space for toolkit directory${NC}" >&2
        return
    fi

    if [ "$available_kb" -lt 102400 ]; then  # Less than 100MB
        echo -e "${YELLOW}Warning: Low disk space in toolkit directory: $((available_kb/1024))MB available${NC}" >&2
        read -p "Continue anyway? (y/N): " -n 1 -r
        echo
        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
            exit 1
        fi
    fi
}

# Run dependency checks
check_dependencies
check_disk_space

# Create temp directory
mkdir -p "$TEMP_DIR" || {
    echo -e "${RED}Error: Cannot create temp directory: $TEMP_DIR${NC}" >&2
    exit 1
}

# Cleanup on exit
trap "rm -rf \"$TEMP_DIR\"" EXIT

#############################################################################
# Bot Signature Database
#############################################################################
# NOTE: Bot signatures now loaded from lib/bot-signatures.sh
# Arrays available: LEGIT_BOTS, AI_BOTS, MONITOR_BOTS, SUSPICIOUS_BOTS

#############################################################################
# Helper Functions
#############################################################################

print_header() {
    echo -e "\n${CYAN}===============================================================${NC}"
    echo -e "${CYAN}$1${NC}"
    echo -e "${CYAN}===============================================================${NC}\n"
}

print_alert() {
    echo -e "${RED}$1${NC}"
}

print_warning() {
    echo -e "${YELLOW}$1${NC}"
}

print_info() {
    echo -e "${BLUE}  $1${NC}"
}

print_success() {
    echo -e "${GREEN}$1${NC}"
}

#############################################################################
# Log Parsing Functions
#############################################################################

parse_logs() {
    if [ "$INTERWORX_MODE" = "yes" ]; then
        print_info "Parsing InterWorx domain logs from: /home/*/var/*/logs/"
    else
        print_info "Parsing logs from: $LOG_DIR"
    fi

    local find_opts=()

    # Add time filter if specified (hours takes precedence over days)
    if [ -n "$HOURS_BACK" ]; then
        local minutes=$((HOURS_BACK * 60))
        find_opts+=(-mmin -"$minutes")
        print_info "Filtering logs from last $HOURS_BACK hours"
    elif [ -n "$DAYS_BACK" ]; then
        find_opts+=(-mtime -"$DAYS_BACK")
        print_info "Filtering logs from last $DAYS_BACK days"
    fi

    # Determine log file search pattern based on control panel
    local log_search_path
    local log_search_name
    if [ "$INTERWORX_MODE" = "yes" ]; then
        # InterWorx: Official docs from https://appendix.interworx.com/current/nodeworx/general/other/log-file-locations.html
        # HTTP:  /home/{user}/var/{domain}/logs/transfer.log
        # HTTPS: /home/{user}/var/{domain}/logs/transfer-ssl.log
        log_search_path="/home/*/var/*/logs"
        log_search_name="transfer*.log"
    else
        # cPanel: /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
        # Plesk: Research verified paths from https://docs.plesk.com/en-US/obsidian/
        #   Apache HTTP:  /var/www/vhosts/system/{domain}/logs/access_log
        #   Apache HTTPS: /var/www/vhosts/system/{domain}/logs/access_ssl_log
        #   nginx HTTP:   /var/www/vhosts/system/{domain}/logs/proxy_access_log
        #   nginx HTTPS:  /var/www/vhosts/system/{domain}/logs/proxy_access_ssl_log
        # Note: /var/www/vhosts/{domain}/logs/ are hardlinks (backward compat)
        log_search_path="$LOG_DIR"
        log_search_name="*"
    fi

    # Parse all domain logs
    local file_count=0
    local progress_interval=5  # Show progress every 5 files instead of 50
    echo ""
    {
    find "$log_search_path" -type f -name "$log_search_name" ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | while read -r logfile; do
        # Skip empty files
        [ -s "$logfile" ] || continue

        # Extract domain name based on control panel
        if [ "$INTERWORX_MODE" = "yes" ]; then
            # InterWorx: extract from path /home/user/var/domain.com/logs/transfer*.log
            domain=$(echo "$logfile" | sed -n 's|^/home/.*/var/\([^/]*\)/logs/.*|\1|p')
        elif [ "$SYS_CONTROL_PANEL" = "plesk" ]; then
            # Plesk: extract from path /var/www/vhosts/system/domain.com/logs/{access_log,access_ssl_log,proxy_*}
            domain=$(echo "$logfile" | sed -n 's|^/var/www/vhosts/system/\([^/]*\)/logs/.*|\1|p')
        else
            # cPanel: extract from filename /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
            domain=$(basename "$logfile" | sed 's/-ssl_log$//')
        fi

        # Skip if domain extraction failed
        [ -z "$domain" ] && continue

        # User filtering: skip domains not belonging to the specified user
        if [ -n "$FILTER_USER" ]; then
            if ! echo "$user_domains" | grep -qFx "$domain"; then
                continue
            fi
        fi

        # Show progress every N files
        file_count=$((file_count + 1))
        if [ $((file_count % progress_interval)) -eq 0 ]; then
            echo -ne "\r  Parsed $file_count log files... (current: $domain)"
        fi

        # Parse Apache Combined Log Format with error handling
        # Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
        awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
        BEGIN {
            # Month name to number lookup
            month["Jan"]=1; month["Feb"]=2; month["Mar"]=3; month["Apr"]=4
            month["May"]=5; month["Jun"]=6; month["Jul"]=7; month["Aug"]=8
            month["Sep"]=9; month["Oct"]=10; month["Nov"]=11; month["Dec"]=12

            # Calculate cutoff timestamp in epoch seconds
            if (hours_filter != "") {
                cmd = "date -d \"" hours_filter " hours ago\" +%s 2>/dev/null || date -v-" hours_filter "H +%s 2>/dev/null"
                cmd | getline cutoff_epoch
                close(cmd)
            } else if (days_filter != "") {
                cmd = "date -d \"" days_filter " days ago\" +%s 2>/dev/null || date -v-" days_filter "d +%s 2>/dev/null"
                cmd | getline cutoff_epoch
                close(cmd)
            }
        }
        {
            # Skip empty lines and malformed entries
            if (NF < 10 || length($0) < 50) next

            # Extract IP (first field - space separated)
            ip = $1

            # Extract timestamp (between square brackets)
            if (match($0, /\[([^\]]+)\]/, ts)) {
                timestamp = ts[1]
            } else {
                timestamp = "unknown"
            }

            # Filter by timestamp if time filter is set
            if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_epoch != "") {
                # Extract just the date/time part (before timezone)
                # Format: 31/Dec/2025:10:30:15 -0500
                split(timestamp, ts_parts, " ")
                log_ts = ts_parts[1]

                # Parse: dd/mmm/yyyy:HH:MM:SS
                split(log_ts, dt, /[\/:]/)
                day = dt[1]
                mon = month[dt[2]]
                year = dt[3]
                hour = dt[4]
                min = dt[5]
                sec = dt[6]

                # Convert to epoch using awk mktime (YYYY MM DD HH MM SS)
                # mktime is much faster than spawning date command
                if (mon != "") {
                    log_epoch = mktime(year " " mon " " day " " hour " " min " " sec)

                    # Numerical comparison of epoch seconds
                    if (log_epoch < cutoff_epoch) {
                        next  # Skip this entry, too old
                    }
                }
            }

            # Extract HTTP method, URL, and status
            if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) {
                http_method = req[1]
                request_url = req[2]
                status = req[3]
                size = req[4]
            } else {
                # Fallback for malformed requests
                http_method = "-"
                request_url = "-"
                status = "-"
                size = "0"
            }

            # Extract User-Agent (last quoted string)
            if (match($0, /"([^"]*)"[[:space:]]*$/, ua)) {
                user_agent = ua[1]
                if (user_agent == "") user_agent = "-"
            } else {
                user_agent = "-"
            }

            # Extract additional headers for enhanced analysis
            referer = "-"
            accept_lang = "-"
            accept_encoding = "-"

            # Extract Referer header
            if (match($0, /"([^"]*)"[[:space:]]*"[^"]*"[[:space:]]*$/, ref)) {
                referer = ref[1]
                if (referer == "") referer = "-"
            }

            # Try to extract Accept-Language from log (if available)
            if (match($0, /Accept-Language: ([^ ,;]*)/i, al)) {
                accept_lang = al[1]
            }

            # Only output valid entries
            if (ip != "" && ip !~ /^[[:space:]]*$/) {
                print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp "|" referer "|" accept_lang
            }
        }' "$logfile" 2>/dev/null
    done
    } > "$TEMP_DIR/parsed_logs.txt"

    # Clear the progress line
    echo -ne "\r\033[K"

    if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then
        print_alert "No log entries were parsed. Check log format or permissions."
        return 1
    fi

    local line_count
    line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
    local file_size_kb
    file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")

    # Compress for storage (gzip saves ~90% space on text)
    # But we keep uncompressed version for fast analysis
    gzip -c "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/parsed_logs.txt.gz" &

    print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB uncompressed)"
    return 0
}

#############################################################################
# Bot Detection & Classification
#############################################################################

classify_bots() {
    print_info "Classifying bot traffic..."

    # Build combined grep patterns for efficiency
    local legit_pattern=$(printf "%s|" "${!LEGIT_BOTS[@]}" | sed 's/|$//')
    local ai_pattern=$(printf "%s|" "${!AI_BOTS[@]}" | sed 's/|$//')
    local monitor_pattern=$(printf "%s|" "${!MONITOR_BOTS[@]}" | sed 's/|$//')
    local suspicious_pattern=$(printf "%s|" "${!SUSPICIOUS_BOTS[@]}" | sed 's/|$//')

    # Process logs with AWK for better performance
    awk -F'|' -v legit="$legit_pattern" -v ai="$ai_pattern" -v monitor="$monitor_pattern" -v suspicious="$suspicious_pattern" '
    BEGIN {
        # Convert patterns to lowercase for case-insensitive matching
        legit_lower = tolower(legit)
        ai_lower = tolower(ai)
        monitor_lower = tolower(monitor)
        suspicious_lower = tolower(suspicious)
    }
    {
        ip = $1
        domain = $2
        url = $3
        status = $4
        size = $5
        ua = $6
        method = $7
        timestamp = $8
        ua_lower = tolower(ua)

        bot_type = "unknown"
        bot_name = "Unknown"

        # Check each category in priority order
        if (legit != "" && match(ua_lower, legit_lower)) {
            bot_type = "legit"
            # Extract actual bot name from UA
            split(legit, bots, "|")
            for (i in bots) {
                if (match(ua_lower, tolower(bots[i]))) {
                    bot_name = bots[i]
                    break
                }
            }
        } else if (ai != "" && match(ua_lower, ai_lower)) {
            bot_type = "ai"
            split(ai, bots, "|")
            for (i in bots) {
                if (match(ua_lower, tolower(bots[i]))) {
                    bot_name = bots[i]
                    break
                }
            }
        } else if (monitor != "" && match(ua_lower, monitor_lower)) {
            bot_type = "monitor"
            split(monitor, bots, "|")
            for (i in bots) {
                if (match(ua_lower, tolower(bots[i]))) {
                    bot_name = bots[i]
                    break
                }
            }
        } else if (suspicious != "" && match(ua_lower, suspicious_lower)) {
            bot_type = "suspicious"
            split(suspicious, bots, "|")
            for (i in bots) {
                if (match(ua_lower, tolower(bots[i]))) {
                    bot_name = bots[i]
                    break
                }
            }
        } else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-requests|python-urllib|java\/|scan|check|monitor/)) {
            # FIXED: Check for bot keywords FIRST, then verify it is not a legitimate browser
            # This prevents bots from bypassing detection by including browser strings

            # FIRST: Check if it is actually a legitimate browser with complete UA signature
            # Real browsers have: Mozilla/5.0 + platform + rendering engine + browser version
            is_real_browser = 0

            # Chrome/Chromium-based: Must have Chrome/ AND (AppleWebKit OR Mobile)
            if (match(ua_lower, /chrome\/[0-9]/) && (match(ua_lower, /applewebkit/) || match(ua_lower, /mobile/))) {
                is_real_browser = 1
            } else if (match(ua_lower, /firefox\/[0-9]/) && match(ua_lower, /gecko\//)) {
                # Firefox: Must have Firefox/ AND Gecko/
                is_real_browser = 1
            } else if (match(ua_lower, /safari\/[0-9]/) && match(ua_lower, /version\//) && match(ua_lower, /applewebkit/) && !match(ua_lower, /chrome/)) {
                # Safari: Must have Safari/ AND Version/ AND AppleWebKit (not Chrome)
                is_real_browser = 1
            } else if (match(ua_lower, /edg\/[0-9]|edge\/[0-9]/)) {
                # Edge: Must have Edg/ or Edge/
                is_real_browser = 1
            } else if (match(ua_lower, /samsungbrowser\/[0-9]|ucbrowser\/[0-9]|opr\/[0-9]/)) {
                # Mobile browsers: Samsung, UC, Opera Mobile
                is_real_browser = 1
            }

            # If it is a real browser, skip bot classification
            if (is_real_browser == 1) {
                next
            }

            bot_type = "unidentified_bot"
            # Extract first word of UA as bot name
            match(ua, /^[^ ]+/, name)
            bot_name = substr(name[0], 1, 30)
        }

        # Only print if bot_type is not "unknown" (i.e., we identified it as something)
        if (bot_type != "unknown") {
            print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
        }
    }' < "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"

    if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
        print_alert "Bot classification failed"
        return 1
    fi

    local classified_count
    classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
    local file_size_kb
    file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")

    # Compress for storage in background
    gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &

    print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB uncompressed)"
    return 0
}

#############################################################################
# NEW: Baseline Management (historical tracking for anomaly detection)
#############################################################################

save_baseline() {
    print_info "Storing baseline metrics for anomaly comparison..."

    local today=$(date +%Y%m%d)

    # Calculate current metrics
    local total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
    local unique_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
    local bot_requests=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
    local bot_pct=0
    if [ "$total_requests" -gt 0 ]; then
        bot_pct=$((bot_requests * 100 / total_requests))
    fi

    local sqli_attempts=$(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo "0")
    local xss_attempts=$(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo "0")
    local path_attempts=$(wc -l < "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null || echo "0")
    local rce_attempts=$(wc -l < "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null || echo "0")
    local login_attempts=$(wc -l < "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null || echo "0")
    local total_attacks=$((sqli_attempts + xss_attempts + path_attempts + rce_attempts + login_attempts))

    local high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")

    # Store baseline for each domain
    if [ -f "$TEMP_DIR/all_domains.txt" ]; then
        while read -r domain; do
            local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"

            # Get domain-specific metrics
            local domain_requests=$(grep "^[^|]*|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | wc -l || echo "0")
            local domain_attacks=$(grep "^[^|]*|$domain|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
            local domain_bots=$(grep "^[^|]*|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")

            # Append to baseline history (timestamp|requests|attacks|bots|high_risk_ips)
            echo "$today|$domain_requests|$domain_attacks|$domain_bots|$high_risk_ips" >> "$baseline_file"

            # Keep only last 30 days
            tail -30 "$baseline_file" > "$baseline_file.tmp" && mv "$baseline_file.tmp" "$baseline_file"
        done < "$TEMP_DIR/all_domains.txt"
    fi

    # Store global baseline
    local global_baseline="$BASELINE_DIR/global_baseline.txt"
    echo "$today|$total_requests|$unique_ips|$bot_pct|$total_attacks|$sqli_attempts|$xss_attempts|$path_attempts|$rce_attempts|$login_attempts|$high_risk_ips" >> "$global_baseline"
    tail -30 "$global_baseline" > "$global_baseline.tmp" && mv "$global_baseline.tmp" "$global_baseline"

    print_success "Baseline stored"
}

get_domain_baseline() {
    local domain="$1"
    local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"

    if [ -f "$baseline_file" ]; then
        cat "$baseline_file"
    fi
}

calculate_baseline_average() {
    local domain="$1"
    local metric="$2"  # requests, attacks, bots, etc.
    local days="${3:-7}"  # default 7 days

    local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
    if [ ! -f "$baseline_file" ]; then
        echo "0"
        return
    fi

    # Get last N days
    local col=2  # requests by default
    case "$metric" in
        attacks) col=3 ;;
        bots) col=4 ;;
        high_risk) col=5 ;;
    esac

    tail -"$days" "$baseline_file" 2>/dev/null | awk -F'|' -v col="$col" '{sum+=$col; count++} END {if (count>0) print int(sum/count); else print 0}'
}

#############################################################################
# NEW: Attack Progression/Timeline Analysis
#############################################################################

analyze_attack_progression() {
    print_info "Analyzing attack progression and sequences..."

    # For each high-risk IP, show the sequence of attacks
    awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -20 | while read -r ip; do
        local progression_file="$TEMP_DIR/progression_${ip}.txt"
        > "$progression_file"

        # Extract all requests from this IP, in order
        grep "^$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
            print $8 "|" $3 "|" $4 "|" $6
        }' | sort >> "$progression_file"

        # Detect attack phases
        local phase="reconnaissance"
        local phase_start=$(head -1 "$progression_file" 2>/dev/null | cut -d'|' -f1)

        echo "$ip|$phase|$phase_start" >> "$TEMP_DIR/attack_phases.txt"
    done

    touch "$TEMP_DIR/attack_phases.txt"
    print_success "Attack progression analysis complete"
}

#############################################################################
# Header Analysis for Bot Detection
#############################################################################

analyze_headers() {
    print_info "Analyzing request headers for bot patterns..."

    # Analyze header patterns to improve bot detection accuracy
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        domain = $2
        url = $3
        status = $4
        size = $5
        ua = $6
        method = $7
        timestamp = $8
        referer = $9
        accept_lang = $10

        ua_lower = tolower(ua)
        referer_lower = tolower(referer)

        # Pattern 1: Empty or missing Accept-Language (bots often have none)
        if (accept_lang == "-" || accept_lang == "") {
            empty_lang[ip]++
        }

        # Pattern 2: All-accepting Accept-Language (bots accept everything)
        # Real browsers: en-US,en;q=0.9 (specific negotiation)
        # Bots: */* or empty
        if (accept_lang == "*/*" || accept_lang == "*") {
            accepts_all[ip]++
        }

        # Pattern 3: Suspicious Referer patterns
        # Bots often have no referer or fake ones
        if (referer == "-" || referer == "") {
            no_referer[ip]++
        }

        # Pattern 4: Referer from suspicious sources
        if (match(referer_lower, /badbot|crawler|scanner|nikto|nmap|metasploit|sqlmap/)) {
            suspicious_referer[ip]++
        }

        # Pattern 5: Referer mismatch (referer domain != target domain)
        # Real users: referer usually from same domain or search engine
        # Bots: random referer or none
        if (referer != "-" && !match(referer_lower, domain)) {
            if (!match(referer_lower, /google|bing|yahoo|facebook|twitter|reddit|instagram/)) {
                cross_domain_referer[ip]++
            }
        }

        # Pattern 6: HEAD requests (bot reconnaissance)
        # Some bots use HEAD to test server without loading content
        if (method == "HEAD") {
            head_requests[ip]++
        }

        # Pattern 7: Options/Trace requests (security testing)
        # Real users never use these
        if (method == "OPTIONS" || method == "TRACE") {
            dangerous_methods[ip]++
        }
    }
    END {
        # Flag IPs with multiple suspicious header patterns
        for (ip in empty_lang) {
            score = 0

            # Assign points for suspicious header combinations
            if (ip in empty_lang) score += 2
            if (ip in accepts_all) score += 3
            if (ip in no_referer) score += 1
            if (ip in suspicious_referer) score += 5
            if (ip in cross_domain_referer && (ip in no_referer)) score += 2
            if (ip in head_requests && (head_requests[ip] > 50)) score += 4
            if (ip in dangerous_methods) score += 10

            # Only flag if high header suspicion score
            if (score >= 8) {
                print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt"
            }
        }
        close(tmpdir "/header_anomalies.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if it doesn't exist
    touch "$TEMP_DIR/header_anomalies.txt"
    print_success "Header analysis complete"
}

#############################################################################
# NEW: Entry Point Analysis (where bots start)
#############################################################################

analyze_entry_points() {
    print_info "Analyzing first request patterns (bot vs. user entry points)..."

    # Get first request from each IP
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    BEGIN {
        ip_first_request[ip] = url
        ip_first_status[ip] = status
    }
    {
        ip = $1
        url = $3
        status = $4

        # Track first request from each IP (first occurrence in sorted logs)
        if (!(ip in first_seen)) {
            first_seen[ip] = 1
            ip_first_request[ip] = url
            ip_first_status[ip] = status
        }
    }
    END {
        for (ip in ip_first_request) {
            url = ip_first_request[ip]
            status = ip_first_status[ip]
            url_lower = tolower(url)

            # Suspicious entry points indicate bot/scanner
            if (match(url_lower, /wp-admin|phpmyadmin|admin|xmlrpc|shell\.php|\.env|\.git|backdoor|config\.php/)) {
                print ip "|admin_entry|" url "|" status > tmpdir "/suspicious_entry_points.txt"
            }
            # Legitimate entry: homepage or search
            else if (match(url_lower, /^\/index|^\/$|^\/search|^\/page|^\/category/)) {
                print ip "|normal_entry|" url > tmpdir "/normal_entry_points.txt"
            }
            # Unusual but possible: static files
            else if (match(url_lower, /\.(css|js|jpg|png|gif|woff|svg)$/)) {
                print ip "|static_entry|" url > tmpdir "/static_entry_points.txt"
            }
        }
        close(tmpdir "/suspicious_entry_points.txt")
        close(tmpdir "/normal_entry_points.txt")
        close(tmpdir "/static_entry_points.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Count suspicious entry points
    if [ -f "$TEMP_DIR/suspicious_entry_points.txt" ]; then
        suspicious_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt")
        print_success "Found $suspicious_count IPs with suspicious entry points"
    else
        touch "$TEMP_DIR/suspicious_entry_points.txt"
    fi
}

#############################################################################
# Threat Detection
#############################################################################

detect_threats() {
    print_info "Detecting security threats..."

    # Use a single AWK pass for multiple threat detections (more efficient)
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        domain = $2
        url = $3
        status = $4
        size = $5
        ua = $6
        method = $7
        url_lower = tolower(url)
        ua_lower = tolower(ua)

        # SQL Injection patterns (enhanced)
        # FIXED: Hex pattern now requires SQL context to avoid false positives on blockchain/product IDs
        if (match(url_lower, /union.*select|concat\(|benchmark\(|sleep\(|waitfor|cast\(|exec\(/) ||
            match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) ||
            match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) ||
            match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) {
            print ip "|" domain "|" url "|" status "|sqli" > tmpdir "/attack_vectors_raw.txt"
        }

        # XSS patterns
        # FIXED: DOM-based patterns (document.cookie, .innerhtml) only flagged in query strings
        # This prevents false positives on documentation URLs like /docs/innerhtml-api-guide
        if (match(url_lower, /<script|javascript:|onerror=|onload=|<iframe|eval\(|alert\(/) ||
            match(url_lower, /\?.*(document\.cookie|document\.write|\.innerhtml)/)) {
            print ip "|" domain "|" url "|" status "|xss" > tmpdir "/attack_vectors_raw.txt"
        }

        # Path Traversal / LFI
        # FIXED: Added URL-encoded variants (%2e%2e, %5c for backslash)
        # FIXED: Case-insensitive hex encoding support (%5C and %5c)
        if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) ||
            match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) {
            print ip "|" domain "|" url "|" status "|path_traversal" > tmpdir "/attack_vectors_raw.txt"
        }

        # Shell upload / RCE attempts
        # FIXED: Removed overly broad "any POST to .php" condition that caused massive false positives
        # Now only detects actual shell commands, known malicious files, and suspicious upload patterns
        if (match(url_lower, /cmd\.exe|\/bin\/bash|\/bin\/sh|phpinfo\(|system\(|exec\(|passthru\(|eval\(/) ||
            match(url_lower, /shell\.php|c99\.php|r57\.php|r00t\.php|backdoor|webshell|cmd\.php|exploit\.php/) ||
            match(url_lower, /base64_decode.*eval|gzinflate.*eval|assert.*\$_/) ||
            (match(url_lower, /\.(php|phtml|php3|php4|php5|phar)\.suspected$/) && method == "POST")) {
            print ip "|" domain "|" url "|" status "|rce_upload" > tmpdir "/attack_vectors_raw.txt"
        }

        # Info Disclosure attempts
        # FIXED: Added status code validation - only flag successful access (200/301/302)
        # FIXED: readme pattern now only matches actual files (.txt, .html, .md)
        # FIXED: Added more backup file extensions and URL-encoded variants
        # FIXED: phpinfo now only matches .php files (not documentation URLs)
        # FIXED: Removed sitemap.xml.gz (intentionally public for SEO)
        if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo\.php/) ||
            match(url_lower, /readme\.(txt|html|md)$/) ||
            match(url_lower, /web\.config|\.htaccess|\.htpasswd/) ||
            match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql/)) {
            # Only flag if successful access (200) or redirect (301/302)
            # Failed attempts (404/403) are just scanning, tracked separately
            if (status ~ /^(200|301|302)/) {
                print ip "|" domain "|" url "|" status "|info_disclosure" > tmpdir "/attack_vectors_raw.txt"
            }
        }

        # composer.json / package.json - lower severity, only if successful
        if (match(url_lower, /composer\.json|package\.json|package-lock\.json/) && status == "200") {
            print ip "|" domain "|" url "|" status "|config_exposure" > tmpdir "/attack_vectors_raw.txt"
        }

        # Login bruteforce
        if (match(url_lower, /wp-login\.php|xmlrpc\.php/) && method == "POST") {
            print ip "|" domain "|" url "|" status "|login_bruteforce" > tmpdir "/attack_vectors_raw.txt"
        }

        # Admin/sensitive endpoint probing
        # FIXED: Only count FAILED attempts (403/401/404) - successful logins are legitimate
        if (match(url_lower, /wp-admin|phpmyadmin|admin|administrator|login|wp-login|xmlrpc/) ||
            match(url_lower, /\.env|\.git|\.sql|backup|config\./)) {
            # Only flag failed access attempts (403 Forbidden, 401 Unauthorized, 404 Not Found)
            # Successful access (200/302) means legitimate user or already compromised
            if (status ~ /^(403|401|404)/) {
                print ip "|" domain "|" url > tmpdir "/admin_probes_raw.txt"
            }
        }

        # 404 scanning (reconnaissance)
        if (status == "404" || status == "403") {
            print ip "|" domain "|" url "|" status > tmpdir "/404_scans_raw.txt"
        }

        # Large data transfers (potential scraping)
        if (size > 1000000) {
            print ip "|" domain "|" url "|" size > tmpdir "/large_transfers_raw.txt"
        }

        # Suspicious user agents
        if (match(ua_lower, /nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp/) ||
            match(ua_lower, /metasploit|<script|null|python-requests|go-http-client/)) {
            print ip "|" ua > tmpdir "/suspicious_ua_raw.txt"
        }

        # Track response codes for intelligence
        print status > tmpdir "/response_codes_raw.txt"
    }
    ' < "$TEMP_DIR/parsed_logs.txt"

    # Process attack vectors by type
    if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
        # Overall attack vectors summary
        awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/attack_types.txt"

        # Breakdown by attack type
        for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
            grep "|$attack_type$" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | \
                awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
                sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true
        done

        # Old sqli file for backwards compatibility
        if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then
            cp "$TEMP_DIR/sqli_attempts.txt" "$TEMP_DIR/sqli_attempts_legacy.txt"
        fi
    else
        touch "$TEMP_DIR/attack_types.txt"
    fi

    # Process raw data into sorted/counted results
    if [ -f "$TEMP_DIR/admin_probes_raw.txt" ]; then
        sort "$TEMP_DIR/admin_probes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/admin_probes.txt"
    else
        touch "$TEMP_DIR/admin_probes.txt"
    fi

    if [ -f "$TEMP_DIR/404_scans_raw.txt" ]; then
        sort "$TEMP_DIR/404_scans_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/404_scans.txt"
    else
        touch "$TEMP_DIR/404_scans.txt"
    fi

    if [ -f "$TEMP_DIR/large_transfers_raw.txt" ]; then
        sort "$TEMP_DIR/large_transfers_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/large_transfers.txt"
    else
        touch "$TEMP_DIR/large_transfers.txt"
    fi

    if [ -f "$TEMP_DIR/suspicious_ua_raw.txt" ]; then
        sort "$TEMP_DIR/suspicious_ua_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/suspicious_ua.txt"
    else
        touch "$TEMP_DIR/suspicious_ua.txt"
    fi

    # Process response codes
    if [ -f "$TEMP_DIR/response_codes_raw.txt" ]; then
        sort "$TEMP_DIR/response_codes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/response_codes.txt"
    else
        touch "$TEMP_DIR/response_codes.txt"
    fi

    print_success "Threat detection complete"
}

#############################################################################
# NEW: URL Entropy Analysis (detects fuzzing/scanning)
#############################################################################

analyze_url_entropy() {
    print_info "Analyzing URL parameter entropy (fuzzing detection)..."

    # Detect IPs that generate random parameters (scanning/fuzzing behavior)
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        url = $3
        url_lower = tolower(url)

        # Extract base path (before query string)
        if (match(url, /([^?]+)/, path)) {
            base_path = path[1]
        } else {
            base_path = url
        }

        # Extract query parameter values (not keys)
        if (match(url, /\?(.+)/, query)) {
            param_string = query[1]

            # Count numeric parameters
            if (match(param_string, /[0-9]+/)) {
                numeric_params[ip base_path]++
            }
        }

        # Track URLs from each IP
        urls_per_ip[ip]++
        unique_paths[ip][base_path]++
    }
    END {
        # Find IPs hitting many unique paths with numeric variations
        for (ip in urls_per_ip) {
            unique_path_count = length(unique_paths[ip])

            # If IP hits >20 URLs with lots of numeric params = scanning
            if (urls_per_ip[ip] > 20 && unique_path_count > 5) {
                # Likely fuzzing/parameter scanning
                print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt"
            }
        }
        close(tmpdir "/fuzzing_ips.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if it doesn't exist
    touch "$TEMP_DIR/fuzzing_ips.txt"
    print_success "URL entropy analysis complete"
}

#############################################################################
# NEW: Request Timing Analysis (DDoS & bot behavior detection)
#############################################################################

analyze_request_timing() {
    print_info "Analyzing request timing patterns (DDoS detection)..."

    # Analyze timing consistency to detect bots/DDoS
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        timestamp = $8

        # Parse timestamp to get seconds (simplified)
        if (match(timestamp, /([0-9]{2}):([0-9]{2}):([0-9]{2})/, t)) {
            seconds = t[1] * 3600 + t[2] * 60 + t[3]

            # Store timestamps for analysis
            if (!(ip in request_times)) {
                request_count[ip] = 0
                request_times[ip] = ""
            }

            request_count[ip]++
            request_times[ip] = request_times[ip] seconds ","
        }
    }
    END {
        # Analyze timing patterns
        for (ip in request_count) {
            count = request_count[ip]

            # If more than 50 requests in the log
            if (count > 50) {
                # Split times and calculate average interval
                split(request_times[ip], times, ",")

                total_intervals = 0
                interval_count = 0

                for (i = 2; i < length(times); i++) {
                    if (times[i] > 0 && times[i-1] > 0) {
                        interval = times[i] - times[i-1]
                        if (interval < 0) interval += 86400  # Handle day boundary

                        total_intervals += interval
                        interval_count++
                    }
                }

                if (interval_count > 0) {
                    avg_interval = total_intervals / interval_count

                    # Very consistent timing = bot (typically 0.5-2 seconds apart)
                    # Real users: highly variable (5-60+ seconds)
                    if (avg_interval < 3 && count > 100) {
                        print ip "|consistent_bot_timing|" avg_interval "|" count > tmpdir "/timing_anomalies.txt"
                    }
                }
            }
        }
        close(tmpdir "/timing_anomalies.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if it doesn't exist
    touch "$TEMP_DIR/timing_anomalies.txt"
    print_success "Request timing analysis complete"
}

#############################################################################
# NEW: Fingerprinting - Combine multiple signals for accuracy
#############################################################################

calculate_bot_fingerprint() {
    print_info "Calculating bot fingerprint confidence scores (combining multiple signals)..."

    # Each signal contributes to confidence that an IP is a bot
    # Real traffic rarely has ALL signals, bots typically have multiple
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    BEGIN {
        # Initialize tracking arrays
    }
    {
        ip = $1
        domain = $2
        url = $3
        status = $4
        ua = $6
        referer = $9
        accept_lang = $10

        ua_lower = tolower(ua)

        # Track per-IP fingerprint components
        if (ip in ip_seen) {
            ip_seen[ip]++
        } else {
            ip_seen[ip] = 1
        }

        # Signal 1: Bot-like User-Agent
        if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python|java[^script]|perl|ruby|node\.js|headless|mechanize/)) {
            ua_bot_signal[ip]++
        }

        # Signal 2: Missing/unusual Accept-Language
        if (accept_lang == "-" || accept_lang == "" || accept_lang == "*/*") {
            header_anomaly_signal[ip]++
        }

        # Signal 3: Missing Referer (bots often dont send it)
        if (referer == "-" || referer == "") {
            missing_referer[ip]++
        }

        # Signal 4: Successful requests indicate not just scanning
        if (status ~ /^(200|301|302)/) {
            success_requests[ip]++
        }

        # Signal 5: Direct admin/config access (suspicious entry)
        if (match(url, /\/(wp-admin|phpmyadmin|admin|config\.php|\.env|\.git|\.htaccess|web\.config)/)) {
            admin_access[ip]++
        }
    }
    END {
        # Calculate fingerprint scores for each IP
        for (ip in ip_seen) {
            score = 0
            signal_count = 0

            # Each signal adds confidence
            if (ip in ua_bot_signal && ua_bot_signal[ip] > 0) {
                score += 20
                signal_count++
            }

            if (ip in header_anomaly_signal && header_anomaly_signal[ip] > 0) {
                score += 15
                signal_count++
            }

            if (ip in missing_referer && missing_referer[ip] > ip_seen[ip] * 0.7) {
                score += 15  # 70%+ requests missing referer
                signal_count++
            }

            if (ip in admin_access && admin_access[ip] > 0) {
                score += 20  # Targeting admin areas
                signal_count++
            }

            # Reduce score if mostly getting 200 OK (might be legitimate bot)
            if (ip in success_requests && success_requests[ip] > ip_seen[ip] * 0.7) {
                score -= 10  # Legitimate traffic (70%+ success)
            }

            # Multi-signal boost (confidence increases when multiple signals align)
            if (signal_count >= 3) {
                score += 25  # Strong indicator of bot when 3+ signals present
            }

            # Normalize to 0-100
            if (score > 100) score = 100
            if (score < 0) score = 0

            # Output fingerprint for high-confidence bots (score >= 60)
            if (score >= 60) {
                printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
            }
        }
        close(tmpdir "/bot_fingerprints.txt")
    }
    ' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if empty
    touch "$TEMP_DIR/bot_fingerprints.txt"
    fingerprint_count=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
    print_success "Fingerprint analysis complete ($fingerprint_count high-confidence bot IPs)"
}

#############################################################################
# NEW: Domain Targeting Analysis - Which domains are being attacked?
#############################################################################

analyze_domain_targeting_percentage() {
    print_info "Analyzing per-domain attack patterns (what's attacking each domain)..."

    # Build per-domain attack data
    # Format: domain|attack_type|ip|count
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    NR == FNR {
        # Skip attack vectors file - using parsed_logs for all data
        next
    }
    {
        # Main log processing
        ip = $1
        domain = $2
        status = $4

        # Track all IPs per domain
        ips_per_domain[domain][ip]++
        request_count_per_domain[domain]++
    }
    END {
        # Output: domain|unique_ips|request_count
        for (domain in ips_per_domain) {
            ip_count = 0
            for (ip in ips_per_domain[domain]) ip_count++
            printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
        }
    }
    ' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt"

    # Also create per-domain attack type breakdown
    # Format: domain|attack_type|ip|count
    awk -F'|' '
    {
        ip = $1
        domain = $2
        attack_type = $5

        # Store as domain -> attack_type -> ip -> count
        attack_data[domain][attack_type][ip]++
        attack_totals[domain][attack_type]++
    }
    END {
        for (domain in attack_data) {
            domain_file = tmpdir "/domain_attacks_" domain ".txt"
            for (attack_type in attack_data[domain]) {
                total = attack_totals[domain][attack_type]
                for (ip in attack_data[domain][attack_type]) {
                    count = attack_data[domain][attack_type][ip]
                    printf "%s|%d|%d\n", attack_type "|" ip, count, total
                }
            }
        }
    }
    ' < "$TEMP_DIR/attack_vectors_raw.txt"

    print_success "Domain attack pattern analysis complete"
}

#############################################################################
# NEW: Top URLs Analysis - What files/endpoints are bots hitting?
#############################################################################

analyze_top_urls_per_domain() {
    print_info "Analyzing top targeted URLs per domain..."

    # Get list of domains from targeting analysis
    if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
        while IFS='|' read -r domain request_count pct; do
            local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"

            # Extract all URLs for this domain, sorted by frequency (no arbitrary limit)
            awk -F'|' -v dom="$domain" '
            $2 == dom {
                urls[$3]++
            }
            END {
                for (url in urls) {
                    printf "%s|%d\n", url, urls[url]
                }
            }
            ' < "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k2 -rn > "$domain_file"
        done < "$TEMP_DIR/domain_targeting.txt"
    fi

    print_success "Top URLs analysis complete"
}

#############################################################################
# NEW: Success Rate & Behavior Analysis (Added for accuracy improvement)
#############################################################################

analyze_success_rates() {
    print_info "Analyzing request success rates and behavior patterns..."

    # Calculate success rate (200/301/302 vs 404/403) for each IP
    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        status = $4

        # Count total requests
        total[ip]++

        # Count successful responses
        if (status ~ /^(200|301|302)/) {
            success[ip]++
        }
        # Count failed/blocked responses
        else if (status ~ /^(404|403|401)/) {
            failed[ip]++
        }
    }
    END {
        for (ip in total) {
            success_count = (success[ip] ? success[ip] : 0)
            failed_count = (failed[ip] ? failed[ip] : 0)
            success_rate = (total[ip] > 0) ? int((success_count / total[ip]) * 100) : 0
            fail_rate = (total[ip] > 0) ? int((failed_count / total[ip]) * 100) : 0

            # High failure rate indicates scanning/probing
            if (fail_rate >= 80 && total[ip] >= 20) {
                print ip "|" total[ip] "|" fail_rate "|scanner" >> tmpdir "/high_failure_ips.txt"
            }
            # Very high success rate + high volume could be scraping
            else if (success_rate >= 90 && total[ip] >= 100) {
                print ip "|" total[ip] "|" success_rate "|scraper" >> tmpdir "/high_success_ips.txt"
            }

            # Output all rates for later analysis
            print ip "|" total[ip] "|" success_rate "|" fail_rate >> tmpdir "/ip_success_rates.txt"
        }
        close(tmpdir "/high_failure_ips.txt")
        close(tmpdir "/high_success_ips.txt")
        close(tmpdir "/ip_success_rates.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Touch files if they don't exist
    touch "$TEMP_DIR/high_failure_ips.txt" "$TEMP_DIR/high_success_ips.txt" "$TEMP_DIR/ip_success_rates.txt"

    print_success "Success rate analysis complete"
}

#############################################################################
# Botnet Detection
#############################################################################

detect_botnets() {
    print_info "Analyzing for botnet patterns..."

    # Group IPs by similar behavior patterns
    # Pattern 1: Multiple IPs hitting same URLs in coordinated manner
    awk -F'|' '{print $1"|"$3}' < "$TEMP_DIR/parsed_logs.txt" | \
        sort | uniq -c | awk '$1 > 10 {print $2}' | \
        cut -d'|' -f2 | sort | uniq -c | sort -rn | \
        awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt"

    # Pattern 2: IPs with similar User-Agents hitting multiple domains
    awk -F'|' '{print $1"|"$6}' < "$TEMP_DIR/parsed_logs.txt" | \
        sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt"

    # Pattern 3: Detect IP ranges (Class C networks) with suspicious activity
    awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | \
        awk -F'.' '{print $1"."$2"."$3".0/24"}' | \
        sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt"

    # Pattern 4: Rapid fire requests (DDoS indicators)
    # Extract timestamp and count requests per IP per minute
    awk -F'|' '{
        ip = $1
        timestamp = $8
        # Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS)
        if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2})/, ts)) {
            # Group by hour:minute for rapid-fire detection
            time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5]
            print ip "|" time_key
        }
    }' < "$TEMP_DIR/parsed_logs.txt" | \
        sort | uniq -c | \
        awk '$1 > 50 {print $1 " " $2}' | \
        awk -F'|' '{print $1}' | \
        awk 'BEGIN {ip=""} {ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \
        sort -rn > "$TEMP_DIR/rapid_fire_ips.txt"

    print_success "Botnet analysis complete"
}

#############################################################################
# Server IP Detection
#############################################################################

detect_server_ips() {
    print_info "Detecting server's own IP addresses..."

    > "$TEMP_DIR/server_ips.txt"

    # Method 1: Get all IPs from network interfaces
    if command -v hostname >/dev/null 2>&1; then
        hostname -I 2>/dev/null | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' >> "$TEMP_DIR/server_ips.txt" || true
    fi

    # Method 2: Parse ip addr output
    if command -v ip >/dev/null 2>&1; then
        ip addr show 2>/dev/null | grep -oP 'inet \K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true
    fi

    # Method 3: Try ifconfig as fallback
    if command -v ifconfig >/dev/null 2>&1; then
        ifconfig 2>/dev/null | grep -oP 'inet (addr:)?\K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true
    fi

    # Method 4: Get public IP from external services (with timeout)
    # Try multiple services for reliability
    for service in "ifconfig.me/ip" "icanhazip.com" "ipecho.net/plain" "api.ipify.org"; do
        public_ip=$(curl -s --max-time 3 "$service" 2>/dev/null | grep -oE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' || true)
        if [ -n "$public_ip" ]; then
            echo "$public_ip" >> "$TEMP_DIR/server_ips.txt"
            break
        fi
    done

    # Method 5: Check cPanel server IP if available
    if [ -f "/var/cpanel/mainip" ]; then
        cat /var/cpanel/mainip >> "$TEMP_DIR/server_ips.txt"
    fi

    # Remove duplicates and empty lines
    sort -u "$TEMP_DIR/server_ips.txt" | grep -v '^$' > "$TEMP_DIR/server_ips_final.txt" || true
    mv "$TEMP_DIR/server_ips_final.txt" "$TEMP_DIR/server_ips.txt"

    server_ip_count=$(wc -l < "$TEMP_DIR/server_ips.txt" 2>/dev/null || echo 0)

    if [ "$server_ip_count" -gt 0 ]; then
        print_success "Detected $server_ip_count server IP(s) - these will be excluded from threat analysis"
    else
        print_warning "Could not detect server IPs automatically - proceeding without server IP filtering"
    fi
}

# Helper function to validate IP address format
is_valid_ip() {
    local ip="$1"

    # IPv4 validation
    if [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
        local IFS='.'
        local -a octets=($ip)
        for octet in "${octets[@]}"; do
            if [ "$octet" -gt 255 ]; then
                return 1  # Invalid
            fi
        done
        return 0  # Valid IPv4
    fi

    # IPv6 basic validation (simplified)
    if [[ "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then
        return 0  # Valid IPv6
    fi

    return 1  # Invalid
}

# Helper function to check if an IP should be excluded
is_excluded_ip() {
    local ip="$1"

    # First validate IP format
    if ! is_valid_ip "$ip"; then
        return 0  # Exclude invalid IPs
    fi

    # Check if private/internal IP
    if [[ "$ip" =~ ^127\. ]] || \
       [[ "$ip" =~ ^10\. ]] || \
       [[ "$ip" =~ ^192\.168\. ]] || \
       [[ "$ip" =~ ^172\.(1[6-9]|2[0-9]|3[01])\. ]] || \
       [[ "$ip" =~ ^169\.254\. ]] || \
       [[ "$ip" == "localhost" ]] || \
       [[ "$ip" == "::1" ]]; then
        return 0  # True - should be excluded
    fi

    # Check if it's the server's own IP
    if [ -f "$TEMP_DIR/server_ips.txt" ]; then
        if grep -qFx "$ip" "$TEMP_DIR/server_ips.txt" 2>/dev/null; then
            return 0  # True - should be excluded
        fi
    fi

    return 1  # False - should not be excluded
}

#############################################################################
# Time-Series Analysis
#############################################################################

analyze_time_series() {
    print_info "Analyzing time-series patterns..."

    # Extract hourly bot traffic
    cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {
        timestamp = $8
        if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
            hour = ts[4]
            print hour
        }
    }' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"

    # Extract hourly attack traffic
    if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
        # Parse timestamps from original parsed logs for IPs in attack vectors
        awk -F'|' 'NR==FNR {ips[$1]=1; next} $1 in ips {
            timestamp = $8
            if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
                hour = ts[4]
                print hour
            }
        }' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
    fi

    print_success "Time-series analysis complete"
}

#############################################################################
# Threat Scoring
#############################################################################

calculate_threat_scores() {
    print_info "Calculating threat scores..."

    # Pre-count requests per IP (MUCH faster than grepping for each IP)
    declare -A ip_request_counts
    while IFS='|' read -r ip rest; do
        ((ip_request_counts["$ip"]++))
    done < <(cat "$TEMP_DIR/parsed_logs.txt")

    # Build hash tables from threat files for O(1) lookups
    # OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
    declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
    declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count

    # Parse each threat file and build hash tables (optimized with awk)
    [ -f "$TEMP_DIR/sqli_attempts.txt" ] && while read -r ip; do
        threat_ips_sqli["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1)

    [ -f "$TEMP_DIR/xss_attempts.txt" ] && while read -r ip; do
        threat_ips_xss["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1)

    [ -f "$TEMP_DIR/path_traversal_attempts.txt" ] && while read -r ip; do
        threat_ips_path["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1)

    [ -f "$TEMP_DIR/rce_upload_attempts.txt" ] && while read -r ip; do
        threat_ips_rce["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1)

    [ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ] && while read -r ip; do
        threat_ips_login["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1)

    [ -f "$TEMP_DIR/suspicious_ua.txt" ] && while read -r ip; do
        threat_ips_suspicious["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1)

    [ -f "$TEMP_DIR/rapid_fire_ips.txt" ] && while read -r ip; do
        threat_ips_ddos["$ip"]=1
    done < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt")

    # Parse count-based threat files
    [ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do
        [ -n "$ip" ] && threat_admin_count["$ip"]=$count
    done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//')

    [ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do
        [ -n "$ip" ] && threat_404_count["$ip"]=$count
    done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//')

    # NEW: Load bot classifications to skip volume scoring for legitimate bots
    declare -A legit_bot_ips
    if [ -f "$TEMP_DIR/classified_bots.txt" ]; then
        while IFS='|' read -r ip domain url status size ua method timestamp bot_type bot_name; do
            if [ "$bot_type" = "legit" ]; then
                legit_bot_ips["$ip"]=1
            fi
        done < "$TEMP_DIR/classified_bots.txt"
    fi

    # NEW: Load success rate data for scanning/scraping detection
    declare -A scanner_ips scraper_ips ip_fail_rates
    [ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do
        scanner_ips["$ip"]=$fail_rate
    done < "$TEMP_DIR/high_failure_ips.txt"

    [ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do
        scraper_ips["$ip"]=$success_rate
    done < "$TEMP_DIR/high_success_ips.txt"

    # Load all fail rates for threshold checks
    [ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do
        ip_fail_rates["$ip"]=$fail_rate
    done < "$TEMP_DIR/ip_success_rates.txt"

    # NEW: Load header anomalies
    declare -A header_anomalies
    [ -f "$TEMP_DIR/header_anomalies.txt" ] && while IFS='|' read -r ip anomaly_type score; do
        header_anomalies["$ip"]=$score
    done < "$TEMP_DIR/header_anomalies.txt"

    # NEW: Load suspicious entry points
    declare -A suspicious_entry_ips
    [ -f "$TEMP_DIR/suspicious_entry_points.txt" ] && while IFS='|' read -r ip entry_type url status; do
        suspicious_entry_ips["$ip"]=1
    done < "$TEMP_DIR/suspicious_entry_points.txt"

    # NEW: Load fuzzing/parameter scanning IPs
    declare -A fuzzing_ips
    [ -f "$TEMP_DIR/fuzzing_ips.txt" ] && while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
        fuzzing_ips["$ip"]=$total_urls
    done < "$TEMP_DIR/fuzzing_ips.txt"

    # NEW: Load timing anomalies (consistent bot timing)
    declare -A timing_anomalies
    [ -f "$TEMP_DIR/timing_anomalies.txt" ] && while IFS='|' read -r ip timing_type avg_interval total_reqs; do
        timing_anomalies["$ip"]=$avg_interval
    done < "$TEMP_DIR/timing_anomalies.txt"

    # Now calculate scores for each IP (using pre-counted requests)
    for ip in "${!ip_request_counts[@]}"; do
        # Skip excluded IPs
        if is_excluded_ip "$ip"; then
            continue
        fi

        score=0
        req_count=${ip_request_counts[$ip]}

        # IMPROVED: Base request volume scoring
        # Skip volume scoring for legitimate bots (Google, Bing, etc.)
        if [ -z "${legit_bot_ips[$ip]}" ]; then
            # Not a legitimate bot - apply volume scoring
            if [ "$req_count" -gt 10000 ]; then score=$((score + 10))
            elif [ "$req_count" -gt 5000 ]; then score=$((score + 8))
            elif [ "$req_count" -gt 1000 ]; then score=$((score + 5))
            elif [ "$req_count" -gt 500 ]; then score=$((score + 3))
            fi
        fi

        # NEW: Success rate analysis bonuses
        # High failure rate (80%+ 404/403) = scanning behavior
        if [ -n "${scanner_ips[$ip]}" ]; then
            fail_rate=${scanner_ips[$ip]}
            if [ "$fail_rate" -ge 90 ]; then
                score=$((score + 8))  # Very high failure rate
            elif [ "$fail_rate" -ge 80 ]; then
                score=$((score + 5))  # High failure rate
            fi
        fi

        # High success rate (90%+ 200/301/302) + high volume = potential scraping
        if [ -n "${scraper_ips[$ip]}" ] && [ "$req_count" -gt 500 ]; then
            score=$((score + 7))  # Scraping behavior
        fi

        # Attack patterns
        [ -n "${threat_ips_sqli[$ip]}" ] && score=$((score + 15))
        [ -n "${threat_ips_xss[$ip]}" ] && score=$((score + 12))
        [ -n "${threat_ips_path[$ip]}" ] && score=$((score + 15))
        [ -n "${threat_ips_rce[$ip]}" ] && score=$((score + 20))
        [ -n "${threat_ips_login[$ip]}" ] && score=$((score + 10))
        [ -n "${threat_ips_suspicious[$ip]}" ] && score=$((score + 10))
        [ -n "${threat_ips_ddos[$ip]}" ] && score=$((score + 10))

        # NEW: Header anomalies (strong indicator of bots)
        if [ -n "${header_anomalies[$ip]}" ]; then
            header_score=${header_anomalies[$ip]}
            if [ "$header_score" -ge 12 ]; then
                score=$((score + 8))  # Multiple header suspicions
            elif [ "$header_score" -ge 8 ]; then
                score=$((score + 5))  # Moderate header anomalies
            fi
        fi

        # NEW: Suspicious entry point (direct jump to admin/config)
        if [ -n "${suspicious_entry_ips[$ip]}" ]; then
            score=$((score + 6))  # Direct attack attempt without probing
        fi

        # NEW: Fuzzing/parameter scanning behavior
        if [ -n "${fuzzing_ips[$ip]}" ]; then
            fuzz_requests=${fuzzing_ips[$ip]}
            if [ "$fuzz_requests" -gt 100 ]; then
                score=$((score + 7))  # Aggressive fuzzing
            elif [ "$fuzz_requests" -gt 50 ]; then
                score=$((score + 4))  # Moderate fuzzing
            fi
        fi

        # NEW: Timing anomalies (very consistent request timing = bot)
        if [ -n "${timing_anomalies[$ip]}" ]; then
            score=$((score + 6))  # Very consistent timing indicates automation
        fi

        # Admin probing - IMPROVED: Raised threshold to 50 (only failed attempts counted)
        admin_count=${threat_admin_count[$ip]:-0}
        if [ "$admin_count" -gt 100 ] 2>/dev/null; then
            score=$((score + 10))  # Excessive probing
        elif [ "$admin_count" -gt 50 ] 2>/dev/null; then
            score=$((score + 5))   # Moderate probing
        fi

        # 404 scanning
        scan_404=${threat_404_count[$ip]:-0}
        [ "$scan_404" -gt 50 ] 2>/dev/null && score=$((score + 3))

        # OPTIMIZATION: Skip external API calls for performance
        # Threat Intelligence Enrichment can be done post-analysis for high-risk IPs only
        # Uncommenting these will SIGNIFICANTLY slow down analysis (API calls for every IP)
        #
        # To enable threat intelligence enrichment:
        # 1. Uncomment the code below
        # 2. Ensure check_abuseipdb, get_country_code, and is_high_risk_country functions exist
        # 3. Be aware this will make thousands of API calls and take much longer
        #
        # local abuse_data=$(check_abuseipdb "$ip" 2>/dev/null || echo "0|0|Unknown|Unknown")
        # IFS='|' read -r abuse_confidence abuse_reports abuse_country abuse_isp <<< "$abuse_data"
        #
        # if [ "$abuse_confidence" -ge 75 ]; then
        #     score=$((score + 15))  # High confidence malicious
        # elif [ "$abuse_confidence" -ge 50 ]; then
        #     score=$((score + 8))   # Moderate confidence
        # elif [ "$abuse_confidence" -ge 25 ]; then
        #     score=$((score + 3))   # Low confidence
        # fi
        #
        # local geo_country=$(get_country_code "$ip" 2>/dev/null || echo "XX")
        # if is_high_risk_country "$geo_country" 2>/dev/null; then
        #     score=$((score + 5))   # High-risk country bonus
        # fi

        # Cap at 100
        [ "${score:-0}" -gt 100 ] && score=100

        # Only output IPs with score > 0
        [ "${score:-0}" -gt 0 ] && echo "$score|$ip|$req_count"

        # Track in centralized IP reputation database (background process)
        if [ "${score:-0}" -gt 0 ]; then
            (
                # Update IP with hit count
                increment_ip_hits "$ip" "$req_count" >/dev/null 2>&1

                # Tag with specific attack types found
                [ -n "${threat_ips_sqli[$ip]}" ] && flag_ip_attack "$ip" "SQL_INJECTION" 0 "Bot analyzer: SQL injection attempts" >/dev/null 2>&1
                [ -n "${threat_ips_xss[$ip]}" ] && flag_ip_attack "$ip" "XSS" 0 "Bot analyzer: XSS attempts" >/dev/null 2>&1
                [ -n "${threat_ips_path[$ip]}" ] && flag_ip_attack "$ip" "PATH_TRAVERSAL" 0 "Bot analyzer: Path traversal" >/dev/null 2>&1
                [ -n "${threat_ips_rce[$ip]}" ] && flag_ip_attack "$ip" "RCE" 0 "Bot analyzer: RCE/shell upload attempts" >/dev/null 2>&1
                [ -n "${threat_ips_login[$ip]}" ] && flag_ip_attack "$ip" "BRUTEFORCE" 0 "Bot analyzer: Login bruteforce" >/dev/null 2>&1
                [ -n "${threat_ips_ddos[$ip]}" ] && flag_ip_attack "$ip" "DDOS" 0 "Bot analyzer: Rapid-fire requests" >/dev/null 2>&1
                [ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
            ) &
        fi
    done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt"

    # Wait for background IP reputation updates to complete
    wait

    print_success "Threat scores calculated and IP reputation updated"
}

#############################################################################
# False Positive Detection
#############################################################################

detect_false_positives() {
    print_info "Detecting legitimate services (false positives)..."

    # Known monitoring service patterns and legitimate CDNs
    cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{
        ip = $1
        domain = $2
        url = $3
        ua = tolower($6)

        # Monitoring Services
        if (match(ua, /pingdom/) || match(ua, /pingdom\.com_bot/)) {
            print ip "|Pingdom Monitoring|" ua "|" domain
        }
        else if (match(ua, /uptimerobot/)) {
            print ip "|UptimeRobot Monitoring|" ua "|" domain
        }
        else if (match(ua, /statuscake/)) {
            print ip "|StatusCake Monitoring|" ua "|" domain
        }
        # WordPress cache preload (WP Rocket, Hummingbird)
        else if (match(url, /admin-ajax\.php.*cache_preload/) || match(url, /admin-ajax\.php.*wphb/)) {
            print ip "|WordPress Cache Preload|" ua "|" domain
        }
        # Legitimate backup services
        else if (match(ua, /jetpack|vaultpress|updraftplus|backwpup/)) {
            print ip "|Backup Service|" ua "|" domain
        }
        # NEW: Google services
        else if (match(ua, /googlebot|google web preview|google-read-aloud|bingbot|slurp|duckduckbot/)) {
            print ip "|Search Engine Bot|" ua "|" domain
        }
        # NEW: Content delivery networks (usually legit)
        else if (match(ua, /cloudflare|akamai|fastly|cloudfront|edgecast|maxcdn|amazon/)) {
            print ip "|CDN Service|" ua "|" domain
        }
        # NEW: Analytics services
        else if (match(ua, /googleanalytics|fbexternalhit|twitterbot|linkedinbot|pinterestbot|whatsapp|telegram/)) {
            print ip "|Analytics\/Social Service|" ua "|" domain
        }
        # NEW: Payment processors (legitimate POST to checkout)
        else if (match(url, /checkout|payment|paypal|stripe|square/) && match(ua, /paypal|stripe|square/)) {
            print ip "|Payment Processor|" ua "|" domain
        }
    }' | sort -u > "$TEMP_DIR/false_positives.txt"

    print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt") legitimate services identified)"
}

#############################################################################
# Statistical Analysis
#############################################################################

generate_statistics() {
    print_info "Generating statistics..."

    # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
    # This reads the uncompressed file ONCE instead of 4+ separate reads
    cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        # Count by domain (for top sites)
        domains[$2]++

        # Count by IP (for top IPs)
        ips[$1]++

        # Count by domain+URL (for top URLs)
        urls[$2"|"$3]++
    }
    END {
        # Output top sites
        for (domain in domains) {
            print domains[domain], domain > tmpdir "/top_sites_raw.txt"
        }

        # Output top IPs
        for (ip in ips) {
            print ips[ip], ip > tmpdir "/top_ips_raw.txt"
        }

        # Output top URLs
        for (url in urls) {
            print urls[url], url > tmpdir "/top_urls_raw.txt"
        }
        close(tmpdir "/top_sites_raw.txt")
        close(tmpdir "/top_ips_raw.txt")
        close(tmpdir "/top_urls_raw.txt")
    }'

    # Sort and limit results
    sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt"
    sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt"
    sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"

    # Top 5 bots by request count (single decompression)
    cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \
        sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"

    # Traffic breakdown by bot type (single decompression)
    cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \
        sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"

    # Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
    if [ -f "$TEMP_DIR/all_domains.txt" ]; then
        # Create indexed bot traffic file (decompress once)
        cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt"

        while read -r domain; do
            echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
            grep "^$domain|" "$TEMP_DIR/domain_bot_types.txt" 2>/dev/null | cut -d'|' -f2 | \
                sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt" || true
        done < "$TEMP_DIR/all_domains.txt"
    fi

    print_success "Statistics generated"
}

#############################################################################
# NEW: Comparison Reports (detect trends)
#############################################################################

generate_comparison_report() {
    print_info "Generating trend analysis and baseline comparison..."

    # Store current results for comparison with previous analysis
    local history_dir="$TOOLKIT_TMP_DIR/analysis_history"
    mkdir -p "$history_dir"

    local timestamp=$(date +%Y%m%d_%H%M%S)
    local today=$(date +%Y%m%d)
    local latest_report="$history_dir/latest_analysis_$today.txt"

    # Extract key metrics from current analysis
    {
        echo "Timestamp: $timestamp"
        echo "Total_Requests: $(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)"
        echo "Unique_IPs: $(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)"
        echo "High_Risk_IPs: $(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo 0)"
        echo "Attack_Vectors: $(awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | sort -u | wc -l || echo 0)"
        echo "SQL_Injection: $(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo 0)"
        echo "XSS_Attempts: $(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo 0)"
        echo "Bot_Traffic: $(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo 0)"
        echo "Suspected_Scanners: $(wc -l < "$TEMP_DIR/high_failure_ips.txt" 2>/dev/null || echo 0)"
        echo "Header_Anomalies: $(wc -l < "$TEMP_DIR/header_anomalies.txt" 2>/dev/null || echo 0)"
        echo "Entry_Point_Suspicious: $(wc -l < "$TEMP_DIR/suspicious_entry_points.txt" 2>/dev/null || echo 0)"
        echo "Fuzzing_IPs: $(wc -l < "$TEMP_DIR/fuzzing_ips.txt" 2>/dev/null || echo 0)"
    } > "$latest_report"

    # NEW: Generate baseline comparison
    echo ""
    print_header "BASELINE COMPARISON (Is this activity normal?)"

    local total_requests=$(grep "^Total_Requests:" "$latest_report" | cut -d: -f2 | tr -d ' ')
    local baseline_requests=$(calculate_baseline_average "server" "requests" 7)

    if [ "$baseline_requests" -gt 0 ]; then
        local request_pct=$((total_requests * 100 / baseline_requests))
        if [ "$request_pct" -gt 200 ]; then
            echo -e "${RED}🔴 ABNORMAL: Requests are $(($request_pct - 100))% above 7-day average${NC}"
            echo "   Baseline (7-day avg): $baseline_requests requests"
            echo "   Today: $total_requests requests"
        elif [ "$request_pct" -lt 50 ]; then
            echo "🟢 LOW: Requests are $(($((100 - $request_pct))))% below baseline"
        else
            echo "🟡 NORMAL: Requests within expected range"
        fi
    else
        echo "📊 (No historical baseline yet - first analysis)"
    fi

    local high_risk=$(grep "^High_Risk_IPs:" "$latest_report" | cut -d: -f2 | tr -d ' ')
    local baseline_attacks=$(calculate_baseline_average "server" "high_risk" 7)

    if [ "$baseline_attacks" -gt 0 ]; then
        local attack_ratio=$((high_risk / baseline_attacks))
        if [ "$attack_ratio" -gt 3 ]; then
            echo -e "${RED}🔴 ABNORMAL: High-risk IPs are ${attack_ratio}x above baseline${NC}"
            echo "   Baseline (7-day avg): $baseline_attacks high-risk IPs"
            echo "   Today: $high_risk high-risk IPs"
        elif [ "$high_risk" -gt "$baseline_attacks" ]; then
            echo -e "${YELLOW}🟡 ELEVATED: $high_risk high-risk IPs (baseline: $baseline_attacks)${NC}"
        else
            echo "🟢 NORMAL: High-risk IPs within expected range"
        fi
    fi

    # Compare with previous day's analysis
    local yesterday=$(date -d "1 day ago" +%Y%m%d 2>/dev/null || date -v-1d +%Y%m%d 2>/dev/null)
    local previous_report="$history_dir/latest_analysis_${yesterday}.txt"

    if [ -f "$previous_report" ]; then
        echo ""
        print_header "DAY-OVER-DAY TRENDS"

        # Extract metrics and calculate differences
        local curr_high_risk=$(grep "^High_Risk_IPs:" "$latest_report" | cut -d: -f2 | tr -d ' ')
        local prev_high_risk=$(grep "^High_Risk_IPs:" "$previous_report" | cut -d: -f2 | tr -d ' ')
        local risk_diff=$((curr_high_risk - prev_high_risk))
        local risk_pct=0

        if [ "$prev_high_risk" -gt 0 ]; then
            risk_pct=$((risk_diff * 100 / prev_high_risk))
        fi

        # Display trend
        if [ "$risk_diff" -gt 0 ]; then
            echo "⚠️  High-Risk IPs: $curr_high_risk (↑ $risk_diff IPs, +${risk_pct}%)"
        elif [ "$risk_diff" -lt 0 ]; then
            echo "✓  High-Risk IPs: $curr_high_risk (↓ $((risk_diff * -1)) IPs, ${risk_pct}%)"
        else
            echo "→  High-Risk IPs: $curr_high_risk (no change)"
        fi

        # Repeat for other metrics
        local curr_sql=$(grep "^SQL_Injection:" "$latest_report" | cut -d: -f2 | tr -d ' ')
        local prev_sql=$(grep "^SQL_Injection:" "$previous_report" | cut -d: -f2 | tr -d ' ')
        local sql_diff=$((curr_sql - prev_sql))

        if [ "$sql_diff" -gt 0 ]; then
            echo "⚠️  SQL Injection: $curr_sql (↑ $sql_diff new attempts)"
        elif [ "$sql_diff" -lt 0 ]; then
            echo "✓  SQL Injection: $curr_sql (↓ $((sql_diff * -1)) fewer)"
        else
            echo "→  SQL Injection: $curr_sql (stable)"
        fi

        # Track repeat attackers
        local repeat_attackers=0
        if [ -f "$history_dir/known_attackers_${yesterday}.txt" ]; then
            repeat_attackers=$(comm -12 <(awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u) <(sort -u "$history_dir/known_attackers_${yesterday}.txt") 2>/dev/null | wc -l || echo 0)
            if [ "$repeat_attackers" -gt 0 ]; then
                echo -e "${RED}🔄 REPEAT ATTACKERS: $repeat_attackers IPs from yesterday${NC}"
            fi
        fi
    fi

    # Save current high-risk IPs for tomorrow's comparison
    awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u > "$history_dir/known_attackers_${today}.txt"
}

#############################################################################
# Report Generation
#############################################################################

generate_report() {
    exec > >(tee "$OUTPUT_FILE")

    echo "==============================================================="
    echo "     APACHE/CPANEL BOT & BOTNET ANALYSIS REPORT"
    echo "     Generated: $(date '+%Y-%m-%d %H:%M:%S')"
    echo "==============================================================="

    # CRITICAL ALERTS SECTION
    print_header "CRITICAL ALERTS"

    alert_count=0

    # Check for attack vectors
    if [ -s "$TEMP_DIR/attack_types.txt" ]; then
        print_alert "Security Attack Vectors Detected:"
        while read -r line; do
            count=$(echo "$line" | awk '{print $1}')
            attack_type=$(echo "$line" | awk '{print $2}')

            case $attack_type in
                sqli) echo "   SQL Injection:       $count attempts" ;;
                xss) echo "   XSS Attacks:         $count attempts" ;;
                path_traversal) echo "   Path Traversal:      $count attempts" ;;
                rce_upload) echo "   RCE/Shell Upload:    $count attempts" ;;
                info_disclosure) echo "   Info Disclosure:     $count attempts" ;;
                login_bruteforce) echo "   Login Bruteforce:    $count attempts" ;;
            esac
        done < "$TEMP_DIR/attack_types.txt"
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # Check for suspicious scanners
    if [ -s "$TEMP_DIR/suspicious_ua.txt" ]; then
        scanner_count=$(wc -l < "$TEMP_DIR/suspicious_ua.txt")
        print_alert "Malicious scanners detected: $scanner_count IPs"
        echo "   Top scanners:"
        head -3 "$TEMP_DIR/suspicious_ua.txt" | while read -r line; do
            count=$(echo "$line" | awk '{print $1}')
            ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
            ua=$(echo "$line" | cut -d'|' -f2)
            printf "   %s requests - IP: %s - UA: %s\n" "$count" "$ip" "$ua"
        done
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # NEW: Check for header anomalies (bot signatures)
    if [ -s "$TEMP_DIR/header_anomalies.txt" ]; then
        header_count=$(wc -l < "$TEMP_DIR/header_anomalies.txt")
        print_alert "Header-based bot signatures detected: $header_count IPs"
        echo "   These IPs show suspicious header patterns (missing/unusual Accept-Language, Referer, etc.)"
        head -5 "$TEMP_DIR/header_anomalies.txt" | while read -r line; do
            ip=$(echo "$line" | awk -F'|' '{print $1}')
            anomaly_type=$(echo "$line" | awk -F'|' '{print $2}')
            score=$(echo "$line" | awk -F'|' '{print $3}')
            printf "   • %s - Anomaly score: %s (detected: %s)\n" "$ip" "$score" "$anomaly_type"
        done
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # NEW: Check for suspicious entry points
    if [ -s "$TEMP_DIR/suspicious_entry_points.txt" ]; then
        entry_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt")
        print_alert "Suspicious entry points detected: $entry_count IPs"
        echo "   These IPs skip homepage/search and go straight to admin/config:"
        head -5 "$TEMP_DIR/suspicious_entry_points.txt" | while read -r line; do
            ip=$(echo "$line" | awk -F'|' '{print $1}')
            url=$(echo "$line" | awk -F'|' '{print $3}')
            status=$(echo "$line" | awk -F'|' '{print $4}')
            printf "   • %s → %s (HTTP %s)\n" "$ip" "$url" "$status"
        done
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # NEW: Check for fuzzing/scanning behavior
    if [ -s "$TEMP_DIR/fuzzing_ips.txt" ]; then
        fuzz_count=$(wc -l < "$TEMP_DIR/fuzzing_ips.txt")
        print_alert "Parameter fuzzing/scanning detected: $fuzz_count IPs"
        echo "   These IPs are testing random parameters (vulnerability scanning):"
        head -5 "$TEMP_DIR/fuzzing_ips.txt" | while read -r line; do
            ip=$(echo "$line" | awk -F'|' '{print $1}')
            total_urls=$(echo "$line" | awk -F'|' '{print $3}')
            unique_paths=$(echo "$line" | awk -F'|' '{print $4}')
            printf "   • %s - %s URLs across %s paths\n" "$ip" "$total_urls" "$unique_paths"
        done
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # NEW: Check for timing anomalies (bot signatures)
    if [ -s "$TEMP_DIR/timing_anomalies.txt" ]; then
        timing_count=$(wc -l < "$TEMP_DIR/timing_anomalies.txt")
        print_alert "Consistent timing pattern detected: $timing_count IPs"
        echo "   These IPs show mechanical request patterns (bot behavior):"
        head -5 "$TEMP_DIR/timing_anomalies.txt" | while read -r line; do
            ip=$(echo "$line" | awk -F'|' '{print $1}')
            avg_interval=$(echo "$line" | awk -F'|' '{print $3}')
            total_reqs=$(echo "$line" | awk -F'|' '{print $4}')
            printf "   • %s - %.1f seconds average between requests (%s total requests)\n" "$ip" "$avg_interval" "$total_reqs"
        done
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # Check for rapid-fire IPs (potential DDoS)
    if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
        ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt")
        print_alert "Potential DDoS sources: $ddos_count IPs with >50 req/min"
        echo "   Top offenders:"
        head -3 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print "   "$2" - "$1" rapid requests"}'
        echo ""
        alert_count=$((alert_count + 1))
    fi

    # Check for suspicious networks
    if [ -s "$TEMP_DIR/suspicious_networks.txt" ]; then
        net_count=$(wc -l < "$TEMP_DIR/suspicious_networks.txt")
        print_alert "Suspicious networks detected: $net_count Class C ranges"
        echo "   Top networks:"
        head -3 "$TEMP_DIR/suspicious_networks.txt" | awk '{print "   "$2" - "$1" requests"}'
        echo ""
        alert_count=$((alert_count + 1))
    fi

    if [ "${alert_count:-0}" -eq 0 ]; then
        print_success "No critical threats detected"
    fi

    # QUICK STATS DASHBOARD
    print_header "QUICK STATS DASHBOARD"

    total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
    unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
    unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
    bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" | wc -l)

    # Count private/internal IPs (excluded from threat analysis)
    private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)

    # Count server's own IPs in the logs
    server_ip_hits=0
    if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
        while read -r server_ip; do
            if cat "$TEMP_DIR/parsed_logs.txt" | grep -q "^$server_ip|" 2>/dev/null; then
                server_ip_hits=$((server_ip_hits + 1))
            fi
        done < "$TEMP_DIR/server_ips.txt"
    fi

    echo "Total Requests:      $(printf "%'d" $total_requests)"
    echo "Unique IPs:          $(printf "%'d" $unique_ips)"

    # Show breakdown if we have excluded IPs
    if [ "$private_ips" -gt 0 ] || [ "$server_ip_hits" -gt 0 ]; then
        excluded_total=$((private_ips + server_ip_hits))
        echo "  ├─ Excluded IPs:     $(printf "%'d" $excluded_total)"
        [ "$private_ips" -gt 0 ] && echo "  │   ├─ Private/Internal: $private_ips"
        [ "$server_ip_hits" -gt 0 ] && echo "  │   └─ Server's own: $server_ip_hits"
        echo "  └─ External IPs:     $(printf "%'d" $((unique_ips - excluded_total)))"
    fi

    echo "Domains Analyzed:    $unique_domains"
    echo "Bot Requests:        $(printf "%'d" $bot_requests) ($(awk "BEGIN {printf \"%.1f\", ($bot_requests/$total_requests)*100}")%)"

    # Show detected server IPs
    if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
        echo ""
        echo "  Server IPs Detected (excluded from threat analysis):"
        while read -r server_ip; do
            echo "  • $server_ip"
        done < "$TEMP_DIR/server_ips.txt"
    fi
    echo ""

    # Traffic breakdown
    echo "Traffic Breakdown:"
    while read -r line; do
        count=$(echo "$line" | awk '{print $1}')
        type=$(echo "$line" | awk '{print $2}')
        pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")

        case $type in
            legit) echo "  Legitimate Bots:    $(printf "%'7d" $count) ($pct%)" ;;
            ai) echo "  AI Bots:            $(printf "%'7d" $count) ($pct%)" ;;
            monitor) echo "  📡 Monitoring/SEO:     $(printf "%'7d" $count) ($pct%)" ;;
            suspicious) echo "  Suspicious Bots:   $(printf "%'7d" $count) ($pct%)" ;;
            unidentified_bot) echo "  ❓ Unidentified Bots: $(printf "%'7d" $count) ($pct%)" ;;
            unknown) echo "  Regular Traffic:    $(printf "%'7d" $count) ($pct%)" ;;
        esac
    done < "$TEMP_DIR/traffic_breakdown.txt"

    # TIME-SERIES ANALYSIS
    if [ -s "$TEMP_DIR/hourly_bot_traffic.txt" ]; then
        echo ""
        echo "Bot Traffic Timeline (hourly):"
        max_bot_traffic=$(awk '{print $1}' "$TEMP_DIR/hourly_bot_traffic.txt" | sort -rn | head -1)
        while read -r line; do
            count=$(echo "$line" | awk '{print $1}')
            hour=$(echo "$line" | awk '{print $2}')
            # Create simple bar chart
            bar_width=$((count * 10 / max_bot_traffic))
            [ "${bar_width:-0}" -eq 0 ] && [ "${count:-0}" -gt 0 ] && bar_width=1
            bar=$(printf '█%.0s' $(seq 1 $bar_width))
            spaces=$(printf '░%.0s' $(seq 1 $((10 - bar_width))))

            # Detect spikes (>2x average)
            avg_traffic=$((total_requests / 24))
            spike=""
            [ ${count:-0} -gt $((avg_traffic * 2)) ] && spike=" SPIKE"

            # Strip leading zeros to avoid octal interpretation
            hour_num=$((10#$hour))
            next_hour=$((hour_num + 1))
            printf "  %02d:00-%02d:00: %s%s %'6d bot requests%s\n" "$hour_num" "$next_hour" "$bar" "$spaces" "$count" "$spike"
        done < "$TEMP_DIR/hourly_bot_traffic.txt"
    fi

    # RESPONSE CODE INTELLIGENCE
    if [ -s "$TEMP_DIR/response_codes.txt" ]; then
        echo ""
        echo "Response Code Analysis:"
        while read -r line; do
            count=$(echo "$line" | awk '{print $1}')
            code=$(echo "$line" | awk '{print $2}')
            pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")

            case $code in
                200) echo "  200 (Success):     $(printf "%'7d" $count) ($pct%) Bots are getting data" ;;
                404) echo "  404 (Not Found):   $(printf "%'7d" $count) ($pct%) Scanning for vulnerabilities" ;;
                403) echo "  403 (Forbidden):   $(printf "%'7d" $count) ($pct%) Blocked by existing rules" ;;
                401) echo "  401 (Unauthorized):$(printf "%'7d" $count) ($pct%) Login attempts failing" ;;
                500|502|503) echo "  $code (Server Error):$(printf "%'7d" $count) ($pct%) Check if exploit triggered" ;;
                301|302) echo "  $code (Redirect):    $(printf "%'7d" $count) ($pct%)" ;;
                *) echo "  $code:             $(printf "%'7d" $count) ($pct%)" ;;
            esac
        done < "$TEMP_DIR/response_codes.txt" | head -7
    fi

    # FALSE POSITIVE WARNINGS
    if [ -s "$TEMP_DIR/false_positives.txt" ]; then
        echo ""
        echo "Whitelist Recommendations (Legitimate Services):"
        while read -r line; do
            ip=$(echo "$line" | cut -d'|' -f1)
            service=$(echo "$line" | cut -d'|' -f2)
            domain=$(echo "$line" | cut -d'|' -f4)
            req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0)
            echo "  $ip - $req_count requests - Identified as: $service"
            echo "    → Domain: $domain"
            echo "    → Action: VERIFY OWNERSHIP then whitelist"
        done < "$TEMP_DIR/false_positives.txt" | head -6
    fi

    # NEW: HIGH-CONFIDENCE BOT FINGERPRINTS
    if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then
        echo ""
        print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
        echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
        echo ""

        awk -F'|' '
        NR <= 15 {
            ip = $1
            score = $2
            signals = $3

            # Risk level based on score
            if (score >= 80) risk = "CRITICAL"
            else if (score >= 70) risk = "HIGH"
            else if (score >= 60) risk = "MEDIUM"
            else risk = "LOW"

            printf "  %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
        }' "$TEMP_DIR/bot_fingerprints.txt"

        total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
        echo ""
        echo "  Total high-confidence bots detected: $total IPs"
        echo ""
    else
        echo ""
        echo "  No high-confidence bot fingerprints detected (requires multiple signals)"
        echo ""
    fi

    # NEW: DOMAIN ATTACK TARGETING ANALYSIS (what's attacking each domain)
    if [ -s "$TEMP_DIR/domain_targeting.txt" ]; then
        echo ""
        print_header "DOMAIN ATTACK TARGETING (Which domains are under attack & from where?)"
        echo ""

        total_domains=$(wc -l < "$TEMP_DIR/domain_targeting.txt" 2>/dev/null || echo "0")
        echo "Total domains with attacks detected: $total_domains"
        echo ""

        # Show top attacked domains with attack details
        awk -F'|' 'NR <= 10 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
            domain_attack_count=$(grep "^[^|]*|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")

            if [ "$domain_attack_count" -gt 0 ]; then
                echo "  Domain: $domain ($domain_attack_count attack attempts)"

                # Get all attacks on this domain, group by type
                awk -F'|' -v dom="$domain" '
                $2 == dom {
                    ip = $1
                    attack_type = $5

                    # Validate IP format
                    if (match(ip, /^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/)) {
                        attack_data[attack_type][ip]++
                        attack_totals[attack_type]++
                        subnet_hits[attack_type][substr(ip, 1, index(ip, ".", index(ip, ".")+1)-1)]++
                    }
                }
                END {
                    for (attack_type in attack_totals) {
                        printf "    └─ %s: %d attempts\n", attack_type, attack_totals[attack_type]

                        # Show top 3 IPs for this attack type
                        attack_count = 0
                        for (ip in attack_data[attack_type]) {
                            if (attack_count >= 3) break
                            count = attack_data[attack_type][ip]
                            split(ip, parts, ".")
                            subnet = parts[1] "." parts[2] "." parts[3] ".0/24"
                            printf "       ├─ %s (%d reqs) [subnet: %s]\n", ip, count, subnet
                            attack_count++
                        }
                    }
                }' "$TEMP_DIR/attack_vectors_raw.txt"
                echo ""
            fi
        done
    else
        echo ""
        echo "  No domain attack data available (all domains may be healthy)"
        echo ""
    fi

    # NEW: TOP URLs BEING ATTACKED
    if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
        echo ""
        print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)"
        echo ""

        # Show top URLs for top 3 most-attacked domains
        urls_shown=0
        awk -F'|' 'NR <= 3 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
            local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
            if [ -f "$domain_file" ] && [ -s "$domain_file" ]; then
                echo "  Domain: $domain"
                awk -F'|' '{
                    url = $1
                    count = $2
                    printf "    %3d requests → %s\n", count, url
                }' "$domain_file"  # Show all URLs, not just top 5
                echo ""
            fi
        done

        # Check if no URL data was shown
        if [ "$urls_shown" -eq 0 ]; then
            echo "  No URL targeting data available"
            echo ""
        fi
    else
        echo ""
        echo "  No domain targeting data available"
        echo ""
    fi

    # TOP 5 THREATS
    print_header "TOP 5 THREATS (with recommended actions)"

    echo "1. Highest Risk IPs (by threat score):"
    if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
        counter=1
        while read -r line && [ "${counter:-0}" -le 10 ]; do
            score=$(echo "$line" | cut -d'|' -f1)
            ip=$(echo "$line" | cut -d'|' -f2)
            count=$(echo "$line" | cut -d'|' -f3)

            # Determine threat level and action based on score
            if [ "$score" -ge 80 ]; then
                threat_level="CRITICAL"
                threat_icon=""
                action="BLOCK IMMEDIATELY + INVESTIGATE"
                echo -e "   ${RED}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
            elif [ "$score" -ge 60 ]; then
                threat_level="HIGH"
                threat_icon=""
                action="BLOCK or AGGRESSIVE RATE LIMIT"
                echo -e "   ${YELLOW}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
            elif [ "$score" -ge 40 ]; then
                threat_level="MODERATE"
                threat_icon=""
                action="RATE LIMIT RECOMMENDED"
                echo "   [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
            else
                threat_level="LOW"
                threat_icon=""
                action="MONITOR"
                echo "   [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
            fi

            echo "      $count requests - Action: $action"

            # Show which attack vectors this IP used
            attack_types=""
            grep -q "$ip" "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null && attack_types="${attack_types}SQL-Injection "
            grep -q "$ip" "$TEMP_DIR/xss_attempts.txt" 2>/dev/null && attack_types="${attack_types}XSS "
            grep -q "$ip" "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null && attack_types="${attack_types}Path-Traversal "
            grep -q "$ip" "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null && attack_types="${attack_types}RCE/Upload "
            grep -q "$ip" "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null && attack_types="${attack_types}Login-Bruteforce "
            grep -q "$ip" "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null && attack_types="${attack_types}Scanner-UA "
            grep -q "$ip" "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null && attack_types="${attack_types}DDoS-Pattern "

            [ -n "$attack_types" ] && echo "      Attack vectors: $attack_types"

            counter=$((counter + 1))
        done < "$TEMP_DIR/threat_scores.txt"
    else
        echo "   No significant threats detected "
    fi
    echo ""

    echo "2. Top Aggressive Bots:"
    counter=1
    while read -r line && [ "${counter:-0}" -le 5 ]; do
        count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}')
        bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs)

        action="Allow"
        if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then
            action="Consider blocking (aggressive)"
        fi

        echo "   [$counter] $bot - $count requests - Action: $action"
        counter=$((counter + 1))
    done < "$TEMP_DIR/top_bots.txt"
    echo ""

    echo "3. Admin Endpoint Probing:"
    if [ -s "$TEMP_DIR/admin_probes.txt" ]; then
        head -3 "$TEMP_DIR/admin_probes.txt" | while read -r line; do
            count=$(echo "$line" | awk '{print $1}')
            ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
            domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
            url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
            printf "   %s attempts - IP: %s - %s%s\n" "$count" "$ip" "$domain" "$url"
        done
        echo "   Action: Verify legitimate admin access or block"
    else
        echo "   None detected "
    fi
    echo ""

    echo "4. 404 Scanners (Reconnaissance):"
    if [ -s "$TEMP_DIR/404_scans.txt" ]; then
        head -3 "$TEMP_DIR/404_scans.txt" | awk '$1 > 10 {
            count = $1
            $1 = ""
            gsub(/^[[:space:]]+\|?/, "")
            split($0, parts, "|")
            printf "   %s failed requests - IP: %s - %s%s\n", count, parts[1], parts[2], parts[3]
        }'
    else
        echo "   None detected "
    fi
    echo ""

    echo "5. Large Data Transfers:"
    if [ -s "$TEMP_DIR/large_transfers.txt" ]; then
        # Calculate total bot bandwidth
        total_bot_bandwidth=0
        if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then
            total_bot_bandwidth=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
        fi

        if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
            bot_bandwidth_mb=$(awk "BEGIN {printf \"%.0f\", $total_bot_bandwidth/1048576}")
            bot_bandwidth_gb=$(awk "BEGIN {printf \"%.2f\", $total_bot_bandwidth/1073741824}")
            # Estimate cost at $0.09/GB (typical CDN pricing)
            estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")

            total_bandwidth=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
            bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")

            echo ""
            echo "   💰 Bandwidth Impact:"
            echo "      Total bot bandwidth: ${bot_bandwidth_mb} MB (${bot_bandwidth_gb} GB) - ${bot_pct}% of total"
            echo "      Estimated cost: \$$estimated_cost (at \$0.09/GB CDN pricing)"
        fi
        echo ""
        echo "   Top bandwidth consumers:"

        head -3 "$TEMP_DIR/large_transfers.txt" | while read -r line; do
            count=$(echo "$line" | awk '{print $1}')
            ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
            domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
            url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
            size=$(echo "$line" | awk '{print $2}' | cut -d'|' -f4)
            size_mb=$(awk "BEGIN {printf \"%.1f\", $size/1048576}")
            total_ip_mb=$(awk "BEGIN {printf \"%.0f\", $size * $count / 1048576}")
            printf "      %s transfers from %s - %.1f MB avg (%s MB total) - %s%s\n" "$count" "$ip" "$size_mb" "$total_ip_mb" "$domain" "$url"
        done
        echo "      Action: Verify if scraping, consider serving WebP/optimized images"
    else
        echo "   None detected "
    fi

    # TOP 5 TARGETED SITES
    print_header "TOP 5 TARGETED SITES (with risk breakdown)"

    counter=1
    while read -r line && [ "${counter:-0}" -le 5 ]; do
        count=$(echo "$line" | awk '{print $1}')
        domain=$(echo "$line" | awk '{print $2}')

        echo "[$counter] $domain - $count requests"

        # Show traffic breakdown for this domain
        if [ -f "$TEMP_DIR/domain_${domain}_stats.txt" ]; then
            tail -n +2 "$TEMP_DIR/domain_${domain}_stats.txt" | while read -r stat_line; do
                stat_count=$(echo "$stat_line" | awk '{print $1}')
                stat_type=$(echo "$stat_line" | awk '{print $2}')
                pct=$(awk "BEGIN {printf \"%.1f\", ($stat_count/$count)*100}")

                case $stat_type in
                    suspicious) echo -e "      ${YELLOW}Suspicious: $stat_count ($pct%)${NC}" ;;
                    ai) echo "      AI Bots: $stat_count ($pct%)" ;;
                    legit) echo "      Legit Bots: $stat_count ($pct%)" ;;
                    unknown) echo "      Regular: $stat_count ($pct%)" ;;
                    *) echo "      $stat_type: $stat_count ($pct%)" ;;
                esac
            done
        fi
        echo ""

        counter=$((counter + 1))
    done < "$TEMP_DIR/top_sites.txt"

    # BLOCKLIST
    print_header "COPY-PASTE READY BLOCKLIST (Prioritized by Threat Score)"

    echo "# Apache .htaccess format:"
    echo "# Add to .htaccess in document root"
    echo "# IPs sorted by risk score (highest first)"
    echo ""

    # Use threat scores to prioritize blocklist (exclude false positives and excluded IPs)
    if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
        # Get IPs with score >= 60 (HIGH and CRITICAL)
        awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
            ip=$(echo "$entry" | cut -d'|' -f1)
            score=$(echo "$entry" | cut -d'|' -f2)

            # Skip excluded IPs (private, localhost, server's own)
            if is_excluded_ip "$ip"; then
                continue
            fi

            # Skip if in false positives
            if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
                continue
            fi

            echo "Deny from $ip  # Risk score: $score/100"
        done
    else
        # Fallback to old method
        {
            [ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
            [ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
            [ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
        } | sort -u | head -30 | while read -r ip; do
            echo "Deny from $ip"
        done
    fi

    echo ""
    echo "# cPanel User-Agent blocking (add to /etc/apache2/conf.d/includes/pre_main_global.conf):"
    echo ""
    echo "<IfModule mod_rewrite.c>"
    echo "    RewriteEngine On"
    echo "    RewriteCond %{HTTP_USER_AGENT} \"(nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp|metasploit)\" [NC]"
    echo "    RewriteRule ^ - [F,L]"
    echo "</IfModule>"
    echo ""
    echo "# Optional: Block aggressive SEO bots (uncomment to enable)"
    echo "# <IfModule mod_rewrite.c>"
    echo "#     RewriteEngine On"
    echo "#     RewriteCond %{HTTP_USER_AGENT} \"(AhrefsBot|SemrushBot|MJ12bot|DotBot|Meta-ExternalAgent|Go-http-client)\" [NC]"
    echo "#     RewriteRule ^ - [F,L]"
    echo "# </IfModule>"

    echo ""
    echo "# CSF/iptables format:"
    echo "# Run these commands as root:"
    echo ""

    # Same prioritized list for CSF
    if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
        awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
            ip=$(echo "$entry" | cut -d'|' -f1)
            score=$(echo "$entry" | cut -d'|' -f2)

            # Skip excluded IPs (private, localhost, server's own)
            if is_excluded_ip "$ip"; then
                continue
            fi

            # Skip if in false positives
            if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
                continue
            fi

            echo "csf -d $ip \"Threat score: $score/100\""
        done
    else
        # Fallback
        {
            [ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
            [ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
            [ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
        } | sort -u | head -30 | while read -r ip; do
            echo "csf -d $ip \"Bot/Scanner threat\""
        done
    fi

    # SUMMARY
    print_header "📋 SUMMARY & RECOMMENDATIONS"

    threat_score=0

    # Calculate threat score from attack vectors
    [ -s "$TEMP_DIR/sqli_attempts.txt" ] && threat_score=$((threat_score + 15))
    [ -s "$TEMP_DIR/xss_attempts.txt" ] && threat_score=$((threat_score + 12))
    [ -s "$TEMP_DIR/path_traversal_attempts.txt" ] && threat_score=$((threat_score + 15))
    [ -s "$TEMP_DIR/rce_upload_attempts.txt" ] && threat_score=$((threat_score + 20))
    [ -s "$TEMP_DIR/login_bruteforce_attempts.txt" ] && threat_score=$((threat_score + 10))
    [ -s "$TEMP_DIR/suspicious_ua.txt" ] && threat_score=$((threat_score + 8))
    [ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && threat_score=$((threat_score + 5))
    [ $(wc -l < "$TEMP_DIR/admin_probes.txt" 2>/dev/null || echo 0) -gt 10 ] && threat_score=$((threat_score + 3))

    # Count high-risk IPs
    high_risk_count=0
    if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
        high_risk_count=$(awk -F'|' '$1 >= 60' "$TEMP_DIR/threat_scores.txt" | wc -l)
    fi

    if [ "${threat_score:-0}" -ge 25 ] || [ "${high_risk_count:-0}" -ge 5 ]; then
        print_alert "THREAT LEVEL: CRITICAL - Immediate action required"
        echo "   Summary: Multiple attack vectors detected from $high_risk_count high-risk IPs"
        echo ""
        echo "   Immediate Actions:"
        echo "   1. ⚡ Apply the blocklist above IMMEDIATELY (prioritized by threat score)"
        echo "   2. Review admin access logs for successful breaches"
        echo "   3. 🛡  Enable ModSecurity WAF or Cloudflare if not already active"
        echo "   4. 🔄 Update all CMS platforms and plugins urgently"
        echo "   5. 🔐 Force password reset for admin accounts if login attempts detected"
        echo "   6. Re-run this analysis in 1 hour to verify blocks are working"
    elif [ "${threat_score:-0}" -ge 12 ] || [ "${high_risk_count:-0}" -ge 2 ]; then
        print_warning "THREAT LEVEL: HIGH - Action recommended within 24 hours"
        echo "   Summary: Significant threat activity from $high_risk_count high-risk IPs"
        echo ""
        echo "   Recommended Actions:"
        echo "   1. Review and apply the blocklist above (focus on CRITICAL/HIGH scores)"
        echo "   2. Enable rate limiting for admin endpoints"
        echo "   3. Monitor logs closely for the next 24-48 hours"
        echo "   4. Consider implementing fail2ban or similar IDS"
        echo "   5. Review and update security plugins/modules"
    elif [ "${threat_score:-0}" -ge 5 ]; then
        print_warning "THREAT LEVEL: MODERATE - Routine security maintenance"
        echo "   Summary: Normal bot activity with some suspicious patterns"
        echo ""
        echo "   Recommended Actions:"
        echo "   1. Review suspicious IPs in the report"
        echo "   2. Consider rate limiting aggressive bots"
        echo "   3. Continue routine log monitoring"
        echo "   4. Block aggressive SEO bots if impacting performance"
    else
        print_success "THREAT LEVEL: ✅ LOW - Normal operation"
        echo "   Summary: Minimal threat activity detected"
        echo ""
        echo "   Recommended Actions:"
        echo "   1. Continue routine log monitoring"
        echo "   2. Review false positive warnings to whitelist legitimate services"
        echo "   3. Consider blocking aggressive SEO bots if bandwidth is a concern"
    fi

    echo ""
    echo "==============================================================="
    echo "Report saved to: $OUTPUT_FILE"
    echo "==============================================================="
}

################################################################################
# BASELINE HEALTH CHECK - Test domains before making changes
################################################################################

baseline_health_check() {
    print_info "Loading baseline health status from cached data..."
    echo ""

    # Create baseline health file
    > "$TEMP_DIR/baseline_health.txt"

    # Use get_all_domain_statuses() from reference database instead of re-checking
    # Returns: domain|http_code|https_code|status_summary
    if ! command -v get_all_domain_statuses &>/dev/null; then
        print_warning "Reference database functions not available - skipping health check"
        return 0
    fi

    local tested=0
    local working=0
    local broken=0

    # Get all domain statuses from cached reference database
    while IFS='|' read -r domain http_status https_status result; do
        [ -z "$domain" ] && continue

        tested=$((tested + 1))

        # Display status based on cached result
        if [ "$result" = "200_OK" ]; then
            working=$((working + 1))
            echo -e "  ${GREEN}✓${NC} $domain - HTTP:$http_status HTTPS:$https_status"
        elif [ "$result" = "REDIRECT" ]; then
            working=$((working + 1))
            echo -e "  ${YELLOW}→${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)"
        elif [ "$result" = "403_FORBIDDEN" ]; then
            broken=$((broken + 1))
            echo -e "  ${RED}✗${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)"
        elif [ "$result" = "TIMEOUT" ] || [ "$result" = "UNREACHABLE" ]; then
            broken=$((broken + 1))
            echo -e "  ${RED}⏱${NC} $domain - Timeout (unreachable)"
        else
            broken=$((broken + 1))
            echo -e "  ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status"
        fi

        # Store baseline: domain|http_status|https_status|result
        echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt"

    done < <(get_all_domain_statuses)

    if [ "$tested" -eq 0 ]; then
        print_warning "No domain status data available in reference database"
        return 0
    fi

    echo ""
    print_success "Baseline loaded from cache: $working working, $broken with issues"
    echo ""
}

verify_domains_still_working() {
    print_info "Checking current domain status from cached data..."
    echo ""

    if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then
        print_warning "No baseline health data available"
        return 0
    fi

    if ! command -v get_domain_status &>/dev/null; then
        print_warning "Reference database functions not available - skipping verification"
        return 0
    fi

    local changes_detected=0
    local now_broken=0

    while IFS='|' read -r domain baseline_http baseline_https baseline_result; do
        [ -z "$domain" ] && continue

        # Get current status from cached reference database
        local current_status=$(get_domain_status "$domain")

        if [ -z "$current_status" ]; then
            # Domain not in cache - skip
            continue
        fi

        # Parse current status: http_code|https_code|status_summary
        IFS='|' read -r http_status https_status new_result <<< "$current_status"

        # Compare to baseline
        if [ "$baseline_result" != "$new_result" ]; then
            changes_detected=$((changes_detected + 1))

            # Check if it got worse
            if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then
                if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "UNREACHABLE" ]; then
                    now_broken=$((now_broken + 1))
                    echo -e "  ${RED}⚠ BROKEN:${NC} $domain"
                    echo -e "      Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)"
                    echo -e "      After:  $new_result (HTTP:$http_status HTTPS:$https_status)"
                    echo -e "      ${RED}WARNING: This domain stopped working after your changes!${NC}"
                    echo ""
                fi
            # Check if it got better
            elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ] || [ "$baseline_result" = "UNREACHABLE" ]; then
                if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then
                    echo -e "  ${GREEN}✅ FIXED:${NC} $domain"
                    echo -e "      Before: $baseline_result"
                    echo -e "      After:  $new_result"
                    echo ""
                fi
            fi
        fi
    done < "$TEMP_DIR/baseline_health.txt"

    if [ "${now_broken:-0}" -gt 0 ]; then
        echo ""
        print_alert "WARNING: $now_broken domain(s) may have stopped working!"
        echo ""
        echo "NOTE: Status is from cached data (max 1 hour old)."
        echo "If you just made changes, the cache may not reflect real-time status."
        echo ""
        echo "Recommended actions:"
        echo "  1. Review the firewall rules you just applied"
        echo "  2. Check CSF temporary blocks: csf -t"
        echo "  3. Check CSF deny list: csf -g"
        echo "  4. Manually verify domain: curl -I http://domain.com"
        echo "  5. Consider reverting changes if issues persist"
        echo ""
    elif [ "${changes_detected:-0}" -eq 0 ]; then
        print_success "All domains show same status as baseline (cache-based check)"
    else
        print_success "Some status changes detected but no domains broken (cache-based check)"
    fi

    echo ""
    read -p "Press Enter to continue..."
}

#############################################################################
# Main Execution
#############################################################################

main() {
    echo ""
    print_header "Starting Apache/cPanel Bot Analysis"

    # InterWorx requires special log discovery (logs are in /home/user/var/domain.com/logs/)
    if [ "$SYS_CONTROL_PANEL" = "interworx" ]; then
        print_info "InterWorx detected - discovering domain logs..."

        # Build time filter options
        local find_opts=()
        if [ -n "$HOURS_BACK" ]; then
            local minutes=$((HOURS_BACK * 60))
            find_opts+=(-mmin -"$minutes")
        elif [ -n "$DAYS_BACK" ]; then
            find_opts+=(-mtime -"$DAYS_BACK")
        fi

        # Find all transfer*.log files in InterWorx structure (includes transfer.log and transfer-ssl.log)
        log_count=$(find /home/*/var/*/logs -type f -name "transfer*.log" "${find_opts[@]}" 2>/dev/null | wc -l)

        if [ "$log_count" -eq 0 ]; then
            # Try without time filter to see if ANY logs exist
            local total_logs=$(find /home/*/var/*/logs -type f -name "transfer*.log" 2>/dev/null | wc -l)

            if [ "$total_logs" -eq 0 ]; then
                print_alert "Error: No InterWorx access logs found in /home/*/var/*/logs/"
                echo ""
                echo "Diagnostic information:"
                echo "  Checking for InterWorx structure:"
                local iw_structure=$(find /home -maxdepth 3 -type d -path "*/var/*/logs" 2>/dev/null | head -5)
                if [ -n "$iw_structure" ]; then
                    echo "  Found InterWorx directories:"
                    echo "$iw_structure"
                    echo ""
                    echo "  Checking for any log files:"
                    find /home/*/var/*/logs -type f -name "*.log" 2>/dev/null | head -10
                else
                    echo "  No InterWorx directory structure found (expected: /home/user/var/domain.com/logs/)"
                fi
                exit 1
            else
                print_alert "No logs found matching time filter (last $HOURS_BACK hours)"
                echo "Total logs available: $total_logs"
                echo ""
                read -p "Analyze all available logs instead? [y/N]: " choice
                if [[ "$choice" =~ ^[Yy] ]]; then
                    log_count=$total_logs
                    find_opts=()  # Clear time filter
                else
                    exit 0
                fi
            fi
        fi

        print_info "Found $log_count InterWorx domain log files to analyze"

        # Override LOG_DIR for parse_logs function to use
        export INTERWORX_MODE="yes"
        export INTERWORX_FIND_OPTS="${find_opts[*]}"
    else
        # Standard cPanel/Plesk log discovery
        # Check if log directory exists
        if [ ! -d "$LOG_DIR" ]; then
            print_alert "Error: Log directory not found: $LOG_DIR"
            echo "Please specify the correct log directory with -l option"
            exit 1
        fi

        # Check if logs exist
        local find_opts=()
        if [ -n "$HOURS_BACK" ]; then
            local minutes=$((HOURS_BACK * 60))
            find_opts+=(-mmin -"$minutes")
        elif [ -n "$DAYS_BACK" ]; then
            find_opts+=(-mtime -"$DAYS_BACK")
        fi

        log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l)
        if [ "$log_count" -eq 0 ]; then
            # Try without time filter to see if ANY logs exist
            local total_logs=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" 2>/dev/null | wc -l)

            if [ "$total_logs" -eq 0 ]; then
                print_alert "Error: No log files found in $LOG_DIR"
                echo ""
                echo "Diagnostic information:"
                echo "  Log directory: $LOG_DIR"
                echo "  Directory exists: $([ -d "$LOG_DIR" ] && echo "yes" || echo "no")"
                if [ -d "$LOG_DIR" ]; then
                    echo "  Total files in directory: $(find "$LOG_DIR" -type f 2>/dev/null | wc -l)"
                    echo "  Sample files:"
                    find "$LOG_DIR" -type f 2>/dev/null | head -5 | sed 's/^/    /'
                fi
                echo ""
                echo "Control panel: $SYS_CONTROL_PANEL"
                exit 1
            else
                print_alert "No logs found matching time filter"
                if [ -n "$HOURS_BACK" ]; then
                    echo "No logs found from the last $HOURS_BACK hours"
                elif [ -n "$DAYS_BACK" ]; then
                    echo "No logs found from the last $DAYS_BACK days"
                fi
                echo "Total logs available: $total_logs"
                echo ""
                read -p "Analyze all available logs instead? [y/N]: " choice
                if [[ "$choice" =~ ^[Yy] ]]; then
                    log_count=$total_logs
                    find_opts=()  # Clear time filter
                else
                    exit 0
                fi
            fi
        fi

        print_info "Found $log_count log files to analyze"
    fi

    # User filtering
    if [ -n "$FILTER_USER" ]; then
        print_info "Filtering logs for user: $FILTER_USER"
        export user_domains=$(get_user_domains "$FILTER_USER")
        if [ -z "$user_domains" ]; then
            print_error "No domains found for user: $FILTER_USER"
            exit 1
        fi
        print_info "User has $(echo "$user_domains" | wc -l) domain(s)"
    else
        export user_domains=""
    fi

    # Print time range info
    if [ -n "$HOURS_BACK" ]; then
        print_info "Analyzing logs from the last $HOURS_BACK hours"
    elif [ -n "$DAYS_BACK" ]; then
        print_info "Analyzing logs from the last $DAYS_BACK days"
    fi

    # Baseline health check - test all domains before analysis
    baseline_health_check

    # Execute analysis pipeline with error handling
    parse_logs || {
        print_alert "Log parsing failed"
        exit 1
    }

    classify_bots || {
        print_alert "Bot classification failed"
        exit 1
    }

    # NEW: Enhanced analysis functions (before threats detected)
    analyze_headers        # Detect header-based bot patterns
    analyze_entry_points   # Detect suspicious entry points
    analyze_url_entropy    # Detect fuzzing/parameter scanning
    analyze_request_timing # Detect DDoS patterns via timing

    detect_server_ips
    detect_threats         # Must be before fingerprinting/domain targeting (creates attack_vectors_raw.txt)
    analyze_success_rates  # Analyze success/failure rates for better accuracy
    detect_botnets
    analyze_time_series
    calculate_threat_scores
    detect_false_positives
    generate_statistics

    # NEW: Fingerprinting and domain targeting analysis (after threats detected)
    calculate_bot_fingerprint       # Combine signals for accuracy (reduce false positives)
    analyze_domain_targeting_percentage  # Show which domains are being targeted
    analyze_top_urls_per_domain     # Show what files/endpoints are being hit

    generate_comparison_report  # Show trends vs previous day

    # NEW: Baseline and progression analysis
    save_baseline              # Store current metrics for historical comparison
    analyze_attack_progression # Show attack sequences and phases

    generate_report

    print_success "Analysis complete!"
    echo ""
    echo "Report location: $OUTPUT_FILE"

    # Analyze threat patterns and generate recommendations
    analyze_domain_threats
    analyze_geographic_threats
    generate_recommendations

    # Ask user what to do next
    show_post_analysis_menu
}

################################################################################
# DOMAIN-LEVEL THREAT ANALYSIS
################################################################################

analyze_domain_threats() {
    print_info "Analyzing per-domain threat patterns..."

    # Create domain threat analysis file
    > "$TEMP_DIR/domain_threats.txt"
    > "$TEMP_DIR/domain_high_risk_ips.txt"

    # MASSIVE OPTIMIZATION: Single AWK pass instead of nested loops with 25,000+ greps
    # Old approach: O(domains × high_risk_IPs × file_size) = 83 minutes for 500 domains
    # New approach: O(file_size) = seconds

    awk -F'|' -v tmpdir="$TEMP_DIR" '
    BEGIN {
        # Load high-risk IPs into memory
        while ((getline < tmpdir "/threat_scores.txt") > 0) {
            score = $1
            ip = $2
            if (score >= 70) {
                high_risk[ip] = score
            }
        }
        close(tmpdir "/threat_scores.txt")

        # Load attack vectors
        while ((getline < tmpdir "/attack_vectors_raw.txt") > 0) {
            domain = $2
            attack_counts[domain]++
        }
        close(tmpdir "/attack_vectors_raw.txt")
    }

    # Process parsed logs (single pass)
    {
        ip = $1
        domain = $2

        # Count total requests per domain
        domain_requests[domain]++

        # Track high-risk IPs per domain
        if (ip in high_risk) {
            domain_high_risk_count[domain]++
            domain_high_risk_ips[domain] = domain_high_risk_ips[domain] ip ":" high_risk[ip] ":" ++domain_ip_count[domain":"ip] " "
        }
    }
    END {
        # Now process classified bots
        while ((getline < tmpdir "/classified_bots.txt") > 0) {
            domain = $2
            bot_counts[domain]++
        }
        close(tmpdir "/classified_bots.txt")

        # Output results for each domain
        for (domain in domain_requests) {
            total_req = domain_requests[domain]
            bot_req = bot_counts[domain] + 0
            bot_pct = (total_req > 0) ? (bot_req / total_req * 100) : 0
            high_risk_count = domain_high_risk_count[domain] + 0
            attacks = attack_counts[domain] + 0
            high_risk_detail = domain_high_risk_ips[domain]

            # domain|total_requests|bot_requests|bot_percentage|high_risk_ip_count|attack_attempts|high_risk_ips_detail
            printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > tmpdir "/domain_threats.txt"

            # Track high-risk IPs per domain
            if (high_risk_count > 0) {
                printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > tmpdir "/domain_high_risk_ips.txt"
            }
        }
        close(tmpdir "/domain_threats.txt")
        close(tmpdir "/domain_high_risk_ips.txt")
    }' "$TEMP_DIR/parsed_logs.txt"

    # Sort by high-risk IP count (descending)
    sort -t'|' -k5 -rn "$TEMP_DIR/domain_threats.txt" > "$TEMP_DIR/domain_threats_sorted.txt"

    # Get all unique domains
    awk -F'|' '{print $1}' "$TEMP_DIR/domain_threats.txt" | sort -u > "$TEMP_DIR/all_domains.txt"

    print_success "Domain threat analysis complete"
}

################################################################################
# GEOGRAPHIC ANALYSIS (Country-based threat tracking)
################################################################################

analyze_geographic_threats() {
    print_info "Analyzing geographic distribution of threats..."

    # Create geographic analysis file
    > "$TEMP_DIR/geo_analysis.txt"
    > "$TEMP_DIR/geo_needs_maxmind.txt"

    # Check if GeoIP/MaxMind is available
    local has_geoip=false
    if command -v geoiplookup >/dev/null 2>&1 || command -v mmdbinspect >/dev/null 2>&1; then
        has_geoip=true
    fi

    if [ "$has_geoip" = false ]; then
        # Can't do full geographic analysis without GeoIP
        # But we can still detect if traffic looks suspicious by analyzing IP ranges

        # Count high-risk IPs by /24 network
        if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
            awk -F'|' '$1 >= 70 {
                split($2, ip, ".")
                network = ip[1]"."ip[2]"."ip[3]".0/24"
                print network
            }' "$TEMP_DIR/threat_scores.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/high_risk_networks.txt"

            local network_count=$(wc -l < "$TEMP_DIR/high_risk_networks.txt" 2>/dev/null || echo "0")
            local total_high_risk=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" | wc -l)

            if [ "$network_count" -gt 10 ] || [ "$total_high_risk" -gt 50 ]; then
                # Multiple networks or many IPs suggests distributed attack
                # Recommend MaxMind for geographic blocking
                echo "DISTRIBUTED|$network_count networks|$total_high_risk IPs|MaxMind recommended" > "$TEMP_DIR/geo_needs_maxmind.txt"
            fi
        fi

        print_info "Geographic analysis limited (MaxMind GeoIP2 not installed)"
    else
        # Full geographic analysis with GeoIP
        print_info "Performing full geographic analysis with GeoIP..."

        # TODO: Implement full GeoIP lookups when available
        # This would lookup each high-risk IP and count by country
    fi

    print_success "Geographic analysis complete"
}

################################################################################
# RECOMMENDATION ENGINE
################################################################################

generate_recommendations() {
    print_info "Generating intelligent recommendations..."

    # Initialize recommendation file
    > "$TEMP_DIR/recommendations.txt"
    local rec_count=0

    # Get total unique high-risk IPs
    local total_high_risk_ips=0
    if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
        total_high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
    fi

    # Get total domains affected
    local total_domains=$(wc -l < "$TEMP_DIR/all_domains.txt" 2>/dev/null || echo "0")
    local affected_domains=0
    if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
        affected_domains=$(wc -l < "$TEMP_DIR/domain_high_risk_ips.txt" || echo "0")
    fi

    # Determine attack scope: single domain vs server-wide
    local attack_scope="unknown"
    local primary_target=""
    local primary_target_percentage=0

    if [ "${affected_domains:-0}" -eq 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
        attack_scope="single_domain"
        primary_target=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f1)
        # Calculate what % of high-risk IPs are targeting this domain
        local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2)
        if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
            primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", ($domain_risk_count / $total_high_risk_ips) * 100}")
        fi
    elif [ "${affected_domains:-0}" -gt 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
        # Check if one domain is getting most of the traffic
        local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5)
        if [ "${top_domain_count:-0}" -gt 0 ] && [ "${total_high_risk_ips:-0}" -gt 0 ]; then
            local top_percentage=$(awk "BEGIN {printf \"%.0f\", ($top_domain_count / $total_high_risk_ips) * 100}")
            if [ "$top_percentage" -ge 75 ]; then
                attack_scope="primary_target"
                primary_target=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f1)
                primary_target_percentage=$top_percentage
            else
                attack_scope="server_wide"
            fi
        else
            attack_scope="server_wide"
        fi
    elif [ "${affected_domains:-0}" -eq "${total_domains:-0}" ] && [ "${total_domains:-0}" -gt 1 ]; then
        attack_scope="server_wide"
    elif [ "${total_domains:-0}" -eq 1 ]; then
        attack_scope="single_server"
        primary_target=$(head -1 "$TEMP_DIR/all_domains.txt" 2>/dev/null)
    fi

    # RECOMMENDATION #1: IP Blocking Strategy
    if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
        rec_count=$((rec_count + 1))
        if [ "${total_high_risk_ips:-0}" -le 10 ]; then
            echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 1 hour|HIGH|CSF temporary block recommended for ${total_high_risk_ips} IPs with threat score >= 70" >> "$TEMP_DIR/recommendations.txt"
        elif [ "${total_high_risk_ips:-0}" -le 50 ]; then
            echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 24 hours|HIGH|Large number of threats detected - 24hr block recommended" >> "$TEMP_DIR/recommendations.txt"
        else
            echo "REC|$rec_count|ip_block_perm|Permanently block $total_high_risk_ips high-risk IPs|CRITICAL|Severe bot attack detected - permanent blocking recommended" >> "$TEMP_DIR/recommendations.txt"
        fi
    fi

    # RECOMMENDATION #2: Connection Limit (CSF CT_LIMIT)
    # Only recommend if CSF is installed and CT_LIMIT is enabled
    if [ "$CSF_AVAILABLE" = true ]; then
        # Check if CT_LIMIT is enabled (not set to 0)
        local current_ct_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "0")

        if [ "$current_ct_limit" -gt 0 ]; then
            # Check concurrent connections from top IPs
            local max_connections=0
            if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
                max_connections=$(head -1 "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null | awk '{print $1}' || echo "0")
            fi

            if [ "$max_connections" -gt 100 ] && [ "$max_connections" -lt "$current_ct_limit" ]; then
                rec_count=$((rec_count + 1))
                local recommended_limit=$((max_connections - 20))
                echo "REC|$rec_count|csf_ct_limit|Reduce CSF CT_LIMIT from $current_ct_limit to $recommended_limit|MEDIUM|High concurrent connections detected ($max_connections from single IP)" >> "$TEMP_DIR/recommendations.txt"
            fi
        fi
    fi

    # RECOMMENDATION #3: Domain-Specific .htaccess Protection
    if [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "primary_target" ]; then
        rec_count=$((rec_count + 1))
        echo "REC|$rec_count|htaccess_domain|Add bot blocking to $primary_target .htaccess|HIGH|${primary_target_percentage}% of attacks target this domain" >> "$TEMP_DIR/recommendations.txt"
    fi

    # RECOMMENDATION #4: Server-wide Apache Protection
    if [ "$attack_scope" = "server_wide" ]; then
        rec_count=$((rec_count + 1))
        echo "REC|$rec_count|apache_global|Add global bot blocking to Apache pre-virtualhost|HIGH|Attack affects $affected_domains of $total_domains domains" >> "$TEMP_DIR/recommendations.txt"
    fi

    # RECOMMENDATION #5: WordPress-specific (if attack patterns show wp-admin/wp-login attempts)
    local wp_attacks=0
    if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
        wp_attacks=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
    fi

    if [ "${wp_attacks:-0}" -gt 50 ]; then
        rec_count=$((rec_count + 1))

        # Determine which domains have WordPress
        local wp_domain_count=0
        local wp_target_domain=""

        if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
            # Get unique domains with WP attacks
            wp_domain_count=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | wc -l || echo "0")
            wp_target_domain=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | head -1 || echo "")
        fi

        # Generate appropriate recommendation based on how many domains have WordPress attacks
        if [ "${wp_domain_count:-0}" -eq 1 ] || [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "single_server" ]; then
            # Single domain being attacked
            echo "REC|$rec_count|wp_hardening|Harden WordPress on $wp_target_domain|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
        elif [ "$attack_scope" = "primary_target" ]; then
            # One primary target but others also affected
            echo "REC|$rec_count|wp_hardening|Harden WordPress on $primary_target|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
        else
            # Multiple domains with WordPress attacks
            echo "REC|$rec_count|wp_hardening|Harden WordPress across $wp_domain_count domains|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
        fi
    fi

    # PORTFLOOD Protection removed - not appropriate for web servers with many sites
    # Blocking ports 80/443 based on connection count breaks legitimate traffic

    # RECOMMENDATION #7: CSF SYNFLOOD Protection (if DDoS patterns detected)
    if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
        local ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt" || echo "0")
        if [ "${ddos_count:-0}" -gt 10 ]; then
            rec_count=$((rec_count + 1))
            echo "REC|$rec_count|csf_synflood|Enable CSF SYNFLOOD protection|HIGH|$ddos_count potential DDoS sources detected" >> "$TEMP_DIR/recommendations.txt"
        fi
    fi

    # RECOMMENDATION #8: MaxMind GeoIP for Country Blocking (if distributed attack)
    if [ -s "$TEMP_DIR/geo_needs_maxmind.txt" ]; then
        local geo_info=$(cat "$TEMP_DIR/geo_needs_maxmind.txt")
        local network_count=$(echo "$geo_info" | cut -d'|' -f2 | grep -oP '\d+' || echo "0")
        local ip_count=$(echo "$geo_info" | cut -d'|' -f3 | grep -oP '\d+' || echo "0")

        rec_count=$((rec_count + 1))
        echo "REC|$rec_count|install_maxmind|Install MaxMind GeoIP2 for country-based blocking|MEDIUM|Distributed attack from $network_count networks ($ip_count IPs) - geographic blocking recommended" >> "$TEMP_DIR/recommendations.txt"
    fi

    # Store attack scope for menu system
    echo "$attack_scope|$primary_target|$primary_target_percentage|$affected_domains|$total_domains" > "$TEMP_DIR/attack_scope.txt"

    print_success "Generated $rec_count recommendations"
}

################################################################################
# POST-ANALYSIS MENU
################################################################################

show_post_analysis_menu() {
    # Load attack scope information
    local attack_scope="unknown"
    local primary_target=""
    local primary_target_percentage=0
    local affected_domains=0
    local total_domains=0

    if [ -s "$TEMP_DIR/attack_scope.txt" ]; then
        local scope_data=$(cat "$TEMP_DIR/attack_scope.txt")
        attack_scope=$(echo "$scope_data" | cut -d'|' -f1)
        primary_target=$(echo "$scope_data" | cut -d'|' -f2)
        primary_target_percentage=$(echo "$scope_data" | cut -d'|' -f3)
        affected_domains=$(echo "$scope_data" | cut -d'|' -f4)
        total_domains=$(echo "$scope_data" | cut -d'|' -f5)
    fi

    # Check if there are any recommendations
    local has_recommendations=false
    local rec_count=0
    if [ -s "$TEMP_DIR/recommendations.txt" ]; then
        has_recommendations=true
        rec_count=$(wc -l < "$TEMP_DIR/recommendations.txt")
    fi

    # Show menu
    echo ""
    echo "==============================================================="
    print_header "THREAT ANALYSIS SUMMARY"
    echo ""

    # Display attack scope
    case "$attack_scope" in
        single_domain)
            print_warning "ATTACK SCOPE: Single Domain Target"
            echo "  • Primary Target: $primary_target"
            echo "  • This domain is receiving 100% of high-risk traffic"
            echo "  • Recommendation: Domain-specific protection"
            ;;
        primary_target)
            print_warning "ATTACK SCOPE: Primarily Targeting One Domain"
            echo "  • Primary Target: $primary_target ($primary_target_percentage% of attacks)"
            echo "  • Other domains also affected: $affected_domains of $total_domains total"
            echo "  • Recommendation: Focus protection on primary target"
            ;;
        server_wide)
            print_alert "ATTACK SCOPE: Server-Wide Attack"
            echo "  • Multiple domains under attack: $affected_domains of $total_domains"
            echo "  • Attack is distributed across the server"
            echo "  • Recommendation: Server-wide protection needed"
            ;;
        single_server)
            print_info "ATTACK SCOPE: Single-Domain Server"
            echo "  • Target: $primary_target (only domain on server)"
            echo "  • Server-level protection will apply to this domain"
            ;;
        *)
            print_info "No significant threats detected"
            ;;
    esac

    echo ""

    # Display recommendations
    if [ "$has_recommendations" = true ]; then
        echo "==============================================================="
        print_header "RECOMMENDED ACTIONS ($rec_count recommendations)"
        echo ""

        local count=0
        while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
            count=$((count + 1))

            # Color code by priority
            local priority_color=""
            local priority_icon=""
            case "$priority" in
                CRITICAL)
                    priority_color="${RED}"
                    priority_icon=""
                    ;;
                HIGH)
                    priority_color="${YELLOW}"
                    priority_icon=""
                    ;;
                MEDIUM)
                    priority_color="${BLUE}"
                    priority_icon=""
                    ;;
                *)
                    priority_color="${NC}"
                    priority_icon=" "
                    ;;
            esac

            echo -e "  ${BOLD}[$count]${NC} $priority_icon $action_title"
            echo -e "      ${priority_color}Priority: $priority${NC} - $description"
            echo ""
        done < "$TEMP_DIR/recommendations.txt"

        echo "==============================================================="
        echo ""
        echo "What would you like to do?"
        echo ""
        echo "  1) Go to Take Action Menu (implement recommended actions)"
        echo "  2) Review Individual Recommendations (detailed view)"
        echo ""
        echo -e "  ${RED}0)${NC} Back"
        echo ""
        read -p "Select option: " menu_choice

        case "$menu_choice" in
            1)
                show_action_menu
                ;;
            2)
                show_detailed_recommendations
                ;;
            0)
                print_info "Returning to main menu..."
                return 0
                ;;
            *)
                print_warning "Invalid option - returning to main menu"
                return 0
                ;;
        esac
    else
        print_success "No recommendations - your server appears secure"
        echo ""
        echo "Press Enter to return to main menu..."
        read
        return 0
    fi
}

################################################################################
# DETAILED RECOMMENDATIONS VIEWER
################################################################################

show_detailed_recommendations() {
    clear
    print_banner "Detailed Recommendations"
    echo ""

    if [ ! -s "$TEMP_DIR/recommendations.txt" ]; then
        print_warning "No recommendations available"
        echo ""
        read -p "Press Enter to continue..."
        show_post_analysis_menu
        return
    fi

    local count=0
    while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
        count=$((count + 1))

        echo "==============================================================="
        echo -e "${BOLD}Recommendation #$count:${NC} $action_title"
        echo "==============================================================="
        echo ""
        echo "Priority: $priority"
        echo "Action Type: $action_type"
        echo "Description: $description"
        echo ""

        # Show specific details based on action type
        case "$action_type" in
            ip_block_temp|ip_block_perm)
                echo "Affected IPs:"
                awk -F'|' '$1 >= 70 {printf "  • %s (score: %s)\n", $2, $1}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -10
                ;;
            htaccess_domain)
                local target_domain=$(echo "$action_title" | grep -oP 'to \K[^ ]+' 2>/dev/null || echo "")
                echo "Target Domain: $target_domain"
                if [ -s "$TEMP_DIR/domain_threats_sorted.txt" ]; then
                    grep "^$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do
                        echo "  • Total Requests: $total_req"
                        echo "  • Bot Requests: $bot_req ($bot_pct%)"
                        echo "  • High-Risk IPs: $high_risk"
                        echo "  • Attack Attempts: $attacks"
                    done
                fi
                ;;
            apache_global)
                echo "Affected Domains:"
                if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
                    awk -F'|' '{printf "  • %s (%s high-risk IPs)\n", $1, $2}' "$TEMP_DIR/domain_high_risk_ips.txt" | head -10
                fi
                ;;
        esac

        echo ""
    done < "$TEMP_DIR/recommendations.txt"

    echo "==============================================================="
    echo ""
    read -p "Press Enter to return to action menu..."
    show_post_analysis_menu
}

################################################################################
# ACTION MENU (IMPLEMENT RECOMMENDATIONS)
################################################################################

show_action_menu() {
    clear
    print_banner "Take Action Menu"
    echo ""

    # Build hash table of recommended actions with their priorities
    declare -A recommended_actions
    declare -A action_priorities
    declare -A action_descriptions

    if [ -s "$TEMP_DIR/recommendations.txt" ]; then
        while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
            recommended_actions["$action_type"]=1
            action_priorities["$action_type"]="$priority"
            action_descriptions["$action_type"]="$description"
        done < "$TEMP_DIR/recommendations.txt"
    fi

    # Display all available actions (not just recommended ones)
    echo "All Available Actions:"
    echo ""
    echo "Legend:  = Recommended by analysis"
    echo ""

    local count=0
    declare -a action_types
    declare -a action_titles
    declare -a action_descs

    # Define all possible actions
    # 1. IP Blocking Actions
    count=$((count + 1))
    action_types[$count]="ip_block_temp_1hr"
    action_titles[$count]="Block high-risk IPs for 1 hour (CSF temporary)"
    action_descs[$count]="Temporary firewall block, auto-expires after 1 hour"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"

    count=$((count + 1))
    action_types[$count]="ip_block_temp_24hr"
    action_titles[$count]="Block high-risk IPs for 24 hours (CSF temporary)"
    action_descs[$count]="Temporary firewall block, auto-expires after 24 hours"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"

    count=$((count + 1))
    action_types[$count]="ip_block_perm"
    action_titles[$count]="Block high-risk IPs permanently (CSF permanent)"
    action_descs[$count]="Permanent firewall block - requires manual removal"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_perm]}" "${action_priorities[ip_block_perm]}"

    echo ""
    echo "------------------------------------------------------------─"
    echo ""

    # 2. Domain/Site Protection
    count=$((count + 1))
    action_types[$count]="htaccess_domain"
    action_titles[$count]="Add bot blocking to specific domain .htaccess"
    action_descs[$count]="Domain-level protection via Apache .htaccess rules"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[htaccess_domain]}" "${action_priorities[htaccess_domain]}"

    count=$((count + 1))
    action_types[$count]="apache_global"
    action_titles[$count]="Add global bot blocking to Apache (all domains)"
    action_descs[$count]="Server-wide Apache configuration, affects all sites"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[apache_global]}" "${action_priorities[apache_global]}"

    echo ""
    echo "------------------------------------------------------------─"
    echo ""

    # 3. CSF Firewall Configuration
    count=$((count + 1))
    action_types[$count]="csf_ct_limit"
    action_titles[$count]="Adjust CSF connection tracking limit (CT_LIMIT)"
    action_descs[$count]="Limit concurrent connections per IP address"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_ct_limit]}" "${action_priorities[csf_ct_limit]}"

    # PORTFLOOD action removed - not appropriate for web servers

    count=$((count + 1))
    action_types[$count]="csf_synflood"
    action_titles[$count]="Enable CSF SYNFLOOD protection"
    action_descs[$count]="Protect against SYN flood DDoS attacks"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_synflood]}" "${action_priorities[csf_synflood]}"

    echo ""
    echo "------------------------------------------------------------─"
    echo ""

    # 4. Geographic & Application Hardening
    count=$((count + 1))
    action_types[$count]="install_maxmind"
    action_titles[$count]="Install MaxMind GeoIP2 for country-based blocking"
    action_descs[$count]="Enable geographic filtering with CSF CC_DENY (requires free MaxMind license)"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[install_maxmind]}" "${action_priorities[install_maxmind]}"

    count=$((count + 1))
    action_types[$count]="wp_hardening"
    action_titles[$count]="WordPress security hardening"
    action_descs[$count]="Protect WordPress login and admin areas"
    display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[wp_hardening]}" "${action_priorities[wp_hardening]}"

    echo ""
    echo "============================================================═"
    echo ""
    echo -e "  ${RED}0)${NC} Back"
    echo ""
    read -p "Select action [0-$count]: " action_choice

    # Validate choice
    if [ "$action_choice" = "0" ]; then
        show_post_analysis_menu
        return
    elif [ "$action_choice" -lt 1 ] || [ "$action_choice" -gt "$count" ] 2>/dev/null; then
        print_warning "Invalid selection"
        sleep 2
        show_action_menu
        return
    fi

    # Execute selected action
    local selected_type="${action_types[$action_choice]}"
    execute_action "$selected_type" "$action_choice"
}

# Helper function to display action options
display_action_option() {
    local num=$1
    local action_type=$2
    local title=$3
    local desc=$4
    local is_recommended=$5
    local priority=$6

    # Show recommendation marker and priority if recommended
    if [ -n "$is_recommended" ]; then
        case "$priority" in
            CRITICAL)
                echo -e "  ${RED}$num)${NC} ${BOLD}$title${NC} ${RED} RECOMMENDED [CRITICAL]${NC}"
                ;;
            HIGH)
                echo -e "  ${YELLOW}$num)${NC} ${BOLD}$title${NC} ${YELLOW} RECOMMENDED [HIGH]${NC}"
                ;;
            MEDIUM)
                echo -e "  ${BLUE}$num)${NC} ${BOLD}$title${NC} ${BLUE} RECOMMENDED [MEDIUM]${NC}"
                ;;
            *)
                echo -e "  ${GREEN}$num)${NC} ${BOLD}$title${NC} ${GREEN} RECOMMENDED${NC}"
                ;;
        esac
    else
        echo -e "  $num) $title"
    fi
    echo "      $desc"
}

################################################################################
# ACTION EXECUTION ENGINE
################################################################################

execute_action() {
    local action_type="$1"
    local rec_number="$2"

    case "$action_type" in
        ip_block_temp_1hr)
            execute_ip_blocking_specific "1hr"
            ;;
        ip_block_temp_24hr)
            execute_ip_blocking_specific "24hr"
            ;;
        ip_block_temp)
            execute_ip_blocking "temp"
            ;;
        ip_block_perm)
            execute_ip_blocking "perm"
            ;;
        csf_ct_limit)
            execute_csf_ct_limit
            ;;
        csf_synflood)
            execute_csf_synflood
            ;;
        htaccess_domain)
            execute_htaccess_domain_blocking
            ;;
        apache_global)
            execute_apache_global_blocking
            ;;
        install_maxmind)
            execute_install_maxmind
            ;;
        wp_hardening)
            execute_wp_hardening
            ;;
        rate_limiting)
            execute_rate_limiting
            ;;
        *)
            print_warning "Action type '$action_type' not yet implemented"
            echo ""
            read -p "Press Enter to continue..."
            show_action_menu
            ;;
    esac
}

execute_ip_blocking_specific() {
    local duration_type="$1"  # "1hr" or "24hr"

    clear
    print_banner "IP Blocking - CSF Temporary Block"
    echo ""

    # Check if CSF is installed
    if [ "$CSF_AVAILABLE" != true ]; then
        print_warning "CSF (ConfigServer Security & Firewall) is not installed"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Get high-risk IPs
    if [ ! -s "$TEMP_DIR/threat_scores.txt" ]; then
        print_warning "No threat scores available"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    local high_risk_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")

    if [ "$high_risk_count" -eq 0 ]; then
        print_info "No high-risk IPs detected (score >= 70)"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Set duration based on type
    local duration
    local duration_text
    if [ "$duration_type" = "1hr" ]; then
        duration=3600
        duration_text="1 hour"
    else
        duration=86400
        duration_text="24 hours"
    fi

    echo "This will block $high_risk_count high-risk IPs for $duration_text"
    echo ""
    echo "High-risk IPs (top 10):"
    awk -F'|' '$1 >= 70 {printf "  • %s (score: %s, %s requests)\n", $2, $1, $3}' "$TEMP_DIR/threat_scores.txt" | head -10
    echo ""

    if [ "$high_risk_count" -gt 10 ]; then
        echo "  ... and $((high_risk_count - 10)) more"
        echo ""
    fi

    read -p "Proceed with blocking for $duration_text? (yes/no): " confirm

    if [ "$confirm" != "yes" ]; then
        print_info "Operation cancelled"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Collect IPs to block
    local -a ips_to_block
    while IFS='|' read -r score ip requests; do
        if [ "$score" -ge 70 ]; then
            # Skip excluded IPs
            if is_excluded_ip "$ip"; then
                continue
            fi
            # Skip false positives
            if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
                continue
            fi
            ips_to_block+=("$ip")
        fi
    done < "$TEMP_DIR/threat_scores.txt"

    # Apply blocks
    echo ""
    print_info "Applying CSF blocks for $duration_text..."
    echo ""

    local success_count=0
    local fail_count=0

    for ip in "${ips_to_block[@]}"; do
        local score=$(grep "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")

        if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
            echo -e "  ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
            success_count=$((success_count + 1))
        else
            echo -e "  ${RED}${NC} Failed to block $ip"
            fail_count=$((fail_count + 1))
        fi
    done

    echo ""
    if [ "${success_count:-0}" -gt 0 ]; then
        print_success "Successfully blocked $success_count IP(s) for $duration_text"
        echo ""
        echo "These blocks will automatically expire after $duration_text"
        echo "To view temporary blocks: csf -t"
        echo "To remove a block early: csf -tr IP"
    fi

    if [ "${fail_count:-0}" -gt 0 ]; then
        print_warning "$fail_count IP(s) failed to block - check CSF configuration"
    fi

    # Restart CSF
    print_info "Restarting CSF to apply changes..."
    if csf -r >/dev/null 2>&1; then
        print_success "CSF restarted successfully"
    else
        print_warning "CSF restart may have failed - check manually with: csf -r"
    fi

    echo ""
    # Verify domains still work after blocking
    verify_domains_still_working

    show_action_menu
}

execute_ip_blocking() {
    local block_mode="$1"  # "temp" or "perm"

    if [ "$block_mode" = "temp" ]; then
        # Call the existing CSF blocking function
        offer_csf_blocking
    else
        # Permanent blocking
        clear
        print_banner "Permanent IP Blocking"
        echo ""
        print_alert "WARNING: Permanent blocks must be manually removed later"
        echo ""
        echo "This will permanently block all high-risk IPs (score >= 70)"
        echo ""
        read -p "Are you sure you want to proceed? (yes/no): " confirm

        if [ "$confirm" = "yes" ]; then
            offer_csf_blocking
        else
            print_info "Operation cancelled"
            echo ""
            read -p "Press Enter to continue..."
            show_action_menu
        fi
    fi
}

execute_csf_ct_limit() {
    clear
    print_banner "Update CSF Connection Tracking Limit"
    echo ""

    # Check if CSF is installed
    if [ "$CSF_AVAILABLE" != true ]; then
        print_warning "CSF is not installed on this server"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Get recommended limit from recommendation
    local recommended_limit=$(grep "|csf_ct_limit|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | grep -oP 'to \K[0-9]+' || echo "100")

    # Get current CT_LIMIT
    local current_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "unknown")

    echo "Current CT_LIMIT: $current_limit"
    echo "Recommended CT_LIMIT: $recommended_limit"
    echo ""
    echo "This will modify /etc/csf/csf.conf and restart CSF"
    echo ""
    read -p "Enter new CT_LIMIT value [$recommended_limit]: " new_limit

    # Use recommended if nothing entered
    [ -z "$new_limit" ] && new_limit=$recommended_limit

    # Validate it's a number
    if ! [[ "$new_limit" =~ ^[0-9]+$ ]]; then
        print_warning "Invalid number"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Update CSF config
    print_info "Updating CT_LIMIT to $new_limit..."

    if [ -f /etc/csf/csf.conf ]; then
        sed -i "s/^CT_LIMIT = .*/CT_LIMIT = \"$new_limit\"/" /etc/csf/csf.conf

        # Restart CSF
        print_info "Restarting CSF..."
        csf -r >/dev/null 2>&1

        print_success "CT_LIMIT updated successfully to $new_limit"
    else
        print_warning "Could not find /etc/csf/csf.conf"
    fi

    echo ""
    # Verify domains still work after CT_LIMIT change
    verify_domains_still_working

    show_action_menu
}

execute_htaccess_domain_blocking() {
    clear
    print_banner "Add Bot Blocking to Domain .htaccess"
    echo ""

    # Get target domain from recommendation
    local target_domain=$(grep "|htaccess_domain|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | head -1 | grep -oP 'to \K[^ ]+' || echo "")

    if [ -z "$target_domain" ]; then
        print_warning "Could not determine target domain"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    echo "Target Domain: $target_domain"
    echo ""

    # Find document root for this domain using reference database
    local doc_root=""
    if [ -s "$SCRIPT_DIR/.sysref" ]; then
        doc_root=$(grep "^DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4 || echo "")
    fi

    if [ -z "$doc_root" ]; then
        print_warning "Document root not found in reference database"
        echo "Please enter the document root manually:"
        read -p "Document root: " doc_root
    else
        echo "Document root: $doc_root"
    fi

    if [ ! -d "$doc_root" ]; then
        print_warning "Document root does not exist: $doc_root"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    local htaccess_file="$doc_root/.htaccess"

    echo ""
    echo "This will add bot blocking rules to: $htaccess_file"
    echo ""
    read -p "Proceed? (yes/no): " confirm

    if [ "$confirm" != "yes" ]; then
        print_info "Operation cancelled"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Create backup
    if [ -f "$htaccess_file" ]; then
        cp "$htaccess_file" "$htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
        print_info "Backed up existing .htaccess"
    fi

    # Generate bot blocking rules
    print_info "Adding bot blocking rules..."

    # Get high-risk IPs for this domain
    local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep "^[^|]*|$target_domain|" 2>/dev/null || true | cut -d'|' -f1 | sort -u | while read ip; do
        # Check if this IP has high threat score
        if grep -q "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
            local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "0")
            if [ "${score:-0}" -ge 70 ]; then
                echo "$ip"
            fi
        fi
    done || true)

    # Add rules to .htaccess
    {
        echo ""
        echo "# Bot blocking rules added by toolkit on $(date)"
        echo "# High-risk IPs (threat score >= 70)"
        echo "<IfModule mod_authz_core.c>"
        for ip in $block_ips; do
            echo "    Require not ip $ip"
        done
        echo "</IfModule>"
        echo ""
    } >> "$htaccess_file"

    local block_count=$(echo "$block_ips" | wc -w)
    print_success "Added blocking rules for $block_count IPs to $htaccess_file"
    echo ""
    echo "Backup saved to: $htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
    echo ""

    # Verify domains still work after .htaccess changes
    verify_domains_still_working

    show_action_menu
}

execute_apache_global_blocking() {
    clear
    print_banner "Add Global Bot Blocking to Apache"
    echo ""

    print_warning "This feature will add blocking rules to Apache pre-virtualhost configuration"
    echo "This affects ALL domains on the server"
    echo ""

    # Determine Apache config location
    local apache_conf=""
    if [ -d "/etc/apache2/conf.d" ]; then
        apache_conf="/etc/apache2/conf.d/bot_blocking.conf"
    elif [ -d "/etc/httpd/conf.d" ]; then
        apache_conf="/etc/httpd/conf.d/bot_blocking.conf"
    else
        print_warning "Could not determine Apache config directory"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    echo "Configuration will be written to: $apache_conf"
    echo ""
    read -p "Proceed? (yes/no): " confirm

    if [ "$confirm" != "yes" ]; then
        print_info "Operation cancelled"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Create backup if file exists
    if [ -f "$apache_conf" ]; then
        cp "$apache_conf" "$apache_conf.backup.$(date +%Y%m%d_%H%M%S)"
        print_info "Backed up existing configuration"
    fi

    # Generate global blocking rules
    print_info "Generating global bot blocking configuration..."

    {
        echo "# Global bot blocking rules"
        echo "# Generated by toolkit on $(date)"
        echo ""
        echo "<IfModule mod_authz_core.c>"
        echo "    # Block high-risk IPs (threat score >= 70)"

        awk -F'|' '$1 >= 70 {print "    Require not ip " $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null

        echo "</IfModule>"
        echo ""
    } > "$apache_conf"

    local block_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l)
    print_success "Created global blocking configuration with $block_count IPs"

    echo ""
    echo "Restarting Apache to apply changes..."

    if systemctl restart httpd 2>/dev/null || systemctl restart apache2 2>/dev/null; then
        print_success "Apache restarted successfully"
    else
        print_warning "Could not restart Apache - please restart manually"
    fi

    echo ""
    # Verify domains still work after Apache global blocking
    verify_domains_still_working

    show_action_menu
}

execute_wp_hardening() {
    clear
    print_banner "WordPress Hardening"
    echo ""
    print_info "WordPress hardening feature coming soon..."
    echo ""
    echo "Recommended manual actions:"
    echo "  • Install Wordfence or similar security plugin"
    echo "  • Enable two-factor authentication"
    echo "  • Limit login attempts"
    echo "  • Disable XML-RPC if not needed"
    echo "  • Use strong passwords"
    echo ""
    read -p "Press Enter to continue..."
    show_action_menu
}

execute_rate_limiting() {
    clear
    print_banner "Enable Rate Limiting"
    echo ""
    print_info "Rate limiting modules like mod_evasive/mod_security can help with application-level DoS"
    echo ""
    echo "For better bot protection, consider:"
    echo "  - IP blocking (options 1-3) - Block specific attacking IPs"
    echo "  - CSF CT_LIMIT adjustment (option 4) - Limit connections per IP"
    echo "  - .htaccess rules (option 5) - Domain-specific blocking"
    echo ""
    echo "This option (rate limiting) is currently a placeholder for future implementation."
    echo ""
    read -p "Press Enter to continue..."
    show_action_menu
}

# execute_csf_portflood() removed - not appropriate for web servers with 400+ sites
# Blocking ports 80/443 based on connection count would break legitimate traffic

execute_csf_synflood() {
    clear
    print_banner "Enable CSF SYNFLOOD Protection"
    echo ""

    if [ "$CSF_AVAILABLE" != true ]; then
        print_warning "CSF is not installed on this server"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Get current SYNFLOOD setting
    local current_synflood=$(grep "^SYNFLOOD = " /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[^"]+' || echo "0")

    echo "Current SYNFLOOD protection: ${current_synflood}"
    echo ""
    echo "SYNFLOOD protects against SYN flood DDoS attacks by limiting"
    echo "the rate of new TCP connections."
    echo ""
    echo "Recommended settings:"
    echo "  SYNFLOOD = \"1\"          (enable protection)"
    echo "  SYNFLOOD_RATE = \"100/s\"   (100 connections per second)"
    echo "  SYNFLOOD_BURST = \"150\"    (allow burst of 150)"
    echo ""
    read -p "Enable SYNFLOOD protection? (yes/no): " confirm

    if [ "$confirm" != "yes" ]; then
        print_info "Operation cancelled"
        echo ""
        read -p "Press Enter to continue..."
        show_action_menu
        return
    fi

    # Update CSF config
    print_info "Enabling SYNFLOOD protection..."
    if [ -f /etc/csf/csf.conf ]; then
        sed -i 's/^SYNFLOOD = .*/SYNFLOOD = "1"/' /etc/csf/csf.conf
        sed -i 's/^SYNFLOOD_RATE = .*/SYNFLOOD_RATE = "100\/s"/' /etc/csf/csf.conf
        sed -i 's/^SYNFLOOD_BURST = .*/SYNFLOOD_BURST = "150"/' /etc/csf/csf.conf

        # Restart CSF
        print_info "Restarting CSF..."
        csf -r >/dev/null 2>&1

        print_success "SYNFLOOD protection enabled"
    else
        print_warning "Could not find /etc/csf/csf.conf"
    fi

    echo ""
    read -p "Press Enter to continue..."
    show_action_menu
}

execute_install_maxmind() {
    clear
    print_banner "Install MaxMind GeoIP2 for Country Blocking"
    echo ""

    # Check if already installed
    if command -v mmdbinspect >/dev/null 2>&1; then
        print_success "MaxMind GeoIP2 tools already installed"
        echo ""
        echo "Next steps:"
        echo "1. Sign up for free license at: https://www.maxmind.com/en/geolite2/signup"
        echo "2. Get your license key from account page"
        echo "3. Install CSF GeoIP module: /usr/local/csf/bin/csftest.pl -g"
        echo "4. Configure CC_DENY in /etc/csf/csf.conf with country codes"
        echo ""
        echo "Example: CC_DENY = \"CN,RU,KP\"  (block China, Russia, North Korea)"
        echo ""
    else
        print_info "MaxMind GeoIP2 not detected"
        echo ""
        echo "To install MaxMind GeoIP2 for CSF country blocking:"
        echo ""
        echo "1. Sign up for free MaxMind account:"
        echo "   https://www.maxmind.com/en/geolite2/signup"
        echo ""
        echo "2. Get your license key from:"
        echo "   https://www.maxmind.com/en/accounts/current/license-key"
        echo ""
        echo "3. Install GeoIP Perl module:"
        echo "   yum install perl-Geo-IP"
        echo "   # or"
        echo "   cpan -i Geo::IP"
        echo ""
        echo "4. Test CSF GeoIP support:"
        echo "   /usr/local/csf/bin/csftest.pl -g"
        echo ""
        echo "5. Configure CC_DENY in /etc/csf/csf.conf:"
        echo "   CC_DENY = \"CN,RU\"  (example: block China & Russia)"
        echo ""
        echo "6. Restart CSF:"
        echo "   csf -r"
        echo ""
    fi

    # Show geographic analysis if available
    if [ -s "$TEMP_DIR/high_risk_networks.txt" ]; then
        echo "=========================================================══"
        echo "High-Risk Networks Detected:"
        echo ""
        head -10 "$TEMP_DIR/high_risk_networks.txt" | while read count network; do
            echo "  • $network - $count high-risk IPs"
        done
        echo ""
    fi

    read -p "Press Enter to continue..."
    show_action_menu
}

################################################################################
# INTERACTIVE CSF BLOCKING
################################################################################

offer_csf_blocking() {
    echo ""
    echo "==============================================================="
    print_header "🛡  INTERACTIVE THREAT BLOCKING"

    # Check if CSF is installed
    if [ "$CSF_AVAILABLE" != true ]; then
        print_warning "CSF (ConfigServer Security & Firewall) is not installed"
        echo "Cannot offer automatic blocking without CSF"
        return 0
    fi

    # Get high-risk IPs (score >= 70)
    local high_risk_ips=()
    local ip_scores=()

    if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
        while read -r line; do
            local score=$(echo "$line" | cut -d'|' -f1)
            local ip=$(echo "$line" | cut -d'|' -f2)

            # Only include scores >= 70 (HIGH and CRITICAL)
            if [ "$score" -ge 70 ]; then
                # Skip excluded IPs
                if is_excluded_ip "$ip"; then
                    continue
                fi

                # Skip false positives
                if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
                    continue
                fi

                high_risk_ips+=("$ip")
                ip_scores+=("$score")
            fi
        done < <(awk -F'|' '{print $1 "|" $2}' "$TEMP_DIR/threat_scores.txt" | sort -rn)
    fi

    # If no high-risk IPs, nothing to block
    if [ ${#high_risk_ips[@]} -eq 0 ]; then
        print_info "No high-risk IPs detected (score >= 70)"
        return 0
    fi

    # Show IPs that would be blocked
    echo ""
    echo "Found ${#high_risk_ips[@]} high-risk IP(s) with threat score >= 70:"
    echo ""

    local count=0
    for i in "${!high_risk_ips[@]}"; do
        count=$((count + 1))
        local ip="${high_risk_ips[$i]}"
        local score="${ip_scores[$i]}"
        local requests=$(grep "^$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")

        # Color code by severity
        if [ "$score" -ge 90 ]; then
            echo -e "  ${RED}[$count] $ip${NC} - Risk: ${RED}$score/100 CRITICAL${NC} ($requests requests)"
        elif [ "$score" -ge 80 ]; then
            echo -e "  ${YELLOW}[$count] $ip${NC} - Risk: ${YELLOW}$score/100  HIGH${NC} ($requests requests)"
        else
            echo -e "  [$count] $ip - Risk: $score/100 ELEVATED ($requests requests)"
        fi
    done

    echo ""
    echo "==============================================================="
    echo ""

    # Ask user if they want to block
    echo -e "${BOLD}Would you like to temporarily block these IPs using CSF?${NC}"
    echo ""
    echo "Options:"
    echo "  1) Block for 1 hour (temporary - auto-expires)"
    echo "  2) Block for 24 hours (temporary - auto-expires)"
    echo "  3) Block permanently (requires manual unblock)"
    echo "  4) Don't block (manual review)"
    echo ""
    read -p "Select option [1-4]: " block_choice

    case "$block_choice" in
        1)
            local duration=3600  # 1 hour in seconds
            local duration_text="1 hour"
            apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
            ;;
        2)
            local duration=86400  # 24 hours in seconds
            local duration_text="24 hours"
            apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
            ;;
        3)
            apply_csf_permanent_blocks "${high_risk_ips[@]}"
            ;;
        4)
            print_info "Skipping automatic blocking - manual review recommended"
            echo "You can block IPs manually using: csf -td IP DURATION"
            ;;
        *)
            print_warning "Invalid option - skipping blocking"
            ;;
    esac
}

apply_csf_blocks() {
    local duration=$1
    local duration_text=$2
    shift 2
    local ips=("$@")

    echo ""
    print_info "Applying temporary CSF blocks for $duration_text..."
    echo ""

    local success_count=0
    local fail_count=0

    for ip in "${ips[@]}"; do
        # Get threat score for comment
        local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")

        # Use csf -td for temporary deny
        if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
            echo -e "  ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
            success_count=$((success_count + 1))
        else
            echo -e "  ${RED}${NC} Failed to block $ip"
            fail_count=$((fail_count + 1))
        fi
    done

    echo ""
    if [ "${success_count:-0}" -gt 0 ]; then
        print_success "Successfully blocked $success_count IP(s) for $duration_text"
        echo ""
        echo "These blocks will automatically expire after $duration_text"
        echo "To view temporary blocks: csf -t"
        echo "To remove a block early: csf -tr IP"
    fi

    if [ "${fail_count:-0}" -gt 0 ]; then
        print_warning "$fail_count IP(s) failed to block - check CSF configuration"
    fi

    # Restart CSF to apply changes
    print_info "Restarting CSF to apply changes..."
    if csf -r >/dev/null 2>&1; then
        print_success "CSF restarted successfully"
    else
        print_warning "CSF restart may have failed - check manually with: csf -r"
    fi
}

apply_csf_permanent_blocks() {
    local ips=("$@")

    echo ""
    print_warning "Applying PERMANENT CSF blocks..."
    echo "These will require manual removal using: csf -dr IP"
    echo ""
    read -p "Are you sure? This is permanent! (yes/no): " confirm

    if [ "$confirm" != "yes" ]; then
        print_info "Cancelled permanent blocking"
        return 0
    fi

    echo ""
    local success_count=0
    local fail_count=0

    for ip in "${ips[@]}"; do
        local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")

        # Use csf -d for permanent deny
        if csf -d "$ip" "Bot threat score: $score/100 - Permanently blocked by toolkit" >/dev/null 2>&1; then
            echo -e "  ${GREEN}${NC} Permanently blocked $ip (score: $score/100)"
            success_count=$((success_count + 1))
        else
            echo -e "  ${RED}${NC} Failed to block $ip"
            fail_count=$((fail_count + 1))
        fi
    done

    echo ""
    if [ "${success_count:-0}" -gt 0 ]; then
        print_success "Successfully blocked $success_count IP(s) permanently"
        echo ""
        echo "To view blocked IPs: csf -g"
        echo "To remove a block: csf -dr IP"
    fi

    if [ "${fail_count:-0}" -gt 0 ]; then
        print_warning "$fail_count IP(s) failed to block - check CSF configuration"
    fi

    # Restart CSF
    print_info "Restarting CSF to apply changes..."
    if csf -r >/dev/null 2>&1; then
        print_success "CSF restarted successfully"
    else
        print_warning "CSF restart may have failed - check manually with: csf -r"
    fi
}

# Run the script
main "$@"