Files
Linux-Server-Management-Too…/modules/security/bot-analyzer.sh
T
cschantz 69ee59e4be Fix remaining AWK-UNINIT issues in bot-analyzer and network analysis
modules/security/bot-analyzer.sh:
- Line 863: Initialize ip="" for rapid fire IP analysis
- Line 1564: Initialize variables in bot detection awk

modules/performance/network-bandwidth-analyzer.sh:
- Line 237: Initialize sum=0 for bandwidth calculation

modules/security/optimize-ct-limit.sh:
- Line 244: Initialize s=0 for request aggregation

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-02-07 02:50:34 -05:00

3631 lines
141 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
#############################################################################
# Apache/cPanel Domain Log Bot & Botnet Analyzer
# Version: 3.1 Enhanced (with Library Integration)
# Advanced log analysis for bot activity, security threats, and botnets
#
# Features:
# - Comprehensive bot classification (legitimate, AI, monitoring, suspicious)
# - Enhanced attack vector detection (SQL injection, XSS, path traversal,
# RCE/shell upload, info disclosure, login bruteforce)
# - Threat scoring system (0-100 risk scores for each IP)
# - Time-series analysis with hourly traffic visualization
# - Response code intelligence (what are bots finding?)
# - False positive detection for legitimate monitoring services
# - Bandwidth cost estimation for bot traffic
# - Botnet pattern analysis (coordinated attacks, DDoS detection)
# - Prioritized blocklists sorted by threat severity
# - Actionable reports with copy-paste ready configurations
# - Performance optimized for large log files (>500k entries)
# - User filtering (analyze all users or specific user)
# - Auto-detects log directory based on control panel
#############################################################################
# Load libraries
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$SCRIPT_DIR/lib/common-functions.sh"
source "$SCRIPT_DIR/lib/system-detect.sh"
source "$SCRIPT_DIR/lib/user-manager.sh"
source "$SCRIPT_DIR/lib/ip-reputation.sh"
source "$SCRIPT_DIR/lib/bot-signatures.sh"
source "$SCRIPT_DIR/lib/attack-patterns.sh"
source "$SCRIPT_DIR/lib/threat-intelligence.sh"
# Default configuration (auto-detected from system)
LOG_DIR="${SYS_LOG_DIR:-/var/log/apache2/domlogs}"
# Use toolkit's tmp directory instead of system /tmp to avoid filling it up
# On large servers with 200+ domains, compressed temp files can still be 50-100MB
# Using toolkit's tmp dir means:
# - Won't fill up system /tmp
# - Gets auto-cleaned when toolkit is removed
# - Included in cleanup script (clean-and-push-toolkit.sh)
TOOLKIT_TMP_DIR="$SCRIPT_DIR/tmp"
mkdir -p "$TOOLKIT_TMP_DIR" 2>/dev/null
TEMP_DIR="$TOOLKIT_TMP_DIR/bot_analysis_$$"
OUTPUT_FILE="$TOOLKIT_TMP_DIR/bot_analysis_report_$(date +%Y%m%d_%H%M%S).txt"
DAYS_BACK="" # Empty means all logs, otherwise filter by days
HOURS_BACK="" # Empty means all logs, otherwise filter by hours
FILTER_USER="" # Empty means all users, otherwise specific user
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
-d|--days)
DAYS_BACK="$2"
shift 2
;;
-H|--hours)
HOURS_BACK="$2"
shift 2
;;
-l|--log-dir)
LOG_DIR="$2"
shift 2
;;
-o|--output)
OUTPUT_FILE="$2"
shift 2
;;
-u|--user)
FILTER_USER="$2"
shift 2
;;
-h|--help)
echo "Apache/cPanel Domain Log Bot & Botnet Analyzer v3.1"
echo ""
echo "Usage: $0 [-d DAYS | -H HOURS] [-u USER] [-l LOG_DIR] [-o OUTPUT_FILE]"
echo ""
echo "Options:"
echo " -d, --days DAYS Analyze only logs from last N days (24-hour periods)"
echo " -H, --hours HOURS Analyze only logs from last N hours"
echo " -u, --user USER Analyze only logs for specific cPanel user"
echo " -l, --log-dir DIR Custom log directory (auto-detected by default)"
echo " -o, --output FILE Custom output file path"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 # Analyze all logs in default directory"
echo " $0 -d 7 # Analyze logs from last 7 days"
echo " $0 -H 6 # Analyze logs from last 6 hours"
echo " $0 -l /custom/path # Use custom log directory"
echo ""
echo "Note: If both -d and -H are specified, only -H (hours) will be used."
echo ""
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use -h for help"
exit 1
;;
esac
done
# Interactive prompts for missing options
prompt_time_range() {
clear
print_banner "Bot Analyzer - Time Range Selection"
echo ""
echo -e " ${GREEN}1)${NC} All available logs"
echo -e " ${GREEN}2)${NC} Last 1 hour"
echo -e " ${GREEN}3)${NC} Last 6 hours"
echo -e " ${GREEN}4)${NC} Last 24 hours"
echo -e " ${GREEN}5)${NC} Last 7 days"
echo -e " ${GREEN}6)${NC} Last 30 days"
echo -e " ${GREEN}7)${NC} Custom hours"
echo -e " ${GREEN}8)${NC} Custom days"
echo ""
read -p "Select time range (1-8): " time_choice
case $time_choice in
1) ;; # All logs - no filter
2) HOURS_BACK=1 ;;
3) HOURS_BACK=6 ;;
4) HOURS_BACK=24 ;;
5) DAYS_BACK=7 ;;
6) DAYS_BACK=30 ;;
7)
read -p "Enter number of hours: " custom_hours
if [[ "$custom_hours" =~ ^[0-9]+$ ]]; then
HOURS_BACK=$custom_hours
else
print_error "Invalid input, using all logs"
fi
;;
8)
read -p "Enter number of days: " custom_days
if [[ "$custom_days" =~ ^[0-9]+$ ]]; then
DAYS_BACK=$custom_days
else
print_error "Invalid input, using all logs"
fi
;;
*)
print_warning "Invalid choice, using all logs"
;;
esac
}
prompt_user_scope() {
clear
print_banner "Bot Analyzer - User Scope Selection"
echo ""
echo -e " ${GREEN}1)${NC} All users (system-wide analysis)"
echo -e " ${GREEN}2)${NC} Specific user"
echo ""
read -p "Select option (1-2): " user_choice
if [ "$user_choice" = "2" ]; then
echo ""
local selected=$(select_user_interactive "Select user to analyze")
if [ $? -eq 0 ] && [ "$selected" != "ALL" ]; then
FILTER_USER="$selected"
fi
fi
}
# Interactive prompts for missing options
# Prompt for time range if not specified
if [ -z "$DAYS_BACK" ] && [ -z "$HOURS_BACK" ]; then
prompt_time_range
fi
# Prompt for user if not specified
if [ -z "$FILTER_USER" ]; then
prompt_user_scope
fi
# Validate time filter options
if [ -n "$DAYS_BACK" ] && [ -n "$HOURS_BACK" ]; then
echo -e "${YELLOW}Warning: Both days and hours specified. Using hours filter only.${NC}" >&2
DAYS_BACK=""
fi
# Color codes for terminal output
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m' # No Color
# Check for required commands
check_dependencies() {
local missing_deps=()
for cmd in awk grep sort uniq find sed head tail cut; do
if ! command -v "$cmd" >/dev/null 2>&1; then
missing_deps+=("$cmd")
fi
done
if [ ${#missing_deps[@]} -gt 0 ]; then
echo -e "${RED}Error: Missing required commands: ${missing_deps[*]}${NC}" >&2
exit 1
fi
}
# Check disk space
check_disk_space() {
local available_kb
local check_path="$SCRIPT_DIR"
available_kb=$(df "$check_path" 2>/dev/null | tail -1 | awk '{print $4}')
if [ -z "$available_kb" ]; then
echo -e "${YELLOW}Warning: Cannot determine available disk space for toolkit directory${NC}" >&2
return
fi
if [ "$available_kb" -lt 102400 ]; then # Less than 100MB
echo -e "${YELLOW}Warning: Low disk space in toolkit directory: $((available_kb/1024))MB available${NC}" >&2
read -p "Continue anyway? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
exit 1
fi
fi
}
# Run dependency checks
check_dependencies
check_disk_space
# Create temp directory
mkdir -p "$TEMP_DIR" || {
echo -e "${RED}Error: Cannot create temp directory: $TEMP_DIR${NC}" >&2
exit 1
}
# Cleanup on exit
trap "rm -rf \"$TEMP_DIR\"" EXIT
#############################################################################
# Bot Signature Database
#############################################################################
# NOTE: Bot signatures now loaded from lib/bot-signatures.sh
# Arrays available: LEGIT_BOTS, AI_BOTS, MONITOR_BOTS, SUSPICIOUS_BOTS
#############################################################################
# Helper Functions
#############################################################################
print_header() {
echo -e "\n${CYAN}===============================================================${NC}"
echo -e "${CYAN}$1${NC}"
echo -e "${CYAN}===============================================================${NC}\n"
}
print_alert() {
echo -e "${RED}$1${NC}"
}
print_warning() {
echo -e "${YELLOW}$1${NC}"
}
print_info() {
echo -e "${BLUE} $1${NC}"
}
print_success() {
echo -e "${GREEN}$1${NC}"
}
#############################################################################
# Log Parsing Functions
#############################################################################
parse_logs() {
if [ "$INTERWORX_MODE" = "yes" ]; then
print_info "Parsing InterWorx domain logs from: /home/*/var/*/logs/"
else
print_info "Parsing logs from: $LOG_DIR"
fi
local find_opts=()
# Add time filter if specified (hours takes precedence over days)
if [ -n "$HOURS_BACK" ]; then
local minutes=$((HOURS_BACK * 60))
find_opts+=(-mmin -"$minutes")
print_info "Filtering logs from last $HOURS_BACK hours"
elif [ -n "$DAYS_BACK" ]; then
find_opts+=(-mtime -"$DAYS_BACK")
print_info "Filtering logs from last $DAYS_BACK days"
fi
# Determine log file search pattern based on control panel
local log_search_path
local log_search_name
if [ "$INTERWORX_MODE" = "yes" ]; then
# InterWorx: Official docs from https://appendix.interworx.com/current/nodeworx/general/other/log-file-locations.html
# HTTP: /home/{user}/var/{domain}/logs/transfer.log
# HTTPS: /home/{user}/var/{domain}/logs/transfer-ssl.log
log_search_path="/home/*/var/*/logs"
log_search_name="transfer*.log"
else
# cPanel: /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
# Plesk: Research verified paths from https://docs.plesk.com/en-US/obsidian/
# Apache HTTP: /var/www/vhosts/system/{domain}/logs/access_log
# Apache HTTPS: /var/www/vhosts/system/{domain}/logs/access_ssl_log
# nginx HTTP: /var/www/vhosts/system/{domain}/logs/proxy_access_log
# nginx HTTPS: /var/www/vhosts/system/{domain}/logs/proxy_access_ssl_log
# Note: /var/www/vhosts/{domain}/logs/ are hardlinks (backward compat)
log_search_path="$LOG_DIR"
log_search_name="*"
fi
# Parse all domain logs
local file_count=0
local progress_interval=5 # Show progress every 5 files instead of 50
echo ""
{
find "$log_search_path" -type f -name "$log_search_name" ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | while read -r logfile; do
# Skip empty files
[ -s "$logfile" ] || continue
# Extract domain name based on control panel
if [ "$INTERWORX_MODE" = "yes" ]; then
# InterWorx: extract from path /home/user/var/domain.com/logs/transfer*.log
domain=$(echo "$logfile" | sed -n 's|^/home/.*/var/\([^/]*\)/logs/.*|\1|p')
elif [ "$SYS_CONTROL_PANEL" = "plesk" ]; then
# Plesk: extract from path /var/www/vhosts/system/domain.com/logs/{access_log,access_ssl_log,proxy_*}
domain=$(echo "$logfile" | sed -n 's|^/var/www/vhosts/system/\([^/]*\)/logs/.*|\1|p')
else
# cPanel: extract from filename /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
domain=$(basename "$logfile" | sed 's/-ssl_log$//')
fi
# Skip if domain extraction failed
[ -z "$domain" ] && continue
# User filtering: skip domains not belonging to the specified user
if [ -n "$FILTER_USER" ]; then
if ! echo "$user_domains" | grep -qFx "$domain"; then
continue
fi
fi
# Show progress every N files
file_count=$((file_count + 1))
if [ $((file_count % progress_interval)) -eq 0 ]; then
echo -ne "\r Parsed $file_count log files... (current: $domain)"
fi
# Parse Apache Combined Log Format with error handling
# Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
BEGIN {
# Month name to number lookup
month["Jan"]=1; month["Feb"]=2; month["Mar"]=3; month["Apr"]=4
month["May"]=5; month["Jun"]=6; month["Jul"]=7; month["Aug"]=8
month["Sep"]=9; month["Oct"]=10; month["Nov"]=11; month["Dec"]=12
# Calculate cutoff timestamp in epoch seconds
if (hours_filter != "") {
cmd = "date -d \"" hours_filter " hours ago\" +%s 2>/dev/null || date -v-" hours_filter "H +%s 2>/dev/null"
cmd | getline cutoff_epoch
close(cmd)
} else if (days_filter != "") {
cmd = "date -d \"" days_filter " days ago\" +%s 2>/dev/null || date -v-" days_filter "d +%s 2>/dev/null"
cmd | getline cutoff_epoch
close(cmd)
}
}
{
# Skip empty lines and malformed entries
if (NF < 10 || length($0) < 50) next
# Extract IP (first field - space separated)
ip = $1
# Extract timestamp (between square brackets)
if (match($0, /\[([^\]]+)\]/, ts)) {
timestamp = ts[1]
} else {
timestamp = "unknown"
}
# Filter by timestamp if time filter is set
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_epoch != "") {
# Extract just the date/time part (before timezone)
# Format: 31/Dec/2025:10:30:15 -0500
split(timestamp, ts_parts, " ")
log_ts = ts_parts[1]
# Parse: dd/mmm/yyyy:HH:MM:SS
split(log_ts, dt, /[\/:]/)
day = dt[1]
mon = month[dt[2]]
year = dt[3]
hour = dt[4]
min = dt[5]
sec = dt[6]
# Convert to epoch using awk mktime (YYYY MM DD HH MM SS)
# mktime is much faster than spawning date command
if (mon != "") {
log_epoch = mktime(year " " mon " " day " " hour " " min " " sec)
# Numerical comparison of epoch seconds
if (log_epoch < cutoff_epoch) {
next # Skip this entry, too old
}
}
}
# Extract HTTP method, URL, and status
if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) {
http_method = req[1]
request_url = req[2]
status = req[3]
size = req[4]
} else {
# Fallback for malformed requests
http_method = "-"
request_url = "-"
status = "-"
size = "0"
}
# Extract User-Agent (last quoted string)
if (match($0, /"([^"]*)"[[:space:]]*$/, ua)) {
user_agent = ua[1]
if (user_agent == "") user_agent = "-"
} else {
user_agent = "-"
}
# Only output valid entries
if (ip != "" && ip !~ /^[[:space:]]*$/) {
print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp
}
}' "$logfile" 2>/dev/null
done
} > "$TEMP_DIR/parsed_logs.txt"
# Clear the progress line
echo -ne "\r\033[K"
if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then
print_alert "No log entries were parsed. Check log format or permissions."
return 1
fi
local line_count
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" | cut -f1)
# Compress for storage (gzip saves ~90% space on text)
# But we keep uncompressed version for fast analysis
gzip -c "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/parsed_logs.txt.gz" &
print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB uncompressed)"
return 0
}
#############################################################################
# Bot Detection & Classification
#############################################################################
classify_bots() {
print_info "Classifying bot traffic..."
# Build combined grep patterns for efficiency
local legit_pattern=$(printf "%s|" "${!LEGIT_BOTS[@]}" | sed 's/|$//')
local ai_pattern=$(printf "%s|" "${!AI_BOTS[@]}" | sed 's/|$//')
local monitor_pattern=$(printf "%s|" "${!MONITOR_BOTS[@]}" | sed 's/|$//')
local suspicious_pattern=$(printf "%s|" "${!SUSPICIOUS_BOTS[@]}" | sed 's/|$//')
# Process logs with AWK for better performance
awk -F'|' -v legit="$legit_pattern" -v ai="$ai_pattern" -v monitor="$monitor_pattern" -v suspicious="$suspicious_pattern" '
BEGIN {
# Convert patterns to lowercase for case-insensitive matching
legit_lower = tolower(legit)
ai_lower = tolower(ai)
monitor_lower = tolower(monitor)
suspicious_lower = tolower(suspicious)
}
{
ip = $1
domain = $2
url = $3
status = $4
size = $5
ua = $6
method = $7
timestamp = $8
ua_lower = tolower(ua)
bot_type = "unknown"
bot_name = "Unknown"
# Check each category in priority order
if (legit != "" && match(ua_lower, legit_lower)) {
bot_type = "legit"
# Extract actual bot name from UA
split(legit, bots, "|")
for (i in bots) {
if (match(ua_lower, tolower(bots[i]))) {
bot_name = bots[i]
break
}
}
} else if (ai != "" && match(ua_lower, ai_lower)) {
bot_type = "ai"
split(ai, bots, "|")
for (i in bots) {
if (match(ua_lower, tolower(bots[i]))) {
bot_name = bots[i]
break
}
}
} else if (monitor != "" && match(ua_lower, monitor_lower)) {
bot_type = "monitor"
split(monitor, bots, "|")
for (i in bots) {
if (match(ua_lower, tolower(bots[i]))) {
bot_name = bots[i]
break
}
}
} else if (suspicious != "" && match(ua_lower, suspicious_lower)) {
bot_type = "suspicious"
split(suspicious, bots, "|")
for (i in bots) {
if (match(ua_lower, tolower(bots[i]))) {
bot_name = bots[i]
break
}
}
} else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-requests|python-urllib|java\/|scan|check|monitor/)) {
# FIXED: Check for bot keywords FIRST, then verify it is not a legitimate browser
# This prevents bots from bypassing detection by including browser strings
# FIRST: Check if it is actually a legitimate browser with complete UA signature
# Real browsers have: Mozilla/5.0 + platform + rendering engine + browser version
is_real_browser = 0
# Chrome/Chromium-based: Must have Chrome/ AND (AppleWebKit OR Mobile)
if (match(ua_lower, /chrome\/[0-9]/) && (match(ua_lower, /applewebkit/) || match(ua_lower, /mobile/))) {
is_real_browser = 1
} else if (match(ua_lower, /firefox\/[0-9]/) && match(ua_lower, /gecko\//)) {
# Firefox: Must have Firefox/ AND Gecko/
is_real_browser = 1
} else if (match(ua_lower, /safari\/[0-9]/) && match(ua_lower, /version\//) && match(ua_lower, /applewebkit/) && !match(ua_lower, /chrome/)) {
# Safari: Must have Safari/ AND Version/ AND AppleWebKit (not Chrome)
is_real_browser = 1
} else if (match(ua_lower, /edg\/[0-9]|edge\/[0-9]/)) {
# Edge: Must have Edg/ or Edge/
is_real_browser = 1
} else if (match(ua_lower, /samsungbrowser\/[0-9]|ucbrowser\/[0-9]|opr\/[0-9]/)) {
# Mobile browsers: Samsung, UC, Opera Mobile
is_real_browser = 1
}
# If it is a real browser, skip bot classification
if (is_real_browser == 1) {
next
}
bot_type = "unidentified_bot"
# Extract first word of UA as bot name
match(ua, /^[^ ]+/, name)
bot_name = substr(name[0], 1, 30)
}
# Only print if bot_type is not "unknown" (i.e., we identified it as something)
if (bot_type != "unknown") {
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
}
}' < "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
print_alert "Bot classification failed"
return 1
fi
local classified_count
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" | cut -f1)
# Compress for storage in background
gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &
print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB uncompressed)"
return 0
}
#############################################################################
# Threat Detection
#############################################################################
detect_threats() {
print_info "Detecting security threats..."
# Use a single AWK pass for multiple threat detections (more efficient)
awk -F'|' '
{
ip = $1
domain = $2
url = $3
status = $4
size = $5
ua = $6
method = $7
url_lower = tolower(url)
ua_lower = tolower(ua)
# SQL Injection patterns (enhanced)
# FIXED: Hex pattern now requires SQL context to avoid false positives on blockchain/product IDs
if (match(url_lower, /union.*select|concat\(|benchmark\(|sleep\(|waitfor|cast\(|exec\(/) ||
match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) ||
match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) ||
match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) {
print ip "|" domain "|" url "|" status "|sqli" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
# XSS patterns
# FIXED: DOM-based patterns (document.cookie, .innerhtml) only flagged in query strings
# This prevents false positives on documentation URLs like /docs/innerhtml-api-guide
if (match(url_lower, /<script|javascript:|onerror=|onload=|<iframe|eval\(|alert\(/) ||
match(url_lower, /\?.*(document\.cookie|document\.write|\.innerhtml)/)) {
print ip "|" domain "|" url "|" status "|xss" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
# Path Traversal / LFI
# FIXED: Added URL-encoded variants (%2e%2e, %5c for backslash)
# FIXED: Case-insensitive hex encoding support (%5C and %5c)
if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) ||
match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) {
print ip "|" domain "|" url "|" status "|path_traversal" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
# Shell upload / RCE attempts
# FIXED: Removed overly broad "any POST to .php" condition that caused massive false positives
# Now only detects actual shell commands, known malicious files, and suspicious upload patterns
if (match(url_lower, /cmd\.exe|\/bin\/bash|\/bin\/sh|phpinfo\(|system\(|exec\(|passthru\(|eval\(/) ||
match(url_lower, /shell\.php|c99\.php|r57\.php|r00t\.php|backdoor|webshell|cmd\.php|exploit\.php/) ||
match(url_lower, /base64_decode.*eval|gzinflate.*eval|assert.*\$_/) ||
(match(url_lower, /\.(php|phtml|php3|php4|php5|phar)\.suspected$/) && method == "POST")) {
print ip "|" domain "|" url "|" status "|rce_upload" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
# Info Disclosure attempts
# FIXED: Added status code validation - only flag successful access (200/301/302)
# FIXED: readme pattern now only matches actual files (.txt, .html, .md)
# FIXED: Added more backup file extensions and URL-encoded variants
# FIXED: phpinfo now only matches .php files (not documentation URLs)
# FIXED: Removed sitemap.xml.gz (intentionally public for SEO)
if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo\.php/) ||
match(url_lower, /readme\.(txt|html|md)$/) ||
match(url_lower, /web\.config|\.htaccess|\.htpasswd/) ||
match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql/)) {
# Only flag if successful access (200) or redirect (301/302)
# Failed attempts (404/403) are just scanning, tracked separately
if (status ~ /^(200|301|302)/) {
print ip "|" domain "|" url "|" status "|info_disclosure" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
}
# composer.json / package.json - lower severity, only if successful
if (match(url_lower, /composer\.json|package\.json|package-lock\.json/) && status == "200") {
print ip "|" domain "|" url "|" status "|config_exposure" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
# Login bruteforce
if (match(url_lower, /wp-login\.php|xmlrpc\.php/) && method == "POST") {
print ip "|" domain "|" url "|" status "|login_bruteforce" > "'"$TEMP_DIR"'/attack_vectors_raw.txt"
}
# Admin/sensitive endpoint probing
# FIXED: Only count FAILED attempts (403/401/404) - successful logins are legitimate
if (match(url_lower, /wp-admin|phpmyadmin|admin|administrator|login|wp-login|xmlrpc/) ||
match(url_lower, /\.env|\.git|\.sql|backup|config\./)) {
# Only flag failed access attempts (403 Forbidden, 401 Unauthorized, 404 Not Found)
# Successful access (200/302) means legitimate user or already compromised
if (status ~ /^(403|401|404)/) {
print ip "|" domain "|" url > "'"$TEMP_DIR"'/admin_probes_raw.txt"
}
}
# 404 scanning (reconnaissance)
if (status == "404" || status == "403") {
print ip "|" domain "|" url "|" status > "'"$TEMP_DIR"'/404_scans_raw.txt"
}
# Large data transfers (potential scraping)
if (size > 1000000) {
print ip "|" domain "|" url "|" size > "'"$TEMP_DIR"'/large_transfers_raw.txt"
}
# Suspicious user agents
if (match(ua_lower, /nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp/) ||
match(ua_lower, /metasploit|<script|null|python-requests|go-http-client/)) {
print ip "|" ua > "'"$TEMP_DIR"'/suspicious_ua_raw.txt"
}
# Track response codes for intelligence
print status > "'"$TEMP_DIR"'/response_codes_raw.txt"
}
' < <(cat "$TEMP_DIR/parsed_logs.txt")
# Process attack vectors by type
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
# Overall attack vectors summary
awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/attack_types.txt"
# Breakdown by attack type
for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
grep "|$attack_type$" "$TEMP_DIR/attack_vectors_raw.txt" | \
awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt"
done
# Old sqli file for backwards compatibility
if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then
cp "$TEMP_DIR/sqli_attempts.txt" "$TEMP_DIR/sqli_attempts_legacy.txt"
fi
else
touch "$TEMP_DIR/attack_types.txt"
fi
# Process raw data into sorted/counted results
if [ -f "$TEMP_DIR/admin_probes_raw.txt" ]; then
sort "$TEMP_DIR/admin_probes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/admin_probes.txt"
else
touch "$TEMP_DIR/admin_probes.txt"
fi
if [ -f "$TEMP_DIR/404_scans_raw.txt" ]; then
sort "$TEMP_DIR/404_scans_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/404_scans.txt"
else
touch "$TEMP_DIR/404_scans.txt"
fi
if [ -f "$TEMP_DIR/large_transfers_raw.txt" ]; then
sort "$TEMP_DIR/large_transfers_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/large_transfers.txt"
else
touch "$TEMP_DIR/large_transfers.txt"
fi
if [ -f "$TEMP_DIR/suspicious_ua_raw.txt" ]; then
sort "$TEMP_DIR/suspicious_ua_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/suspicious_ua.txt"
else
touch "$TEMP_DIR/suspicious_ua.txt"
fi
# Process response codes
if [ -f "$TEMP_DIR/response_codes_raw.txt" ]; then
sort "$TEMP_DIR/response_codes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/response_codes.txt"
else
touch "$TEMP_DIR/response_codes.txt"
fi
print_success "Threat detection complete"
}
#############################################################################
# NEW: Success Rate & Behavior Analysis (Added for accuracy improvement)
#############################################################################
analyze_success_rates() {
print_info "Analyzing request success rates and behavior patterns..."
# Calculate success rate (200/301/302 vs 404/403) for each IP
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '
{
ip = $1
status = $4
# Count total requests
total[ip]++
# Count successful responses
if (status ~ /^(200|301|302)/) {
success[ip]++
}
# Count failed/blocked responses
else if (status ~ /^(404|403|401)/) {
failed[ip]++
}
}
END {
for (ip in total) {
success_count = (success[ip] ? success[ip] : 0)
failed_count = (failed[ip] ? failed[ip] : 0)
success_rate = (total[ip] > 0) ? int((success_count / total[ip]) * 100) : 0
fail_rate = (total[ip] > 0) ? int((failed_count / total[ip]) * 100) : 0
# High failure rate indicates scanning/probing
if (fail_rate >= 80 && total[ip] >= 20) {
print ip "|" total[ip] "|" fail_rate "|scanner" > "'"$TEMP_DIR"'/high_failure_ips.txt"
}
# Very high success rate + high volume could be scraping
else if (success_rate >= 90 && total[ip] >= 100) {
print ip "|" total[ip] "|" success_rate "|scraper" > "'"$TEMP_DIR"'/high_success_ips.txt"
}
# Output all rates for later analysis
print ip "|" total[ip] "|" success_rate "|" fail_rate > "'"$TEMP_DIR"'/ip_success_rates.txt"
}
}' < <(cat "$TEMP_DIR/parsed_logs.txt")
# Touch files if they don't exist
touch "$TEMP_DIR/high_failure_ips.txt" "$TEMP_DIR/high_success_ips.txt" "$TEMP_DIR/ip_success_rates.txt"
print_success "Success rate analysis complete"
}
#############################################################################
# Botnet Detection
#############################################################################
detect_botnets() {
print_info "Analyzing for botnet patterns..."
# Group IPs by similar behavior patterns
# Pattern 1: Multiple IPs hitting same URLs in coordinated manner
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1"|"$3}' | \
sort | uniq -c | awk '$1 > 10 {print $2}' | \
cut -d'|' -f2 | sort | uniq -c | sort -rn | \
awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt"
# Pattern 2: IPs with similar User-Agents hitting multiple domains
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1"|"$6}' | \
sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt"
# Pattern 3: Detect IP ranges (Class C networks) with suspicious activity
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | \
awk -F'.' '{print $1"."$2"."$3".0/24"}' | \
sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt"
# Pattern 4: Rapid fire requests (DDoS indicators)
# Extract timestamp and count requests per IP per minute
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{
ip = $1
timestamp = $8
# Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS)
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2})/, ts)) {
# Group by hour:minute for rapid-fire detection
time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5]
print ip "|" time_key
}
}' | \
sort | uniq -c | \
awk '$1 > 50 {print $1 " " $2}' | \
awk -F'|' '{print $1}' | \
awk 'BEGIN {ip=""} {ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \
sort -rn > "$TEMP_DIR/rapid_fire_ips.txt"
print_success "Botnet analysis complete"
}
#############################################################################
# Server IP Detection
#############################################################################
detect_server_ips() {
print_info "Detecting server's own IP addresses..."
> "$TEMP_DIR/server_ips.txt"
# Method 1: Get all IPs from network interfaces
if command -v hostname >/dev/null 2>&1; then
hostname -I 2>/dev/null | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' >> "$TEMP_DIR/server_ips.txt"
fi
# Method 2: Parse ip addr output
if command -v ip >/dev/null 2>&1; then
ip addr show 2>/dev/null | grep -oP 'inet \K[\d.]+' >> "$TEMP_DIR/server_ips.txt"
fi
# Method 3: Try ifconfig as fallback
if command -v ifconfig >/dev/null 2>&1; then
ifconfig 2>/dev/null | grep -oP 'inet (addr:)?\K[\d.]+' >> "$TEMP_DIR/server_ips.txt"
fi
# Method 4: Get public IP from external services (with timeout)
# Try multiple services for reliability
for service in "ifconfig.me/ip" "icanhazip.com" "ipecho.net/plain" "api.ipify.org"; do
public_ip=$(curl -s --max-time 3 "$service" 2>/dev/null | grep -oE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$')
if [ -n "$public_ip" ]; then
echo "$public_ip" >> "$TEMP_DIR/server_ips.txt"
break
fi
done
# Method 5: Check cPanel server IP if available
if [ -f "/var/cpanel/mainip" ]; then
cat /var/cpanel/mainip >> "$TEMP_DIR/server_ips.txt"
fi
# Remove duplicates and empty lines
sort -u "$TEMP_DIR/server_ips.txt" | grep -v '^$' > "$TEMP_DIR/server_ips_final.txt"
mv "$TEMP_DIR/server_ips_final.txt" "$TEMP_DIR/server_ips.txt"
server_ip_count=$(wc -l < "$TEMP_DIR/server_ips.txt" 2>/dev/null || echo 0)
if [ "$server_ip_count" -gt 0 ]; then
print_success "Detected $server_ip_count server IP(s) - these will be excluded from threat analysis"
else
print_warning "Could not detect server IPs automatically - proceeding without server IP filtering"
fi
}
# Helper function to validate IP address format
is_valid_ip() {
local ip="$1"
# IPv4 validation
if [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
local IFS='.'
local -a octets=($ip)
for octet in "${octets[@]}"; do
if [ "$octet" -gt 255 ]; then
return 1 # Invalid
fi
done
return 0 # Valid IPv4
fi
# IPv6 basic validation (simplified)
if [[ "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then
return 0 # Valid IPv6
fi
return 1 # Invalid
}
# Helper function to check if an IP should be excluded
is_excluded_ip() {
local ip="$1"
# First validate IP format
if ! is_valid_ip "$ip"; then
return 0 # Exclude invalid IPs
fi
# Check if private/internal IP
if [[ "$ip" =~ ^127\. ]] || \
[[ "$ip" =~ ^10\. ]] || \
[[ "$ip" =~ ^192\.168\. ]] || \
[[ "$ip" =~ ^172\.(1[6-9]|2[0-9]|3[01])\. ]] || \
[[ "$ip" =~ ^169\.254\. ]] || \
[[ "$ip" == "localhost" ]] || \
[[ "$ip" == "::1" ]]; then
return 0 # True - should be excluded
fi
# Check if it's the server's own IP
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
if grep -qFx "$ip" "$TEMP_DIR/server_ips.txt" 2>/dev/null; then
return 0 # True - should be excluded
fi
fi
return 1 # False - should not be excluded
}
#############################################################################
# Time-Series Analysis
#############################################################################
analyze_time_series() {
print_info "Analyzing time-series patterns..."
# Extract hourly bot traffic
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {
timestamp = $8
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
hour = ts[4]
print hour
}
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
# Extract hourly attack traffic
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
# Parse timestamps from original parsed logs for IPs in attack vectors
awk -F'|' 'NR==FNR {ips[$1]=1; next} $1 in ips {
timestamp = $8
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
hour = ts[4]
print hour
}
}' "$TEMP_DIR/attack_vectors_raw.txt" <(cat "$TEMP_DIR/parsed_logs.txt") | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
fi
print_success "Time-series analysis complete"
}
#############################################################################
# Threat Scoring
#############################################################################
calculate_threat_scores() {
print_info "Calculating threat scores..."
# Pre-count requests per IP (MUCH faster than grepping for each IP)
declare -A ip_request_counts
while IFS='|' read -r ip rest; do
((ip_request_counts["$ip"]++))
done < <(cat "$TEMP_DIR/parsed_logs.txt")
# Build hash tables from threat files for O(1) lookups
# OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count
# Parse each threat file and build hash tables (optimized with awk)
[ -f "$TEMP_DIR/sqli_attempts.txt" ] && while read -r ip; do
threat_ips_sqli["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1)
[ -f "$TEMP_DIR/xss_attempts.txt" ] && while read -r ip; do
threat_ips_xss["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1)
[ -f "$TEMP_DIR/path_traversal_attempts.txt" ] && while read -r ip; do
threat_ips_path["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1)
[ -f "$TEMP_DIR/rce_upload_attempts.txt" ] && while read -r ip; do
threat_ips_rce["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1)
[ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ] && while read -r ip; do
threat_ips_login["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1)
[ -f "$TEMP_DIR/suspicious_ua.txt" ] && while read -r ip; do
threat_ips_suspicious["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1)
[ -f "$TEMP_DIR/rapid_fire_ips.txt" ] && while read -r ip; do
threat_ips_ddos["$ip"]=1
done < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt")
# Parse count-based threat files
[ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//')
[ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do
[ -n "$ip" ] && threat_404_count["$ip"]=$count
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//')
# NEW: Load bot classifications to skip volume scoring for legitimate bots
declare -A legit_bot_ips
if [ -f "$TEMP_DIR/classified_bots.txt" ]; then
while IFS='|' read -r ip domain url status size ua method timestamp bot_type bot_name; do
if [ "$bot_type" = "legit" ]; then
legit_bot_ips["$ip"]=1
fi
done < "$TEMP_DIR/classified_bots.txt"
fi
# NEW: Load success rate data for scanning/scraping detection
declare -A scanner_ips scraper_ips ip_fail_rates
[ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do
scanner_ips["$ip"]=$fail_rate
done < "$TEMP_DIR/high_failure_ips.txt"
[ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do
scraper_ips["$ip"]=$success_rate
done < "$TEMP_DIR/high_success_ips.txt"
# Load all fail rates for threshold checks
[ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do
ip_fail_rates["$ip"]=$fail_rate
done < "$TEMP_DIR/ip_success_rates.txt"
# Now calculate scores for each IP (using pre-counted requests)
for ip in "${!ip_request_counts[@]}"; do
# Skip excluded IPs
if is_excluded_ip "$ip"; then
continue
fi
score=0
req_count=${ip_request_counts[$ip]}
# IMPROVED: Base request volume scoring
# Skip volume scoring for legitimate bots (Google, Bing, etc.)
if [ -z "${legit_bot_ips[$ip]}" ]; then
# Not a legitimate bot - apply volume scoring
if [ "$req_count" -gt 10000 ]; then score=$((score + 10))
elif [ "$req_count" -gt 5000 ]; then score=$((score + 8))
elif [ "$req_count" -gt 1000 ]; then score=$((score + 5))
elif [ "$req_count" -gt 500 ]; then score=$((score + 3))
fi
fi
# NEW: Success rate analysis bonuses
# High failure rate (80%+ 404/403) = scanning behavior
if [ -n "${scanner_ips[$ip]}" ]; then
fail_rate=${scanner_ips[$ip]}
if [ "$fail_rate" -ge 90 ]; then
score=$((score + 8)) # Very high failure rate
elif [ "$fail_rate" -ge 80 ]; then
score=$((score + 5)) # High failure rate
fi
fi
# High success rate (90%+ 200/301/302) + high volume = potential scraping
if [ -n "${scraper_ips[$ip]}" ] && [ "$req_count" -gt 500 ]; then
score=$((score + 7)) # Scraping behavior
fi
# Attack patterns
[ -n "${threat_ips_sqli[$ip]}" ] && score=$((score + 15))
[ -n "${threat_ips_xss[$ip]}" ] && score=$((score + 12))
[ -n "${threat_ips_path[$ip]}" ] && score=$((score + 15))
[ -n "${threat_ips_rce[$ip]}" ] && score=$((score + 20))
[ -n "${threat_ips_login[$ip]}" ] && score=$((score + 10))
[ -n "${threat_ips_suspicious[$ip]}" ] && score=$((score + 10))
[ -n "${threat_ips_ddos[$ip]}" ] && score=$((score + 10))
# Admin probing - IMPROVED: Raised threshold to 50 (only failed attempts counted)
admin_count=${threat_admin_count[$ip]:-0}
if [ "$admin_count" -gt 100 ] 2>/dev/null; then
score=$((score + 10)) # Excessive probing
elif [ "$admin_count" -gt 50 ] 2>/dev/null; then
score=$((score + 5)) # Moderate probing
fi
# 404 scanning
scan_404=${threat_404_count[$ip]:-0}
[ "$scan_404" -gt 50 ] 2>/dev/null && score=$((score + 3))
# OPTIMIZATION: Skip external API calls for performance
# Threat Intelligence Enrichment can be done post-analysis for high-risk IPs only
# Uncommenting these will SIGNIFICANTLY slow down analysis (API calls for every IP)
#
# To enable threat intelligence enrichment:
# 1. Uncomment the code below
# 2. Ensure check_abuseipdb, get_country_code, and is_high_risk_country functions exist
# 3. Be aware this will make thousands of API calls and take much longer
#
# local abuse_data=$(check_abuseipdb "$ip" 2>/dev/null || echo "0|0|Unknown|Unknown")
# IFS='|' read -r abuse_confidence abuse_reports abuse_country abuse_isp <<< "$abuse_data"
#
# if [ "$abuse_confidence" -ge 75 ]; then
# score=$((score + 15)) # High confidence malicious
# elif [ "$abuse_confidence" -ge 50 ]; then
# score=$((score + 8)) # Moderate confidence
# elif [ "$abuse_confidence" -ge 25 ]; then
# score=$((score + 3)) # Low confidence
# fi
#
# local geo_country=$(get_country_code "$ip" 2>/dev/null || echo "XX")
# if is_high_risk_country "$geo_country" 2>/dev/null; then
# score=$((score + 5)) # High-risk country bonus
# fi
# Cap at 100
[ "${score:-0}" -gt 100 ] && score=100
# Only output IPs with score > 0
[ "${score:-0}" -gt 0 ] && echo "$score|$ip|$req_count"
# Track in centralized IP reputation database (background process)
if [ "${score:-0}" -gt 0 ]; then
(
# Update IP with hit count
increment_ip_hits "$ip" "$req_count" >/dev/null 2>&1
# Tag with specific attack types found
[ -n "${threat_ips_sqli[$ip]}" ] && flag_ip_attack "$ip" "SQL_INJECTION" 0 "Bot analyzer: SQL injection attempts" >/dev/null 2>&1
[ -n "${threat_ips_xss[$ip]}" ] && flag_ip_attack "$ip" "XSS" 0 "Bot analyzer: XSS attempts" >/dev/null 2>&1
[ -n "${threat_ips_path[$ip]}" ] && flag_ip_attack "$ip" "PATH_TRAVERSAL" 0 "Bot analyzer: Path traversal" >/dev/null 2>&1
[ -n "${threat_ips_rce[$ip]}" ] && flag_ip_attack "$ip" "RCE" 0 "Bot analyzer: RCE/shell upload attempts" >/dev/null 2>&1
[ -n "${threat_ips_login[$ip]}" ] && flag_ip_attack "$ip" "BRUTEFORCE" 0 "Bot analyzer: Login bruteforce" >/dev/null 2>&1
[ -n "${threat_ips_ddos[$ip]}" ] && flag_ip_attack "$ip" "DDOS" 0 "Bot analyzer: Rapid-fire requests" >/dev/null 2>&1
[ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
) &
fi
done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt"
# Wait for background IP reputation updates to complete
wait
print_success "Threat scores calculated and IP reputation updated"
}
#############################################################################
# False Positive Detection
#############################################################################
detect_false_positives() {
print_info "Detecting legitimate services (false positives)..."
# Known monitoring service patterns
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{
ip = $1
domain = $2
url = $3
ua = tolower($6)
# Pingdom
if (match(ua, /pingdom/) || match(ua, /pingdom\.com_bot/)) {
print ip "|Pingdom Monitoring|" ua "|" domain
}
# UptimeRobot
else if (match(ua, /uptimerobot/)) {
print ip "|UptimeRobot Monitoring|" ua "|" domain
}
# StatusCake
else if (match(ua, /statuscake/)) {
print ip "|StatusCake Monitoring|" ua "|" domain
}
# WordPress cache preload (WP Rocket, Hummingbird)
else if (match(url, /admin-ajax\.php.*cache_preload/) || match(url, /admin-ajax\.php.*wphb/)) {
print ip "|WordPress Cache Preload|" ua "|" domain
}
# Legitimate backup services
else if (match(ua, /jetpack|vaultpress|updraftplus/)) {
print ip "|Backup Service|" ua "|" domain
}
}' | sort -u > "$TEMP_DIR/false_positives.txt"
print_success "False positive detection complete"
}
#############################################################################
# Statistical Analysis
#############################################################################
generate_statistics() {
print_info "Generating statistics..."
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
# This reads the uncompressed file ONCE instead of 4+ separate reads
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '
{
# Count by domain (for top sites)
domains[$2]++
# Count by IP (for top IPs)
ips[$1]++
# Count by domain+URL (for top URLs)
urls[$2"|"$3]++
}
END {
# Output top sites
for (domain in domains) {
print domains[domain], domain > "'"$TEMP_DIR"'/top_sites_raw.txt"
}
# Output top IPs
for (ip in ips) {
print ips[ip], ip > "'"$TEMP_DIR"'/top_ips_raw.txt"
}
# Output top URLs
for (url in urls) {
print urls[url], url > "'"$TEMP_DIR"'/top_urls_raw.txt"
}
}'
# Sort and limit results
sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt"
sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt"
sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"
# Top 5 bots by request count (single decompression)
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"
# Traffic breakdown by bot type (single decompression)
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
# Create indexed bot traffic file (decompress once)
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt"
while read -r domain; do
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
grep "^$domain|" "$TEMP_DIR/domain_bot_types.txt" | cut -d'|' -f2 | \
sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt"
done < "$TEMP_DIR/all_domains.txt"
fi
print_success "Statistics generated"
}
#############################################################################
# Report Generation
#############################################################################
generate_report() {
exec > >(tee "$OUTPUT_FILE")
echo "==============================================================="
echo " APACHE/CPANEL BOT & BOTNET ANALYSIS REPORT"
echo " Generated: $(date '+%Y-%m-%d %H:%M:%S')"
echo "==============================================================="
# CRITICAL ALERTS SECTION
print_header "CRITICAL ALERTS"
alert_count=0
# Check for attack vectors
if [ -s "$TEMP_DIR/attack_types.txt" ]; then
print_alert "Security Attack Vectors Detected:"
while read -r line; do
count=$(echo "$line" | awk '{print $1}')
attack_type=$(echo "$line" | awk '{print $2}')
case $attack_type in
sqli) echo " SQL Injection: $count attempts" ;;
xss) echo " XSS Attacks: $count attempts" ;;
path_traversal) echo " Path Traversal: $count attempts" ;;
rce_upload) echo " RCE/Shell Upload: $count attempts" ;;
info_disclosure) echo " Info Disclosure: $count attempts" ;;
login_bruteforce) echo " Login Bruteforce: $count attempts" ;;
esac
done < "$TEMP_DIR/attack_types.txt"
echo ""
alert_count=$((alert_count + 1))
fi
# Check for suspicious scanners
if [ -s "$TEMP_DIR/suspicious_ua.txt" ]; then
scanner_count=$(wc -l < "$TEMP_DIR/suspicious_ua.txt")
print_alert "Malicious scanners detected: $scanner_count IPs"
echo " Top scanners:"
head -3 "$TEMP_DIR/suspicious_ua.txt" | while read -r line; do
count=$(echo "$line" | awk '{print $1}')
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
ua=$(echo "$line" | cut -d'|' -f2)
printf " %s requests - IP: %s - UA: %s\n" "$count" "$ip" "$ua"
done
echo ""
alert_count=$((alert_count + 1))
fi
# Check for rapid-fire IPs (potential DDoS)
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt")
print_alert "Potential DDoS sources: $ddos_count IPs with >50 req/min"
echo " Top offenders:"
head -3 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print " "$2" - "$1" rapid requests"}'
echo ""
alert_count=$((alert_count + 1))
fi
# Check for suspicious networks
if [ -s "$TEMP_DIR/suspicious_networks.txt" ]; then
net_count=$(wc -l < "$TEMP_DIR/suspicious_networks.txt")
print_alert "Suspicious networks detected: $net_count Class C ranges"
echo " Top networks:"
head -3 "$TEMP_DIR/suspicious_networks.txt" | awk '{print " "$2" - "$1" requests"}'
echo ""
alert_count=$((alert_count + 1))
fi
if [ "${alert_count:-0}" -eq 0 ]; then
print_success "No critical threats detected"
fi
# QUICK STATS DASHBOARD
print_header "QUICK STATS DASHBOARD"
total_requests=$(cat "$TEMP_DIR/parsed_logs.txt" | wc -l)
unique_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | wc -l)
unique_domains=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $2}' | sort -u | wc -l)
bot_requests=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown"' | wc -l)
# Count private/internal IPs (excluded from threat analysis)
private_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' | wc -l)
# Count server's own IPs in the logs
server_ip_hits=0
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
while read -r server_ip; do
if cat "$TEMP_DIR/parsed_logs.txt" | grep -q "^$server_ip|" 2>/dev/null; then
server_ip_hits=$((server_ip_hits + 1))
fi
done < "$TEMP_DIR/server_ips.txt"
fi
echo "Total Requests: $(printf "%'d" $total_requests)"
echo "Unique IPs: $(printf "%'d" $unique_ips)"
# Show breakdown if we have excluded IPs
if [ "$private_ips" -gt 0 ] || [ "$server_ip_hits" -gt 0 ]; then
excluded_total=$((private_ips + server_ip_hits))
echo " ├─ Excluded IPs: $(printf "%'d" $excluded_total)"
[ "$private_ips" -gt 0 ] && echo " │ ├─ Private/Internal: $private_ips"
[ "$server_ip_hits" -gt 0 ] && echo " │ └─ Server's own: $server_ip_hits"
echo " └─ External IPs: $(printf "%'d" $((unique_ips - excluded_total)))"
fi
echo "Domains Analyzed: $unique_domains"
echo "Bot Requests: $(printf "%'d" $bot_requests) ($(awk "BEGIN {printf \"%.1f\", ($bot_requests/$total_requests)*100}")%)"
# Show detected server IPs
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
echo ""
echo " Server IPs Detected (excluded from threat analysis):"
while read -r server_ip; do
echo "$server_ip"
done < "$TEMP_DIR/server_ips.txt"
fi
echo ""
# Traffic breakdown
echo "Traffic Breakdown:"
while read -r line; do
count=$(echo "$line" | awk '{print $1}')
type=$(echo "$line" | awk '{print $2}')
pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")
case $type in
legit) echo " Legitimate Bots: $(printf "%'7d" $count) ($pct%)" ;;
ai) echo " AI Bots: $(printf "%'7d" $count) ($pct%)" ;;
monitor) echo " 📡 Monitoring/SEO: $(printf "%'7d" $count) ($pct%)" ;;
suspicious) echo " Suspicious Bots: $(printf "%'7d" $count) ($pct%)" ;;
unidentified_bot) echo " ❓ Unidentified Bots: $(printf "%'7d" $count) ($pct%)" ;;
unknown) echo " Regular Traffic: $(printf "%'7d" $count) ($pct%)" ;;
esac
done < "$TEMP_DIR/traffic_breakdown.txt"
# TIME-SERIES ANALYSIS
if [ -s "$TEMP_DIR/hourly_bot_traffic.txt" ]; then
echo ""
echo "Bot Traffic Timeline (hourly):"
max_bot_traffic=$(awk '{print $1}' "$TEMP_DIR/hourly_bot_traffic.txt" | sort -rn | head -1)
while read -r line; do
count=$(echo "$line" | awk '{print $1}')
hour=$(echo "$line" | awk '{print $2}')
# Create simple bar chart
bar_width=$((count * 10 / max_bot_traffic))
[ "${bar_width:-0}" -eq 0 ] && [ "${count:-0}" -gt 0 ] && bar_width=1
bar=$(printf '█%.0s' $(seq 1 $bar_width))
spaces=$(printf '░%.0s' $(seq 1 $((10 - bar_width))))
# Detect spikes (>2x average)
avg_traffic=$((total_requests / 24))
spike=""
[ ${count:-0} -gt $((avg_traffic * 2)) ] && spike=" SPIKE"
# Strip leading zeros to avoid octal interpretation
hour_num=$((10#$hour))
next_hour=$((hour_num + 1))
printf " %02d:00-%02d:00: %s%s %'6d bot requests%s\n" "$hour_num" "$next_hour" "$bar" "$spaces" "$count" "$spike"
done < "$TEMP_DIR/hourly_bot_traffic.txt"
fi
# RESPONSE CODE INTELLIGENCE
if [ -s "$TEMP_DIR/response_codes.txt" ]; then
echo ""
echo "Response Code Analysis:"
while read -r line; do
count=$(echo "$line" | awk '{print $1}')
code=$(echo "$line" | awk '{print $2}')
pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")
case $code in
200) echo " 200 (Success): $(printf "%'7d" $count) ($pct%) Bots are getting data" ;;
404) echo " 404 (Not Found): $(printf "%'7d" $count) ($pct%) Scanning for vulnerabilities" ;;
403) echo " 403 (Forbidden): $(printf "%'7d" $count) ($pct%) Blocked by existing rules" ;;
401) echo " 401 (Unauthorized):$(printf "%'7d" $count) ($pct%) Login attempts failing" ;;
500|502|503) echo " $code (Server Error):$(printf "%'7d" $count) ($pct%) Check if exploit triggered" ;;
301|302) echo " $code (Redirect): $(printf "%'7d" $count) ($pct%)" ;;
*) echo " $code: $(printf "%'7d" $count) ($pct%)" ;;
esac
done < "$TEMP_DIR/response_codes.txt" | head -7
fi
# FALSE POSITIVE WARNINGS
if [ -s "$TEMP_DIR/false_positives.txt" ]; then
echo ""
echo "Whitelist Recommendations (Legitimate Services):"
while read -r line; do
ip=$(echo "$line" | cut -d'|' -f1)
service=$(echo "$line" | cut -d'|' -f2)
domain=$(echo "$line" | cut -d'|' -f4)
req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0)
echo " $ip - $req_count requests - Identified as: $service"
echo " → Domain: $domain"
echo " → Action: VERIFY OWNERSHIP then whitelist"
done < "$TEMP_DIR/false_positives.txt" | head -6
fi
# TOP 5 THREATS
print_header "TOP 5 THREATS (with recommended actions)"
echo "1. Highest Risk IPs (by threat score):"
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
counter=1
while read -r line && [ "${counter:-0}" -le 10 ]; do
score=$(echo "$line" | cut -d'|' -f1)
ip=$(echo "$line" | cut -d'|' -f2)
count=$(echo "$line" | cut -d'|' -f3)
# Determine threat level and action based on score
if [ "$score" -ge 80 ]; then
threat_level="CRITICAL"
threat_icon=""
action="BLOCK IMMEDIATELY + INVESTIGATE"
echo -e " ${RED}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
elif [ "$score" -ge 60 ]; then
threat_level="HIGH"
threat_icon=""
action="BLOCK or AGGRESSIVE RATE LIMIT"
echo -e " ${YELLOW}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
elif [ "$score" -ge 40 ]; then
threat_level="MODERATE"
threat_icon=""
action="RATE LIMIT RECOMMENDED"
echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
else
threat_level="LOW"
threat_icon=""
action="MONITOR"
echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
fi
echo " $count requests - Action: $action"
# Show which attack vectors this IP used
attack_types=""
grep -q "$ip" "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null && attack_types="${attack_types}SQL-Injection "
grep -q "$ip" "$TEMP_DIR/xss_attempts.txt" 2>/dev/null && attack_types="${attack_types}XSS "
grep -q "$ip" "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null && attack_types="${attack_types}Path-Traversal "
grep -q "$ip" "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null && attack_types="${attack_types}RCE/Upload "
grep -q "$ip" "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null && attack_types="${attack_types}Login-Bruteforce "
grep -q "$ip" "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null && attack_types="${attack_types}Scanner-UA "
grep -q "$ip" "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null && attack_types="${attack_types}DDoS-Pattern "
[ -n "$attack_types" ] && echo " Attack vectors: $attack_types"
counter=$((counter + 1))
done < "$TEMP_DIR/threat_scores.txt"
else
echo " No significant threats detected "
fi
echo ""
echo "2. Top Aggressive Bots:"
counter=1
while read -r line && [ "${counter:-0}" -le 5 ]; do
count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}')
bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs)
action="Allow"
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then
action="Consider blocking (aggressive)"
fi
echo " [$counter] $bot - $count requests - Action: $action"
counter=$((counter + 1))
done < "$TEMP_DIR/top_bots.txt"
echo ""
echo "3. Admin Endpoint Probing:"
if [ -s "$TEMP_DIR/admin_probes.txt" ]; then
head -3 "$TEMP_DIR/admin_probes.txt" | while read -r line; do
count=$(echo "$line" | awk '{print $1}')
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
printf " %s attempts - IP: %s - %s%s\n" "$count" "$ip" "$domain" "$url"
done
echo " Action: Verify legitimate admin access or block"
else
echo " None detected "
fi
echo ""
echo "4. 404 Scanners (Reconnaissance):"
if [ -s "$TEMP_DIR/404_scans.txt" ]; then
head -3 "$TEMP_DIR/404_scans.txt" | awk '$1 > 10 {
count = $1
$1 = ""
gsub(/^[[:space:]]+\|?/, "")
split($0, parts, "|")
printf " %s failed requests - IP: %s - %s%s\n", count, parts[1], parts[2], parts[3]
}'
else
echo " None detected "
fi
echo ""
echo "5. Large Data Transfers:"
if [ -s "$TEMP_DIR/large_transfers.txt" ]; then
# Calculate total bot bandwidth
total_bot_bandwidth=0
if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then
total_bot_bandwidth=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
fi
if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
bot_bandwidth_mb=$(awk "BEGIN {printf \"%.0f\", $total_bot_bandwidth/1048576}")
bot_bandwidth_gb=$(awk "BEGIN {printf \"%.2f\", $total_bot_bandwidth/1073741824}")
# Estimate cost at $0.09/GB (typical CDN pricing)
estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")
total_bandwidth=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")
echo ""
echo " 💰 Bandwidth Impact:"
echo " Total bot bandwidth: ${bot_bandwidth_mb} MB (${bot_bandwidth_gb} GB) - ${bot_pct}% of total"
echo " Estimated cost: \$$estimated_cost (at \$0.09/GB CDN pricing)"
fi
echo ""
echo " Top bandwidth consumers:"
head -3 "$TEMP_DIR/large_transfers.txt" | while read -r line; do
count=$(echo "$line" | awk '{print $1}')
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
size=$(echo "$line" | awk '{print $2}' | cut -d'|' -f4)
size_mb=$(awk "BEGIN {printf \"%.1f\", $size/1048576}")
total_ip_mb=$(awk "BEGIN {printf \"%.0f\", $size * $count / 1048576}")
printf " %s transfers from %s - %.1f MB avg (%s MB total) - %s%s\n" "$count" "$ip" "$size_mb" "$total_ip_mb" "$domain" "$url"
done
echo " Action: Verify if scraping, consider serving WebP/optimized images"
else
echo " None detected "
fi
# TOP 5 TARGETED SITES
print_header "TOP 5 TARGETED SITES (with risk breakdown)"
counter=1
while read -r line && [ "${counter:-0}" -le 5 ]; do
count=$(echo "$line" | awk '{print $1}')
domain=$(echo "$line" | awk '{print $2}')
echo "[$counter] $domain - $count requests"
# Show traffic breakdown for this domain
if [ -f "$TEMP_DIR/domain_${domain}_stats.txt" ]; then
tail -n +2 "$TEMP_DIR/domain_${domain}_stats.txt" | while read -r stat_line; do
stat_count=$(echo "$stat_line" | awk '{print $1}')
stat_type=$(echo "$stat_line" | awk '{print $2}')
pct=$(awk "BEGIN {printf \"%.1f\", ($stat_count/$count)*100}")
case $stat_type in
suspicious) echo -e " ${YELLOW}Suspicious: $stat_count ($pct%)${NC}" ;;
ai) echo " AI Bots: $stat_count ($pct%)" ;;
legit) echo " Legit Bots: $stat_count ($pct%)" ;;
unknown) echo " Regular: $stat_count ($pct%)" ;;
*) echo " $stat_type: $stat_count ($pct%)" ;;
esac
done
fi
echo ""
counter=$((counter + 1))
done < "$TEMP_DIR/top_sites.txt"
# BLOCKLIST
print_header "COPY-PASTE READY BLOCKLIST (Prioritized by Threat Score)"
echo "# Apache .htaccess format:"
echo "# Add to .htaccess in document root"
echo "# IPs sorted by risk score (highest first)"
echo ""
# Use threat scores to prioritize blocklist (exclude false positives and excluded IPs)
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
# Get IPs with score >= 60 (HIGH and CRITICAL)
awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
ip=$(echo "$entry" | cut -d'|' -f1)
score=$(echo "$entry" | cut -d'|' -f2)
# Skip excluded IPs (private, localhost, server's own)
if is_excluded_ip "$ip"; then
continue
fi
# Skip if in false positives
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
continue
fi
echo "Deny from $ip # Risk score: $score/100"
done
else
# Fallback to old method
{
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
[ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
} | sort -u | head -30 | while read -r ip; do
echo "Deny from $ip"
done
fi
echo ""
echo "# cPanel User-Agent blocking (add to /etc/apache2/conf.d/includes/pre_main_global.conf):"
echo ""
echo "<IfModule mod_rewrite.c>"
echo " RewriteEngine On"
echo " RewriteCond %{HTTP_USER_AGENT} \"(nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp|metasploit)\" [NC]"
echo " RewriteRule ^ - [F,L]"
echo "</IfModule>"
echo ""
echo "# Optional: Block aggressive SEO bots (uncomment to enable)"
echo "# <IfModule mod_rewrite.c>"
echo "# RewriteEngine On"
echo "# RewriteCond %{HTTP_USER_AGENT} \"(AhrefsBot|SemrushBot|MJ12bot|DotBot|Meta-ExternalAgent|Go-http-client)\" [NC]"
echo "# RewriteRule ^ - [F,L]"
echo "# </IfModule>"
echo ""
echo "# CSF/iptables format:"
echo "# Run these commands as root:"
echo ""
# Same prioritized list for CSF
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
ip=$(echo "$entry" | cut -d'|' -f1)
score=$(echo "$entry" | cut -d'|' -f2)
# Skip excluded IPs (private, localhost, server's own)
if is_excluded_ip "$ip"; then
continue
fi
# Skip if in false positives
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
continue
fi
echo "csf -d $ip \"Threat score: $score/100\""
done
else
# Fallback
{
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
[ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
} | sort -u | head -30 | while read -r ip; do
echo "csf -d $ip \"Bot/Scanner threat\""
done
fi
# SUMMARY
print_header "📋 SUMMARY & RECOMMENDATIONS"
threat_score=0
# Calculate threat score from attack vectors
[ -s "$TEMP_DIR/sqli_attempts.txt" ] && threat_score=$((threat_score + 15))
[ -s "$TEMP_DIR/xss_attempts.txt" ] && threat_score=$((threat_score + 12))
[ -s "$TEMP_DIR/path_traversal_attempts.txt" ] && threat_score=$((threat_score + 15))
[ -s "$TEMP_DIR/rce_upload_attempts.txt" ] && threat_score=$((threat_score + 20))
[ -s "$TEMP_DIR/login_bruteforce_attempts.txt" ] && threat_score=$((threat_score + 10))
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && threat_score=$((threat_score + 8))
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && threat_score=$((threat_score + 5))
[ $(wc -l < "$TEMP_DIR/admin_probes.txt" 2>/dev/null || echo 0) -gt 10 ] && threat_score=$((threat_score + 3))
# Count high-risk IPs
high_risk_count=0
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
high_risk_count=$(awk -F'|' '$1 >= 60' "$TEMP_DIR/threat_scores.txt" | wc -l)
fi
if [ "${threat_score:-0}" -ge 25 ] || [ "${high_risk_count:-0}" -ge 5 ]; then
print_alert "THREAT LEVEL: CRITICAL - Immediate action required"
echo " Summary: Multiple attack vectors detected from $high_risk_count high-risk IPs"
echo ""
echo " Immediate Actions:"
echo " 1. ⚡ Apply the blocklist above IMMEDIATELY (prioritized by threat score)"
echo " 2. Review admin access logs for successful breaches"
echo " 3. 🛡 Enable ModSecurity WAF or Cloudflare if not already active"
echo " 4. 🔄 Update all CMS platforms and plugins urgently"
echo " 5. 🔐 Force password reset for admin accounts if login attempts detected"
echo " 6. Re-run this analysis in 1 hour to verify blocks are working"
elif [ "${threat_score:-0}" -ge 12 ] || [ "${high_risk_count:-0}" -ge 2 ]; then
print_warning "THREAT LEVEL: HIGH - Action recommended within 24 hours"
echo " Summary: Significant threat activity from $high_risk_count high-risk IPs"
echo ""
echo " Recommended Actions:"
echo " 1. Review and apply the blocklist above (focus on CRITICAL/HIGH scores)"
echo " 2. Enable rate limiting for admin endpoints"
echo " 3. Monitor logs closely for the next 24-48 hours"
echo " 4. Consider implementing fail2ban or similar IDS"
echo " 5. Review and update security plugins/modules"
elif [ "${threat_score:-0}" -ge 5 ]; then
print_warning "THREAT LEVEL: MODERATE - Routine security maintenance"
echo " Summary: Normal bot activity with some suspicious patterns"
echo ""
echo " Recommended Actions:"
echo " 1. Review suspicious IPs in the report"
echo " 2. Consider rate limiting aggressive bots"
echo " 3. Continue routine log monitoring"
echo " 4. Block aggressive SEO bots if impacting performance"
else
print_success "THREAT LEVEL: ✅ LOW - Normal operation"
echo " Summary: Minimal threat activity detected"
echo ""
echo " Recommended Actions:"
echo " 1. Continue routine log monitoring"
echo " 2. Review false positive warnings to whitelist legitimate services"
echo " 3. Consider blocking aggressive SEO bots if bandwidth is a concern"
fi
echo ""
echo "==============================================================="
echo "Report saved to: $OUTPUT_FILE"
echo "==============================================================="
}
################################################################################
# BASELINE HEALTH CHECK - Test domains before making changes
################################################################################
baseline_health_check() {
print_info "Loading baseline health status from cached data..."
echo ""
# Create baseline health file
> "$TEMP_DIR/baseline_health.txt"
# Use get_all_domain_statuses() from reference database instead of re-checking
# Returns: domain|http_code|https_code|status_summary
if ! command -v get_all_domain_statuses &>/dev/null; then
print_warning "Reference database functions not available - skipping health check"
return 0
fi
local tested=0
local working=0
local broken=0
# Get all domain statuses from cached reference database
while IFS='|' read -r domain http_status https_status result; do
[ -z "$domain" ] && continue
tested=$((tested + 1))
# Display status based on cached result
if [ "$result" = "200_OK" ]; then
working=$((working + 1))
echo -e " ${GREEN}${NC} $domain - HTTP:$http_status HTTPS:$https_status"
elif [ "$result" = "REDIRECT" ]; then
working=$((working + 1))
echo -e " ${YELLOW}${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)"
elif [ "$result" = "403_FORBIDDEN" ]; then
broken=$((broken + 1))
echo -e " ${RED}${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)"
elif [ "$result" = "TIMEOUT" ] || [ "$result" = "UNREACHABLE" ]; then
broken=$((broken + 1))
echo -e " ${RED}${NC} $domain - Timeout (unreachable)"
else
broken=$((broken + 1))
echo -e " ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status"
fi
# Store baseline: domain|http_status|https_status|result
echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt"
done < <(get_all_domain_statuses)
if [ "$tested" -eq 0 ]; then
print_warning "No domain status data available in reference database"
return 0
fi
echo ""
print_success "Baseline loaded from cache: $working working, $broken with issues"
echo ""
}
verify_domains_still_working() {
print_info "Checking current domain status from cached data..."
echo ""
if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then
print_warning "No baseline health data available"
return 0
fi
if ! command -v get_domain_status &>/dev/null; then
print_warning "Reference database functions not available - skipping verification"
return 0
fi
local changes_detected=0
local now_broken=0
while IFS='|' read -r domain baseline_http baseline_https baseline_result; do
[ -z "$domain" ] && continue
# Get current status from cached reference database
local current_status=$(get_domain_status "$domain")
if [ -z "$current_status" ]; then
# Domain not in cache - skip
continue
fi
# Parse current status: http_code|https_code|status_summary
IFS='|' read -r http_status https_status new_result <<< "$current_status"
# Compare to baseline
if [ "$baseline_result" != "$new_result" ]; then
changes_detected=$((changes_detected + 1))
# Check if it got worse
if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then
if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "UNREACHABLE" ]; then
now_broken=$((now_broken + 1))
echo -e " ${RED}⚠ BROKEN:${NC} $domain"
echo -e " Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)"
echo -e " After: $new_result (HTTP:$http_status HTTPS:$https_status)"
echo -e " ${RED}WARNING: This domain stopped working after your changes!${NC}"
echo ""
fi
# Check if it got better
elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ] || [ "$baseline_result" = "UNREACHABLE" ]; then
if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then
echo -e " ${GREEN}✅ FIXED:${NC} $domain"
echo -e " Before: $baseline_result"
echo -e " After: $new_result"
echo ""
fi
fi
fi
done < "$TEMP_DIR/baseline_health.txt"
if [ "${now_broken:-0}" -gt 0 ]; then
echo ""
print_alert "WARNING: $now_broken domain(s) may have stopped working!"
echo ""
echo "NOTE: Status is from cached data (max 1 hour old)."
echo "If you just made changes, the cache may not reflect real-time status."
echo ""
echo "Recommended actions:"
echo " 1. Review the firewall rules you just applied"
echo " 2. Check CSF temporary blocks: csf -t"
echo " 3. Check CSF deny list: csf -g"
echo " 4. Manually verify domain: curl -I http://domain.com"
echo " 5. Consider reverting changes if issues persist"
echo ""
elif [ "${changes_detected:-0}" -eq 0 ]; then
print_success "All domains show same status as baseline (cache-based check)"
else
print_success "Some status changes detected but no domains broken (cache-based check)"
fi
echo ""
read -p "Press Enter to continue..."
}
#############################################################################
# Main Execution
#############################################################################
main() {
echo ""
print_header "Starting Apache/cPanel Bot Analysis"
# InterWorx requires special log discovery (logs are in /home/user/var/domain.com/logs/)
if [ "$SYS_CONTROL_PANEL" = "interworx" ]; then
print_info "InterWorx detected - discovering domain logs..."
# Build time filter options
local find_opts=()
if [ -n "$HOURS_BACK" ]; then
local minutes=$((HOURS_BACK * 60))
find_opts+=(-mmin -"$minutes")
elif [ -n "$DAYS_BACK" ]; then
find_opts+=(-mtime -"$DAYS_BACK")
fi
# Find all transfer*.log files in InterWorx structure (includes transfer.log and transfer-ssl.log)
log_count=$(find /home/*/var/*/logs -type f -name "transfer*.log" "${find_opts[@]}" 2>/dev/null | wc -l)
if [ "$log_count" -eq 0 ]; then
# Try without time filter to see if ANY logs exist
local total_logs=$(find /home/*/var/*/logs -type f -name "transfer*.log" 2>/dev/null | wc -l)
if [ "$total_logs" -eq 0 ]; then
print_alert "Error: No InterWorx access logs found in /home/*/var/*/logs/"
echo ""
echo "Diagnostic information:"
echo " Checking for InterWorx structure:"
local iw_structure=$(find /home -maxdepth 3 -type d -path "*/var/*/logs" 2>/dev/null | head -5)
if [ -n "$iw_structure" ]; then
echo " Found InterWorx directories:"
echo "$iw_structure"
echo ""
echo " Checking for any log files:"
find /home/*/var/*/logs -type f -name "*.log" 2>/dev/null | head -10
else
echo " No InterWorx directory structure found (expected: /home/user/var/domain.com/logs/)"
fi
exit 1
else
print_alert "No logs found matching time filter (last $HOURS_BACK hours)"
echo "Total logs available: $total_logs"
echo ""
read -p "Analyze all available logs instead? [y/N]: " choice
if [[ "$choice" =~ ^[Yy] ]]; then
log_count=$total_logs
find_opts=() # Clear time filter
else
exit 0
fi
fi
fi
print_info "Found $log_count InterWorx domain log files to analyze"
# Override LOG_DIR for parse_logs function to use
export INTERWORX_MODE="yes"
export INTERWORX_FIND_OPTS="${find_opts[*]}"
else
# Standard cPanel/Plesk log discovery
# Check if log directory exists
if [ ! -d "$LOG_DIR" ]; then
print_alert "Error: Log directory not found: $LOG_DIR"
echo "Please specify the correct log directory with -l option"
exit 1
fi
# Check if logs exist
local find_opts=()
if [ -n "$HOURS_BACK" ]; then
local minutes=$((HOURS_BACK * 60))
find_opts+=(-mmin -"$minutes")
elif [ -n "$DAYS_BACK" ]; then
find_opts+=(-mtime -"$DAYS_BACK")
fi
log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l)
if [ "$log_count" -eq 0 ]; then
# Try without time filter to see if ANY logs exist
local total_logs=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" 2>/dev/null | wc -l)
if [ "$total_logs" -eq 0 ]; then
print_alert "Error: No log files found in $LOG_DIR"
echo ""
echo "Diagnostic information:"
echo " Log directory: $LOG_DIR"
echo " Directory exists: $([ -d "$LOG_DIR" ] && echo "yes" || echo "no")"
if [ -d "$LOG_DIR" ]; then
echo " Total files in directory: $(find "$LOG_DIR" -type f 2>/dev/null | wc -l)"
echo " Sample files:"
find "$LOG_DIR" -type f 2>/dev/null | head -5 | sed 's/^/ /'
fi
echo ""
echo "Control panel: $SYS_CONTROL_PANEL"
exit 1
else
print_alert "No logs found matching time filter"
if [ -n "$HOURS_BACK" ]; then
echo "No logs found from the last $HOURS_BACK hours"
elif [ -n "$DAYS_BACK" ]; then
echo "No logs found from the last $DAYS_BACK days"
fi
echo "Total logs available: $total_logs"
echo ""
read -p "Analyze all available logs instead? [y/N]: " choice
if [[ "$choice" =~ ^[Yy] ]]; then
log_count=$total_logs
find_opts=() # Clear time filter
else
exit 0
fi
fi
fi
print_info "Found $log_count log files to analyze"
fi
# User filtering
if [ -n "$FILTER_USER" ]; then
print_info "Filtering logs for user: $FILTER_USER"
export user_domains=$(get_user_domains "$FILTER_USER")
if [ -z "$user_domains" ]; then
print_error "No domains found for user: $FILTER_USER"
exit 1
fi
print_info "User has $(echo "$user_domains" | wc -l) domain(s)"
else
export user_domains=""
fi
# Print time range info
if [ -n "$HOURS_BACK" ]; then
print_info "Analyzing logs from the last $HOURS_BACK hours"
elif [ -n "$DAYS_BACK" ]; then
print_info "Analyzing logs from the last $DAYS_BACK days"
fi
# Baseline health check - test all domains before analysis
baseline_health_check
# Execute analysis pipeline with error handling
parse_logs || {
print_alert "Log parsing failed"
exit 1
}
classify_bots || {
print_alert "Bot classification failed"
exit 1
}
detect_server_ips
detect_threats
analyze_success_rates # NEW: Analyze success/failure rates for better accuracy
detect_botnets
analyze_time_series
calculate_threat_scores
detect_false_positives
generate_statistics
generate_report
print_success "Analysis complete!"
echo ""
echo "Report location: $OUTPUT_FILE"
# Analyze threat patterns and generate recommendations
analyze_domain_threats
analyze_geographic_threats
generate_recommendations
# Ask user what to do next
show_post_analysis_menu
}
################################################################################
# DOMAIN-LEVEL THREAT ANALYSIS
################################################################################
analyze_domain_threats() {
print_info "Analyzing per-domain threat patterns..."
# Create domain threat analysis file
> "$TEMP_DIR/domain_threats.txt"
> "$TEMP_DIR/domain_high_risk_ips.txt"
# MASSIVE OPTIMIZATION: Single AWK pass instead of nested loops with 25,000+ greps
# Old approach: O(domains × high_risk_IPs × file_size) = 83 minutes for 500 domains
# New approach: O(file_size) = seconds
awk -F'|' '
BEGIN {
# Load high-risk IPs into memory
while ((getline < "'"$TEMP_DIR"'/threat_scores.txt") > 0) {
score = $1
ip = $2
if (score >= 70) {
high_risk[ip] = score
}
}
close("'"$TEMP_DIR"'/threat_scores.txt")
# Load attack vectors
while ((getline < "'"$TEMP_DIR"'/attack_vectors_raw.txt") > 0) {
domain = $2
attack_counts[domain]++
}
close("'"$TEMP_DIR"'/attack_vectors_raw.txt")
}
# Process parsed logs (single pass)
{
ip = $1
domain = $2
# Count total requests per domain
domain_requests[domain]++
# Track high-risk IPs per domain
if (ip in high_risk) {
domain_high_risk_count[domain]++
domain_high_risk_ips[domain] = domain_high_risk_ips[domain] ip ":" high_risk[ip] ":" ++domain_ip_count[domain":"ip] " "
}
}
END {
# Now process classified bots
while ((getline < "'"$TEMP_DIR"'/classified_bots.txt") > 0) {
domain = $2
bot_counts[domain]++
}
close("'"$TEMP_DIR"'/classified_bots.txt")
# Output results for each domain
for (domain in domain_requests) {
total_req = domain_requests[domain]
bot_req = bot_counts[domain] + 0
bot_pct = (total_req > 0) ? (bot_req / total_req * 100) : 0
high_risk_count = domain_high_risk_count[domain] + 0
attacks = attack_counts[domain] + 0
high_risk_detail = domain_high_risk_ips[domain]
# domain|total_requests|bot_requests|bot_percentage|high_risk_ip_count|attack_attempts|high_risk_ips_detail
printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > "'"$TEMP_DIR"'/domain_threats.txt"
# Track high-risk IPs per domain
if (high_risk_count > 0) {
printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > "'"$TEMP_DIR"'/domain_high_risk_ips.txt"
}
}
}' "$TEMP_DIR/parsed_logs.txt"
# Sort by high-risk IP count (descending)
sort -t'|' -k5 -rn "$TEMP_DIR/domain_threats.txt" > "$TEMP_DIR/domain_threats_sorted.txt"
# Get all unique domains
awk -F'|' '{print $1}' "$TEMP_DIR/domain_threats.txt" | sort -u > "$TEMP_DIR/all_domains.txt"
print_success "Domain threat analysis complete"
}
################################################################################
# GEOGRAPHIC ANALYSIS (Country-based threat tracking)
################################################################################
analyze_geographic_threats() {
print_info "Analyzing geographic distribution of threats..."
# Create geographic analysis file
> "$TEMP_DIR/geo_analysis.txt"
> "$TEMP_DIR/geo_needs_maxmind.txt"
# Check if GeoIP/MaxMind is available
local has_geoip=false
if command -v geoiplookup >/dev/null 2>&1 || command -v mmdbinspect >/dev/null 2>&1; then
has_geoip=true
fi
if [ "$has_geoip" = false ]; then
# Can't do full geographic analysis without GeoIP
# But we can still detect if traffic looks suspicious by analyzing IP ranges
# Count high-risk IPs by /24 network
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
awk -F'|' '$1 >= 70 {
split($2, ip, ".")
network = ip[1]"."ip[2]"."ip[3]".0/24"
print network
}' "$TEMP_DIR/threat_scores.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/high_risk_networks.txt"
local network_count=$(wc -l < "$TEMP_DIR/high_risk_networks.txt" 2>/dev/null || echo "0")
local total_high_risk=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" | wc -l)
if [ "$network_count" -gt 10 ] || [ "$total_high_risk" -gt 50 ]; then
# Multiple networks or many IPs suggests distributed attack
# Recommend MaxMind for geographic blocking
echo "DISTRIBUTED|$network_count networks|$total_high_risk IPs|MaxMind recommended" > "$TEMP_DIR/geo_needs_maxmind.txt"
fi
fi
print_info "Geographic analysis limited (MaxMind GeoIP2 not installed)"
else
# Full geographic analysis with GeoIP
print_info "Performing full geographic analysis with GeoIP..."
# TODO: Implement full GeoIP lookups when available
# This would lookup each high-risk IP and count by country
fi
print_success "Geographic analysis complete"
}
################################################################################
# RECOMMENDATION ENGINE
################################################################################
generate_recommendations() {
print_info "Generating intelligent recommendations..."
# Initialize recommendation file
> "$TEMP_DIR/recommendations.txt"
local rec_count=0
# Get total unique high-risk IPs
local total_high_risk_ips=0
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
total_high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
fi
# Get total domains affected
local total_domains=$(wc -l < "$TEMP_DIR/all_domains.txt" 2>/dev/null || echo "0")
local affected_domains=0
if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
affected_domains=$(wc -l < "$TEMP_DIR/domain_high_risk_ips.txt" || echo "0")
fi
# Determine attack scope: single domain vs server-wide
local attack_scope="unknown"
local primary_target=""
local primary_target_percentage=0
if [ "${affected_domains:-0}" -eq 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
attack_scope="single_domain"
primary_target=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f1)
# Calculate what % of high-risk IPs are targeting this domain
local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2)
if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", ($domain_risk_count / $total_high_risk_ips) * 100}")
fi
elif [ "${affected_domains:-0}" -gt 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
# Check if one domain is getting most of the traffic
local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5)
if [ "${top_domain_count:-0}" -gt 0 ] && [ "${total_high_risk_ips:-0}" -gt 0 ]; then
local top_percentage=$(awk "BEGIN {printf \"%.0f\", ($top_domain_count / $total_high_risk_ips) * 100}")
if [ "$top_percentage" -ge 75 ]; then
attack_scope="primary_target"
primary_target=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f1)
primary_target_percentage=$top_percentage
else
attack_scope="server_wide"
fi
else
attack_scope="server_wide"
fi
elif [ "${affected_domains:-0}" -eq "${total_domains:-0}" ] && [ "${total_domains:-0}" -gt 1 ]; then
attack_scope="server_wide"
elif [ "${total_domains:-0}" -eq 1 ]; then
attack_scope="single_server"
primary_target=$(head -1 "$TEMP_DIR/all_domains.txt" 2>/dev/null)
fi
# RECOMMENDATION #1: IP Blocking Strategy
if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
rec_count=$((rec_count + 1))
if [ "${total_high_risk_ips:-0}" -le 10 ]; then
echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 1 hour|HIGH|CSF temporary block recommended for ${total_high_risk_ips} IPs with threat score >= 70" >> "$TEMP_DIR/recommendations.txt"
elif [ "${total_high_risk_ips:-0}" -le 50 ]; then
echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 24 hours|HIGH|Large number of threats detected - 24hr block recommended" >> "$TEMP_DIR/recommendations.txt"
else
echo "REC|$rec_count|ip_block_perm|Permanently block $total_high_risk_ips high-risk IPs|CRITICAL|Severe bot attack detected - permanent blocking recommended" >> "$TEMP_DIR/recommendations.txt"
fi
fi
# RECOMMENDATION #2: Connection Limit (CSF CT_LIMIT)
# Only recommend if CSF is installed and CT_LIMIT is enabled
if command -v csf >/dev/null 2>&1 && [ -f /etc/csf/csf.conf ]; then
# Check if CT_LIMIT is enabled (not set to 0)
local current_ct_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "0")
if [ "$current_ct_limit" -gt 0 ]; then
# Check concurrent connections from top IPs
local max_connections=0
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
max_connections=$(head -1 "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null | awk '{print $1}' || echo "0")
fi
if [ "$max_connections" -gt 100 ] && [ "$max_connections" -lt "$current_ct_limit" ]; then
rec_count=$((rec_count + 1))
local recommended_limit=$((max_connections - 20))
echo "REC|$rec_count|csf_ct_limit|Reduce CSF CT_LIMIT from $current_ct_limit to $recommended_limit|MEDIUM|High concurrent connections detected ($max_connections from single IP)" >> "$TEMP_DIR/recommendations.txt"
fi
fi
fi
# RECOMMENDATION #3: Domain-Specific .htaccess Protection
if [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "primary_target" ]; then
rec_count=$((rec_count + 1))
echo "REC|$rec_count|htaccess_domain|Add bot blocking to $primary_target .htaccess|HIGH|${primary_target_percentage}% of attacks target this domain" >> "$TEMP_DIR/recommendations.txt"
fi
# RECOMMENDATION #4: Server-wide Apache Protection
if [ "$attack_scope" = "server_wide" ]; then
rec_count=$((rec_count + 1))
echo "REC|$rec_count|apache_global|Add global bot blocking to Apache pre-virtualhost|HIGH|Attack affects $affected_domains of $total_domains domains" >> "$TEMP_DIR/recommendations.txt"
fi
# RECOMMENDATION #5: WordPress-specific (if attack patterns show wp-admin/wp-login attempts)
local wp_attacks=0
if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
wp_attacks=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
fi
if [ "${wp_attacks:-0}" -gt 50 ]; then
rec_count=$((rec_count + 1))
# Determine which domains have WordPress
local wp_domain_count=0
local wp_target_domain=""
if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
# Get unique domains with WP attacks
wp_domain_count=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | wc -l || echo "0")
wp_target_domain=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | head -1)
fi
# Generate appropriate recommendation based on how many domains have WordPress attacks
if [ "${wp_domain_count:-0}" -eq 1 ] || [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "single_server" ]; then
# Single domain being attacked
echo "REC|$rec_count|wp_hardening|Harden WordPress on $wp_target_domain|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
elif [ "$attack_scope" = "primary_target" ]; then
# One primary target but others also affected
echo "REC|$rec_count|wp_hardening|Harden WordPress on $primary_target|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
else
# Multiple domains with WordPress attacks
echo "REC|$rec_count|wp_hardening|Harden WordPress across $wp_domain_count domains|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
fi
fi
# PORTFLOOD Protection removed - not appropriate for web servers with many sites
# Blocking ports 80/443 based on connection count breaks legitimate traffic
# RECOMMENDATION #7: CSF SYNFLOOD Protection (if DDoS patterns detected)
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
local ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt" || echo "0")
if [ "${ddos_count:-0}" -gt 10 ]; then
rec_count=$((rec_count + 1))
echo "REC|$rec_count|csf_synflood|Enable CSF SYNFLOOD protection|HIGH|$ddos_count potential DDoS sources detected" >> "$TEMP_DIR/recommendations.txt"
fi
fi
# RECOMMENDATION #8: MaxMind GeoIP for Country Blocking (if distributed attack)
if [ -s "$TEMP_DIR/geo_needs_maxmind.txt" ]; then
local geo_info=$(cat "$TEMP_DIR/geo_needs_maxmind.txt")
local network_count=$(echo "$geo_info" | cut -d'|' -f2 | grep -oP '\d+' || echo "0")
local ip_count=$(echo "$geo_info" | cut -d'|' -f3 | grep -oP '\d+' || echo "0")
rec_count=$((rec_count + 1))
echo "REC|$rec_count|install_maxmind|Install MaxMind GeoIP2 for country-based blocking|MEDIUM|Distributed attack from $network_count networks ($ip_count IPs) - geographic blocking recommended" >> "$TEMP_DIR/recommendations.txt"
fi
# Store attack scope for menu system
echo "$attack_scope|$primary_target|$primary_target_percentage|$affected_domains|$total_domains" > "$TEMP_DIR/attack_scope.txt"
print_success "Generated $rec_count recommendations"
}
################################################################################
# POST-ANALYSIS MENU
################################################################################
show_post_analysis_menu() {
# Load attack scope information
local attack_scope="unknown"
local primary_target=""
local primary_target_percentage=0
local affected_domains=0
local total_domains=0
if [ -s "$TEMP_DIR/attack_scope.txt" ]; then
local scope_data=$(cat "$TEMP_DIR/attack_scope.txt")
attack_scope=$(echo "$scope_data" | cut -d'|' -f1)
primary_target=$(echo "$scope_data" | cut -d'|' -f2)
primary_target_percentage=$(echo "$scope_data" | cut -d'|' -f3)
affected_domains=$(echo "$scope_data" | cut -d'|' -f4)
total_domains=$(echo "$scope_data" | cut -d'|' -f5)
fi
# Check if there are any recommendations
local has_recommendations=false
local rec_count=0
if [ -s "$TEMP_DIR/recommendations.txt" ]; then
has_recommendations=true
rec_count=$(wc -l < "$TEMP_DIR/recommendations.txt")
fi
# Show menu
echo ""
echo "==============================================================="
print_header "THREAT ANALYSIS SUMMARY"
echo ""
# Display attack scope
case "$attack_scope" in
single_domain)
print_warning "ATTACK SCOPE: Single Domain Target"
echo " • Primary Target: $primary_target"
echo " • This domain is receiving 100% of high-risk traffic"
echo " • Recommendation: Domain-specific protection"
;;
primary_target)
print_warning "ATTACK SCOPE: Primarily Targeting One Domain"
echo " • Primary Target: $primary_target ($primary_target_percentage% of attacks)"
echo " • Other domains also affected: $affected_domains of $total_domains total"
echo " • Recommendation: Focus protection on primary target"
;;
server_wide)
print_alert "ATTACK SCOPE: Server-Wide Attack"
echo " • Multiple domains under attack: $affected_domains of $total_domains"
echo " • Attack is distributed across the server"
echo " • Recommendation: Server-wide protection needed"
;;
single_server)
print_info "ATTACK SCOPE: Single-Domain Server"
echo " • Target: $primary_target (only domain on server)"
echo " • Server-level protection will apply to this domain"
;;
*)
print_info "No significant threats detected"
;;
esac
echo ""
# Display recommendations
if [ "$has_recommendations" = true ]; then
echo "==============================================================="
print_header "RECOMMENDED ACTIONS ($rec_count recommendations)"
echo ""
local count=0
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
count=$((count + 1))
# Color code by priority
local priority_color=""
local priority_icon=""
case "$priority" in
CRITICAL)
priority_color="${RED}"
priority_icon=""
;;
HIGH)
priority_color="${YELLOW}"
priority_icon=""
;;
MEDIUM)
priority_color="${BLUE}"
priority_icon=""
;;
*)
priority_color="${NC}"
priority_icon=" "
;;
esac
echo -e " ${BOLD}[$count]${NC} $priority_icon $action_title"
echo -e " ${priority_color}Priority: $priority${NC} - $description"
echo ""
done < "$TEMP_DIR/recommendations.txt"
echo "==============================================================="
echo ""
echo "What would you like to do?"
echo ""
echo " 1) Go to Take Action Menu (implement recommended actions)"
echo " 2) Review Individual Recommendations (detailed view)"
echo ""
echo -e " ${RED}0)${NC} Back"
echo ""
read -p "Select option: " menu_choice
case "$menu_choice" in
1)
show_action_menu
;;
2)
show_detailed_recommendations
;;
0)
print_info "Returning to main menu..."
return 0
;;
*)
print_warning "Invalid option - returning to main menu"
return 0
;;
esac
else
print_success "No recommendations - your server appears secure"
echo ""
echo "Press Enter to return to main menu..."
read
return 0
fi
}
################################################################################
# DETAILED RECOMMENDATIONS VIEWER
################################################################################
show_detailed_recommendations() {
clear
print_banner "Detailed Recommendations"
echo ""
if [ ! -s "$TEMP_DIR/recommendations.txt" ]; then
print_warning "No recommendations available"
echo ""
read -p "Press Enter to continue..."
show_post_analysis_menu
return
fi
local count=0
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
count=$((count + 1))
echo "==============================================================="
echo -e "${BOLD}Recommendation #$count:${NC} $action_title"
echo "==============================================================="
echo ""
echo "Priority: $priority"
echo "Action Type: $action_type"
echo "Description: $description"
echo ""
# Show specific details based on action type
case "$action_type" in
ip_block_temp|ip_block_perm)
echo "Affected IPs:"
awk -F'|' '$1 >= 70 {printf " • %s (score: %s)\n", $2, $1}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -10
;;
htaccess_domain)
local target_domain=$(echo "$action_title" | grep -oP 'to \K[^ ]+' 2>/dev/null)
echo "Target Domain: $target_domain"
if [ -s "$TEMP_DIR/domain_threats_sorted.txt" ]; then
grep "^$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do
echo " • Total Requests: $total_req"
echo " • Bot Requests: $bot_req ($bot_pct%)"
echo " • High-Risk IPs: $high_risk"
echo " • Attack Attempts: $attacks"
done
fi
;;
apache_global)
echo "Affected Domains:"
if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
awk -F'|' '{printf " • %s (%s high-risk IPs)\n", $1, $2}' "$TEMP_DIR/domain_high_risk_ips.txt" | head -10
fi
;;
esac
echo ""
done < "$TEMP_DIR/recommendations.txt"
echo "==============================================================="
echo ""
read -p "Press Enter to return to action menu..."
show_post_analysis_menu
}
################################################################################
# ACTION MENU (IMPLEMENT RECOMMENDATIONS)
################################################################################
show_action_menu() {
clear
print_banner "Take Action Menu"
echo ""
# Build hash table of recommended actions with their priorities
declare -A recommended_actions
declare -A action_priorities
declare -A action_descriptions
if [ -s "$TEMP_DIR/recommendations.txt" ]; then
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
recommended_actions["$action_type"]=1
action_priorities["$action_type"]="$priority"
action_descriptions["$action_type"]="$description"
done < "$TEMP_DIR/recommendations.txt"
fi
# Display all available actions (not just recommended ones)
echo "All Available Actions:"
echo ""
echo "Legend: = Recommended by analysis"
echo ""
local count=0
declare -a action_types
declare -a action_titles
declare -a action_descs
# Define all possible actions
# 1. IP Blocking Actions
count=$((count + 1))
action_types[$count]="ip_block_temp_1hr"
action_titles[$count]="Block high-risk IPs for 1 hour (CSF temporary)"
action_descs[$count]="Temporary firewall block, auto-expires after 1 hour"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"
count=$((count + 1))
action_types[$count]="ip_block_temp_24hr"
action_titles[$count]="Block high-risk IPs for 24 hours (CSF temporary)"
action_descs[$count]="Temporary firewall block, auto-expires after 24 hours"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"
count=$((count + 1))
action_types[$count]="ip_block_perm"
action_titles[$count]="Block high-risk IPs permanently (CSF permanent)"
action_descs[$count]="Permanent firewall block - requires manual removal"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_perm]}" "${action_priorities[ip_block_perm]}"
echo ""
echo "------------------------------------------------------------─"
echo ""
# 2. Domain/Site Protection
count=$((count + 1))
action_types[$count]="htaccess_domain"
action_titles[$count]="Add bot blocking to specific domain .htaccess"
action_descs[$count]="Domain-level protection via Apache .htaccess rules"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[htaccess_domain]}" "${action_priorities[htaccess_domain]}"
count=$((count + 1))
action_types[$count]="apache_global"
action_titles[$count]="Add global bot blocking to Apache (all domains)"
action_descs[$count]="Server-wide Apache configuration, affects all sites"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[apache_global]}" "${action_priorities[apache_global]}"
echo ""
echo "------------------------------------------------------------─"
echo ""
# 3. CSF Firewall Configuration
count=$((count + 1))
action_types[$count]="csf_ct_limit"
action_titles[$count]="Adjust CSF connection tracking limit (CT_LIMIT)"
action_descs[$count]="Limit concurrent connections per IP address"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_ct_limit]}" "${action_priorities[csf_ct_limit]}"
# PORTFLOOD action removed - not appropriate for web servers
count=$((count + 1))
action_types[$count]="csf_synflood"
action_titles[$count]="Enable CSF SYNFLOOD protection"
action_descs[$count]="Protect against SYN flood DDoS attacks"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_synflood]}" "${action_priorities[csf_synflood]}"
echo ""
echo "------------------------------------------------------------─"
echo ""
# 4. Geographic & Application Hardening
count=$((count + 1))
action_types[$count]="install_maxmind"
action_titles[$count]="Install MaxMind GeoIP2 for country-based blocking"
action_descs[$count]="Enable geographic filtering with CSF CC_DENY (requires free MaxMind license)"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[install_maxmind]}" "${action_priorities[install_maxmind]}"
count=$((count + 1))
action_types[$count]="wp_hardening"
action_titles[$count]="WordPress security hardening"
action_descs[$count]="Protect WordPress login and admin areas"
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[wp_hardening]}" "${action_priorities[wp_hardening]}"
echo ""
echo "============================================================═"
echo ""
echo -e " ${RED}0)${NC} Back"
echo ""
read -p "Select action [0-$count]: " action_choice
# Validate choice
if [ "$action_choice" = "0" ]; then
show_post_analysis_menu
return
elif [ "$action_choice" -lt 1 ] || [ "$action_choice" -gt "$count" ] 2>/dev/null; then
print_warning "Invalid selection"
sleep 2
show_action_menu
return
fi
# Execute selected action
local selected_type="${action_types[$action_choice]}"
execute_action "$selected_type" "$action_choice"
}
# Helper function to display action options
display_action_option() {
local num=$1
local action_type=$2
local title=$3
local desc=$4
local is_recommended=$5
local priority=$6
# Show recommendation marker and priority if recommended
if [ -n "$is_recommended" ]; then
case "$priority" in
CRITICAL)
echo -e " ${RED}$num)${NC} ${BOLD}$title${NC} ${RED} RECOMMENDED [CRITICAL]${NC}"
;;
HIGH)
echo -e " ${YELLOW}$num)${NC} ${BOLD}$title${NC} ${YELLOW} RECOMMENDED [HIGH]${NC}"
;;
MEDIUM)
echo -e " ${BLUE}$num)${NC} ${BOLD}$title${NC} ${BLUE} RECOMMENDED [MEDIUM]${NC}"
;;
*)
echo -e " ${GREEN}$num)${NC} ${BOLD}$title${NC} ${GREEN} RECOMMENDED${NC}"
;;
esac
else
echo -e " $num) $title"
fi
echo " $desc"
}
################################################################################
# ACTION EXECUTION ENGINE
################################################################################
execute_action() {
local action_type="$1"
local rec_number="$2"
case "$action_type" in
ip_block_temp_1hr)
execute_ip_blocking_specific "1hr"
;;
ip_block_temp_24hr)
execute_ip_blocking_specific "24hr"
;;
ip_block_temp)
execute_ip_blocking "temp"
;;
ip_block_perm)
execute_ip_blocking "perm"
;;
csf_ct_limit)
execute_csf_ct_limit
;;
csf_synflood)
execute_csf_synflood
;;
htaccess_domain)
execute_htaccess_domain_blocking
;;
apache_global)
execute_apache_global_blocking
;;
install_maxmind)
execute_install_maxmind
;;
wp_hardening)
execute_wp_hardening
;;
rate_limiting)
execute_rate_limiting
;;
*)
print_warning "Action type '$action_type' not yet implemented"
echo ""
read -p "Press Enter to continue..."
show_action_menu
;;
esac
}
execute_ip_blocking_specific() {
local duration_type="$1" # "1hr" or "24hr"
clear
print_banner "IP Blocking - CSF Temporary Block"
echo ""
# Check if CSF is installed
if ! command -v csf >/dev/null 2>&1; then
print_warning "CSF (ConfigServer Security & Firewall) is not installed"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Get high-risk IPs
if [ ! -s "$TEMP_DIR/threat_scores.txt" ]; then
print_warning "No threat scores available"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
local high_risk_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
if [ "$high_risk_count" -eq 0 ]; then
print_info "No high-risk IPs detected (score >= 70)"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Set duration based on type
local duration
local duration_text
if [ "$duration_type" = "1hr" ]; then
duration=3600
duration_text="1 hour"
else
duration=86400
duration_text="24 hours"
fi
echo "This will block $high_risk_count high-risk IPs for $duration_text"
echo ""
echo "High-risk IPs (top 10):"
awk -F'|' '$1 >= 70 {printf " • %s (score: %s, %s requests)\n", $2, $1, $3}' "$TEMP_DIR/threat_scores.txt" | head -10
echo ""
if [ "$high_risk_count" -gt 10 ]; then
echo " ... and $((high_risk_count - 10)) more"
echo ""
fi
read -p "Proceed with blocking for $duration_text? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
print_info "Operation cancelled"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Collect IPs to block
local -a ips_to_block
while IFS='|' read -r score ip requests; do
if [ "$score" -ge 70 ]; then
# Skip excluded IPs
if is_excluded_ip "$ip"; then
continue
fi
# Skip false positives
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
continue
fi
ips_to_block+=("$ip")
fi
done < "$TEMP_DIR/threat_scores.txt"
# Apply blocks
echo ""
print_info "Applying CSF blocks for $duration_text..."
echo ""
local success_count=0
local fail_count=0
for ip in "${ips_to_block[@]}"; do
local score=$(grep "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
success_count=$((success_count + 1))
else
echo -e " ${RED}${NC} Failed to block $ip"
fail_count=$((fail_count + 1))
fi
done
echo ""
if [ "${success_count:-0}" -gt 0 ]; then
print_success "Successfully blocked $success_count IP(s) for $duration_text"
echo ""
echo "These blocks will automatically expire after $duration_text"
echo "To view temporary blocks: csf -t"
echo "To remove a block early: csf -tr IP"
fi
if [ "${fail_count:-0}" -gt 0 ]; then
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
fi
# Restart CSF
print_info "Restarting CSF to apply changes..."
if csf -r >/dev/null 2>&1; then
print_success "CSF restarted successfully"
else
print_warning "CSF restart may have failed - check manually with: csf -r"
fi
echo ""
# Verify domains still work after blocking
verify_domains_still_working
show_action_menu
}
execute_ip_blocking() {
local block_mode="$1" # "temp" or "perm"
if [ "$block_mode" = "temp" ]; then
# Call the existing CSF blocking function
offer_csf_blocking
else
# Permanent blocking
clear
print_banner "Permanent IP Blocking"
echo ""
print_alert "WARNING: Permanent blocks must be manually removed later"
echo ""
echo "This will permanently block all high-risk IPs (score >= 70)"
echo ""
read -p "Are you sure you want to proceed? (yes/no): " confirm
if [ "$confirm" = "yes" ]; then
offer_csf_blocking
else
print_info "Operation cancelled"
echo ""
read -p "Press Enter to continue..."
show_action_menu
fi
fi
}
execute_csf_ct_limit() {
clear
print_banner "Update CSF Connection Tracking Limit"
echo ""
# Check if CSF is installed
if ! command -v csf >/dev/null 2>&1; then
print_warning "CSF is not installed on this server"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Get recommended limit from recommendation
local recommended_limit=$(grep "|csf_ct_limit|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | grep -oP 'to \K[0-9]+' || echo "100")
# Get current CT_LIMIT
local current_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "unknown")
echo "Current CT_LIMIT: $current_limit"
echo "Recommended CT_LIMIT: $recommended_limit"
echo ""
echo "This will modify /etc/csf/csf.conf and restart CSF"
echo ""
read -p "Enter new CT_LIMIT value [$recommended_limit]: " new_limit
# Use recommended if nothing entered
[ -z "$new_limit" ] && new_limit=$recommended_limit
# Validate it's a number
if ! [[ "$new_limit" =~ ^[0-9]+$ ]]; then
print_warning "Invalid number"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Update CSF config
print_info "Updating CT_LIMIT to $new_limit..."
if [ -f /etc/csf/csf.conf ]; then
sed -i "s/^CT_LIMIT = .*/CT_LIMIT = \"$new_limit\"/" /etc/csf/csf.conf
# Restart CSF
print_info "Restarting CSF..."
csf -r >/dev/null 2>&1
print_success "CT_LIMIT updated successfully to $new_limit"
else
print_warning "Could not find /etc/csf/csf.conf"
fi
echo ""
# Verify domains still work after CT_LIMIT change
verify_domains_still_working
show_action_menu
}
execute_htaccess_domain_blocking() {
clear
print_banner "Add Bot Blocking to Domain .htaccess"
echo ""
# Get target domain from recommendation
local target_domain=$(grep "|htaccess_domain|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | head -1 | grep -oP 'to \K[^ ]+' || echo "")
if [ -z "$target_domain" ]; then
print_warning "Could not determine target domain"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
echo "Target Domain: $target_domain"
echo ""
# Find document root for this domain using reference database
local doc_root=""
if [ -s "$SCRIPT_DIR/.sysref" ]; then
doc_root=$(grep "^DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4)
fi
if [ -z "$doc_root" ]; then
print_warning "Document root not found in reference database"
echo "Please enter the document root manually:"
read -p "Document root: " doc_root
else
echo "Document root: $doc_root"
fi
if [ ! -d "$doc_root" ]; then
print_warning "Document root does not exist: $doc_root"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
local htaccess_file="$doc_root/.htaccess"
echo ""
echo "This will add bot blocking rules to: $htaccess_file"
echo ""
read -p "Proceed? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
print_info "Operation cancelled"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Create backup
if [ -f "$htaccess_file" ]; then
cp "$htaccess_file" "$htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
print_info "Backed up existing .htaccess"
fi
# Generate bot blocking rules
print_info "Adding bot blocking rules..."
# Get high-risk IPs for this domain
local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep "^[^|]*|$target_domain|" | cut -d'|' -f1 | sort -u | while read ip; do
# Check if this IP has high threat score
if grep -q "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" | cut -d'|' -f1)
if [ "$score" -ge 70 ]; then
echo "$ip"
fi
fi
done)
# Add rules to .htaccess
{
echo ""
echo "# Bot blocking rules added by toolkit on $(date)"
echo "# High-risk IPs (threat score >= 70)"
echo "<IfModule mod_authz_core.c>"
for ip in $block_ips; do
echo " Require not ip $ip"
done
echo "</IfModule>"
echo ""
} >> "$htaccess_file"
local block_count=$(echo "$block_ips" | wc -w)
print_success "Added blocking rules for $block_count IPs to $htaccess_file"
echo ""
echo "Backup saved to: $htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
echo ""
# Verify domains still work after .htaccess changes
verify_domains_still_working
show_action_menu
}
execute_apache_global_blocking() {
clear
print_banner "Add Global Bot Blocking to Apache"
echo ""
print_warning "This feature will add blocking rules to Apache pre-virtualhost configuration"
echo "This affects ALL domains on the server"
echo ""
# Determine Apache config location
local apache_conf=""
if [ -d "/etc/apache2/conf.d" ]; then
apache_conf="/etc/apache2/conf.d/bot_blocking.conf"
elif [ -d "/etc/httpd/conf.d" ]; then
apache_conf="/etc/httpd/conf.d/bot_blocking.conf"
else
print_warning "Could not determine Apache config directory"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
echo "Configuration will be written to: $apache_conf"
echo ""
read -p "Proceed? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
print_info "Operation cancelled"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Create backup if file exists
if [ -f "$apache_conf" ]; then
cp "$apache_conf" "$apache_conf.backup.$(date +%Y%m%d_%H%M%S)"
print_info "Backed up existing configuration"
fi
# Generate global blocking rules
print_info "Generating global bot blocking configuration..."
{
echo "# Global bot blocking rules"
echo "# Generated by toolkit on $(date)"
echo ""
echo "<IfModule mod_authz_core.c>"
echo " # Block high-risk IPs (threat score >= 70)"
awk -F'|' '$1 >= 70 {print " Require not ip " $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null
echo "</IfModule>"
echo ""
} > "$apache_conf"
local block_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l)
print_success "Created global blocking configuration with $block_count IPs"
echo ""
echo "Restarting Apache to apply changes..."
if systemctl restart httpd 2>/dev/null || systemctl restart apache2 2>/dev/null; then
print_success "Apache restarted successfully"
else
print_warning "Could not restart Apache - please restart manually"
fi
echo ""
# Verify domains still work after Apache global blocking
verify_domains_still_working
show_action_menu
}
execute_wp_hardening() {
clear
print_banner "WordPress Hardening"
echo ""
print_info "WordPress hardening feature coming soon..."
echo ""
echo "Recommended manual actions:"
echo " • Install Wordfence or similar security plugin"
echo " • Enable two-factor authentication"
echo " • Limit login attempts"
echo " • Disable XML-RPC if not needed"
echo " • Use strong passwords"
echo ""
read -p "Press Enter to continue..."
show_action_menu
}
execute_rate_limiting() {
clear
print_banner "Enable Rate Limiting"
echo ""
print_info "Rate limiting modules like mod_evasive/mod_security can help with application-level DoS"
echo ""
echo "For better bot protection, consider:"
echo " - IP blocking (options 1-3) - Block specific attacking IPs"
echo " - CSF CT_LIMIT adjustment (option 4) - Limit connections per IP"
echo " - .htaccess rules (option 5) - Domain-specific blocking"
echo ""
echo "This option (rate limiting) is currently a placeholder for future implementation."
echo ""
read -p "Press Enter to continue..."
show_action_menu
}
# execute_csf_portflood() removed - not appropriate for web servers with 400+ sites
# Blocking ports 80/443 based on connection count would break legitimate traffic
execute_csf_synflood() {
clear
print_banner "Enable CSF SYNFLOOD Protection"
echo ""
if ! command -v csf >/dev/null 2>&1; then
print_warning "CSF is not installed on this server"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Get current SYNFLOOD setting
local current_synflood=$(grep "^SYNFLOOD = " /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[^"]+' || echo "0")
echo "Current SYNFLOOD protection: ${current_synflood}"
echo ""
echo "SYNFLOOD protects against SYN flood DDoS attacks by limiting"
echo "the rate of new TCP connections."
echo ""
echo "Recommended settings:"
echo " SYNFLOOD = \"1\" (enable protection)"
echo " SYNFLOOD_RATE = \"100/s\" (100 connections per second)"
echo " SYNFLOOD_BURST = \"150\" (allow burst of 150)"
echo ""
read -p "Enable SYNFLOOD protection? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
print_info "Operation cancelled"
echo ""
read -p "Press Enter to continue..."
show_action_menu
return
fi
# Update CSF config
print_info "Enabling SYNFLOOD protection..."
if [ -f /etc/csf/csf.conf ]; then
sed -i 's/^SYNFLOOD = .*/SYNFLOOD = "1"/' /etc/csf/csf.conf
sed -i 's/^SYNFLOOD_RATE = .*/SYNFLOOD_RATE = "100\/s"/' /etc/csf/csf.conf
sed -i 's/^SYNFLOOD_BURST = .*/SYNFLOOD_BURST = "150"/' /etc/csf/csf.conf
# Restart CSF
print_info "Restarting CSF..."
csf -r >/dev/null 2>&1
print_success "SYNFLOOD protection enabled"
else
print_warning "Could not find /etc/csf/csf.conf"
fi
echo ""
read -p "Press Enter to continue..."
show_action_menu
}
execute_install_maxmind() {
clear
print_banner "Install MaxMind GeoIP2 for Country Blocking"
echo ""
# Check if already installed
if command -v mmdbinspect >/dev/null 2>&1; then
print_success "MaxMind GeoIP2 tools already installed"
echo ""
echo "Next steps:"
echo "1. Sign up for free license at: https://www.maxmind.com/en/geolite2/signup"
echo "2. Get your license key from account page"
echo "3. Install CSF GeoIP module: /usr/local/csf/bin/csftest.pl -g"
echo "4. Configure CC_DENY in /etc/csf/csf.conf with country codes"
echo ""
echo "Example: CC_DENY = \"CN,RU,KP\" (block China, Russia, North Korea)"
echo ""
else
print_info "MaxMind GeoIP2 not detected"
echo ""
echo "To install MaxMind GeoIP2 for CSF country blocking:"
echo ""
echo "1. Sign up for free MaxMind account:"
echo " https://www.maxmind.com/en/geolite2/signup"
echo ""
echo "2. Get your license key from:"
echo " https://www.maxmind.com/en/accounts/current/license-key"
echo ""
echo "3. Install GeoIP Perl module:"
echo " yum install perl-Geo-IP"
echo " # or"
echo " cpan -i Geo::IP"
echo ""
echo "4. Test CSF GeoIP support:"
echo " /usr/local/csf/bin/csftest.pl -g"
echo ""
echo "5. Configure CC_DENY in /etc/csf/csf.conf:"
echo " CC_DENY = \"CN,RU\" (example: block China & Russia)"
echo ""
echo "6. Restart CSF:"
echo " csf -r"
echo ""
fi
# Show geographic analysis if available
if [ -s "$TEMP_DIR/high_risk_networks.txt" ]; then
echo "=========================================================══"
echo "High-Risk Networks Detected:"
echo ""
head -10 "$TEMP_DIR/high_risk_networks.txt" | while read count network; do
echo "$network - $count high-risk IPs"
done
echo ""
fi
read -p "Press Enter to continue..."
show_action_menu
}
################################################################################
# INTERACTIVE CSF BLOCKING
################################################################################
offer_csf_blocking() {
echo ""
echo "==============================================================="
print_header "🛡 INTERACTIVE THREAT BLOCKING"
# Check if CSF is installed
if ! command -v csf >/dev/null 2>&1; then
print_warning "CSF (ConfigServer Security & Firewall) is not installed"
echo "Cannot offer automatic blocking without CSF"
return 0
fi
# Get high-risk IPs (score >= 70)
local high_risk_ips=()
local ip_scores=()
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
while read -r line; do
local score=$(echo "$line" | cut -d'|' -f1)
local ip=$(echo "$line" | cut -d'|' -f2)
# Only include scores >= 70 (HIGH and CRITICAL)
if [ "$score" -ge 70 ]; then
# Skip excluded IPs
if is_excluded_ip "$ip"; then
continue
fi
# Skip false positives
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
continue
fi
high_risk_ips+=("$ip")
ip_scores+=("$score")
fi
done < <(awk -F'|' '{print $1 "|" $2}' "$TEMP_DIR/threat_scores.txt" | sort -rn)
fi
# If no high-risk IPs, nothing to block
if [ ${#high_risk_ips[@]} -eq 0 ]; then
print_info "No high-risk IPs detected (score >= 70)"
return 0
fi
# Show IPs that would be blocked
echo ""
echo "Found ${#high_risk_ips[@]} high-risk IP(s) with threat score >= 70:"
echo ""
local count=0
for i in "${!high_risk_ips[@]}"; do
count=$((count + 1))
local ip="${high_risk_ips[$i]}"
local score="${ip_scores[$i]}"
local requests=$(grep "^$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
# Color code by severity
if [ "$score" -ge 90 ]; then
echo -e " ${RED}[$count] $ip${NC} - Risk: ${RED}$score/100 CRITICAL${NC} ($requests requests)"
elif [ "$score" -ge 80 ]; then
echo -e " ${YELLOW}[$count] $ip${NC} - Risk: ${YELLOW}$score/100 HIGH${NC} ($requests requests)"
else
echo -e " [$count] $ip - Risk: $score/100 ELEVATED ($requests requests)"
fi
done
echo ""
echo "==============================================================="
echo ""
# Ask user if they want to block
echo -e "${BOLD}Would you like to temporarily block these IPs using CSF?${NC}"
echo ""
echo "Options:"
echo " 1) Block for 1 hour (temporary - auto-expires)"
echo " 2) Block for 24 hours (temporary - auto-expires)"
echo " 3) Block permanently (requires manual unblock)"
echo " 4) Don't block (manual review)"
echo ""
read -p "Select option [1-4]: " block_choice
case "$block_choice" in
1)
local duration=3600 # 1 hour in seconds
local duration_text="1 hour"
apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
;;
2)
local duration=86400 # 24 hours in seconds
local duration_text="24 hours"
apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
;;
3)
apply_csf_permanent_blocks "${high_risk_ips[@]}"
;;
4)
print_info "Skipping automatic blocking - manual review recommended"
echo "You can block IPs manually using: csf -td IP DURATION"
;;
*)
print_warning "Invalid option - skipping blocking"
;;
esac
}
apply_csf_blocks() {
local duration=$1
local duration_text=$2
shift 2
local ips=("$@")
echo ""
print_info "Applying temporary CSF blocks for $duration_text..."
echo ""
local success_count=0
local fail_count=0
for ip in "${ips[@]}"; do
# Get threat score for comment
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
# Use csf -td for temporary deny
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
success_count=$((success_count + 1))
else
echo -e " ${RED}${NC} Failed to block $ip"
fail_count=$((fail_count + 1))
fi
done
echo ""
if [ "${success_count:-0}" -gt 0 ]; then
print_success "Successfully blocked $success_count IP(s) for $duration_text"
echo ""
echo "These blocks will automatically expire after $duration_text"
echo "To view temporary blocks: csf -t"
echo "To remove a block early: csf -tr IP"
fi
if [ "${fail_count:-0}" -gt 0 ]; then
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
fi
# Restart CSF to apply changes
print_info "Restarting CSF to apply changes..."
if csf -r >/dev/null 2>&1; then
print_success "CSF restarted successfully"
else
print_warning "CSF restart may have failed - check manually with: csf -r"
fi
}
apply_csf_permanent_blocks() {
local ips=("$@")
echo ""
print_warning "Applying PERMANENT CSF blocks..."
echo "These will require manual removal using: csf -dr IP"
echo ""
read -p "Are you sure? This is permanent! (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
print_info "Cancelled permanent blocking"
return 0
fi
echo ""
local success_count=0
local fail_count=0
for ip in "${ips[@]}"; do
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
# Use csf -d for permanent deny
if csf -d "$ip" "Bot threat score: $score/100 - Permanently blocked by toolkit" >/dev/null 2>&1; then
echo -e " ${GREEN}${NC} Permanently blocked $ip (score: $score/100)"
success_count=$((success_count + 1))
else
echo -e " ${RED}${NC} Failed to block $ip"
fail_count=$((fail_count + 1))
fi
done
echo ""
if [ "${success_count:-0}" -gt 0 ]; then
print_success "Successfully blocked $success_count IP(s) permanently"
echo ""
echo "To view blocked IPs: csf -g"
echo "To remove a block: csf -dr IP"
fi
if [ "${fail_count:-0}" -gt 0 ]; then
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
fi
# Restart CSF
print_info "Restarting CSF to apply changes..."
if csf -r >/dev/null 2>&1; then
print_success "CSF restarted successfully"
else
print_warning "CSF restart may have failed - check manually with: csf -r"
fi
}
# Run the script
main "$@"