5902ea990d
Line 2131: Changed repeat attacker detection from grep -Fx -f to comm -12 - Problem: Using grep -F with pattern file from process substitution is unsafe - Solution: Use comm command which is designed for set intersection operations - From: grep -Fx -f <(awk ...) known_attackers.txt - To: comm -12 <(awk ... | sort -u) <(sort -u known_attackers.txt) - Effect: Same logic but cleaner and safer IP comparison This fixes QA CRITICAL issue at line 2131.
4668 lines
182 KiB
Bash
Executable File
4668 lines
182 KiB
Bash
Executable File
#!/bin/bash
|
||
set -eo pipefail
|
||
|
||
#############################################################################
|
||
# Apache/cPanel Domain Log Bot & Botnet Analyzer
|
||
# Version: 3.1 Enhanced (with Library Integration)
|
||
# Advanced log analysis for bot activity, security threats, and botnets
|
||
#
|
||
# Features:
|
||
# - Comprehensive bot classification (legitimate, AI, monitoring, suspicious)
|
||
# - Enhanced attack vector detection (SQL injection, XSS, path traversal,
|
||
# RCE/shell upload, info disclosure, login bruteforce)
|
||
# - Threat scoring system (0-100 risk scores for each IP)
|
||
# - Time-series analysis with hourly traffic visualization
|
||
# - Response code intelligence (what are bots finding?)
|
||
# - False positive detection for legitimate monitoring services
|
||
# - Bandwidth cost estimation for bot traffic
|
||
# - Botnet pattern analysis (coordinated attacks, DDoS detection)
|
||
# - Prioritized blocklists sorted by threat severity
|
||
# - Actionable reports with copy-paste ready configurations
|
||
# - Performance optimized for large log files (>500k entries)
|
||
# - User filtering (analyze all users or specific user)
|
||
# - Auto-detects log directory based on control panel
|
||
#############################################################################
|
||
|
||
# Load libraries
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||
source "$SCRIPT_DIR/lib/common-functions.sh"
|
||
source "$SCRIPT_DIR/lib/system-detect.sh"
|
||
source "$SCRIPT_DIR/lib/user-manager.sh"
|
||
source "$SCRIPT_DIR/lib/ip-reputation.sh"
|
||
source "$SCRIPT_DIR/lib/bot-signatures.sh"
|
||
source "$SCRIPT_DIR/lib/attack-patterns.sh"
|
||
source "$SCRIPT_DIR/lib/threat-intelligence.sh"
|
||
|
||
# Default configuration (auto-detected from system)
|
||
LOG_DIR="${SYS_LOG_DIR:-/var/log/apache2/domlogs}"
|
||
|
||
# Use toolkit's tmp directory instead of system /tmp to avoid filling it up
|
||
# On large servers with 200+ domains, compressed temp files can still be 50-100MB
|
||
# Using toolkit's tmp dir means:
|
||
# - Won't fill up system /tmp
|
||
# - Gets auto-cleaned when toolkit is removed
|
||
# - Included in cleanup script (clean-and-push-toolkit.sh)
|
||
TOOLKIT_TMP_DIR="$SCRIPT_DIR/tmp"
|
||
mkdir -p "$TOOLKIT_TMP_DIR" 2>/dev/null
|
||
|
||
# NEW: Baseline history directory (stores 30 days of historical data per domain)
|
||
BASELINE_DIR="$TOOLKIT_TMP_DIR/baseline_history"
|
||
mkdir -p "$BASELINE_DIR" 2>/dev/null
|
||
|
||
TEMP_DIR="$TOOLKIT_TMP_DIR/bot_analysis_$$"
|
||
OUTPUT_FILE="$TOOLKIT_TMP_DIR/bot_analysis_report_$(date +%Y%m%d_%H%M%S).txt"
|
||
DAYS_BACK="" # Empty means all logs, otherwise filter by days
|
||
HOURS_BACK="" # Empty means all logs, otherwise filter by hours
|
||
FILTER_USER="" # Empty means all users, otherwise specific user
|
||
|
||
# Cache CSF availability (avoid checking command_v csf 5 times)
|
||
CSF_AVAILABLE=false
|
||
if command -v csf >/dev/null 2>&1 && [ -f /etc/csf/csf.conf ]; then
|
||
CSF_AVAILABLE=true
|
||
fi
|
||
|
||
# Parse command line arguments
|
||
while [[ $# -gt 0 ]]; do
|
||
case $1 in
|
||
-d|--days)
|
||
DAYS_BACK="$2"
|
||
shift 2
|
||
;;
|
||
-H|--hours)
|
||
HOURS_BACK="$2"
|
||
shift 2
|
||
;;
|
||
-l|--log-dir)
|
||
LOG_DIR="$2"
|
||
shift 2
|
||
;;
|
||
-o|--output)
|
||
OUTPUT_FILE="$2"
|
||
shift 2
|
||
;;
|
||
-u|--user)
|
||
FILTER_USER="$2"
|
||
shift 2
|
||
;;
|
||
-h|--help)
|
||
echo "Apache/cPanel Domain Log Bot & Botnet Analyzer v3.1"
|
||
echo ""
|
||
echo "Usage: $0 [-d DAYS | -H HOURS] [-u USER] [-l LOG_DIR] [-o OUTPUT_FILE]"
|
||
echo ""
|
||
echo "Options:"
|
||
echo " -d, --days DAYS Analyze only logs from last N days (24-hour periods)"
|
||
echo " -H, --hours HOURS Analyze only logs from last N hours"
|
||
echo " -u, --user USER Analyze only logs for specific cPanel user"
|
||
echo " -l, --log-dir DIR Custom log directory (auto-detected by default)"
|
||
echo " -o, --output FILE Custom output file path"
|
||
echo " -h, --help Show this help message"
|
||
echo ""
|
||
echo "Examples:"
|
||
echo " $0 # Analyze all logs in default directory"
|
||
echo " $0 -d 7 # Analyze logs from last 7 days"
|
||
echo " $0 -H 6 # Analyze logs from last 6 hours"
|
||
echo " $0 -l /custom/path # Use custom log directory"
|
||
echo ""
|
||
echo "Note: If both -d and -H are specified, only -H (hours) will be used."
|
||
echo ""
|
||
exit 0
|
||
;;
|
||
*)
|
||
echo "Unknown option: $1"
|
||
echo "Use -h for help"
|
||
exit 1
|
||
;;
|
||
esac
|
||
done
|
||
|
||
# Interactive prompts for missing options
|
||
prompt_time_range() {
|
||
clear
|
||
print_banner "Bot Analyzer - Time Range Selection"
|
||
echo ""
|
||
echo -e " ${GREEN}1)${NC} All available logs"
|
||
echo -e " ${GREEN}2)${NC} Last 1 hour"
|
||
echo -e " ${GREEN}3)${NC} Last 6 hours"
|
||
echo -e " ${GREEN}4)${NC} Last 24 hours"
|
||
echo -e " ${GREEN}5)${NC} Last 7 days"
|
||
echo -e " ${GREEN}6)${NC} Last 30 days"
|
||
echo -e " ${GREEN}7)${NC} Custom hours"
|
||
echo -e " ${GREEN}8)${NC} Custom days"
|
||
echo ""
|
||
|
||
# Validate time_choice input with retry loop
|
||
while true; do
|
||
read -p "Select time range (1-8): " time_choice
|
||
|
||
if ! [[ "$time_choice" =~ ^[1-8]$ ]]; then
|
||
print_error "Invalid choice. Please enter 1-8"
|
||
continue
|
||
fi
|
||
|
||
case $time_choice in
|
||
1) break ;; # All logs - no filter
|
||
2) HOURS_BACK=1; break ;;
|
||
3) HOURS_BACK=6; break ;;
|
||
4) HOURS_BACK=24; break ;;
|
||
5) DAYS_BACK=7; break ;;
|
||
6) DAYS_BACK=30; break ;;
|
||
7)
|
||
while true; do
|
||
read -p "Enter number of hours: " custom_hours
|
||
if [[ "$custom_hours" =~ ^[0-9]+$ ]] && [ "$custom_hours" -gt 0 ]; then
|
||
HOURS_BACK=$custom_hours
|
||
break 2 # Break out of both loops
|
||
else
|
||
print_error "Invalid input. Please enter a positive number"
|
||
fi
|
||
done
|
||
;;
|
||
8)
|
||
while true; do
|
||
read -p "Enter number of days: " custom_days
|
||
if [[ "$custom_days" =~ ^[0-9]+$ ]] && [ "$custom_days" -gt 0 ]; then
|
||
DAYS_BACK=$custom_days
|
||
break 2 # Break out of both loops
|
||
else
|
||
print_error "Invalid input. Please enter a positive number"
|
||
fi
|
||
done
|
||
;;
|
||
esac
|
||
done
|
||
}
|
||
|
||
prompt_user_scope() {
|
||
clear
|
||
print_banner "Bot Analyzer - User Scope Selection"
|
||
echo ""
|
||
echo -e " ${GREEN}1)${NC} All users (system-wide analysis)"
|
||
echo -e " ${GREEN}2)${NC} Specific user"
|
||
echo ""
|
||
|
||
# Validate user_choice input with retry loop
|
||
while true; do
|
||
read -p "Select option (1-2): " user_choice
|
||
|
||
if ! [[ "$user_choice" =~ ^[1-2]$ ]]; then
|
||
print_error "Invalid choice. Please enter 1 or 2"
|
||
continue
|
||
fi
|
||
|
||
if [ "$user_choice" = "2" ]; then
|
||
echo ""
|
||
local selected=$(select_user_interactive "Select user to analyze")
|
||
if [ $? -eq 0 ] && [ "$selected" != "ALL" ]; then
|
||
FILTER_USER="$selected"
|
||
fi
|
||
fi
|
||
break
|
||
done
|
||
}
|
||
|
||
# Interactive prompts for missing options
|
||
# Prompt for time range if not specified
|
||
if [ -z "$DAYS_BACK" ] && [ -z "$HOURS_BACK" ]; then
|
||
prompt_time_range
|
||
fi
|
||
|
||
# Prompt for user if not specified
|
||
if [ -z "$FILTER_USER" ]; then
|
||
prompt_user_scope
|
||
fi
|
||
|
||
# Validate time filter options
|
||
if [ -n "$DAYS_BACK" ] && [ -n "$HOURS_BACK" ]; then
|
||
echo -e "${YELLOW}Warning: Both days and hours specified. Using hours filter only.${NC}" >&2
|
||
DAYS_BACK=""
|
||
fi
|
||
|
||
# Color codes for terminal output
|
||
RED='\033[0;31m'
|
||
YELLOW='\033[1;33m'
|
||
GREEN='\033[0;32m'
|
||
BLUE='\033[0;34m'
|
||
CYAN='\033[0;36m'
|
||
BOLD='\033[1m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# Check for required commands
|
||
check_dependencies() {
|
||
local missing_deps=()
|
||
for cmd in awk grep sort uniq find sed head tail cut; do
|
||
if ! command -v "$cmd" >/dev/null 2>&1; then
|
||
missing_deps+=("$cmd")
|
||
fi
|
||
done
|
||
|
||
if [ ${#missing_deps[@]} -gt 0 ]; then
|
||
echo -e "${RED}Error: Missing required commands: ${missing_deps[*]}${NC}" >&2
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
# Check disk space
|
||
check_disk_space() {
|
||
local available_kb
|
||
local check_path="$SCRIPT_DIR"
|
||
available_kb=$(df "$check_path" 2>/dev/null | tail -1 | awk '{print $4}')
|
||
|
||
if [ -z "$available_kb" ]; then
|
||
echo -e "${YELLOW}Warning: Cannot determine available disk space for toolkit directory${NC}" >&2
|
||
return
|
||
fi
|
||
|
||
if [ "$available_kb" -lt 102400 ]; then # Less than 100MB
|
||
echo -e "${YELLOW}Warning: Low disk space in toolkit directory: $((available_kb/1024))MB available${NC}" >&2
|
||
read -p "Continue anyway? (y/N): " -n 1 -r
|
||
echo
|
||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||
exit 1
|
||
fi
|
||
fi
|
||
}
|
||
|
||
# Run dependency checks
|
||
check_dependencies
|
||
check_disk_space
|
||
|
||
# Create temp directory
|
||
mkdir -p "$TEMP_DIR" || {
|
||
echo -e "${RED}Error: Cannot create temp directory: $TEMP_DIR${NC}" >&2
|
||
exit 1
|
||
}
|
||
|
||
# Cleanup on exit
|
||
trap "rm -rf \"$TEMP_DIR\"" EXIT
|
||
|
||
#############################################################################
|
||
# Bot Signature Database
|
||
#############################################################################
|
||
# NOTE: Bot signatures now loaded from lib/bot-signatures.sh
|
||
# Arrays available: LEGIT_BOTS, AI_BOTS, MONITOR_BOTS, SUSPICIOUS_BOTS
|
||
|
||
#############################################################################
|
||
# Helper Functions
|
||
#############################################################################
|
||
|
||
print_header() {
|
||
echo -e "\n${CYAN}===============================================================${NC}"
|
||
echo -e "${CYAN}$1${NC}"
|
||
echo -e "${CYAN}===============================================================${NC}\n"
|
||
}
|
||
|
||
print_alert() {
|
||
echo -e "${RED}$1${NC}"
|
||
}
|
||
|
||
print_warning() {
|
||
echo -e "${YELLOW}$1${NC}"
|
||
}
|
||
|
||
print_info() {
|
||
echo -e "${BLUE} $1${NC}"
|
||
}
|
||
|
||
print_success() {
|
||
echo -e "${GREEN}$1${NC}"
|
||
}
|
||
|
||
#############################################################################
|
||
# Log Parsing Functions
|
||
#############################################################################
|
||
|
||
parse_logs() {
|
||
if [ "$INTERWORX_MODE" = "yes" ]; then
|
||
print_info "Parsing InterWorx domain logs from: /home/*/var/*/logs/"
|
||
else
|
||
print_info "Parsing logs from: $LOG_DIR"
|
||
fi
|
||
|
||
local find_opts=()
|
||
|
||
# Add time filter if specified (hours takes precedence over days)
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
local minutes=$((HOURS_BACK * 60))
|
||
find_opts+=(-mmin -"$minutes")
|
||
print_info "Filtering logs from last $HOURS_BACK hours"
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
find_opts+=(-mtime -"$DAYS_BACK")
|
||
print_info "Filtering logs from last $DAYS_BACK days"
|
||
fi
|
||
|
||
# Determine log file search pattern based on control panel
|
||
local log_search_path
|
||
local log_search_name
|
||
if [ "$INTERWORX_MODE" = "yes" ]; then
|
||
# InterWorx: Official docs from https://appendix.interworx.com/current/nodeworx/general/other/log-file-locations.html
|
||
# HTTP: /home/{user}/var/{domain}/logs/transfer.log
|
||
# HTTPS: /home/{user}/var/{domain}/logs/transfer-ssl.log
|
||
log_search_path="/home/*/var/*/logs"
|
||
log_search_name="transfer*.log"
|
||
else
|
||
# cPanel: /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
|
||
# Plesk: Research verified paths from https://docs.plesk.com/en-US/obsidian/
|
||
# Apache HTTP: /var/www/vhosts/system/{domain}/logs/access_log
|
||
# Apache HTTPS: /var/www/vhosts/system/{domain}/logs/access_ssl_log
|
||
# nginx HTTP: /var/www/vhosts/system/{domain}/logs/proxy_access_log
|
||
# nginx HTTPS: /var/www/vhosts/system/{domain}/logs/proxy_access_ssl_log
|
||
# Note: /var/www/vhosts/{domain}/logs/ are hardlinks (backward compat)
|
||
log_search_path="$LOG_DIR"
|
||
log_search_name="*"
|
||
fi
|
||
|
||
# Parse all domain logs
|
||
local file_count=0
|
||
local progress_interval=5 # Show progress every 5 files instead of 50
|
||
echo ""
|
||
{
|
||
find "$log_search_path" -type f -name "$log_search_name" ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | while read -r logfile; do
|
||
# Skip empty files
|
||
[ -s "$logfile" ] || continue
|
||
|
||
# Extract domain name based on control panel
|
||
if [ "$INTERWORX_MODE" = "yes" ]; then
|
||
# InterWorx: extract from path /home/user/var/domain.com/logs/transfer*.log
|
||
domain=$(echo "$logfile" | sed -n 's|^/home/.*/var/\([^/]*\)/logs/.*|\1|p')
|
||
elif [ "$SYS_CONTROL_PANEL" = "plesk" ]; then
|
||
# Plesk: extract from path /var/www/vhosts/system/domain.com/logs/{access_log,access_ssl_log,proxy_*}
|
||
domain=$(echo "$logfile" | sed -n 's|^/var/www/vhosts/system/\([^/]*\)/logs/.*|\1|p')
|
||
else
|
||
# cPanel: extract from filename /var/log/apache2/domlogs/domain.com or domain.com-ssl_log
|
||
domain=$(basename "$logfile" | sed 's/-ssl_log$//')
|
||
fi
|
||
|
||
# Skip if domain extraction failed
|
||
[ -z "$domain" ] && continue
|
||
|
||
# User filtering: skip domains not belonging to the specified user
|
||
if [ -n "$FILTER_USER" ]; then
|
||
if ! echo "$user_domains" | grep -qFx "$domain"; then
|
||
continue
|
||
fi
|
||
fi
|
||
|
||
# Show progress every N files
|
||
file_count=$((file_count + 1))
|
||
if [ $((file_count % progress_interval)) -eq 0 ]; then
|
||
echo -ne "\r Parsed $file_count log files... (current: $domain)"
|
||
fi
|
||
|
||
# Parse Apache Combined Log Format with error handling
|
||
# Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
|
||
awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
|
||
BEGIN {
|
||
# Month name to number lookup
|
||
month["Jan"]=1; month["Feb"]=2; month["Mar"]=3; month["Apr"]=4
|
||
month["May"]=5; month["Jun"]=6; month["Jul"]=7; month["Aug"]=8
|
||
month["Sep"]=9; month["Oct"]=10; month["Nov"]=11; month["Dec"]=12
|
||
|
||
# Calculate cutoff timestamp in epoch seconds
|
||
if (hours_filter != "") {
|
||
cmd = "date -d \"" hours_filter " hours ago\" +%s 2>/dev/null || date -v-" hours_filter "H +%s 2>/dev/null"
|
||
cmd | getline cutoff_epoch
|
||
close(cmd)
|
||
} else if (days_filter != "") {
|
||
cmd = "date -d \"" days_filter " days ago\" +%s 2>/dev/null || date -v-" days_filter "d +%s 2>/dev/null"
|
||
cmd | getline cutoff_epoch
|
||
close(cmd)
|
||
}
|
||
}
|
||
{
|
||
# Skip empty lines and malformed entries
|
||
if (NF < 10 || length($0) < 50) next
|
||
|
||
# Extract IP (first field - space separated)
|
||
ip = $1
|
||
|
||
# Extract timestamp (between square brackets)
|
||
if (match($0, /\[([^\]]+)\]/, ts)) {
|
||
timestamp = ts[1]
|
||
} else {
|
||
timestamp = "unknown"
|
||
}
|
||
|
||
# Filter by timestamp if time filter is set
|
||
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_epoch != "") {
|
||
# Extract just the date/time part (before timezone)
|
||
# Format: 31/Dec/2025:10:30:15 -0500
|
||
split(timestamp, ts_parts, " ")
|
||
log_ts = ts_parts[1]
|
||
|
||
# Parse: dd/mmm/yyyy:HH:MM:SS
|
||
split(log_ts, dt, /[\/:]/)
|
||
day = dt[1]
|
||
mon = month[dt[2]]
|
||
year = dt[3]
|
||
hour = dt[4]
|
||
min = dt[5]
|
||
sec = dt[6]
|
||
|
||
# Convert to epoch using awk mktime (YYYY MM DD HH MM SS)
|
||
# mktime is much faster than spawning date command
|
||
if (mon != "") {
|
||
log_epoch = mktime(year " " mon " " day " " hour " " min " " sec)
|
||
|
||
# Numerical comparison of epoch seconds
|
||
if (log_epoch < cutoff_epoch) {
|
||
next # Skip this entry, too old
|
||
}
|
||
}
|
||
}
|
||
|
||
# Extract HTTP method, URL, and status
|
||
if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) {
|
||
http_method = req[1]
|
||
request_url = req[2]
|
||
status = req[3]
|
||
size = req[4]
|
||
} else {
|
||
# Fallback for malformed requests
|
||
http_method = "-"
|
||
request_url = "-"
|
||
status = "-"
|
||
size = "0"
|
||
}
|
||
|
||
# Extract User-Agent (last quoted string)
|
||
if (match($0, /"([^"]*)"[[:space:]]*$/, ua)) {
|
||
user_agent = ua[1]
|
||
if (user_agent == "") user_agent = "-"
|
||
} else {
|
||
user_agent = "-"
|
||
}
|
||
|
||
# Extract additional headers for enhanced analysis
|
||
referer = "-"
|
||
accept_lang = "-"
|
||
accept_encoding = "-"
|
||
|
||
# Extract Referer header
|
||
if (match($0, /"([^"]*)"[[:space:]]*"[^"]*"[[:space:]]*$/, ref)) {
|
||
referer = ref[1]
|
||
if (referer == "") referer = "-"
|
||
}
|
||
|
||
# Try to extract Accept-Language from log (if available)
|
||
if (match($0, /Accept-Language: ([^ ,;]*)/i, al)) {
|
||
accept_lang = al[1]
|
||
}
|
||
|
||
# Only output valid entries
|
||
if (ip != "" && ip !~ /^[[:space:]]*$/) {
|
||
print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp "|" referer "|" accept_lang
|
||
}
|
||
}' "$logfile" 2>/dev/null
|
||
done
|
||
} > "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Clear the progress line
|
||
echo -ne "\r\033[K"
|
||
|
||
if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then
|
||
print_alert "No log entries were parsed. Check log format or permissions."
|
||
return 1
|
||
fi
|
||
|
||
local line_count
|
||
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
||
local file_size_kb
|
||
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
|
||
|
||
# Compress for storage (gzip saves ~90% space on text)
|
||
# But we keep uncompressed version for fast analysis
|
||
gzip -c "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/parsed_logs.txt.gz" &
|
||
|
||
print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB uncompressed)"
|
||
return 0
|
||
}
|
||
|
||
#############################################################################
|
||
# Bot Detection & Classification
|
||
#############################################################################
|
||
|
||
classify_bots() {
|
||
print_info "Classifying bot traffic..."
|
||
|
||
# Build combined grep patterns for efficiency
|
||
local legit_pattern=$(printf "%s|" "${!LEGIT_BOTS[@]}" | sed 's/|$//')
|
||
local ai_pattern=$(printf "%s|" "${!AI_BOTS[@]}" | sed 's/|$//')
|
||
local monitor_pattern=$(printf "%s|" "${!MONITOR_BOTS[@]}" | sed 's/|$//')
|
||
local suspicious_pattern=$(printf "%s|" "${!SUSPICIOUS_BOTS[@]}" | sed 's/|$//')
|
||
|
||
# Process logs with AWK for better performance
|
||
awk -F'|' -v legit="$legit_pattern" -v ai="$ai_pattern" -v monitor="$monitor_pattern" -v suspicious="$suspicious_pattern" '
|
||
BEGIN {
|
||
# Convert patterns to lowercase for case-insensitive matching
|
||
legit_lower = tolower(legit)
|
||
ai_lower = tolower(ai)
|
||
monitor_lower = tolower(monitor)
|
||
suspicious_lower = tolower(suspicious)
|
||
}
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
size = $5
|
||
ua = $6
|
||
method = $7
|
||
timestamp = $8
|
||
ua_lower = tolower(ua)
|
||
|
||
bot_type = "unknown"
|
||
bot_name = "Unknown"
|
||
|
||
# Check each category in priority order
|
||
if (legit != "" && match(ua_lower, legit_lower)) {
|
||
bot_type = "legit"
|
||
# Extract actual bot name from UA
|
||
split(legit, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (ai != "" && match(ua_lower, ai_lower)) {
|
||
bot_type = "ai"
|
||
split(ai, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (monitor != "" && match(ua_lower, monitor_lower)) {
|
||
bot_type = "monitor"
|
||
split(monitor, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (suspicious != "" && match(ua_lower, suspicious_lower)) {
|
||
bot_type = "suspicious"
|
||
split(suspicious, bots, "|")
|
||
for (i in bots) {
|
||
if (match(ua_lower, tolower(bots[i]))) {
|
||
bot_name = bots[i]
|
||
break
|
||
}
|
||
}
|
||
} else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-requests|python-urllib|java\/|scan|check|monitor/)) {
|
||
# FIXED: Check for bot keywords FIRST, then verify it is not a legitimate browser
|
||
# This prevents bots from bypassing detection by including browser strings
|
||
|
||
# FIRST: Check if it is actually a legitimate browser with complete UA signature
|
||
# Real browsers have: Mozilla/5.0 + platform + rendering engine + browser version
|
||
is_real_browser = 0
|
||
|
||
# Chrome/Chromium-based: Must have Chrome/ AND (AppleWebKit OR Mobile)
|
||
if (match(ua_lower, /chrome\/[0-9]/) && (match(ua_lower, /applewebkit/) || match(ua_lower, /mobile/))) {
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /firefox\/[0-9]/) && match(ua_lower, /gecko\//)) {
|
||
# Firefox: Must have Firefox/ AND Gecko/
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /safari\/[0-9]/) && match(ua_lower, /version\//) && match(ua_lower, /applewebkit/) && !match(ua_lower, /chrome/)) {
|
||
# Safari: Must have Safari/ AND Version/ AND AppleWebKit (not Chrome)
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /edg\/[0-9]|edge\/[0-9]/)) {
|
||
# Edge: Must have Edg/ or Edge/
|
||
is_real_browser = 1
|
||
} else if (match(ua_lower, /samsungbrowser\/[0-9]|ucbrowser\/[0-9]|opr\/[0-9]/)) {
|
||
# Mobile browsers: Samsung, UC, Opera Mobile
|
||
is_real_browser = 1
|
||
}
|
||
|
||
# If it is a real browser, skip bot classification
|
||
if (is_real_browser == 1) {
|
||
next
|
||
}
|
||
|
||
bot_type = "unidentified_bot"
|
||
# Extract first word of UA as bot name
|
||
match(ua, /^[^ ]+/, name)
|
||
bot_name = substr(name[0], 1, 30)
|
||
}
|
||
|
||
# Only print if bot_type is not "unknown" (i.e., we identified it as something)
|
||
if (bot_type != "unknown") {
|
||
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
||
}
|
||
}' < "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
|
||
|
||
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
|
||
print_alert "Bot classification failed"
|
||
return 1
|
||
fi
|
||
|
||
local classified_count
|
||
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
|
||
local file_size_kb
|
||
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
|
||
|
||
# Compress for storage in background
|
||
gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &
|
||
|
||
print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB uncompressed)"
|
||
return 0
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Baseline Management (historical tracking for anomaly detection)
|
||
#############################################################################
|
||
|
||
save_baseline() {
|
||
print_info "Storing baseline metrics for anomaly comparison..."
|
||
|
||
local today=$(date +%Y%m%d)
|
||
|
||
# Calculate current metrics
|
||
local total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
||
local unique_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
|
||
local bot_requests=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
|
||
local bot_pct=0
|
||
if [ "$total_requests" -gt 0 ]; then
|
||
bot_pct=$((bot_requests * 100 / total_requests))
|
||
fi
|
||
|
||
local sqli_attempts=$(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo "0")
|
||
local xss_attempts=$(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo "0")
|
||
local path_attempts=$(wc -l < "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null || echo "0")
|
||
local rce_attempts=$(wc -l < "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null || echo "0")
|
||
local login_attempts=$(wc -l < "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null || echo "0")
|
||
local total_attacks=$((sqli_attempts + xss_attempts + path_attempts + rce_attempts + login_attempts))
|
||
|
||
local high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
# Store baseline for each domain
|
||
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
||
while read -r domain; do
|
||
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
||
|
||
# Get domain-specific metrics
|
||
local domain_requests=$(grep "^[^|]*|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | wc -l || echo "0")
|
||
local domain_attacks=$(grep "^[^|]*|$domain|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
||
local domain_bots=$(grep "^[^|]*|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
# Append to baseline history (timestamp|requests|attacks|bots|high_risk_ips)
|
||
echo "$today|$domain_requests|$domain_attacks|$domain_bots|$high_risk_ips" >> "$baseline_file"
|
||
|
||
# Keep only last 30 days
|
||
tail -30 "$baseline_file" > "$baseline_file.tmp" && mv "$baseline_file.tmp" "$baseline_file"
|
||
done < "$TEMP_DIR/all_domains.txt"
|
||
fi
|
||
|
||
# Store global baseline
|
||
local global_baseline="$BASELINE_DIR/global_baseline.txt"
|
||
echo "$today|$total_requests|$unique_ips|$bot_pct|$total_attacks|$sqli_attempts|$xss_attempts|$path_attempts|$rce_attempts|$login_attempts|$high_risk_ips" >> "$global_baseline"
|
||
tail -30 "$global_baseline" > "$global_baseline.tmp" && mv "$global_baseline.tmp" "$global_baseline"
|
||
|
||
print_success "Baseline stored"
|
||
}
|
||
|
||
get_domain_baseline() {
|
||
local domain="$1"
|
||
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
||
|
||
if [ -f "$baseline_file" ]; then
|
||
cat "$baseline_file"
|
||
fi
|
||
}
|
||
|
||
calculate_baseline_average() {
|
||
local domain="$1"
|
||
local metric="$2" # requests, attacks, bots, etc.
|
||
local days="${3:-7}" # default 7 days
|
||
|
||
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
||
if [ ! -f "$baseline_file" ]; then
|
||
echo "0"
|
||
return
|
||
fi
|
||
|
||
# Get last N days
|
||
local col=2 # requests by default
|
||
case "$metric" in
|
||
attacks) col=3 ;;
|
||
bots) col=4 ;;
|
||
high_risk) col=5 ;;
|
||
esac
|
||
|
||
tail -"$days" "$baseline_file" 2>/dev/null | awk -F'|' -v col="$col" '{sum+=$col; count++} END {if (count>0) print int(sum/count); else print 0}'
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Attack Progression/Timeline Analysis
|
||
#############################################################################
|
||
|
||
analyze_attack_progression() {
|
||
print_info "Analyzing attack progression and sequences..."
|
||
|
||
# For each high-risk IP, show the sequence of attacks
|
||
awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -20 | while read -r ip; do
|
||
local progression_file="$TEMP_DIR/progression_${ip}.txt"
|
||
> "$progression_file"
|
||
|
||
# Extract all requests from this IP, in order
|
||
grep "^$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
|
||
print $8 "|" $3 "|" $4 "|" $6
|
||
}' | sort >> "$progression_file"
|
||
|
||
# Detect attack phases
|
||
local phase="reconnaissance"
|
||
local phase_start=$(head -1 "$progression_file" 2>/dev/null | cut -d'|' -f1)
|
||
|
||
echo "$ip|$phase|$phase_start" >> "$TEMP_DIR/attack_phases.txt"
|
||
done
|
||
|
||
touch "$TEMP_DIR/attack_phases.txt"
|
||
print_success "Attack progression analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Header Analysis for Bot Detection
|
||
#############################################################################
|
||
|
||
analyze_headers() {
|
||
print_info "Analyzing request headers for bot patterns..."
|
||
|
||
# Analyze header patterns to improve bot detection accuracy
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
size = $5
|
||
ua = $6
|
||
method = $7
|
||
timestamp = $8
|
||
referer = $9
|
||
accept_lang = $10
|
||
|
||
ua_lower = tolower(ua)
|
||
referer_lower = tolower(referer)
|
||
|
||
# Pattern 1: Empty or missing Accept-Language (bots often have none)
|
||
if (accept_lang == "-" || accept_lang == "") {
|
||
empty_lang[ip]++
|
||
}
|
||
|
||
# Pattern 2: All-accepting Accept-Language (bots accept everything)
|
||
# Real browsers: en-US,en;q=0.9 (specific negotiation)
|
||
# Bots: */* or empty
|
||
if (accept_lang == "*/*" || accept_lang == "*") {
|
||
accepts_all[ip]++
|
||
}
|
||
|
||
# Pattern 3: Suspicious Referer patterns
|
||
# Bots often have no referer or fake ones
|
||
if (referer == "-" || referer == "") {
|
||
no_referer[ip]++
|
||
}
|
||
|
||
# Pattern 4: Referer from suspicious sources
|
||
if (match(referer_lower, /badbot|crawler|scanner|nikto|nmap|metasploit|sqlmap/)) {
|
||
suspicious_referer[ip]++
|
||
}
|
||
|
||
# Pattern 5: Referer mismatch (referer domain != target domain)
|
||
# Real users: referer usually from same domain or search engine
|
||
# Bots: random referer or none
|
||
if (referer != "-" && !match(referer_lower, domain)) {
|
||
if (!match(referer_lower, /google|bing|yahoo|facebook|twitter|reddit|instagram/)) {
|
||
cross_domain_referer[ip]++
|
||
}
|
||
}
|
||
|
||
# Pattern 6: HEAD requests (bot reconnaissance)
|
||
# Some bots use HEAD to test server without loading content
|
||
if (method == "HEAD") {
|
||
head_requests[ip]++
|
||
}
|
||
|
||
# Pattern 7: Options/Trace requests (security testing)
|
||
# Real users never use these
|
||
if (method == "OPTIONS" || method == "TRACE") {
|
||
dangerous_methods[ip]++
|
||
}
|
||
}
|
||
END {
|
||
# Flag IPs with multiple suspicious header patterns
|
||
for (ip in empty_lang) {
|
||
score = 0
|
||
|
||
# Assign points for suspicious header combinations
|
||
if (ip in empty_lang) score += 2
|
||
if (ip in accepts_all) score += 3
|
||
if (ip in no_referer) score += 1
|
||
if (ip in suspicious_referer) score += 5
|
||
if (ip in cross_domain_referer && (ip in no_referer)) score += 2
|
||
if (ip in head_requests && (head_requests[ip] > 50)) score += 4
|
||
if (ip in dangerous_methods) score += 10
|
||
|
||
# Only flag if high header suspicion score
|
||
if (score >= 8) {
|
||
print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/header_anomalies.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if it doesn't exist
|
||
touch "$TEMP_DIR/header_anomalies.txt"
|
||
print_success "Header analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Entry Point Analysis (where bots start)
|
||
#############################################################################
|
||
|
||
analyze_entry_points() {
|
||
print_info "Analyzing first request patterns (bot vs. user entry points)..."
|
||
|
||
# Get first request from each IP
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
BEGIN {
|
||
ip_first_request[ip] = url
|
||
ip_first_status[ip] = status
|
||
}
|
||
{
|
||
ip = $1
|
||
url = $3
|
||
status = $4
|
||
|
||
# Track first request from each IP (first occurrence in sorted logs)
|
||
if (!(ip in first_seen)) {
|
||
first_seen[ip] = 1
|
||
ip_first_request[ip] = url
|
||
ip_first_status[ip] = status
|
||
}
|
||
}
|
||
END {
|
||
for (ip in ip_first_request) {
|
||
url = ip_first_request[ip]
|
||
status = ip_first_status[ip]
|
||
url_lower = tolower(url)
|
||
|
||
# Suspicious entry points indicate bot/scanner
|
||
if (match(url_lower, /wp-admin|phpmyadmin|admin|xmlrpc|shell\.php|\.env|\.git|backdoor|config\.php/)) {
|
||
print ip "|admin_entry|" url "|" status > tmpdir "/suspicious_entry_points.txt"
|
||
}
|
||
# Legitimate entry: homepage or search
|
||
else if (match(url_lower, /^\/index|^\/$|^\/search|^\/page|^\/category/)) {
|
||
print ip "|normal_entry|" url > tmpdir "/normal_entry_points.txt"
|
||
}
|
||
# Unusual but possible: static files
|
||
else if (match(url_lower, /\.(css|js|jpg|png|gif|woff|svg)$/)) {
|
||
print ip "|static_entry|" url > tmpdir "/static_entry_points.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/suspicious_entry_points.txt")
|
||
close(tmpdir "/normal_entry_points.txt")
|
||
close(tmpdir "/static_entry_points.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Count suspicious entry points
|
||
if [ -f "$TEMP_DIR/suspicious_entry_points.txt" ]; then
|
||
suspicious_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt")
|
||
print_success "Found $suspicious_count IPs with suspicious entry points"
|
||
else
|
||
touch "$TEMP_DIR/suspicious_entry_points.txt"
|
||
fi
|
||
}
|
||
|
||
#############################################################################
|
||
# Threat Detection
|
||
#############################################################################
|
||
|
||
detect_threats() {
|
||
print_info "Detecting security threats..."
|
||
|
||
# Use a single AWK pass for multiple threat detections (more efficient)
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
size = $5
|
||
ua = $6
|
||
method = $7
|
||
url_lower = tolower(url)
|
||
ua_lower = tolower(ua)
|
||
|
||
# SQL Injection patterns (enhanced)
|
||
# FIXED: Hex pattern now requires SQL context to avoid false positives on blockchain/product IDs
|
||
if (match(url_lower, /union.*select|concat\(|benchmark\(|sleep\(|waitfor|cast\(|exec\(/) ||
|
||
match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) ||
|
||
match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) ||
|
||
match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) {
|
||
print ip "|" domain "|" url "|" status "|sqli" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# XSS patterns
|
||
# FIXED: DOM-based patterns (document.cookie, .innerhtml) only flagged in query strings
|
||
# This prevents false positives on documentation URLs like /docs/innerhtml-api-guide
|
||
if (match(url_lower, /<script|javascript:|onerror=|onload=|<iframe|eval\(|alert\(/) ||
|
||
match(url_lower, /\?.*(document\.cookie|document\.write|\.innerhtml)/)) {
|
||
print ip "|" domain "|" url "|" status "|xss" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Path Traversal / LFI
|
||
# FIXED: Added URL-encoded variants (%2e%2e, %5c for backslash)
|
||
# FIXED: Case-insensitive hex encoding support (%5C and %5c)
|
||
if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) ||
|
||
match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) {
|
||
print ip "|" domain "|" url "|" status "|path_traversal" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Shell upload / RCE attempts
|
||
# FIXED: Removed overly broad "any POST to .php" condition that caused massive false positives
|
||
# Now only detects actual shell commands, known malicious files, and suspicious upload patterns
|
||
if (match(url_lower, /cmd\.exe|\/bin\/bash|\/bin\/sh|phpinfo\(|system\(|exec\(|passthru\(|eval\(/) ||
|
||
match(url_lower, /shell\.php|c99\.php|r57\.php|r00t\.php|backdoor|webshell|cmd\.php|exploit\.php/) ||
|
||
match(url_lower, /base64_decode.*eval|gzinflate.*eval|assert.*\$_/) ||
|
||
(match(url_lower, /\.(php|phtml|php3|php4|php5|phar)\.suspected$/) && method == "POST")) {
|
||
print ip "|" domain "|" url "|" status "|rce_upload" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Info Disclosure attempts
|
||
# FIXED: Added status code validation - only flag successful access (200/301/302)
|
||
# FIXED: readme pattern now only matches actual files (.txt, .html, .md)
|
||
# FIXED: Added more backup file extensions and URL-encoded variants
|
||
# FIXED: phpinfo now only matches .php files (not documentation URLs)
|
||
# FIXED: Removed sitemap.xml.gz (intentionally public for SEO)
|
||
if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo\.php/) ||
|
||
match(url_lower, /readme\.(txt|html|md)$/) ||
|
||
match(url_lower, /web\.config|\.htaccess|\.htpasswd/) ||
|
||
match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql/)) {
|
||
# Only flag if successful access (200) or redirect (301/302)
|
||
# Failed attempts (404/403) are just scanning, tracked separately
|
||
if (status ~ /^(200|301|302)/) {
|
||
print ip "|" domain "|" url "|" status "|info_disclosure" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
}
|
||
|
||
# composer.json / package.json - lower severity, only if successful
|
||
if (match(url_lower, /composer\.json|package\.json|package-lock\.json/) && status == "200") {
|
||
print ip "|" domain "|" url "|" status "|config_exposure" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Login bruteforce
|
||
if (match(url_lower, /wp-login\.php|xmlrpc\.php/) && method == "POST") {
|
||
print ip "|" domain "|" url "|" status "|login_bruteforce" > tmpdir "/attack_vectors_raw.txt"
|
||
}
|
||
|
||
# Admin/sensitive endpoint probing
|
||
# FIXED: Only count FAILED attempts (403/401/404) - successful logins are legitimate
|
||
if (match(url_lower, /wp-admin|phpmyadmin|admin|administrator|login|wp-login|xmlrpc/) ||
|
||
match(url_lower, /\.env|\.git|\.sql|backup|config\./)) {
|
||
# Only flag failed access attempts (403 Forbidden, 401 Unauthorized, 404 Not Found)
|
||
# Successful access (200/302) means legitimate user or already compromised
|
||
if (status ~ /^(403|401|404)/) {
|
||
print ip "|" domain "|" url > tmpdir "/admin_probes_raw.txt"
|
||
}
|
||
}
|
||
|
||
# 404 scanning (reconnaissance)
|
||
if (status == "404" || status == "403") {
|
||
print ip "|" domain "|" url "|" status > tmpdir "/404_scans_raw.txt"
|
||
}
|
||
|
||
# Large data transfers (potential scraping)
|
||
if (size > 1000000) {
|
||
print ip "|" domain "|" url "|" size > tmpdir "/large_transfers_raw.txt"
|
||
}
|
||
|
||
# Suspicious user agents
|
||
if (match(ua_lower, /nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp/) ||
|
||
match(ua_lower, /metasploit|<script|null|python-requests|go-http-client/)) {
|
||
print ip "|" ua > tmpdir "/suspicious_ua_raw.txt"
|
||
}
|
||
|
||
# Track response codes for intelligence
|
||
print status > tmpdir "/response_codes_raw.txt"
|
||
}
|
||
' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Process attack vectors by type
|
||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
# Overall attack vectors summary
|
||
awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/attack_types.txt"
|
||
|
||
# Breakdown by attack type
|
||
for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
|
||
grep "|$attack_type$" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | \
|
||
awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
|
||
sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true
|
||
done
|
||
|
||
# Old sqli file for backwards compatibility
|
||
if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then
|
||
cp "$TEMP_DIR/sqli_attempts.txt" "$TEMP_DIR/sqli_attempts_legacy.txt"
|
||
fi
|
||
else
|
||
touch "$TEMP_DIR/attack_types.txt"
|
||
fi
|
||
|
||
# Process raw data into sorted/counted results
|
||
if [ -f "$TEMP_DIR/admin_probes_raw.txt" ]; then
|
||
sort "$TEMP_DIR/admin_probes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/admin_probes.txt"
|
||
else
|
||
touch "$TEMP_DIR/admin_probes.txt"
|
||
fi
|
||
|
||
if [ -f "$TEMP_DIR/404_scans_raw.txt" ]; then
|
||
sort "$TEMP_DIR/404_scans_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/404_scans.txt"
|
||
else
|
||
touch "$TEMP_DIR/404_scans.txt"
|
||
fi
|
||
|
||
if [ -f "$TEMP_DIR/large_transfers_raw.txt" ]; then
|
||
sort "$TEMP_DIR/large_transfers_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/large_transfers.txt"
|
||
else
|
||
touch "$TEMP_DIR/large_transfers.txt"
|
||
fi
|
||
|
||
if [ -f "$TEMP_DIR/suspicious_ua_raw.txt" ]; then
|
||
sort "$TEMP_DIR/suspicious_ua_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/suspicious_ua.txt"
|
||
else
|
||
touch "$TEMP_DIR/suspicious_ua.txt"
|
||
fi
|
||
|
||
# Process response codes
|
||
if [ -f "$TEMP_DIR/response_codes_raw.txt" ]; then
|
||
sort "$TEMP_DIR/response_codes_raw.txt" | uniq -c | sort -rn > "$TEMP_DIR/response_codes.txt"
|
||
else
|
||
touch "$TEMP_DIR/response_codes.txt"
|
||
fi
|
||
|
||
print_success "Threat detection complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: URL Entropy Analysis (detects fuzzing/scanning)
|
||
#############################################################################
|
||
|
||
analyze_url_entropy() {
|
||
print_info "Analyzing URL parameter entropy (fuzzing detection)..."
|
||
|
||
# Detect IPs that generate random parameters (scanning/fuzzing behavior)
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
url = $3
|
||
url_lower = tolower(url)
|
||
|
||
# Extract base path (before query string)
|
||
if (match(url, /([^?]+)/, path)) {
|
||
base_path = path[1]
|
||
} else {
|
||
base_path = url
|
||
}
|
||
|
||
# Extract query parameter values (not keys)
|
||
if (match(url, /\?(.+)/, query)) {
|
||
param_string = query[1]
|
||
|
||
# Count numeric parameters
|
||
if (match(param_string, /[0-9]+/)) {
|
||
numeric_params[ip base_path]++
|
||
}
|
||
}
|
||
|
||
# Track URLs from each IP
|
||
urls_per_ip[ip]++
|
||
unique_paths[ip][base_path]++
|
||
}
|
||
END {
|
||
# Find IPs hitting many unique paths with numeric variations
|
||
for (ip in urls_per_ip) {
|
||
unique_path_count = length(unique_paths[ip])
|
||
|
||
# If IP hits >20 URLs with lots of numeric params = scanning
|
||
if (urls_per_ip[ip] > 20 && unique_path_count > 5) {
|
||
# Likely fuzzing/parameter scanning
|
||
print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/fuzzing_ips.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if it doesn't exist
|
||
touch "$TEMP_DIR/fuzzing_ips.txt"
|
||
print_success "URL entropy analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Request Timing Analysis (DDoS & bot behavior detection)
|
||
#############################################################################
|
||
|
||
analyze_request_timing() {
|
||
print_info "Analyzing request timing patterns (DDoS detection)..."
|
||
|
||
# Analyze timing consistency to detect bots/DDoS
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
timestamp = $8
|
||
|
||
# Parse timestamp to get seconds (simplified)
|
||
if (match(timestamp, /([0-9]{2}):([0-9]{2}):([0-9]{2})/, t)) {
|
||
seconds = t[1] * 3600 + t[2] * 60 + t[3]
|
||
|
||
# Store timestamps for analysis
|
||
if (!(ip in request_times)) {
|
||
request_count[ip] = 0
|
||
request_times[ip] = ""
|
||
}
|
||
|
||
request_count[ip]++
|
||
request_times[ip] = request_times[ip] seconds ","
|
||
}
|
||
}
|
||
END {
|
||
# Analyze timing patterns
|
||
for (ip in request_count) {
|
||
count = request_count[ip]
|
||
|
||
# If more than 50 requests in the log
|
||
if (count > 50) {
|
||
# Split times and calculate average interval
|
||
split(request_times[ip], times, ",")
|
||
|
||
total_intervals = 0
|
||
interval_count = 0
|
||
|
||
for (i = 2; i < length(times); i++) {
|
||
if (times[i] > 0 && times[i-1] > 0) {
|
||
interval = times[i] - times[i-1]
|
||
if (interval < 0) interval += 86400 # Handle day boundary
|
||
|
||
total_intervals += interval
|
||
interval_count++
|
||
}
|
||
}
|
||
|
||
if (interval_count > 0) {
|
||
avg_interval = total_intervals / interval_count
|
||
|
||
# Very consistent timing = bot (typically 0.5-2 seconds apart)
|
||
# Real users: highly variable (5-60+ seconds)
|
||
if (avg_interval < 3 && count > 100) {
|
||
print ip "|consistent_bot_timing|" avg_interval "|" count > tmpdir "/timing_anomalies.txt"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
close(tmpdir "/timing_anomalies.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if it doesn't exist
|
||
touch "$TEMP_DIR/timing_anomalies.txt"
|
||
print_success "Request timing analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Fingerprinting - Combine multiple signals for accuracy
|
||
#############################################################################
|
||
|
||
calculate_bot_fingerprint() {
|
||
print_info "Calculating bot fingerprint confidence scores (combining multiple signals)..."
|
||
|
||
# Each signal contributes to confidence that an IP is a bot
|
||
# Real traffic rarely has ALL signals, bots typically have multiple
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
BEGIN {
|
||
# Initialize tracking arrays
|
||
}
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
status = $4
|
||
ua = $6
|
||
referer = $9
|
||
accept_lang = $10
|
||
|
||
ua_lower = tolower(ua)
|
||
|
||
# Track per-IP fingerprint components
|
||
if (ip in ip_seen) {
|
||
ip_seen[ip]++
|
||
} else {
|
||
ip_seen[ip] = 1
|
||
}
|
||
|
||
# Signal 1: Bot-like User-Agent
|
||
if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python|java[^script]|perl|ruby|node\.js|headless|mechanize/)) {
|
||
ua_bot_signal[ip]++
|
||
}
|
||
|
||
# Signal 2: Missing/unusual Accept-Language
|
||
if (accept_lang == "-" || accept_lang == "" || accept_lang == "*/*") {
|
||
header_anomaly_signal[ip]++
|
||
}
|
||
|
||
# Signal 3: Missing Referer (bots often dont send it)
|
||
if (referer == "-" || referer == "") {
|
||
missing_referer[ip]++
|
||
}
|
||
|
||
# Signal 4: Successful requests indicate not just scanning
|
||
if (status ~ /^(200|301|302)/) {
|
||
success_requests[ip]++
|
||
}
|
||
|
||
# Signal 5: Direct admin/config access (suspicious entry)
|
||
if (match(url, /\/(wp-admin|phpmyadmin|admin|config\.php|\.env|\.git|\.htaccess|web\.config)/)) {
|
||
admin_access[ip]++
|
||
}
|
||
}
|
||
END {
|
||
# Calculate fingerprint scores for each IP
|
||
for (ip in ip_seen) {
|
||
score = 0
|
||
signal_count = 0
|
||
|
||
# Each signal adds confidence
|
||
if (ip in ua_bot_signal && ua_bot_signal[ip] > 0) {
|
||
score += 20
|
||
signal_count++
|
||
}
|
||
|
||
if (ip in header_anomaly_signal && header_anomaly_signal[ip] > 0) {
|
||
score += 15
|
||
signal_count++
|
||
}
|
||
|
||
if (ip in missing_referer && missing_referer[ip] > ip_seen[ip] * 0.7) {
|
||
score += 15 # 70%+ requests missing referer
|
||
signal_count++
|
||
}
|
||
|
||
if (ip in admin_access && admin_access[ip] > 0) {
|
||
score += 20 # Targeting admin areas
|
||
signal_count++
|
||
}
|
||
|
||
# Reduce score if mostly getting 200 OK (might be legitimate bot)
|
||
if (ip in success_requests && success_requests[ip] > ip_seen[ip] * 0.7) {
|
||
score -= 10 # Legitimate traffic (70%+ success)
|
||
}
|
||
|
||
# Multi-signal boost (confidence increases when multiple signals align)
|
||
if (signal_count >= 3) {
|
||
score += 25 # Strong indicator of bot when 3+ signals present
|
||
}
|
||
|
||
# Normalize to 0-100
|
||
if (score > 100) score = 100
|
||
if (score < 0) score = 0
|
||
|
||
# Output fingerprint for high-confidence bots (score >= 60)
|
||
if (score >= 60) {
|
||
printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/bot_fingerprints.txt")
|
||
}
|
||
' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Create file if empty
|
||
touch "$TEMP_DIR/bot_fingerprints.txt"
|
||
fingerprint_count=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
||
print_success "Fingerprint analysis complete ($fingerprint_count high-confidence bot IPs)"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Domain Targeting Analysis - Which domains are being attacked?
|
||
#############################################################################
|
||
|
||
analyze_domain_targeting_percentage() {
|
||
print_info "Analyzing per-domain attack patterns (what's attacking each domain)..."
|
||
|
||
# Build per-domain attack data
|
||
# Format: domain|attack_type|ip|count
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
NR == FNR {
|
||
# Skip attack vectors file - using parsed_logs for all data
|
||
next
|
||
}
|
||
{
|
||
# Main log processing
|
||
ip = $1
|
||
domain = $2
|
||
status = $4
|
||
|
||
# Track all IPs per domain
|
||
ips_per_domain[domain][ip]++
|
||
request_count_per_domain[domain]++
|
||
}
|
||
END {
|
||
# Output: domain|unique_ips|request_count
|
||
for (domain in ips_per_domain) {
|
||
ip_count = 0
|
||
for (ip in ips_per_domain[domain]) ip_count++
|
||
printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
|
||
}
|
||
}
|
||
' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt"
|
||
|
||
# Also create per-domain attack type breakdown
|
||
# Format: domain|attack_type|ip|count
|
||
awk -F'|' '
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
attack_type = $5
|
||
|
||
# Store as domain -> attack_type -> ip -> count
|
||
attack_data[domain][attack_type][ip]++
|
||
attack_totals[domain][attack_type]++
|
||
}
|
||
END {
|
||
for (domain in attack_data) {
|
||
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
||
for (attack_type in attack_data[domain]) {
|
||
total = attack_totals[domain][attack_type]
|
||
for (ip in attack_data[domain][attack_type]) {
|
||
count = attack_data[domain][attack_type][ip]
|
||
printf "%s|%d|%d\n", attack_type "|" ip, count, total
|
||
}
|
||
}
|
||
}
|
||
}
|
||
' < "$TEMP_DIR/attack_vectors_raw.txt"
|
||
|
||
print_success "Domain attack pattern analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Top URLs Analysis - What files/endpoints are bots hitting?
|
||
#############################################################################
|
||
|
||
analyze_top_urls_per_domain() {
|
||
print_info "Analyzing top targeted URLs per domain..."
|
||
|
||
# Get list of domains from targeting analysis
|
||
if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
|
||
while IFS='|' read -r domain request_count pct; do
|
||
local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
|
||
|
||
# Extract all URLs for this domain, sorted by frequency (no arbitrary limit)
|
||
awk -F'|' -v dom="$domain" '
|
||
$2 == dom {
|
||
urls[$3]++
|
||
}
|
||
END {
|
||
for (url in urls) {
|
||
printf "%s|%d\n", url, urls[url]
|
||
}
|
||
}
|
||
' < "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k2 -rn > "$domain_file"
|
||
done < "$TEMP_DIR/domain_targeting.txt"
|
||
fi
|
||
|
||
print_success "Top URLs analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Success Rate & Behavior Analysis (Added for accuracy improvement)
|
||
#############################################################################
|
||
|
||
analyze_success_rates() {
|
||
print_info "Analyzing request success rates and behavior patterns..."
|
||
|
||
# Calculate success rate (200/301/302 vs 404/403) for each IP
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
ip = $1
|
||
status = $4
|
||
|
||
# Count total requests
|
||
total[ip]++
|
||
|
||
# Count successful responses
|
||
if (status ~ /^(200|301|302)/) {
|
||
success[ip]++
|
||
}
|
||
# Count failed/blocked responses
|
||
else if (status ~ /^(404|403|401)/) {
|
||
failed[ip]++
|
||
}
|
||
}
|
||
END {
|
||
for (ip in total) {
|
||
success_count = (success[ip] ? success[ip] : 0)
|
||
failed_count = (failed[ip] ? failed[ip] : 0)
|
||
success_rate = (total[ip] > 0) ? int((success_count / total[ip]) * 100) : 0
|
||
fail_rate = (total[ip] > 0) ? int((failed_count / total[ip]) * 100) : 0
|
||
|
||
# High failure rate indicates scanning/probing
|
||
if (fail_rate >= 80 && total[ip] >= 20) {
|
||
print ip "|" total[ip] "|" fail_rate "|scanner" >> tmpdir "/high_failure_ips.txt"
|
||
}
|
||
# Very high success rate + high volume could be scraping
|
||
else if (success_rate >= 90 && total[ip] >= 100) {
|
||
print ip "|" total[ip] "|" success_rate "|scraper" >> tmpdir "/high_success_ips.txt"
|
||
}
|
||
|
||
# Output all rates for later analysis
|
||
print ip "|" total[ip] "|" success_rate "|" fail_rate >> tmpdir "/ip_success_rates.txt"
|
||
}
|
||
close(tmpdir "/high_failure_ips.txt")
|
||
close(tmpdir "/high_success_ips.txt")
|
||
close(tmpdir "/ip_success_rates.txt")
|
||
}' < "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Touch files if they don't exist
|
||
touch "$TEMP_DIR/high_failure_ips.txt" "$TEMP_DIR/high_success_ips.txt" "$TEMP_DIR/ip_success_rates.txt"
|
||
|
||
print_success "Success rate analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Botnet Detection
|
||
#############################################################################
|
||
|
||
detect_botnets() {
|
||
print_info "Analyzing for botnet patterns..."
|
||
|
||
# Group IPs by similar behavior patterns
|
||
# Pattern 1: Multiple IPs hitting same URLs in coordinated manner
|
||
awk -F'|' '{print $1"|"$3}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
sort | uniq -c | awk '$1 > 10 {print $2}' | \
|
||
cut -d'|' -f2 | sort | uniq -c | sort -rn | \
|
||
awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt"
|
||
|
||
# Pattern 2: IPs with similar User-Agents hitting multiple domains
|
||
awk -F'|' '{print $1"|"$6}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt"
|
||
|
||
# Pattern 3: Detect IP ranges (Class C networks) with suspicious activity
|
||
awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
awk -F'.' '{print $1"."$2"."$3".0/24"}' | \
|
||
sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt"
|
||
|
||
# Pattern 4: Rapid fire requests (DDoS indicators)
|
||
# Extract timestamp and count requests per IP per minute
|
||
awk -F'|' '{
|
||
ip = $1
|
||
timestamp = $8
|
||
# Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS)
|
||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||
# Group by hour:minute for rapid-fire detection
|
||
time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5]
|
||
print ip "|" time_key
|
||
}
|
||
}' < "$TEMP_DIR/parsed_logs.txt" | \
|
||
sort | uniq -c | \
|
||
awk '$1 > 50 {print $1 " " $2}' | \
|
||
awk -F'|' '{print $1}' | \
|
||
awk 'BEGIN {ip=""} {ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \
|
||
sort -rn > "$TEMP_DIR/rapid_fire_ips.txt"
|
||
|
||
print_success "Botnet analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Server IP Detection
|
||
#############################################################################
|
||
|
||
detect_server_ips() {
|
||
print_info "Detecting server's own IP addresses..."
|
||
|
||
> "$TEMP_DIR/server_ips.txt"
|
||
|
||
# Method 1: Get all IPs from network interfaces
|
||
if command -v hostname >/dev/null 2>&1; then
|
||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' >> "$TEMP_DIR/server_ips.txt" || true
|
||
fi
|
||
|
||
# Method 2: Parse ip addr output
|
||
if command -v ip >/dev/null 2>&1; then
|
||
ip addr show 2>/dev/null | grep -oP 'inet \K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true
|
||
fi
|
||
|
||
# Method 3: Try ifconfig as fallback
|
||
if command -v ifconfig >/dev/null 2>&1; then
|
||
ifconfig 2>/dev/null | grep -oP 'inet (addr:)?\K[\d.]+' >> "$TEMP_DIR/server_ips.txt" || true
|
||
fi
|
||
|
||
# Method 4: Get public IP from external services (with timeout)
|
||
# Try multiple services for reliability
|
||
for service in "ifconfig.me/ip" "icanhazip.com" "ipecho.net/plain" "api.ipify.org"; do
|
||
public_ip=$(curl -s --max-time 3 "$service" 2>/dev/null | grep -oE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' || true)
|
||
if [ -n "$public_ip" ]; then
|
||
echo "$public_ip" >> "$TEMP_DIR/server_ips.txt"
|
||
break
|
||
fi
|
||
done
|
||
|
||
# Method 5: Check cPanel server IP if available
|
||
if [ -f "/var/cpanel/mainip" ]; then
|
||
cat /var/cpanel/mainip >> "$TEMP_DIR/server_ips.txt"
|
||
fi
|
||
|
||
# Remove duplicates and empty lines
|
||
sort -u "$TEMP_DIR/server_ips.txt" | grep -v '^$' > "$TEMP_DIR/server_ips_final.txt" || true
|
||
mv "$TEMP_DIR/server_ips_final.txt" "$TEMP_DIR/server_ips.txt"
|
||
|
||
server_ip_count=$(wc -l < "$TEMP_DIR/server_ips.txt" 2>/dev/null || echo 0)
|
||
|
||
if [ "$server_ip_count" -gt 0 ]; then
|
||
print_success "Detected $server_ip_count server IP(s) - these will be excluded from threat analysis"
|
||
else
|
||
print_warning "Could not detect server IPs automatically - proceeding without server IP filtering"
|
||
fi
|
||
}
|
||
|
||
# Helper function to validate IP address format
|
||
is_valid_ip() {
|
||
local ip="$1"
|
||
|
||
# IPv4 validation
|
||
if [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
|
||
local IFS='.'
|
||
local -a octets=($ip)
|
||
for octet in "${octets[@]}"; do
|
||
if [ "$octet" -gt 255 ]; then
|
||
return 1 # Invalid
|
||
fi
|
||
done
|
||
return 0 # Valid IPv4
|
||
fi
|
||
|
||
# IPv6 basic validation (simplified)
|
||
if [[ "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then
|
||
return 0 # Valid IPv6
|
||
fi
|
||
|
||
return 1 # Invalid
|
||
}
|
||
|
||
# Helper function to check if an IP should be excluded
|
||
is_excluded_ip() {
|
||
local ip="$1"
|
||
|
||
# First validate IP format
|
||
if ! is_valid_ip "$ip"; then
|
||
return 0 # Exclude invalid IPs
|
||
fi
|
||
|
||
# Check if private/internal IP
|
||
if [[ "$ip" =~ ^127\. ]] || \
|
||
[[ "$ip" =~ ^10\. ]] || \
|
||
[[ "$ip" =~ ^192\.168\. ]] || \
|
||
[[ "$ip" =~ ^172\.(1[6-9]|2[0-9]|3[01])\. ]] || \
|
||
[[ "$ip" =~ ^169\.254\. ]] || \
|
||
[[ "$ip" == "localhost" ]] || \
|
||
[[ "$ip" == "::1" ]]; then
|
||
return 0 # True - should be excluded
|
||
fi
|
||
|
||
# Check if it's the server's own IP
|
||
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
|
||
if grep -qFx "$ip" "$TEMP_DIR/server_ips.txt" 2>/dev/null; then
|
||
return 0 # True - should be excluded
|
||
fi
|
||
fi
|
||
|
||
return 1 # False - should not be excluded
|
||
}
|
||
|
||
#############################################################################
|
||
# Time-Series Analysis
|
||
#############################################################################
|
||
|
||
analyze_time_series() {
|
||
print_info "Analyzing time-series patterns..."
|
||
|
||
# Extract hourly bot traffic
|
||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {
|
||
timestamp = $8
|
||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||
hour = ts[4]
|
||
print hour
|
||
}
|
||
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
|
||
|
||
# Extract hourly attack traffic
|
||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
# Parse timestamps from original parsed logs for IPs in attack vectors
|
||
awk -F'|' 'NR==FNR {ips[$1]=1; next} $1 in ips {
|
||
timestamp = $8
|
||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||
hour = ts[4]
|
||
print hour
|
||
}
|
||
}' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
|
||
fi
|
||
|
||
print_success "Time-series analysis complete"
|
||
}
|
||
|
||
#############################################################################
|
||
# Threat Scoring
|
||
#############################################################################
|
||
|
||
calculate_threat_scores() {
|
||
print_info "Calculating threat scores..."
|
||
|
||
# Pre-count requests per IP (MUCH faster than grepping for each IP)
|
||
declare -A ip_request_counts
|
||
while IFS='|' read -r ip rest; do
|
||
((ip_request_counts["$ip"]++))
|
||
done < <(cat "$TEMP_DIR/parsed_logs.txt")
|
||
|
||
# Build hash tables from threat files for O(1) lookups
|
||
# OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
|
||
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
|
||
declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count
|
||
|
||
# Parse each threat file and build hash tables (optimized with awk)
|
||
[ -f "$TEMP_DIR/sqli_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_sqli["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/xss_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_xss["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/path_traversal_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_path["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/rce_upload_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_rce["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ] && while read -r ip; do
|
||
threat_ips_login["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/suspicious_ua.txt" ] && while read -r ip; do
|
||
threat_ips_suspicious["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1)
|
||
|
||
[ -f "$TEMP_DIR/rapid_fire_ips.txt" ] && while read -r ip; do
|
||
threat_ips_ddos["$ip"]=1
|
||
done < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt")
|
||
|
||
# Parse count-based threat files
|
||
[ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do
|
||
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
|
||
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//')
|
||
|
||
[ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do
|
||
[ -n "$ip" ] && threat_404_count["$ip"]=$count
|
||
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//')
|
||
|
||
# NEW: Load bot classifications to skip volume scoring for legitimate bots
|
||
declare -A legit_bot_ips
|
||
if [ -f "$TEMP_DIR/classified_bots.txt" ]; then
|
||
while IFS='|' read -r ip domain url status size ua method timestamp bot_type bot_name; do
|
||
if [ "$bot_type" = "legit" ]; then
|
||
legit_bot_ips["$ip"]=1
|
||
fi
|
||
done < "$TEMP_DIR/classified_bots.txt"
|
||
fi
|
||
|
||
# NEW: Load success rate data for scanning/scraping detection
|
||
declare -A scanner_ips scraper_ips ip_fail_rates
|
||
[ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do
|
||
scanner_ips["$ip"]=$fail_rate
|
||
done < "$TEMP_DIR/high_failure_ips.txt"
|
||
|
||
[ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do
|
||
scraper_ips["$ip"]=$success_rate
|
||
done < "$TEMP_DIR/high_success_ips.txt"
|
||
|
||
# Load all fail rates for threshold checks
|
||
[ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do
|
||
ip_fail_rates["$ip"]=$fail_rate
|
||
done < "$TEMP_DIR/ip_success_rates.txt"
|
||
|
||
# NEW: Load header anomalies
|
||
declare -A header_anomalies
|
||
[ -f "$TEMP_DIR/header_anomalies.txt" ] && while IFS='|' read -r ip anomaly_type score; do
|
||
header_anomalies["$ip"]=$score
|
||
done < "$TEMP_DIR/header_anomalies.txt"
|
||
|
||
# NEW: Load suspicious entry points
|
||
declare -A suspicious_entry_ips
|
||
[ -f "$TEMP_DIR/suspicious_entry_points.txt" ] && while IFS='|' read -r ip entry_type url status; do
|
||
suspicious_entry_ips["$ip"]=1
|
||
done < "$TEMP_DIR/suspicious_entry_points.txt"
|
||
|
||
# NEW: Load fuzzing/parameter scanning IPs
|
||
declare -A fuzzing_ips
|
||
[ -f "$TEMP_DIR/fuzzing_ips.txt" ] && while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
|
||
fuzzing_ips["$ip"]=$total_urls
|
||
done < "$TEMP_DIR/fuzzing_ips.txt"
|
||
|
||
# NEW: Load timing anomalies (consistent bot timing)
|
||
declare -A timing_anomalies
|
||
[ -f "$TEMP_DIR/timing_anomalies.txt" ] && while IFS='|' read -r ip timing_type avg_interval total_reqs; do
|
||
timing_anomalies["$ip"]=$avg_interval
|
||
done < "$TEMP_DIR/timing_anomalies.txt"
|
||
|
||
# Now calculate scores for each IP (using pre-counted requests)
|
||
for ip in "${!ip_request_counts[@]}"; do
|
||
# Skip excluded IPs
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
score=0
|
||
req_count=${ip_request_counts[$ip]}
|
||
|
||
# IMPROVED: Base request volume scoring
|
||
# Skip volume scoring for legitimate bots (Google, Bing, etc.)
|
||
if [ -z "${legit_bot_ips[$ip]}" ]; then
|
||
# Not a legitimate bot - apply volume scoring
|
||
if [ "$req_count" -gt 10000 ]; then score=$((score + 10))
|
||
elif [ "$req_count" -gt 5000 ]; then score=$((score + 8))
|
||
elif [ "$req_count" -gt 1000 ]; then score=$((score + 5))
|
||
elif [ "$req_count" -gt 500 ]; then score=$((score + 3))
|
||
fi
|
||
fi
|
||
|
||
# NEW: Success rate analysis bonuses
|
||
# High failure rate (80%+ 404/403) = scanning behavior
|
||
if [ -n "${scanner_ips[$ip]}" ]; then
|
||
fail_rate=${scanner_ips[$ip]}
|
||
if [ "$fail_rate" -ge 90 ]; then
|
||
score=$((score + 8)) # Very high failure rate
|
||
elif [ "$fail_rate" -ge 80 ]; then
|
||
score=$((score + 5)) # High failure rate
|
||
fi
|
||
fi
|
||
|
||
# High success rate (90%+ 200/301/302) + high volume = potential scraping
|
||
if [ -n "${scraper_ips[$ip]}" ] && [ "$req_count" -gt 500 ]; then
|
||
score=$((score + 7)) # Scraping behavior
|
||
fi
|
||
|
||
# Attack patterns
|
||
[ -n "${threat_ips_sqli[$ip]}" ] && score=$((score + 15))
|
||
[ -n "${threat_ips_xss[$ip]}" ] && score=$((score + 12))
|
||
[ -n "${threat_ips_path[$ip]}" ] && score=$((score + 15))
|
||
[ -n "${threat_ips_rce[$ip]}" ] && score=$((score + 20))
|
||
[ -n "${threat_ips_login[$ip]}" ] && score=$((score + 10))
|
||
[ -n "${threat_ips_suspicious[$ip]}" ] && score=$((score + 10))
|
||
[ -n "${threat_ips_ddos[$ip]}" ] && score=$((score + 10))
|
||
|
||
# NEW: Header anomalies (strong indicator of bots)
|
||
if [ -n "${header_anomalies[$ip]}" ]; then
|
||
header_score=${header_anomalies[$ip]}
|
||
if [ "$header_score" -ge 12 ]; then
|
||
score=$((score + 8)) # Multiple header suspicions
|
||
elif [ "$header_score" -ge 8 ]; then
|
||
score=$((score + 5)) # Moderate header anomalies
|
||
fi
|
||
fi
|
||
|
||
# NEW: Suspicious entry point (direct jump to admin/config)
|
||
if [ -n "${suspicious_entry_ips[$ip]}" ]; then
|
||
score=$((score + 6)) # Direct attack attempt without probing
|
||
fi
|
||
|
||
# NEW: Fuzzing/parameter scanning behavior
|
||
if [ -n "${fuzzing_ips[$ip]}" ]; then
|
||
fuzz_requests=${fuzzing_ips[$ip]}
|
||
if [ "$fuzz_requests" -gt 100 ]; then
|
||
score=$((score + 7)) # Aggressive fuzzing
|
||
elif [ "$fuzz_requests" -gt 50 ]; then
|
||
score=$((score + 4)) # Moderate fuzzing
|
||
fi
|
||
fi
|
||
|
||
# NEW: Timing anomalies (very consistent request timing = bot)
|
||
if [ -n "${timing_anomalies[$ip]}" ]; then
|
||
score=$((score + 6)) # Very consistent timing indicates automation
|
||
fi
|
||
|
||
# Admin probing - IMPROVED: Raised threshold to 50 (only failed attempts counted)
|
||
admin_count=${threat_admin_count[$ip]:-0}
|
||
if [ "$admin_count" -gt 100 ] 2>/dev/null; then
|
||
score=$((score + 10)) # Excessive probing
|
||
elif [ "$admin_count" -gt 50 ] 2>/dev/null; then
|
||
score=$((score + 5)) # Moderate probing
|
||
fi
|
||
|
||
# 404 scanning
|
||
scan_404=${threat_404_count[$ip]:-0}
|
||
[ "$scan_404" -gt 50 ] 2>/dev/null && score=$((score + 3))
|
||
|
||
# OPTIMIZATION: Skip external API calls for performance
|
||
# Threat Intelligence Enrichment can be done post-analysis for high-risk IPs only
|
||
# Uncommenting these will SIGNIFICANTLY slow down analysis (API calls for every IP)
|
||
#
|
||
# To enable threat intelligence enrichment:
|
||
# 1. Uncomment the code below
|
||
# 2. Ensure check_abuseipdb, get_country_code, and is_high_risk_country functions exist
|
||
# 3. Be aware this will make thousands of API calls and take much longer
|
||
#
|
||
# local abuse_data=$(check_abuseipdb "$ip" 2>/dev/null || echo "0|0|Unknown|Unknown")
|
||
# IFS='|' read -r abuse_confidence abuse_reports abuse_country abuse_isp <<< "$abuse_data"
|
||
#
|
||
# if [ "$abuse_confidence" -ge 75 ]; then
|
||
# score=$((score + 15)) # High confidence malicious
|
||
# elif [ "$abuse_confidence" -ge 50 ]; then
|
||
# score=$((score + 8)) # Moderate confidence
|
||
# elif [ "$abuse_confidence" -ge 25 ]; then
|
||
# score=$((score + 3)) # Low confidence
|
||
# fi
|
||
#
|
||
# local geo_country=$(get_country_code "$ip" 2>/dev/null || echo "XX")
|
||
# if is_high_risk_country "$geo_country" 2>/dev/null; then
|
||
# score=$((score + 5)) # High-risk country bonus
|
||
# fi
|
||
|
||
# Cap at 100
|
||
[ "${score:-0}" -gt 100 ] && score=100
|
||
|
||
# Only output IPs with score > 0
|
||
[ "${score:-0}" -gt 0 ] && echo "$score|$ip|$req_count"
|
||
|
||
# Track in centralized IP reputation database (background process)
|
||
if [ "${score:-0}" -gt 0 ]; then
|
||
(
|
||
# Update IP with hit count
|
||
increment_ip_hits "$ip" "$req_count" >/dev/null 2>&1
|
||
|
||
# Tag with specific attack types found
|
||
[ -n "${threat_ips_sqli[$ip]}" ] && flag_ip_attack "$ip" "SQL_INJECTION" 0 "Bot analyzer: SQL injection attempts" >/dev/null 2>&1
|
||
[ -n "${threat_ips_xss[$ip]}" ] && flag_ip_attack "$ip" "XSS" 0 "Bot analyzer: XSS attempts" >/dev/null 2>&1
|
||
[ -n "${threat_ips_path[$ip]}" ] && flag_ip_attack "$ip" "PATH_TRAVERSAL" 0 "Bot analyzer: Path traversal" >/dev/null 2>&1
|
||
[ -n "${threat_ips_rce[$ip]}" ] && flag_ip_attack "$ip" "RCE" 0 "Bot analyzer: RCE/shell upload attempts" >/dev/null 2>&1
|
||
[ -n "${threat_ips_login[$ip]}" ] && flag_ip_attack "$ip" "BRUTEFORCE" 0 "Bot analyzer: Login bruteforce" >/dev/null 2>&1
|
||
[ -n "${threat_ips_ddos[$ip]}" ] && flag_ip_attack "$ip" "DDOS" 0 "Bot analyzer: Rapid-fire requests" >/dev/null 2>&1
|
||
[ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
|
||
) &
|
||
fi
|
||
done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt"
|
||
|
||
# Wait for background IP reputation updates to complete
|
||
wait
|
||
|
||
print_success "Threat scores calculated and IP reputation updated"
|
||
}
|
||
|
||
#############################################################################
|
||
# False Positive Detection
|
||
#############################################################################
|
||
|
||
detect_false_positives() {
|
||
print_info "Detecting legitimate services (false positives)..."
|
||
|
||
# Known monitoring service patterns and legitimate CDNs
|
||
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{
|
||
ip = $1
|
||
domain = $2
|
||
url = $3
|
||
ua = tolower($6)
|
||
|
||
# Monitoring Services
|
||
if (match(ua, /pingdom/) || match(ua, /pingdom\.com_bot/)) {
|
||
print ip "|Pingdom Monitoring|" ua "|" domain
|
||
}
|
||
else if (match(ua, /uptimerobot/)) {
|
||
print ip "|UptimeRobot Monitoring|" ua "|" domain
|
||
}
|
||
else if (match(ua, /statuscake/)) {
|
||
print ip "|StatusCake Monitoring|" ua "|" domain
|
||
}
|
||
# WordPress cache preload (WP Rocket, Hummingbird)
|
||
else if (match(url, /admin-ajax\.php.*cache_preload/) || match(url, /admin-ajax\.php.*wphb/)) {
|
||
print ip "|WordPress Cache Preload|" ua "|" domain
|
||
}
|
||
# Legitimate backup services
|
||
else if (match(ua, /jetpack|vaultpress|updraftplus|backwpup/)) {
|
||
print ip "|Backup Service|" ua "|" domain
|
||
}
|
||
# NEW: Google services
|
||
else if (match(ua, /googlebot|google web preview|google-read-aloud|bingbot|slurp|duckduckbot/)) {
|
||
print ip "|Search Engine Bot|" ua "|" domain
|
||
}
|
||
# NEW: Content delivery networks (usually legit)
|
||
else if (match(ua, /cloudflare|akamai|fastly|cloudfront|edgecast|maxcdn|amazon/)) {
|
||
print ip "|CDN Service|" ua "|" domain
|
||
}
|
||
# NEW: Analytics services
|
||
else if (match(ua, /googleanalytics|fbexternalhit|twitterbot|linkedinbot|pinterestbot|whatsapp|telegram/)) {
|
||
print ip "|Analytics\/Social Service|" ua "|" domain
|
||
}
|
||
# NEW: Payment processors (legitimate POST to checkout)
|
||
else if (match(url, /checkout|payment|paypal|stripe|square/) && match(ua, /paypal|stripe|square/)) {
|
||
print ip "|Payment Processor|" ua "|" domain
|
||
}
|
||
}' | sort -u > "$TEMP_DIR/false_positives.txt"
|
||
|
||
print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt") legitimate services identified)"
|
||
}
|
||
|
||
#############################################################################
|
||
# Statistical Analysis
|
||
#############################################################################
|
||
|
||
generate_statistics() {
|
||
print_info "Generating statistics..."
|
||
|
||
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
|
||
# This reads the uncompressed file ONCE instead of 4+ separate reads
|
||
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
{
|
||
# Count by domain (for top sites)
|
||
domains[$2]++
|
||
|
||
# Count by IP (for top IPs)
|
||
ips[$1]++
|
||
|
||
# Count by domain+URL (for top URLs)
|
||
urls[$2"|"$3]++
|
||
}
|
||
END {
|
||
# Output top sites
|
||
for (domain in domains) {
|
||
print domains[domain], domain > tmpdir "/top_sites_raw.txt"
|
||
}
|
||
|
||
# Output top IPs
|
||
for (ip in ips) {
|
||
print ips[ip], ip > tmpdir "/top_ips_raw.txt"
|
||
}
|
||
|
||
# Output top URLs
|
||
for (url in urls) {
|
||
print urls[url], url > tmpdir "/top_urls_raw.txt"
|
||
}
|
||
close(tmpdir "/top_sites_raw.txt")
|
||
close(tmpdir "/top_ips_raw.txt")
|
||
close(tmpdir "/top_urls_raw.txt")
|
||
}'
|
||
|
||
# Sort and limit results
|
||
sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt"
|
||
sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt"
|
||
sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"
|
||
|
||
# Top 5 bots by request count (single decompression)
|
||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \
|
||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"
|
||
|
||
# Traffic breakdown by bot type (single decompression)
|
||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \
|
||
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"
|
||
|
||
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
|
||
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
||
# Create indexed bot traffic file (decompress once)
|
||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt"
|
||
|
||
while read -r domain; do
|
||
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
|
||
grep "^$domain|" "$TEMP_DIR/domain_bot_types.txt" 2>/dev/null | cut -d'|' -f2 | \
|
||
sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt" || true
|
||
done < "$TEMP_DIR/all_domains.txt"
|
||
fi
|
||
|
||
print_success "Statistics generated"
|
||
}
|
||
|
||
#############################################################################
|
||
# NEW: Comparison Reports (detect trends)
|
||
#############################################################################
|
||
|
||
generate_comparison_report() {
|
||
print_info "Generating trend analysis and baseline comparison..."
|
||
|
||
# Store current results for comparison with previous analysis
|
||
local history_dir="$TOOLKIT_TMP_DIR/analysis_history"
|
||
mkdir -p "$history_dir"
|
||
|
||
local timestamp=$(date +%Y%m%d_%H%M%S)
|
||
local today=$(date +%Y%m%d)
|
||
local latest_report="$history_dir/latest_analysis_$today.txt"
|
||
|
||
# Extract key metrics from current analysis
|
||
{
|
||
echo "Timestamp: $timestamp"
|
||
echo "Total_Requests: $(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)"
|
||
echo "Unique_IPs: $(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)"
|
||
echo "High_Risk_IPs: $(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo 0)"
|
||
echo "Attack_Vectors: $(awk -F'|' '{print $5}' "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | sort -u | wc -l || echo 0)"
|
||
echo "SQL_Injection: $(wc -l < "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null || echo 0)"
|
||
echo "XSS_Attempts: $(wc -l < "$TEMP_DIR/xss_attempts.txt" 2>/dev/null || echo 0)"
|
||
echo "Bot_Traffic: $(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo 0)"
|
||
echo "Suspected_Scanners: $(wc -l < "$TEMP_DIR/high_failure_ips.txt" 2>/dev/null || echo 0)"
|
||
echo "Header_Anomalies: $(wc -l < "$TEMP_DIR/header_anomalies.txt" 2>/dev/null || echo 0)"
|
||
echo "Entry_Point_Suspicious: $(wc -l < "$TEMP_DIR/suspicious_entry_points.txt" 2>/dev/null || echo 0)"
|
||
echo "Fuzzing_IPs: $(wc -l < "$TEMP_DIR/fuzzing_ips.txt" 2>/dev/null || echo 0)"
|
||
} > "$latest_report"
|
||
|
||
# NEW: Generate baseline comparison
|
||
echo ""
|
||
print_header "BASELINE COMPARISON (Is this activity normal?)"
|
||
|
||
local total_requests=$(grep "^Total_Requests:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
||
local baseline_requests=$(calculate_baseline_average "server" "requests" 7)
|
||
|
||
if [ "$baseline_requests" -gt 0 ]; then
|
||
local request_pct=$((total_requests * 100 / baseline_requests))
|
||
if [ "$request_pct" -gt 200 ]; then
|
||
echo -e "${RED}🔴 ABNORMAL: Requests are $(($request_pct - 100))% above 7-day average${NC}"
|
||
echo " Baseline (7-day avg): $baseline_requests requests"
|
||
echo " Today: $total_requests requests"
|
||
elif [ "$request_pct" -lt 50 ]; then
|
||
echo "🟢 LOW: Requests are $(($((100 - $request_pct))))% below baseline"
|
||
else
|
||
echo "🟡 NORMAL: Requests within expected range"
|
||
fi
|
||
else
|
||
echo "📊 (No historical baseline yet - first analysis)"
|
||
fi
|
||
|
||
local high_risk=$(grep "^High_Risk_IPs:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
||
local baseline_attacks=$(calculate_baseline_average "server" "high_risk" 7)
|
||
|
||
if [ "$baseline_attacks" -gt 0 ]; then
|
||
local attack_ratio=$((high_risk / baseline_attacks))
|
||
if [ "$attack_ratio" -gt 3 ]; then
|
||
echo -e "${RED}🔴 ABNORMAL: High-risk IPs are ${attack_ratio}x above baseline${NC}"
|
||
echo " Baseline (7-day avg): $baseline_attacks high-risk IPs"
|
||
echo " Today: $high_risk high-risk IPs"
|
||
elif [ "$high_risk" -gt "$baseline_attacks" ]; then
|
||
echo -e "${YELLOW}🟡 ELEVATED: $high_risk high-risk IPs (baseline: $baseline_attacks)${NC}"
|
||
else
|
||
echo "🟢 NORMAL: High-risk IPs within expected range"
|
||
fi
|
||
fi
|
||
|
||
# Compare with previous day's analysis
|
||
local yesterday=$(date -d "1 day ago" +%Y%m%d 2>/dev/null || date -v-1d +%Y%m%d 2>/dev/null)
|
||
local previous_report="$history_dir/latest_analysis_${yesterday}.txt"
|
||
|
||
if [ -f "$previous_report" ]; then
|
||
echo ""
|
||
print_header "DAY-OVER-DAY TRENDS"
|
||
|
||
# Extract metrics and calculate differences
|
||
local curr_high_risk=$(grep "^High_Risk_IPs:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
||
local prev_high_risk=$(grep "^High_Risk_IPs:" "$previous_report" | cut -d: -f2 | tr -d ' ')
|
||
local risk_diff=$((curr_high_risk - prev_high_risk))
|
||
local risk_pct=0
|
||
|
||
if [ "$prev_high_risk" -gt 0 ]; then
|
||
risk_pct=$((risk_diff * 100 / prev_high_risk))
|
||
fi
|
||
|
||
# Display trend
|
||
if [ "$risk_diff" -gt 0 ]; then
|
||
echo "⚠️ High-Risk IPs: $curr_high_risk (↑ $risk_diff IPs, +${risk_pct}%)"
|
||
elif [ "$risk_diff" -lt 0 ]; then
|
||
echo "✓ High-Risk IPs: $curr_high_risk (↓ $((risk_diff * -1)) IPs, ${risk_pct}%)"
|
||
else
|
||
echo "→ High-Risk IPs: $curr_high_risk (no change)"
|
||
fi
|
||
|
||
# Repeat for other metrics
|
||
local curr_sql=$(grep "^SQL_Injection:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
||
local prev_sql=$(grep "^SQL_Injection:" "$previous_report" | cut -d: -f2 | tr -d ' ')
|
||
local sql_diff=$((curr_sql - prev_sql))
|
||
|
||
if [ "$sql_diff" -gt 0 ]; then
|
||
echo "⚠️ SQL Injection: $curr_sql (↑ $sql_diff new attempts)"
|
||
elif [ "$sql_diff" -lt 0 ]; then
|
||
echo "✓ SQL Injection: $curr_sql (↓ $((sql_diff * -1)) fewer)"
|
||
else
|
||
echo "→ SQL Injection: $curr_sql (stable)"
|
||
fi
|
||
|
||
# Track repeat attackers
|
||
local repeat_attackers=0
|
||
if [ -f "$history_dir/known_attackers_${yesterday}.txt" ]; then
|
||
repeat_attackers=$(comm -12 <(awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u) <(sort -u "$history_dir/known_attackers_${yesterday}.txt") 2>/dev/null | wc -l || echo 0)
|
||
if [ "$repeat_attackers" -gt 0 ]; then
|
||
echo -e "${RED}🔄 REPEAT ATTACKERS: $repeat_attackers IPs from yesterday${NC}"
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
# Save current high-risk IPs for tomorrow's comparison
|
||
awk -F'|' '$1 >= 70 {print $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | sort -u > "$history_dir/known_attackers_${today}.txt"
|
||
}
|
||
|
||
#############################################################################
|
||
# Report Generation
|
||
#############################################################################
|
||
|
||
generate_report() {
|
||
exec > >(tee "$OUTPUT_FILE")
|
||
|
||
echo "==============================================================="
|
||
echo " APACHE/CPANEL BOT & BOTNET ANALYSIS REPORT"
|
||
echo " Generated: $(date '+%Y-%m-%d %H:%M:%S')"
|
||
echo "==============================================================="
|
||
|
||
# CRITICAL ALERTS SECTION
|
||
print_header "CRITICAL ALERTS"
|
||
|
||
alert_count=0
|
||
|
||
# Check for attack vectors
|
||
if [ -s "$TEMP_DIR/attack_types.txt" ]; then
|
||
print_alert "Security Attack Vectors Detected:"
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
attack_type=$(echo "$line" | awk '{print $2}')
|
||
|
||
case $attack_type in
|
||
sqli) echo " SQL Injection: $count attempts" ;;
|
||
xss) echo " XSS Attacks: $count attempts" ;;
|
||
path_traversal) echo " Path Traversal: $count attempts" ;;
|
||
rce_upload) echo " RCE/Shell Upload: $count attempts" ;;
|
||
info_disclosure) echo " Info Disclosure: $count attempts" ;;
|
||
login_bruteforce) echo " Login Bruteforce: $count attempts" ;;
|
||
esac
|
||
done < "$TEMP_DIR/attack_types.txt"
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# Check for suspicious scanners
|
||
if [ -s "$TEMP_DIR/suspicious_ua.txt" ]; then
|
||
scanner_count=$(wc -l < "$TEMP_DIR/suspicious_ua.txt")
|
||
print_alert "Malicious scanners detected: $scanner_count IPs"
|
||
echo " Top scanners:"
|
||
head -3 "$TEMP_DIR/suspicious_ua.txt" | while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
|
||
ua=$(echo "$line" | cut -d'|' -f2)
|
||
printf " %s requests - IP: %s - UA: %s\n" "$count" "$ip" "$ua"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for header anomalies (bot signatures)
|
||
if [ -s "$TEMP_DIR/header_anomalies.txt" ]; then
|
||
header_count=$(wc -l < "$TEMP_DIR/header_anomalies.txt")
|
||
print_alert "Header-based bot signatures detected: $header_count IPs"
|
||
echo " These IPs show suspicious header patterns (missing/unusual Accept-Language, Referer, etc.)"
|
||
head -5 "$TEMP_DIR/header_anomalies.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
anomaly_type=$(echo "$line" | awk -F'|' '{print $2}')
|
||
score=$(echo "$line" | awk -F'|' '{print $3}')
|
||
printf " • %s - Anomaly score: %s (detected: %s)\n" "$ip" "$score" "$anomaly_type"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for suspicious entry points
|
||
if [ -s "$TEMP_DIR/suspicious_entry_points.txt" ]; then
|
||
entry_count=$(wc -l < "$TEMP_DIR/suspicious_entry_points.txt")
|
||
print_alert "Suspicious entry points detected: $entry_count IPs"
|
||
echo " These IPs skip homepage/search and go straight to admin/config:"
|
||
head -5 "$TEMP_DIR/suspicious_entry_points.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
url=$(echo "$line" | awk -F'|' '{print $3}')
|
||
status=$(echo "$line" | awk -F'|' '{print $4}')
|
||
printf " • %s → %s (HTTP %s)\n" "$ip" "$url" "$status"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for fuzzing/scanning behavior
|
||
if [ -s "$TEMP_DIR/fuzzing_ips.txt" ]; then
|
||
fuzz_count=$(wc -l < "$TEMP_DIR/fuzzing_ips.txt")
|
||
print_alert "Parameter fuzzing/scanning detected: $fuzz_count IPs"
|
||
echo " These IPs are testing random parameters (vulnerability scanning):"
|
||
head -5 "$TEMP_DIR/fuzzing_ips.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
total_urls=$(echo "$line" | awk -F'|' '{print $3}')
|
||
unique_paths=$(echo "$line" | awk -F'|' '{print $4}')
|
||
printf " • %s - %s URLs across %s paths\n" "$ip" "$total_urls" "$unique_paths"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# NEW: Check for timing anomalies (bot signatures)
|
||
if [ -s "$TEMP_DIR/timing_anomalies.txt" ]; then
|
||
timing_count=$(wc -l < "$TEMP_DIR/timing_anomalies.txt")
|
||
print_alert "Consistent timing pattern detected: $timing_count IPs"
|
||
echo " These IPs show mechanical request patterns (bot behavior):"
|
||
head -5 "$TEMP_DIR/timing_anomalies.txt" | while read -r line; do
|
||
ip=$(echo "$line" | awk -F'|' '{print $1}')
|
||
avg_interval=$(echo "$line" | awk -F'|' '{print $3}')
|
||
total_reqs=$(echo "$line" | awk -F'|' '{print $4}')
|
||
printf " • %s - %.1f seconds average between requests (%s total requests)\n" "$ip" "$avg_interval" "$total_reqs"
|
||
done
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# Check for rapid-fire IPs (potential DDoS)
|
||
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||
ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt")
|
||
print_alert "Potential DDoS sources: $ddos_count IPs with >50 req/min"
|
||
echo " Top offenders:"
|
||
head -3 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print " "$2" - "$1" rapid requests"}'
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
# Check for suspicious networks
|
||
if [ -s "$TEMP_DIR/suspicious_networks.txt" ]; then
|
||
net_count=$(wc -l < "$TEMP_DIR/suspicious_networks.txt")
|
||
print_alert "Suspicious networks detected: $net_count Class C ranges"
|
||
echo " Top networks:"
|
||
head -3 "$TEMP_DIR/suspicious_networks.txt" | awk '{print " "$2" - "$1" requests"}'
|
||
echo ""
|
||
alert_count=$((alert_count + 1))
|
||
fi
|
||
|
||
if [ "${alert_count:-0}" -eq 0 ]; then
|
||
print_success "No critical threats detected"
|
||
fi
|
||
|
||
# QUICK STATS DASHBOARD
|
||
print_header "QUICK STATS DASHBOARD"
|
||
|
||
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
||
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
||
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
||
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" | wc -l)
|
||
|
||
# Count private/internal IPs (excluded from threat analysis)
|
||
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
|
||
|
||
# Count server's own IPs in the logs
|
||
server_ip_hits=0
|
||
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
|
||
while read -r server_ip; do
|
||
if cat "$TEMP_DIR/parsed_logs.txt" | grep -q "^$server_ip|" 2>/dev/null; then
|
||
server_ip_hits=$((server_ip_hits + 1))
|
||
fi
|
||
done < "$TEMP_DIR/server_ips.txt"
|
||
fi
|
||
|
||
echo "Total Requests: $(printf "%'d" $total_requests)"
|
||
echo "Unique IPs: $(printf "%'d" $unique_ips)"
|
||
|
||
# Show breakdown if we have excluded IPs
|
||
if [ "$private_ips" -gt 0 ] || [ "$server_ip_hits" -gt 0 ]; then
|
||
excluded_total=$((private_ips + server_ip_hits))
|
||
echo " ├─ Excluded IPs: $(printf "%'d" $excluded_total)"
|
||
[ "$private_ips" -gt 0 ] && echo " │ ├─ Private/Internal: $private_ips"
|
||
[ "$server_ip_hits" -gt 0 ] && echo " │ └─ Server's own: $server_ip_hits"
|
||
echo " └─ External IPs: $(printf "%'d" $((unique_ips - excluded_total)))"
|
||
fi
|
||
|
||
echo "Domains Analyzed: $unique_domains"
|
||
echo "Bot Requests: $(printf "%'d" $bot_requests) ($(awk "BEGIN {printf \"%.1f\", ($bot_requests/$total_requests)*100}")%)"
|
||
|
||
# Show detected server IPs
|
||
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
|
||
echo ""
|
||
echo " Server IPs Detected (excluded from threat analysis):"
|
||
while read -r server_ip; do
|
||
echo " • $server_ip"
|
||
done < "$TEMP_DIR/server_ips.txt"
|
||
fi
|
||
echo ""
|
||
|
||
# Traffic breakdown
|
||
echo "Traffic Breakdown:"
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
type=$(echo "$line" | awk '{print $2}')
|
||
pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")
|
||
|
||
case $type in
|
||
legit) echo " Legitimate Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
ai) echo " AI Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
monitor) echo " 📡 Monitoring/SEO: $(printf "%'7d" $count) ($pct%)" ;;
|
||
suspicious) echo " Suspicious Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
unidentified_bot) echo " ❓ Unidentified Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
||
unknown) echo " Regular Traffic: $(printf "%'7d" $count) ($pct%)" ;;
|
||
esac
|
||
done < "$TEMP_DIR/traffic_breakdown.txt"
|
||
|
||
# TIME-SERIES ANALYSIS
|
||
if [ -s "$TEMP_DIR/hourly_bot_traffic.txt" ]; then
|
||
echo ""
|
||
echo "Bot Traffic Timeline (hourly):"
|
||
max_bot_traffic=$(awk '{print $1}' "$TEMP_DIR/hourly_bot_traffic.txt" | sort -rn | head -1)
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
hour=$(echo "$line" | awk '{print $2}')
|
||
# Create simple bar chart
|
||
bar_width=$((count * 10 / max_bot_traffic))
|
||
[ "${bar_width:-0}" -eq 0 ] && [ "${count:-0}" -gt 0 ] && bar_width=1
|
||
bar=$(printf '█%.0s' $(seq 1 $bar_width))
|
||
spaces=$(printf '░%.0s' $(seq 1 $((10 - bar_width))))
|
||
|
||
# Detect spikes (>2x average)
|
||
avg_traffic=$((total_requests / 24))
|
||
spike=""
|
||
[ ${count:-0} -gt $((avg_traffic * 2)) ] && spike=" SPIKE"
|
||
|
||
# Strip leading zeros to avoid octal interpretation
|
||
hour_num=$((10#$hour))
|
||
next_hour=$((hour_num + 1))
|
||
printf " %02d:00-%02d:00: %s%s %'6d bot requests%s\n" "$hour_num" "$next_hour" "$bar" "$spaces" "$count" "$spike"
|
||
done < "$TEMP_DIR/hourly_bot_traffic.txt"
|
||
fi
|
||
|
||
# RESPONSE CODE INTELLIGENCE
|
||
if [ -s "$TEMP_DIR/response_codes.txt" ]; then
|
||
echo ""
|
||
echo "Response Code Analysis:"
|
||
while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
code=$(echo "$line" | awk '{print $2}')
|
||
pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")
|
||
|
||
case $code in
|
||
200) echo " 200 (Success): $(printf "%'7d" $count) ($pct%) Bots are getting data" ;;
|
||
404) echo " 404 (Not Found): $(printf "%'7d" $count) ($pct%) Scanning for vulnerabilities" ;;
|
||
403) echo " 403 (Forbidden): $(printf "%'7d" $count) ($pct%) Blocked by existing rules" ;;
|
||
401) echo " 401 (Unauthorized):$(printf "%'7d" $count) ($pct%) Login attempts failing" ;;
|
||
500|502|503) echo " $code (Server Error):$(printf "%'7d" $count) ($pct%) Check if exploit triggered" ;;
|
||
301|302) echo " $code (Redirect): $(printf "%'7d" $count) ($pct%)" ;;
|
||
*) echo " $code: $(printf "%'7d" $count) ($pct%)" ;;
|
||
esac
|
||
done < "$TEMP_DIR/response_codes.txt" | head -7
|
||
fi
|
||
|
||
# FALSE POSITIVE WARNINGS
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ]; then
|
||
echo ""
|
||
echo "Whitelist Recommendations (Legitimate Services):"
|
||
while read -r line; do
|
||
ip=$(echo "$line" | cut -d'|' -f1)
|
||
service=$(echo "$line" | cut -d'|' -f2)
|
||
domain=$(echo "$line" | cut -d'|' -f4)
|
||
req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0)
|
||
echo " $ip - $req_count requests - Identified as: $service"
|
||
echo " → Domain: $domain"
|
||
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
||
done < "$TEMP_DIR/false_positives.txt" | head -6
|
||
fi
|
||
|
||
# NEW: HIGH-CONFIDENCE BOT FINGERPRINTS
|
||
if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then
|
||
echo ""
|
||
print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
|
||
echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
|
||
echo ""
|
||
|
||
awk -F'|' '
|
||
NR <= 15 {
|
||
ip = $1
|
||
score = $2
|
||
signals = $3
|
||
|
||
# Risk level based on score
|
||
if (score >= 80) risk = "CRITICAL"
|
||
else if (score >= 70) risk = "HIGH"
|
||
else if (score >= 60) risk = "MEDIUM"
|
||
else risk = "LOW"
|
||
|
||
printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
|
||
}' "$TEMP_DIR/bot_fingerprints.txt"
|
||
|
||
total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
||
echo ""
|
||
echo " Total high-confidence bots detected: $total IPs"
|
||
echo ""
|
||
else
|
||
echo ""
|
||
echo " No high-confidence bot fingerprints detected (requires multiple signals)"
|
||
echo ""
|
||
fi
|
||
|
||
# NEW: DOMAIN ATTACK TARGETING ANALYSIS (what's attacking each domain)
|
||
if [ -s "$TEMP_DIR/domain_targeting.txt" ]; then
|
||
echo ""
|
||
print_header "DOMAIN ATTACK TARGETING (Which domains are under attack & from where?)"
|
||
echo ""
|
||
|
||
total_domains=$(wc -l < "$TEMP_DIR/domain_targeting.txt" 2>/dev/null || echo "0")
|
||
echo "Total domains with attacks detected: $total_domains"
|
||
echo ""
|
||
|
||
# Show top attacked domains with attack details
|
||
awk -F'|' 'NR <= 10 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
||
domain_attack_count=$(grep "^[^|]*|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
if [ "$domain_attack_count" -gt 0 ]; then
|
||
echo " Domain: $domain ($domain_attack_count attack attempts)"
|
||
|
||
# Get all attacks on this domain, group by type
|
||
awk -F'|' -v dom="$domain" '
|
||
$2 == dom {
|
||
ip = $1
|
||
attack_type = $5
|
||
|
||
# Validate IP format
|
||
if (match(ip, /^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/)) {
|
||
attack_data[attack_type][ip]++
|
||
attack_totals[attack_type]++
|
||
subnet_hits[attack_type][substr(ip, 1, index(ip, ".", index(ip, ".")+1)-1)]++
|
||
}
|
||
}
|
||
END {
|
||
for (attack_type in attack_totals) {
|
||
printf " └─ %s: %d attempts\n", attack_type, attack_totals[attack_type]
|
||
|
||
# Show top 3 IPs for this attack type
|
||
attack_count = 0
|
||
for (ip in attack_data[attack_type]) {
|
||
if (attack_count >= 3) break
|
||
count = attack_data[attack_type][ip]
|
||
split(ip, parts, ".")
|
||
subnet = parts[1] "." parts[2] "." parts[3] ".0/24"
|
||
printf " ├─ %s (%d reqs) [subnet: %s]\n", ip, count, subnet
|
||
attack_count++
|
||
}
|
||
}
|
||
}' "$TEMP_DIR/attack_vectors_raw.txt"
|
||
echo ""
|
||
fi
|
||
done
|
||
else
|
||
echo ""
|
||
echo " No domain attack data available (all domains may be healthy)"
|
||
echo ""
|
||
fi
|
||
|
||
# NEW: TOP URLs BEING ATTACKED
|
||
if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
|
||
echo ""
|
||
print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)"
|
||
echo ""
|
||
|
||
# Show top URLs for top 3 most-attacked domains
|
||
urls_shown=0
|
||
awk -F'|' 'NR <= 3 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
||
local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
|
||
if [ -f "$domain_file" ] && [ -s "$domain_file" ]; then
|
||
echo " Domain: $domain"
|
||
awk -F'|' '{
|
||
url = $1
|
||
count = $2
|
||
printf " %3d requests → %s\n", count, url
|
||
}' "$domain_file" # Show all URLs, not just top 5
|
||
echo ""
|
||
fi
|
||
done
|
||
|
||
# Check if no URL data was shown
|
||
if [ "$urls_shown" -eq 0 ]; then
|
||
echo " No URL targeting data available"
|
||
echo ""
|
||
fi
|
||
else
|
||
echo ""
|
||
echo " No domain targeting data available"
|
||
echo ""
|
||
fi
|
||
|
||
# TOP 5 THREATS
|
||
print_header "TOP 5 THREATS (with recommended actions)"
|
||
|
||
echo "1. Highest Risk IPs (by threat score):"
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
counter=1
|
||
while read -r line && [ "${counter:-0}" -le 10 ]; do
|
||
score=$(echo "$line" | cut -d'|' -f1)
|
||
ip=$(echo "$line" | cut -d'|' -f2)
|
||
count=$(echo "$line" | cut -d'|' -f3)
|
||
|
||
# Determine threat level and action based on score
|
||
if [ "$score" -ge 80 ]; then
|
||
threat_level="CRITICAL"
|
||
threat_icon=""
|
||
action="BLOCK IMMEDIATELY + INVESTIGATE"
|
||
echo -e " ${RED}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
|
||
elif [ "$score" -ge 60 ]; then
|
||
threat_level="HIGH"
|
||
threat_icon=""
|
||
action="BLOCK or AGGRESSIVE RATE LIMIT"
|
||
echo -e " ${YELLOW}[$counter] $ip - RISK: $score/100 $threat_icon $threat_level${NC}"
|
||
elif [ "$score" -ge 40 ]; then
|
||
threat_level="MODERATE"
|
||
threat_icon=""
|
||
action="RATE LIMIT RECOMMENDED"
|
||
echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
|
||
else
|
||
threat_level="LOW"
|
||
threat_icon=""
|
||
action="MONITOR"
|
||
echo " [$counter] $ip - RISK: $score/100 $threat_icon $threat_level"
|
||
fi
|
||
|
||
echo " $count requests - Action: $action"
|
||
|
||
# Show which attack vectors this IP used
|
||
attack_types=""
|
||
grep -q "$ip" "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null && attack_types="${attack_types}SQL-Injection "
|
||
grep -q "$ip" "$TEMP_DIR/xss_attempts.txt" 2>/dev/null && attack_types="${attack_types}XSS "
|
||
grep -q "$ip" "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null && attack_types="${attack_types}Path-Traversal "
|
||
grep -q "$ip" "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null && attack_types="${attack_types}RCE/Upload "
|
||
grep -q "$ip" "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null && attack_types="${attack_types}Login-Bruteforce "
|
||
grep -q "$ip" "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null && attack_types="${attack_types}Scanner-UA "
|
||
grep -q "$ip" "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null && attack_types="${attack_types}DDoS-Pattern "
|
||
|
||
[ -n "$attack_types" ] && echo " Attack vectors: $attack_types"
|
||
|
||
counter=$((counter + 1))
|
||
done < "$TEMP_DIR/threat_scores.txt"
|
||
else
|
||
echo " No significant threats detected "
|
||
fi
|
||
echo ""
|
||
|
||
echo "2. Top Aggressive Bots:"
|
||
counter=1
|
||
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
||
count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}')
|
||
bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs)
|
||
|
||
action="Allow"
|
||
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then
|
||
action="Consider blocking (aggressive)"
|
||
fi
|
||
|
||
echo " [$counter] $bot - $count requests - Action: $action"
|
||
counter=$((counter + 1))
|
||
done < "$TEMP_DIR/top_bots.txt"
|
||
echo ""
|
||
|
||
echo "3. Admin Endpoint Probing:"
|
||
if [ -s "$TEMP_DIR/admin_probes.txt" ]; then
|
||
head -3 "$TEMP_DIR/admin_probes.txt" | while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
|
||
domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
|
||
url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
|
||
printf " %s attempts - IP: %s - %s%s\n" "$count" "$ip" "$domain" "$url"
|
||
done
|
||
echo " Action: Verify legitimate admin access or block"
|
||
else
|
||
echo " None detected "
|
||
fi
|
||
echo ""
|
||
|
||
echo "4. 404 Scanners (Reconnaissance):"
|
||
if [ -s "$TEMP_DIR/404_scans.txt" ]; then
|
||
head -3 "$TEMP_DIR/404_scans.txt" | awk '$1 > 10 {
|
||
count = $1
|
||
$1 = ""
|
||
gsub(/^[[:space:]]+\|?/, "")
|
||
split($0, parts, "|")
|
||
printf " %s failed requests - IP: %s - %s%s\n", count, parts[1], parts[2], parts[3]
|
||
}'
|
||
else
|
||
echo " None detected "
|
||
fi
|
||
echo ""
|
||
|
||
echo "5. Large Data Transfers:"
|
||
if [ -s "$TEMP_DIR/large_transfers.txt" ]; then
|
||
# Calculate total bot bandwidth
|
||
total_bot_bandwidth=0
|
||
if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then
|
||
total_bot_bandwidth=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
|
||
fi
|
||
|
||
if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
|
||
bot_bandwidth_mb=$(awk "BEGIN {printf \"%.0f\", $total_bot_bandwidth/1048576}")
|
||
bot_bandwidth_gb=$(awk "BEGIN {printf \"%.2f\", $total_bot_bandwidth/1073741824}")
|
||
# Estimate cost at $0.09/GB (typical CDN pricing)
|
||
estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")
|
||
|
||
total_bandwidth=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
|
||
bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")
|
||
|
||
echo ""
|
||
echo " 💰 Bandwidth Impact:"
|
||
echo " Total bot bandwidth: ${bot_bandwidth_mb} MB (${bot_bandwidth_gb} GB) - ${bot_pct}% of total"
|
||
echo " Estimated cost: \$$estimated_cost (at \$0.09/GB CDN pricing)"
|
||
fi
|
||
echo ""
|
||
echo " Top bandwidth consumers:"
|
||
|
||
head -3 "$TEMP_DIR/large_transfers.txt" | while read -r line; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
|
||
domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
|
||
url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
|
||
size=$(echo "$line" | awk '{print $2}' | cut -d'|' -f4)
|
||
size_mb=$(awk "BEGIN {printf \"%.1f\", $size/1048576}")
|
||
total_ip_mb=$(awk "BEGIN {printf \"%.0f\", $size * $count / 1048576}")
|
||
printf " %s transfers from %s - %.1f MB avg (%s MB total) - %s%s\n" "$count" "$ip" "$size_mb" "$total_ip_mb" "$domain" "$url"
|
||
done
|
||
echo " Action: Verify if scraping, consider serving WebP/optimized images"
|
||
else
|
||
echo " None detected "
|
||
fi
|
||
|
||
# TOP 5 TARGETED SITES
|
||
print_header "TOP 5 TARGETED SITES (with risk breakdown)"
|
||
|
||
counter=1
|
||
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
||
count=$(echo "$line" | awk '{print $1}')
|
||
domain=$(echo "$line" | awk '{print $2}')
|
||
|
||
echo "[$counter] $domain - $count requests"
|
||
|
||
# Show traffic breakdown for this domain
|
||
if [ -f "$TEMP_DIR/domain_${domain}_stats.txt" ]; then
|
||
tail -n +2 "$TEMP_DIR/domain_${domain}_stats.txt" | while read -r stat_line; do
|
||
stat_count=$(echo "$stat_line" | awk '{print $1}')
|
||
stat_type=$(echo "$stat_line" | awk '{print $2}')
|
||
pct=$(awk "BEGIN {printf \"%.1f\", ($stat_count/$count)*100}")
|
||
|
||
case $stat_type in
|
||
suspicious) echo -e " ${YELLOW}Suspicious: $stat_count ($pct%)${NC}" ;;
|
||
ai) echo " AI Bots: $stat_count ($pct%)" ;;
|
||
legit) echo " Legit Bots: $stat_count ($pct%)" ;;
|
||
unknown) echo " Regular: $stat_count ($pct%)" ;;
|
||
*) echo " $stat_type: $stat_count ($pct%)" ;;
|
||
esac
|
||
done
|
||
fi
|
||
echo ""
|
||
|
||
counter=$((counter + 1))
|
||
done < "$TEMP_DIR/top_sites.txt"
|
||
|
||
# BLOCKLIST
|
||
print_header "COPY-PASTE READY BLOCKLIST (Prioritized by Threat Score)"
|
||
|
||
echo "# Apache .htaccess format:"
|
||
echo "# Add to .htaccess in document root"
|
||
echo "# IPs sorted by risk score (highest first)"
|
||
echo ""
|
||
|
||
# Use threat scores to prioritize blocklist (exclude false positives and excluded IPs)
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
# Get IPs with score >= 60 (HIGH and CRITICAL)
|
||
awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
|
||
ip=$(echo "$entry" | cut -d'|' -f1)
|
||
score=$(echo "$entry" | cut -d'|' -f2)
|
||
|
||
# Skip excluded IPs (private, localhost, server's own)
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
# Skip if in false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
|
||
echo "Deny from $ip # Risk score: $score/100"
|
||
done
|
||
else
|
||
# Fallback to old method
|
||
{
|
||
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
|
||
[ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
|
||
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
|
||
} | sort -u | head -30 | while read -r ip; do
|
||
echo "Deny from $ip"
|
||
done
|
||
fi
|
||
|
||
echo ""
|
||
echo "# cPanel User-Agent blocking (add to /etc/apache2/conf.d/includes/pre_main_global.conf):"
|
||
echo ""
|
||
echo "<IfModule mod_rewrite.c>"
|
||
echo " RewriteEngine On"
|
||
echo " RewriteCond %{HTTP_USER_AGENT} \"(nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp|metasploit)\" [NC]"
|
||
echo " RewriteRule ^ - [F,L]"
|
||
echo "</IfModule>"
|
||
echo ""
|
||
echo "# Optional: Block aggressive SEO bots (uncomment to enable)"
|
||
echo "# <IfModule mod_rewrite.c>"
|
||
echo "# RewriteEngine On"
|
||
echo "# RewriteCond %{HTTP_USER_AGENT} \"(AhrefsBot|SemrushBot|MJ12bot|DotBot|Meta-ExternalAgent|Go-http-client)\" [NC]"
|
||
echo "# RewriteRule ^ - [F,L]"
|
||
echo "# </IfModule>"
|
||
|
||
echo ""
|
||
echo "# CSF/iptables format:"
|
||
echo "# Run these commands as root:"
|
||
echo ""
|
||
|
||
# Same prioritized list for CSF
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
awk -F'|' '$1 >= 60 {print $2 "|" $1}' "$TEMP_DIR/threat_scores.txt" | head -30 | while read -r entry; do
|
||
ip=$(echo "$entry" | cut -d'|' -f1)
|
||
score=$(echo "$entry" | cut -d'|' -f2)
|
||
|
||
# Skip excluded IPs (private, localhost, server's own)
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
# Skip if in false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
|
||
echo "csf -d $ip \"Threat score: $score/100\""
|
||
done
|
||
else
|
||
# Fallback
|
||
{
|
||
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1
|
||
[ -s "$TEMP_DIR/attack_vectors_raw.txt" ] && awk -F'|' '{print $1}' "$TEMP_DIR/attack_vectors_raw.txt" | sort -u
|
||
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && head -10 "$TEMP_DIR/rapid_fire_ips.txt" | awk '{print $2}'
|
||
} | sort -u | head -30 | while read -r ip; do
|
||
echo "csf -d $ip \"Bot/Scanner threat\""
|
||
done
|
||
fi
|
||
|
||
# SUMMARY
|
||
print_header "📋 SUMMARY & RECOMMENDATIONS"
|
||
|
||
threat_score=0
|
||
|
||
# Calculate threat score from attack vectors
|
||
[ -s "$TEMP_DIR/sqli_attempts.txt" ] && threat_score=$((threat_score + 15))
|
||
[ -s "$TEMP_DIR/xss_attempts.txt" ] && threat_score=$((threat_score + 12))
|
||
[ -s "$TEMP_DIR/path_traversal_attempts.txt" ] && threat_score=$((threat_score + 15))
|
||
[ -s "$TEMP_DIR/rce_upload_attempts.txt" ] && threat_score=$((threat_score + 20))
|
||
[ -s "$TEMP_DIR/login_bruteforce_attempts.txt" ] && threat_score=$((threat_score + 10))
|
||
[ -s "$TEMP_DIR/suspicious_ua.txt" ] && threat_score=$((threat_score + 8))
|
||
[ -s "$TEMP_DIR/rapid_fire_ips.txt" ] && threat_score=$((threat_score + 5))
|
||
[ $(wc -l < "$TEMP_DIR/admin_probes.txt" 2>/dev/null || echo 0) -gt 10 ] && threat_score=$((threat_score + 3))
|
||
|
||
# Count high-risk IPs
|
||
high_risk_count=0
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
high_risk_count=$(awk -F'|' '$1 >= 60' "$TEMP_DIR/threat_scores.txt" | wc -l)
|
||
fi
|
||
|
||
if [ "${threat_score:-0}" -ge 25 ] || [ "${high_risk_count:-0}" -ge 5 ]; then
|
||
print_alert "THREAT LEVEL: CRITICAL - Immediate action required"
|
||
echo " Summary: Multiple attack vectors detected from $high_risk_count high-risk IPs"
|
||
echo ""
|
||
echo " Immediate Actions:"
|
||
echo " 1. ⚡ Apply the blocklist above IMMEDIATELY (prioritized by threat score)"
|
||
echo " 2. Review admin access logs for successful breaches"
|
||
echo " 3. 🛡 Enable ModSecurity WAF or Cloudflare if not already active"
|
||
echo " 4. 🔄 Update all CMS platforms and plugins urgently"
|
||
echo " 5. 🔐 Force password reset for admin accounts if login attempts detected"
|
||
echo " 6. Re-run this analysis in 1 hour to verify blocks are working"
|
||
elif [ "${threat_score:-0}" -ge 12 ] || [ "${high_risk_count:-0}" -ge 2 ]; then
|
||
print_warning "THREAT LEVEL: HIGH - Action recommended within 24 hours"
|
||
echo " Summary: Significant threat activity from $high_risk_count high-risk IPs"
|
||
echo ""
|
||
echo " Recommended Actions:"
|
||
echo " 1. Review and apply the blocklist above (focus on CRITICAL/HIGH scores)"
|
||
echo " 2. Enable rate limiting for admin endpoints"
|
||
echo " 3. Monitor logs closely for the next 24-48 hours"
|
||
echo " 4. Consider implementing fail2ban or similar IDS"
|
||
echo " 5. Review and update security plugins/modules"
|
||
elif [ "${threat_score:-0}" -ge 5 ]; then
|
||
print_warning "THREAT LEVEL: MODERATE - Routine security maintenance"
|
||
echo " Summary: Normal bot activity with some suspicious patterns"
|
||
echo ""
|
||
echo " Recommended Actions:"
|
||
echo " 1. Review suspicious IPs in the report"
|
||
echo " 2. Consider rate limiting aggressive bots"
|
||
echo " 3. Continue routine log monitoring"
|
||
echo " 4. Block aggressive SEO bots if impacting performance"
|
||
else
|
||
print_success "THREAT LEVEL: ✅ LOW - Normal operation"
|
||
echo " Summary: Minimal threat activity detected"
|
||
echo ""
|
||
echo " Recommended Actions:"
|
||
echo " 1. Continue routine log monitoring"
|
||
echo " 2. Review false positive warnings to whitelist legitimate services"
|
||
echo " 3. Consider blocking aggressive SEO bots if bandwidth is a concern"
|
||
fi
|
||
|
||
echo ""
|
||
echo "==============================================================="
|
||
echo "Report saved to: $OUTPUT_FILE"
|
||
echo "==============================================================="
|
||
}
|
||
|
||
################################################################################
|
||
# BASELINE HEALTH CHECK - Test domains before making changes
|
||
################################################################################
|
||
|
||
baseline_health_check() {
|
||
print_info "Loading baseline health status from cached data..."
|
||
echo ""
|
||
|
||
# Create baseline health file
|
||
> "$TEMP_DIR/baseline_health.txt"
|
||
|
||
# Use get_all_domain_statuses() from reference database instead of re-checking
|
||
# Returns: domain|http_code|https_code|status_summary
|
||
if ! command -v get_all_domain_statuses &>/dev/null; then
|
||
print_warning "Reference database functions not available - skipping health check"
|
||
return 0
|
||
fi
|
||
|
||
local tested=0
|
||
local working=0
|
||
local broken=0
|
||
|
||
# Get all domain statuses from cached reference database
|
||
while IFS='|' read -r domain http_status https_status result; do
|
||
[ -z "$domain" ] && continue
|
||
|
||
tested=$((tested + 1))
|
||
|
||
# Display status based on cached result
|
||
if [ "$result" = "200_OK" ]; then
|
||
working=$((working + 1))
|
||
echo -e " ${GREEN}✓${NC} $domain - HTTP:$http_status HTTPS:$https_status"
|
||
elif [ "$result" = "REDIRECT" ]; then
|
||
working=$((working + 1))
|
||
echo -e " ${YELLOW}→${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)"
|
||
elif [ "$result" = "403_FORBIDDEN" ]; then
|
||
broken=$((broken + 1))
|
||
echo -e " ${RED}✗${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)"
|
||
elif [ "$result" = "TIMEOUT" ] || [ "$result" = "UNREACHABLE" ]; then
|
||
broken=$((broken + 1))
|
||
echo -e " ${RED}⏱${NC} $domain - Timeout (unreachable)"
|
||
else
|
||
broken=$((broken + 1))
|
||
echo -e " ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status"
|
||
fi
|
||
|
||
# Store baseline: domain|http_status|https_status|result
|
||
echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt"
|
||
|
||
done < <(get_all_domain_statuses)
|
||
|
||
if [ "$tested" -eq 0 ]; then
|
||
print_warning "No domain status data available in reference database"
|
||
return 0
|
||
fi
|
||
|
||
echo ""
|
||
print_success "Baseline loaded from cache: $working working, $broken with issues"
|
||
echo ""
|
||
}
|
||
|
||
verify_domains_still_working() {
|
||
print_info "Checking current domain status from cached data..."
|
||
echo ""
|
||
|
||
if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then
|
||
print_warning "No baseline health data available"
|
||
return 0
|
||
fi
|
||
|
||
if ! command -v get_domain_status &>/dev/null; then
|
||
print_warning "Reference database functions not available - skipping verification"
|
||
return 0
|
||
fi
|
||
|
||
local changes_detected=0
|
||
local now_broken=0
|
||
|
||
while IFS='|' read -r domain baseline_http baseline_https baseline_result; do
|
||
[ -z "$domain" ] && continue
|
||
|
||
# Get current status from cached reference database
|
||
local current_status=$(get_domain_status "$domain")
|
||
|
||
if [ -z "$current_status" ]; then
|
||
# Domain not in cache - skip
|
||
continue
|
||
fi
|
||
|
||
# Parse current status: http_code|https_code|status_summary
|
||
IFS='|' read -r http_status https_status new_result <<< "$current_status"
|
||
|
||
# Compare to baseline
|
||
if [ "$baseline_result" != "$new_result" ]; then
|
||
changes_detected=$((changes_detected + 1))
|
||
|
||
# Check if it got worse
|
||
if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then
|
||
if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "UNREACHABLE" ]; then
|
||
now_broken=$((now_broken + 1))
|
||
echo -e " ${RED}⚠ BROKEN:${NC} $domain"
|
||
echo -e " Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)"
|
||
echo -e " After: $new_result (HTTP:$http_status HTTPS:$https_status)"
|
||
echo -e " ${RED}WARNING: This domain stopped working after your changes!${NC}"
|
||
echo ""
|
||
fi
|
||
# Check if it got better
|
||
elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ] || [ "$baseline_result" = "UNREACHABLE" ]; then
|
||
if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then
|
||
echo -e " ${GREEN}✅ FIXED:${NC} $domain"
|
||
echo -e " Before: $baseline_result"
|
||
echo -e " After: $new_result"
|
||
echo ""
|
||
fi
|
||
fi
|
||
fi
|
||
done < "$TEMP_DIR/baseline_health.txt"
|
||
|
||
if [ "${now_broken:-0}" -gt 0 ]; then
|
||
echo ""
|
||
print_alert "WARNING: $now_broken domain(s) may have stopped working!"
|
||
echo ""
|
||
echo "NOTE: Status is from cached data (max 1 hour old)."
|
||
echo "If you just made changes, the cache may not reflect real-time status."
|
||
echo ""
|
||
echo "Recommended actions:"
|
||
echo " 1. Review the firewall rules you just applied"
|
||
echo " 2. Check CSF temporary blocks: csf -t"
|
||
echo " 3. Check CSF deny list: csf -g"
|
||
echo " 4. Manually verify domain: curl -I http://domain.com"
|
||
echo " 5. Consider reverting changes if issues persist"
|
||
echo ""
|
||
elif [ "${changes_detected:-0}" -eq 0 ]; then
|
||
print_success "All domains show same status as baseline (cache-based check)"
|
||
else
|
||
print_success "Some status changes detected but no domains broken (cache-based check)"
|
||
fi
|
||
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
}
|
||
|
||
#############################################################################
|
||
# Main Execution
|
||
#############################################################################
|
||
|
||
main() {
|
||
echo ""
|
||
print_header "Starting Apache/cPanel Bot Analysis"
|
||
|
||
# InterWorx requires special log discovery (logs are in /home/user/var/domain.com/logs/)
|
||
if [ "$SYS_CONTROL_PANEL" = "interworx" ]; then
|
||
print_info "InterWorx detected - discovering domain logs..."
|
||
|
||
# Build time filter options
|
||
local find_opts=()
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
local minutes=$((HOURS_BACK * 60))
|
||
find_opts+=(-mmin -"$minutes")
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
find_opts+=(-mtime -"$DAYS_BACK")
|
||
fi
|
||
|
||
# Find all transfer*.log files in InterWorx structure (includes transfer.log and transfer-ssl.log)
|
||
log_count=$(find /home/*/var/*/logs -type f -name "transfer*.log" "${find_opts[@]}" 2>/dev/null | wc -l)
|
||
|
||
if [ "$log_count" -eq 0 ]; then
|
||
# Try without time filter to see if ANY logs exist
|
||
local total_logs=$(find /home/*/var/*/logs -type f -name "transfer*.log" 2>/dev/null | wc -l)
|
||
|
||
if [ "$total_logs" -eq 0 ]; then
|
||
print_alert "Error: No InterWorx access logs found in /home/*/var/*/logs/"
|
||
echo ""
|
||
echo "Diagnostic information:"
|
||
echo " Checking for InterWorx structure:"
|
||
local iw_structure=$(find /home -maxdepth 3 -type d -path "*/var/*/logs" 2>/dev/null | head -5)
|
||
if [ -n "$iw_structure" ]; then
|
||
echo " Found InterWorx directories:"
|
||
echo "$iw_structure"
|
||
echo ""
|
||
echo " Checking for any log files:"
|
||
find /home/*/var/*/logs -type f -name "*.log" 2>/dev/null | head -10
|
||
else
|
||
echo " No InterWorx directory structure found (expected: /home/user/var/domain.com/logs/)"
|
||
fi
|
||
exit 1
|
||
else
|
||
print_alert "No logs found matching time filter (last $HOURS_BACK hours)"
|
||
echo "Total logs available: $total_logs"
|
||
echo ""
|
||
read -p "Analyze all available logs instead? [y/N]: " choice
|
||
if [[ "$choice" =~ ^[Yy] ]]; then
|
||
log_count=$total_logs
|
||
find_opts=() # Clear time filter
|
||
else
|
||
exit 0
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
print_info "Found $log_count InterWorx domain log files to analyze"
|
||
|
||
# Override LOG_DIR for parse_logs function to use
|
||
export INTERWORX_MODE="yes"
|
||
export INTERWORX_FIND_OPTS="${find_opts[*]}"
|
||
else
|
||
# Standard cPanel/Plesk log discovery
|
||
# Check if log directory exists
|
||
if [ ! -d "$LOG_DIR" ]; then
|
||
print_alert "Error: Log directory not found: $LOG_DIR"
|
||
echo "Please specify the correct log directory with -l option"
|
||
exit 1
|
||
fi
|
||
|
||
# Check if logs exist
|
||
local find_opts=()
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
local minutes=$((HOURS_BACK * 60))
|
||
find_opts+=(-mmin -"$minutes")
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
find_opts+=(-mtime -"$DAYS_BACK")
|
||
fi
|
||
|
||
log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l)
|
||
if [ "$log_count" -eq 0 ]; then
|
||
# Try without time filter to see if ANY logs exist
|
||
local total_logs=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" 2>/dev/null | wc -l)
|
||
|
||
if [ "$total_logs" -eq 0 ]; then
|
||
print_alert "Error: No log files found in $LOG_DIR"
|
||
echo ""
|
||
echo "Diagnostic information:"
|
||
echo " Log directory: $LOG_DIR"
|
||
echo " Directory exists: $([ -d "$LOG_DIR" ] && echo "yes" || echo "no")"
|
||
if [ -d "$LOG_DIR" ]; then
|
||
echo " Total files in directory: $(find "$LOG_DIR" -type f 2>/dev/null | wc -l)"
|
||
echo " Sample files:"
|
||
find "$LOG_DIR" -type f 2>/dev/null | head -5 | sed 's/^/ /'
|
||
fi
|
||
echo ""
|
||
echo "Control panel: $SYS_CONTROL_PANEL"
|
||
exit 1
|
||
else
|
||
print_alert "No logs found matching time filter"
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
echo "No logs found from the last $HOURS_BACK hours"
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
echo "No logs found from the last $DAYS_BACK days"
|
||
fi
|
||
echo "Total logs available: $total_logs"
|
||
echo ""
|
||
read -p "Analyze all available logs instead? [y/N]: " choice
|
||
if [[ "$choice" =~ ^[Yy] ]]; then
|
||
log_count=$total_logs
|
||
find_opts=() # Clear time filter
|
||
else
|
||
exit 0
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
print_info "Found $log_count log files to analyze"
|
||
fi
|
||
|
||
# User filtering
|
||
if [ -n "$FILTER_USER" ]; then
|
||
print_info "Filtering logs for user: $FILTER_USER"
|
||
export user_domains=$(get_user_domains "$FILTER_USER")
|
||
if [ -z "$user_domains" ]; then
|
||
print_error "No domains found for user: $FILTER_USER"
|
||
exit 1
|
||
fi
|
||
print_info "User has $(echo "$user_domains" | wc -l) domain(s)"
|
||
else
|
||
export user_domains=""
|
||
fi
|
||
|
||
# Print time range info
|
||
if [ -n "$HOURS_BACK" ]; then
|
||
print_info "Analyzing logs from the last $HOURS_BACK hours"
|
||
elif [ -n "$DAYS_BACK" ]; then
|
||
print_info "Analyzing logs from the last $DAYS_BACK days"
|
||
fi
|
||
|
||
# Baseline health check - test all domains before analysis
|
||
baseline_health_check
|
||
|
||
# Execute analysis pipeline with error handling
|
||
parse_logs || {
|
||
print_alert "Log parsing failed"
|
||
exit 1
|
||
}
|
||
|
||
classify_bots || {
|
||
print_alert "Bot classification failed"
|
||
exit 1
|
||
}
|
||
|
||
# NEW: Enhanced analysis functions (before threats detected)
|
||
analyze_headers # Detect header-based bot patterns
|
||
analyze_entry_points # Detect suspicious entry points
|
||
analyze_url_entropy # Detect fuzzing/parameter scanning
|
||
analyze_request_timing # Detect DDoS patterns via timing
|
||
|
||
detect_server_ips
|
||
detect_threats # Must be before fingerprinting/domain targeting (creates attack_vectors_raw.txt)
|
||
analyze_success_rates # Analyze success/failure rates for better accuracy
|
||
detect_botnets
|
||
analyze_time_series
|
||
calculate_threat_scores
|
||
detect_false_positives
|
||
generate_statistics
|
||
|
||
# NEW: Fingerprinting and domain targeting analysis (after threats detected)
|
||
calculate_bot_fingerprint # Combine signals for accuracy (reduce false positives)
|
||
analyze_domain_targeting_percentage # Show which domains are being targeted
|
||
analyze_top_urls_per_domain # Show what files/endpoints are being hit
|
||
|
||
generate_comparison_report # Show trends vs previous day
|
||
|
||
# NEW: Baseline and progression analysis
|
||
save_baseline # Store current metrics for historical comparison
|
||
analyze_attack_progression # Show attack sequences and phases
|
||
|
||
generate_report
|
||
|
||
print_success "Analysis complete!"
|
||
echo ""
|
||
echo "Report location: $OUTPUT_FILE"
|
||
|
||
# Analyze threat patterns and generate recommendations
|
||
analyze_domain_threats
|
||
analyze_geographic_threats
|
||
generate_recommendations
|
||
|
||
# Ask user what to do next
|
||
show_post_analysis_menu
|
||
}
|
||
|
||
################################################################################
|
||
# DOMAIN-LEVEL THREAT ANALYSIS
|
||
################################################################################
|
||
|
||
analyze_domain_threats() {
|
||
print_info "Analyzing per-domain threat patterns..."
|
||
|
||
# Create domain threat analysis file
|
||
> "$TEMP_DIR/domain_threats.txt"
|
||
> "$TEMP_DIR/domain_high_risk_ips.txt"
|
||
|
||
# MASSIVE OPTIMIZATION: Single AWK pass instead of nested loops with 25,000+ greps
|
||
# Old approach: O(domains × high_risk_IPs × file_size) = 83 minutes for 500 domains
|
||
# New approach: O(file_size) = seconds
|
||
|
||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||
BEGIN {
|
||
# Load high-risk IPs into memory
|
||
while ((getline < tmpdir "/threat_scores.txt") > 0) {
|
||
score = $1
|
||
ip = $2
|
||
if (score >= 70) {
|
||
high_risk[ip] = score
|
||
}
|
||
}
|
||
close(tmpdir "/threat_scores.txt")
|
||
|
||
# Load attack vectors
|
||
while ((getline < tmpdir "/attack_vectors_raw.txt") > 0) {
|
||
domain = $2
|
||
attack_counts[domain]++
|
||
}
|
||
close(tmpdir "/attack_vectors_raw.txt")
|
||
}
|
||
|
||
# Process parsed logs (single pass)
|
||
{
|
||
ip = $1
|
||
domain = $2
|
||
|
||
# Count total requests per domain
|
||
domain_requests[domain]++
|
||
|
||
# Track high-risk IPs per domain
|
||
if (ip in high_risk) {
|
||
domain_high_risk_count[domain]++
|
||
domain_high_risk_ips[domain] = domain_high_risk_ips[domain] ip ":" high_risk[ip] ":" ++domain_ip_count[domain":"ip] " "
|
||
}
|
||
}
|
||
END {
|
||
# Now process classified bots
|
||
while ((getline < tmpdir "/classified_bots.txt") > 0) {
|
||
domain = $2
|
||
bot_counts[domain]++
|
||
}
|
||
close(tmpdir "/classified_bots.txt")
|
||
|
||
# Output results for each domain
|
||
for (domain in domain_requests) {
|
||
total_req = domain_requests[domain]
|
||
bot_req = bot_counts[domain] + 0
|
||
bot_pct = (total_req > 0) ? (bot_req / total_req * 100) : 0
|
||
high_risk_count = domain_high_risk_count[domain] + 0
|
||
attacks = attack_counts[domain] + 0
|
||
high_risk_detail = domain_high_risk_ips[domain]
|
||
|
||
# domain|total_requests|bot_requests|bot_percentage|high_risk_ip_count|attack_attempts|high_risk_ips_detail
|
||
printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > tmpdir "/domain_threats.txt"
|
||
|
||
# Track high-risk IPs per domain
|
||
if (high_risk_count > 0) {
|
||
printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > tmpdir "/domain_high_risk_ips.txt"
|
||
}
|
||
}
|
||
close(tmpdir "/domain_threats.txt")
|
||
close(tmpdir "/domain_high_risk_ips.txt")
|
||
}' "$TEMP_DIR/parsed_logs.txt"
|
||
|
||
# Sort by high-risk IP count (descending)
|
||
sort -t'|' -k5 -rn "$TEMP_DIR/domain_threats.txt" > "$TEMP_DIR/domain_threats_sorted.txt"
|
||
|
||
# Get all unique domains
|
||
awk -F'|' '{print $1}' "$TEMP_DIR/domain_threats.txt" | sort -u > "$TEMP_DIR/all_domains.txt"
|
||
|
||
print_success "Domain threat analysis complete"
|
||
}
|
||
|
||
################################################################################
|
||
# GEOGRAPHIC ANALYSIS (Country-based threat tracking)
|
||
################################################################################
|
||
|
||
analyze_geographic_threats() {
|
||
print_info "Analyzing geographic distribution of threats..."
|
||
|
||
# Create geographic analysis file
|
||
> "$TEMP_DIR/geo_analysis.txt"
|
||
> "$TEMP_DIR/geo_needs_maxmind.txt"
|
||
|
||
# Check if GeoIP/MaxMind is available
|
||
local has_geoip=false
|
||
if command -v geoiplookup >/dev/null 2>&1 || command -v mmdbinspect >/dev/null 2>&1; then
|
||
has_geoip=true
|
||
fi
|
||
|
||
if [ "$has_geoip" = false ]; then
|
||
# Can't do full geographic analysis without GeoIP
|
||
# But we can still detect if traffic looks suspicious by analyzing IP ranges
|
||
|
||
# Count high-risk IPs by /24 network
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
awk -F'|' '$1 >= 70 {
|
||
split($2, ip, ".")
|
||
network = ip[1]"."ip[2]"."ip[3]".0/24"
|
||
print network
|
||
}' "$TEMP_DIR/threat_scores.txt" | sort | uniq -c | sort -rn > "$TEMP_DIR/high_risk_networks.txt"
|
||
|
||
local network_count=$(wc -l < "$TEMP_DIR/high_risk_networks.txt" 2>/dev/null || echo "0")
|
||
local total_high_risk=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" | wc -l)
|
||
|
||
if [ "$network_count" -gt 10 ] || [ "$total_high_risk" -gt 50 ]; then
|
||
# Multiple networks or many IPs suggests distributed attack
|
||
# Recommend MaxMind for geographic blocking
|
||
echo "DISTRIBUTED|$network_count networks|$total_high_risk IPs|MaxMind recommended" > "$TEMP_DIR/geo_needs_maxmind.txt"
|
||
fi
|
||
fi
|
||
|
||
print_info "Geographic analysis limited (MaxMind GeoIP2 not installed)"
|
||
else
|
||
# Full geographic analysis with GeoIP
|
||
print_info "Performing full geographic analysis with GeoIP..."
|
||
|
||
# TODO: Implement full GeoIP lookups when available
|
||
# This would lookup each high-risk IP and count by country
|
||
fi
|
||
|
||
print_success "Geographic analysis complete"
|
||
}
|
||
|
||
################################################################################
|
||
# RECOMMENDATION ENGINE
|
||
################################################################################
|
||
|
||
generate_recommendations() {
|
||
print_info "Generating intelligent recommendations..."
|
||
|
||
# Initialize recommendation file
|
||
> "$TEMP_DIR/recommendations.txt"
|
||
local rec_count=0
|
||
|
||
# Get total unique high-risk IPs
|
||
local total_high_risk_ips=0
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
total_high_risk_ips=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
|
||
fi
|
||
|
||
# Get total domains affected
|
||
local total_domains=$(wc -l < "$TEMP_DIR/all_domains.txt" 2>/dev/null || echo "0")
|
||
local affected_domains=0
|
||
if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
|
||
affected_domains=$(wc -l < "$TEMP_DIR/domain_high_risk_ips.txt" || echo "0")
|
||
fi
|
||
|
||
# Determine attack scope: single domain vs server-wide
|
||
local attack_scope="unknown"
|
||
local primary_target=""
|
||
local primary_target_percentage=0
|
||
|
||
if [ "${affected_domains:-0}" -eq 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
|
||
attack_scope="single_domain"
|
||
primary_target=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f1)
|
||
# Calculate what % of high-risk IPs are targeting this domain
|
||
local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2)
|
||
if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
||
primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", ($domain_risk_count / $total_high_risk_ips) * 100}")
|
||
fi
|
||
elif [ "${affected_domains:-0}" -gt 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
|
||
# Check if one domain is getting most of the traffic
|
||
local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5)
|
||
if [ "${top_domain_count:-0}" -gt 0 ] && [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
||
local top_percentage=$(awk "BEGIN {printf \"%.0f\", ($top_domain_count / $total_high_risk_ips) * 100}")
|
||
if [ "$top_percentage" -ge 75 ]; then
|
||
attack_scope="primary_target"
|
||
primary_target=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f1)
|
||
primary_target_percentage=$top_percentage
|
||
else
|
||
attack_scope="server_wide"
|
||
fi
|
||
else
|
||
attack_scope="server_wide"
|
||
fi
|
||
elif [ "${affected_domains:-0}" -eq "${total_domains:-0}" ] && [ "${total_domains:-0}" -gt 1 ]; then
|
||
attack_scope="server_wide"
|
||
elif [ "${total_domains:-0}" -eq 1 ]; then
|
||
attack_scope="single_server"
|
||
primary_target=$(head -1 "$TEMP_DIR/all_domains.txt" 2>/dev/null)
|
||
fi
|
||
|
||
# RECOMMENDATION #1: IP Blocking Strategy
|
||
if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
||
rec_count=$((rec_count + 1))
|
||
if [ "${total_high_risk_ips:-0}" -le 10 ]; then
|
||
echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 1 hour|HIGH|CSF temporary block recommended for ${total_high_risk_ips} IPs with threat score >= 70" >> "$TEMP_DIR/recommendations.txt"
|
||
elif [ "${total_high_risk_ips:-0}" -le 50 ]; then
|
||
echo "REC|$rec_count|ip_block_temp|Block $total_high_risk_ips high-risk IPs for 24 hours|HIGH|Large number of threats detected - 24hr block recommended" >> "$TEMP_DIR/recommendations.txt"
|
||
else
|
||
echo "REC|$rec_count|ip_block_perm|Permanently block $total_high_risk_ips high-risk IPs|CRITICAL|Severe bot attack detected - permanent blocking recommended" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
|
||
# RECOMMENDATION #2: Connection Limit (CSF CT_LIMIT)
|
||
# Only recommend if CSF is installed and CT_LIMIT is enabled
|
||
if [ "$CSF_AVAILABLE" = true ]; then
|
||
# Check if CT_LIMIT is enabled (not set to 0)
|
||
local current_ct_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "0")
|
||
|
||
if [ "$current_ct_limit" -gt 0 ]; then
|
||
# Check concurrent connections from top IPs
|
||
local max_connections=0
|
||
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||
max_connections=$(head -1 "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null | awk '{print $1}' || echo "0")
|
||
fi
|
||
|
||
if [ "$max_connections" -gt 100 ] && [ "$max_connections" -lt "$current_ct_limit" ]; then
|
||
rec_count=$((rec_count + 1))
|
||
local recommended_limit=$((max_connections - 20))
|
||
echo "REC|$rec_count|csf_ct_limit|Reduce CSF CT_LIMIT from $current_ct_limit to $recommended_limit|MEDIUM|High concurrent connections detected ($max_connections from single IP)" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
# RECOMMENDATION #3: Domain-Specific .htaccess Protection
|
||
if [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "primary_target" ]; then
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|htaccess_domain|Add bot blocking to $primary_target .htaccess|HIGH|${primary_target_percentage}% of attacks target this domain" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# RECOMMENDATION #4: Server-wide Apache Protection
|
||
if [ "$attack_scope" = "server_wide" ]; then
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|apache_global|Add global bot blocking to Apache pre-virtualhost|HIGH|Attack affects $affected_domains of $total_domains domains" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# RECOMMENDATION #5: WordPress-specific (if attack patterns show wp-admin/wp-login attempts)
|
||
local wp_attacks=0
|
||
if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
wp_attacks=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
||
fi
|
||
|
||
if [ "${wp_attacks:-0}" -gt 50 ]; then
|
||
rec_count=$((rec_count + 1))
|
||
|
||
# Determine which domains have WordPress
|
||
local wp_domain_count=0
|
||
local wp_target_domain=""
|
||
|
||
if [ -s "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||
# Get unique domains with WP attacks
|
||
wp_domain_count=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | wc -l || echo "0")
|
||
wp_target_domain=$(grep -i "wp-admin\|wp-login\|xmlrpc" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | cut -d'|' -f2 | sort -u | head -1 || echo "")
|
||
fi
|
||
|
||
# Generate appropriate recommendation based on how many domains have WordPress attacks
|
||
if [ "${wp_domain_count:-0}" -eq 1 ] || [ "$attack_scope" = "single_domain" ] || [ "$attack_scope" = "single_server" ]; then
|
||
# Single domain being attacked
|
||
echo "REC|$rec_count|wp_hardening|Harden WordPress on $wp_target_domain|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
|
||
elif [ "$attack_scope" = "primary_target" ]; then
|
||
# One primary target but others also affected
|
||
echo "REC|$rec_count|wp_hardening|Harden WordPress on $primary_target|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
|
||
else
|
||
# Multiple domains with WordPress attacks
|
||
echo "REC|$rec_count|wp_hardening|Harden WordPress across $wp_domain_count domains|HIGH|$wp_attacks WordPress login/admin attempts detected" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
|
||
# PORTFLOOD Protection removed - not appropriate for web servers with many sites
|
||
# Blocking ports 80/443 based on connection count breaks legitimate traffic
|
||
|
||
# RECOMMENDATION #7: CSF SYNFLOOD Protection (if DDoS patterns detected)
|
||
if [ -s "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||
local ddos_count=$(wc -l < "$TEMP_DIR/rapid_fire_ips.txt" || echo "0")
|
||
if [ "${ddos_count:-0}" -gt 10 ]; then
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|csf_synflood|Enable CSF SYNFLOOD protection|HIGH|$ddos_count potential DDoS sources detected" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
fi
|
||
|
||
# RECOMMENDATION #8: MaxMind GeoIP for Country Blocking (if distributed attack)
|
||
if [ -s "$TEMP_DIR/geo_needs_maxmind.txt" ]; then
|
||
local geo_info=$(cat "$TEMP_DIR/geo_needs_maxmind.txt")
|
||
local network_count=$(echo "$geo_info" | cut -d'|' -f2 | grep -oP '\d+' || echo "0")
|
||
local ip_count=$(echo "$geo_info" | cut -d'|' -f3 | grep -oP '\d+' || echo "0")
|
||
|
||
rec_count=$((rec_count + 1))
|
||
echo "REC|$rec_count|install_maxmind|Install MaxMind GeoIP2 for country-based blocking|MEDIUM|Distributed attack from $network_count networks ($ip_count IPs) - geographic blocking recommended" >> "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# Store attack scope for menu system
|
||
echo "$attack_scope|$primary_target|$primary_target_percentage|$affected_domains|$total_domains" > "$TEMP_DIR/attack_scope.txt"
|
||
|
||
print_success "Generated $rec_count recommendations"
|
||
}
|
||
|
||
################################################################################
|
||
# POST-ANALYSIS MENU
|
||
################################################################################
|
||
|
||
show_post_analysis_menu() {
|
||
# Load attack scope information
|
||
local attack_scope="unknown"
|
||
local primary_target=""
|
||
local primary_target_percentage=0
|
||
local affected_domains=0
|
||
local total_domains=0
|
||
|
||
if [ -s "$TEMP_DIR/attack_scope.txt" ]; then
|
||
local scope_data=$(cat "$TEMP_DIR/attack_scope.txt")
|
||
attack_scope=$(echo "$scope_data" | cut -d'|' -f1)
|
||
primary_target=$(echo "$scope_data" | cut -d'|' -f2)
|
||
primary_target_percentage=$(echo "$scope_data" | cut -d'|' -f3)
|
||
affected_domains=$(echo "$scope_data" | cut -d'|' -f4)
|
||
total_domains=$(echo "$scope_data" | cut -d'|' -f5)
|
||
fi
|
||
|
||
# Check if there are any recommendations
|
||
local has_recommendations=false
|
||
local rec_count=0
|
||
if [ -s "$TEMP_DIR/recommendations.txt" ]; then
|
||
has_recommendations=true
|
||
rec_count=$(wc -l < "$TEMP_DIR/recommendations.txt")
|
||
fi
|
||
|
||
# Show menu
|
||
echo ""
|
||
echo "==============================================================="
|
||
print_header "THREAT ANALYSIS SUMMARY"
|
||
echo ""
|
||
|
||
# Display attack scope
|
||
case "$attack_scope" in
|
||
single_domain)
|
||
print_warning "ATTACK SCOPE: Single Domain Target"
|
||
echo " • Primary Target: $primary_target"
|
||
echo " • This domain is receiving 100% of high-risk traffic"
|
||
echo " • Recommendation: Domain-specific protection"
|
||
;;
|
||
primary_target)
|
||
print_warning "ATTACK SCOPE: Primarily Targeting One Domain"
|
||
echo " • Primary Target: $primary_target ($primary_target_percentage% of attacks)"
|
||
echo " • Other domains also affected: $affected_domains of $total_domains total"
|
||
echo " • Recommendation: Focus protection on primary target"
|
||
;;
|
||
server_wide)
|
||
print_alert "ATTACK SCOPE: Server-Wide Attack"
|
||
echo " • Multiple domains under attack: $affected_domains of $total_domains"
|
||
echo " • Attack is distributed across the server"
|
||
echo " • Recommendation: Server-wide protection needed"
|
||
;;
|
||
single_server)
|
||
print_info "ATTACK SCOPE: Single-Domain Server"
|
||
echo " • Target: $primary_target (only domain on server)"
|
||
echo " • Server-level protection will apply to this domain"
|
||
;;
|
||
*)
|
||
print_info "No significant threats detected"
|
||
;;
|
||
esac
|
||
|
||
echo ""
|
||
|
||
# Display recommendations
|
||
if [ "$has_recommendations" = true ]; then
|
||
echo "==============================================================="
|
||
print_header "RECOMMENDED ACTIONS ($rec_count recommendations)"
|
||
echo ""
|
||
|
||
local count=0
|
||
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
|
||
count=$((count + 1))
|
||
|
||
# Color code by priority
|
||
local priority_color=""
|
||
local priority_icon=""
|
||
case "$priority" in
|
||
CRITICAL)
|
||
priority_color="${RED}"
|
||
priority_icon=""
|
||
;;
|
||
HIGH)
|
||
priority_color="${YELLOW}"
|
||
priority_icon=""
|
||
;;
|
||
MEDIUM)
|
||
priority_color="${BLUE}"
|
||
priority_icon=""
|
||
;;
|
||
*)
|
||
priority_color="${NC}"
|
||
priority_icon=" "
|
||
;;
|
||
esac
|
||
|
||
echo -e " ${BOLD}[$count]${NC} $priority_icon $action_title"
|
||
echo -e " ${priority_color}Priority: $priority${NC} - $description"
|
||
echo ""
|
||
done < "$TEMP_DIR/recommendations.txt"
|
||
|
||
echo "==============================================================="
|
||
echo ""
|
||
echo "What would you like to do?"
|
||
echo ""
|
||
echo " 1) Go to Take Action Menu (implement recommended actions)"
|
||
echo " 2) Review Individual Recommendations (detailed view)"
|
||
echo ""
|
||
echo -e " ${RED}0)${NC} Back"
|
||
echo ""
|
||
read -p "Select option: " menu_choice
|
||
|
||
case "$menu_choice" in
|
||
1)
|
||
show_action_menu
|
||
;;
|
||
2)
|
||
show_detailed_recommendations
|
||
;;
|
||
0)
|
||
print_info "Returning to main menu..."
|
||
return 0
|
||
;;
|
||
*)
|
||
print_warning "Invalid option - returning to main menu"
|
||
return 0
|
||
;;
|
||
esac
|
||
else
|
||
print_success "No recommendations - your server appears secure"
|
||
echo ""
|
||
echo "Press Enter to return to main menu..."
|
||
read
|
||
return 0
|
||
fi
|
||
}
|
||
|
||
################################################################################
|
||
# DETAILED RECOMMENDATIONS VIEWER
|
||
################################################################################
|
||
|
||
show_detailed_recommendations() {
|
||
clear
|
||
print_banner "Detailed Recommendations"
|
||
echo ""
|
||
|
||
if [ ! -s "$TEMP_DIR/recommendations.txt" ]; then
|
||
print_warning "No recommendations available"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_post_analysis_menu
|
||
return
|
||
fi
|
||
|
||
local count=0
|
||
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
|
||
count=$((count + 1))
|
||
|
||
echo "==============================================================="
|
||
echo -e "${BOLD}Recommendation #$count:${NC} $action_title"
|
||
echo "==============================================================="
|
||
echo ""
|
||
echo "Priority: $priority"
|
||
echo "Action Type: $action_type"
|
||
echo "Description: $description"
|
||
echo ""
|
||
|
||
# Show specific details based on action type
|
||
case "$action_type" in
|
||
ip_block_temp|ip_block_perm)
|
||
echo "Affected IPs:"
|
||
awk -F'|' '$1 >= 70 {printf " • %s (score: %s)\n", $2, $1}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | head -10
|
||
;;
|
||
htaccess_domain)
|
||
local target_domain=$(echo "$action_title" | grep -oP 'to \K[^ ]+' 2>/dev/null || echo "")
|
||
echo "Target Domain: $target_domain"
|
||
if [ -s "$TEMP_DIR/domain_threats_sorted.txt" ]; then
|
||
grep "^$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do
|
||
echo " • Total Requests: $total_req"
|
||
echo " • Bot Requests: $bot_req ($bot_pct%)"
|
||
echo " • High-Risk IPs: $high_risk"
|
||
echo " • Attack Attempts: $attacks"
|
||
done
|
||
fi
|
||
;;
|
||
apache_global)
|
||
echo "Affected Domains:"
|
||
if [ -s "$TEMP_DIR/domain_high_risk_ips.txt" ]; then
|
||
awk -F'|' '{printf " • %s (%s high-risk IPs)\n", $1, $2}' "$TEMP_DIR/domain_high_risk_ips.txt" | head -10
|
||
fi
|
||
;;
|
||
esac
|
||
|
||
echo ""
|
||
done < "$TEMP_DIR/recommendations.txt"
|
||
|
||
echo "==============================================================="
|
||
echo ""
|
||
read -p "Press Enter to return to action menu..."
|
||
show_post_analysis_menu
|
||
}
|
||
|
||
################################################################################
|
||
# ACTION MENU (IMPLEMENT RECOMMENDATIONS)
|
||
################################################################################
|
||
|
||
show_action_menu() {
|
||
clear
|
||
print_banner "Take Action Menu"
|
||
echo ""
|
||
|
||
# Build hash table of recommended actions with their priorities
|
||
declare -A recommended_actions
|
||
declare -A action_priorities
|
||
declare -A action_descriptions
|
||
|
||
if [ -s "$TEMP_DIR/recommendations.txt" ]; then
|
||
while IFS='|' read -r rec_type rec_num action_type action_title priority description; do
|
||
recommended_actions["$action_type"]=1
|
||
action_priorities["$action_type"]="$priority"
|
||
action_descriptions["$action_type"]="$description"
|
||
done < "$TEMP_DIR/recommendations.txt"
|
||
fi
|
||
|
||
# Display all available actions (not just recommended ones)
|
||
echo "All Available Actions:"
|
||
echo ""
|
||
echo "Legend: = Recommended by analysis"
|
||
echo ""
|
||
|
||
local count=0
|
||
declare -a action_types
|
||
declare -a action_titles
|
||
declare -a action_descs
|
||
|
||
# Define all possible actions
|
||
# 1. IP Blocking Actions
|
||
count=$((count + 1))
|
||
action_types[$count]="ip_block_temp_1hr"
|
||
action_titles[$count]="Block high-risk IPs for 1 hour (CSF temporary)"
|
||
action_descs[$count]="Temporary firewall block, auto-expires after 1 hour"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="ip_block_temp_24hr"
|
||
action_titles[$count]="Block high-risk IPs for 24 hours (CSF temporary)"
|
||
action_descs[$count]="Temporary firewall block, auto-expires after 24 hours"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_temp]}" "${action_priorities[ip_block_temp]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="ip_block_perm"
|
||
action_titles[$count]="Block high-risk IPs permanently (CSF permanent)"
|
||
action_descs[$count]="Permanent firewall block - requires manual removal"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[ip_block_perm]}" "${action_priorities[ip_block_perm]}"
|
||
|
||
echo ""
|
||
echo "------------------------------------------------------------─"
|
||
echo ""
|
||
|
||
# 2. Domain/Site Protection
|
||
count=$((count + 1))
|
||
action_types[$count]="htaccess_domain"
|
||
action_titles[$count]="Add bot blocking to specific domain .htaccess"
|
||
action_descs[$count]="Domain-level protection via Apache .htaccess rules"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[htaccess_domain]}" "${action_priorities[htaccess_domain]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="apache_global"
|
||
action_titles[$count]="Add global bot blocking to Apache (all domains)"
|
||
action_descs[$count]="Server-wide Apache configuration, affects all sites"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[apache_global]}" "${action_priorities[apache_global]}"
|
||
|
||
echo ""
|
||
echo "------------------------------------------------------------─"
|
||
echo ""
|
||
|
||
# 3. CSF Firewall Configuration
|
||
count=$((count + 1))
|
||
action_types[$count]="csf_ct_limit"
|
||
action_titles[$count]="Adjust CSF connection tracking limit (CT_LIMIT)"
|
||
action_descs[$count]="Limit concurrent connections per IP address"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_ct_limit]}" "${action_priorities[csf_ct_limit]}"
|
||
|
||
# PORTFLOOD action removed - not appropriate for web servers
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="csf_synflood"
|
||
action_titles[$count]="Enable CSF SYNFLOOD protection"
|
||
action_descs[$count]="Protect against SYN flood DDoS attacks"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[csf_synflood]}" "${action_priorities[csf_synflood]}"
|
||
|
||
echo ""
|
||
echo "------------------------------------------------------------─"
|
||
echo ""
|
||
|
||
# 4. Geographic & Application Hardening
|
||
count=$((count + 1))
|
||
action_types[$count]="install_maxmind"
|
||
action_titles[$count]="Install MaxMind GeoIP2 for country-based blocking"
|
||
action_descs[$count]="Enable geographic filtering with CSF CC_DENY (requires free MaxMind license)"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[install_maxmind]}" "${action_priorities[install_maxmind]}"
|
||
|
||
count=$((count + 1))
|
||
action_types[$count]="wp_hardening"
|
||
action_titles[$count]="WordPress security hardening"
|
||
action_descs[$count]="Protect WordPress login and admin areas"
|
||
display_action_option $count "${action_types[$count]}" "${action_titles[$count]}" "${action_descs[$count]}" "${recommended_actions[wp_hardening]}" "${action_priorities[wp_hardening]}"
|
||
|
||
echo ""
|
||
echo "============================================================═"
|
||
echo ""
|
||
echo -e " ${RED}0)${NC} Back"
|
||
echo ""
|
||
read -p "Select action [0-$count]: " action_choice
|
||
|
||
# Validate choice
|
||
if [ "$action_choice" = "0" ]; then
|
||
show_post_analysis_menu
|
||
return
|
||
elif [ "$action_choice" -lt 1 ] || [ "$action_choice" -gt "$count" ] 2>/dev/null; then
|
||
print_warning "Invalid selection"
|
||
sleep 2
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Execute selected action
|
||
local selected_type="${action_types[$action_choice]}"
|
||
execute_action "$selected_type" "$action_choice"
|
||
}
|
||
|
||
# Helper function to display action options
|
||
display_action_option() {
|
||
local num=$1
|
||
local action_type=$2
|
||
local title=$3
|
||
local desc=$4
|
||
local is_recommended=$5
|
||
local priority=$6
|
||
|
||
# Show recommendation marker and priority if recommended
|
||
if [ -n "$is_recommended" ]; then
|
||
case "$priority" in
|
||
CRITICAL)
|
||
echo -e " ${RED}$num)${NC} ${BOLD}$title${NC} ${RED} RECOMMENDED [CRITICAL]${NC}"
|
||
;;
|
||
HIGH)
|
||
echo -e " ${YELLOW}$num)${NC} ${BOLD}$title${NC} ${YELLOW} RECOMMENDED [HIGH]${NC}"
|
||
;;
|
||
MEDIUM)
|
||
echo -e " ${BLUE}$num)${NC} ${BOLD}$title${NC} ${BLUE} RECOMMENDED [MEDIUM]${NC}"
|
||
;;
|
||
*)
|
||
echo -e " ${GREEN}$num)${NC} ${BOLD}$title${NC} ${GREEN} RECOMMENDED${NC}"
|
||
;;
|
||
esac
|
||
else
|
||
echo -e " $num) $title"
|
||
fi
|
||
echo " $desc"
|
||
}
|
||
|
||
################################################################################
|
||
# ACTION EXECUTION ENGINE
|
||
################################################################################
|
||
|
||
execute_action() {
|
||
local action_type="$1"
|
||
local rec_number="$2"
|
||
|
||
case "$action_type" in
|
||
ip_block_temp_1hr)
|
||
execute_ip_blocking_specific "1hr"
|
||
;;
|
||
ip_block_temp_24hr)
|
||
execute_ip_blocking_specific "24hr"
|
||
;;
|
||
ip_block_temp)
|
||
execute_ip_blocking "temp"
|
||
;;
|
||
ip_block_perm)
|
||
execute_ip_blocking "perm"
|
||
;;
|
||
csf_ct_limit)
|
||
execute_csf_ct_limit
|
||
;;
|
||
csf_synflood)
|
||
execute_csf_synflood
|
||
;;
|
||
htaccess_domain)
|
||
execute_htaccess_domain_blocking
|
||
;;
|
||
apache_global)
|
||
execute_apache_global_blocking
|
||
;;
|
||
install_maxmind)
|
||
execute_install_maxmind
|
||
;;
|
||
wp_hardening)
|
||
execute_wp_hardening
|
||
;;
|
||
rate_limiting)
|
||
execute_rate_limiting
|
||
;;
|
||
*)
|
||
print_warning "Action type '$action_type' not yet implemented"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
;;
|
||
esac
|
||
}
|
||
|
||
execute_ip_blocking_specific() {
|
||
local duration_type="$1" # "1hr" or "24hr"
|
||
|
||
clear
|
||
print_banner "IP Blocking - CSF Temporary Block"
|
||
echo ""
|
||
|
||
# Check if CSF is installed
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF (ConfigServer Security & Firewall) is not installed"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Get high-risk IPs
|
||
if [ ! -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
print_warning "No threat scores available"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
local high_risk_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l || echo "0")
|
||
|
||
if [ "$high_risk_count" -eq 0 ]; then
|
||
print_info "No high-risk IPs detected (score >= 70)"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Set duration based on type
|
||
local duration
|
||
local duration_text
|
||
if [ "$duration_type" = "1hr" ]; then
|
||
duration=3600
|
||
duration_text="1 hour"
|
||
else
|
||
duration=86400
|
||
duration_text="24 hours"
|
||
fi
|
||
|
||
echo "This will block $high_risk_count high-risk IPs for $duration_text"
|
||
echo ""
|
||
echo "High-risk IPs (top 10):"
|
||
awk -F'|' '$1 >= 70 {printf " • %s (score: %s, %s requests)\n", $2, $1, $3}' "$TEMP_DIR/threat_scores.txt" | head -10
|
||
echo ""
|
||
|
||
if [ "$high_risk_count" -gt 10 ]; then
|
||
echo " ... and $((high_risk_count - 10)) more"
|
||
echo ""
|
||
fi
|
||
|
||
read -p "Proceed with blocking for $duration_text? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Collect IPs to block
|
||
local -a ips_to_block
|
||
while IFS='|' read -r score ip requests; do
|
||
if [ "$score" -ge 70 ]; then
|
||
# Skip excluded IPs
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
# Skip false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
ips_to_block+=("$ip")
|
||
fi
|
||
done < "$TEMP_DIR/threat_scores.txt"
|
||
|
||
# Apply blocks
|
||
echo ""
|
||
print_info "Applying CSF blocks for $duration_text..."
|
||
echo ""
|
||
|
||
local success_count=0
|
||
local fail_count=0
|
||
|
||
for ip in "${ips_to_block[@]}"; do
|
||
local score=$(grep "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
|
||
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
|
||
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
|
||
success_count=$((success_count + 1))
|
||
else
|
||
echo -e " ${RED}${NC} Failed to block $ip"
|
||
fail_count=$((fail_count + 1))
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
if [ "${success_count:-0}" -gt 0 ]; then
|
||
print_success "Successfully blocked $success_count IP(s) for $duration_text"
|
||
echo ""
|
||
echo "These blocks will automatically expire after $duration_text"
|
||
echo "To view temporary blocks: csf -t"
|
||
echo "To remove a block early: csf -tr IP"
|
||
fi
|
||
|
||
if [ "${fail_count:-0}" -gt 0 ]; then
|
||
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
|
||
fi
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF to apply changes..."
|
||
if csf -r >/dev/null 2>&1; then
|
||
print_success "CSF restarted successfully"
|
||
else
|
||
print_warning "CSF restart may have failed - check manually with: csf -r"
|
||
fi
|
||
|
||
echo ""
|
||
# Verify domains still work after blocking
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_ip_blocking() {
|
||
local block_mode="$1" # "temp" or "perm"
|
||
|
||
if [ "$block_mode" = "temp" ]; then
|
||
# Call the existing CSF blocking function
|
||
offer_csf_blocking
|
||
else
|
||
# Permanent blocking
|
||
clear
|
||
print_banner "Permanent IP Blocking"
|
||
echo ""
|
||
print_alert "WARNING: Permanent blocks must be manually removed later"
|
||
echo ""
|
||
echo "This will permanently block all high-risk IPs (score >= 70)"
|
||
echo ""
|
||
read -p "Are you sure you want to proceed? (yes/no): " confirm
|
||
|
||
if [ "$confirm" = "yes" ]; then
|
||
offer_csf_blocking
|
||
else
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
fi
|
||
fi
|
||
}
|
||
|
||
execute_csf_ct_limit() {
|
||
clear
|
||
print_banner "Update CSF Connection Tracking Limit"
|
||
echo ""
|
||
|
||
# Check if CSF is installed
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF is not installed on this server"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Get recommended limit from recommendation
|
||
local recommended_limit=$(grep "|csf_ct_limit|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | grep -oP 'to \K[0-9]+' || echo "100")
|
||
|
||
# Get current CT_LIMIT
|
||
local current_limit=$(grep "^CT_LIMIT" /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[0-9]+' || echo "unknown")
|
||
|
||
echo "Current CT_LIMIT: $current_limit"
|
||
echo "Recommended CT_LIMIT: $recommended_limit"
|
||
echo ""
|
||
echo "This will modify /etc/csf/csf.conf and restart CSF"
|
||
echo ""
|
||
read -p "Enter new CT_LIMIT value [$recommended_limit]: " new_limit
|
||
|
||
# Use recommended if nothing entered
|
||
[ -z "$new_limit" ] && new_limit=$recommended_limit
|
||
|
||
# Validate it's a number
|
||
if ! [[ "$new_limit" =~ ^[0-9]+$ ]]; then
|
||
print_warning "Invalid number"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Update CSF config
|
||
print_info "Updating CT_LIMIT to $new_limit..."
|
||
|
||
if [ -f /etc/csf/csf.conf ]; then
|
||
sed -i "s/^CT_LIMIT = .*/CT_LIMIT = \"$new_limit\"/" /etc/csf/csf.conf
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF..."
|
||
csf -r >/dev/null 2>&1
|
||
|
||
print_success "CT_LIMIT updated successfully to $new_limit"
|
||
else
|
||
print_warning "Could not find /etc/csf/csf.conf"
|
||
fi
|
||
|
||
echo ""
|
||
# Verify domains still work after CT_LIMIT change
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_htaccess_domain_blocking() {
|
||
clear
|
||
print_banner "Add Bot Blocking to Domain .htaccess"
|
||
echo ""
|
||
|
||
# Get target domain from recommendation
|
||
local target_domain=$(grep "|htaccess_domain|" "$TEMP_DIR/recommendations.txt" 2>/dev/null | head -1 | grep -oP 'to \K[^ ]+' || echo "")
|
||
|
||
if [ -z "$target_domain" ]; then
|
||
print_warning "Could not determine target domain"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
echo "Target Domain: $target_domain"
|
||
echo ""
|
||
|
||
# Find document root for this domain using reference database
|
||
local doc_root=""
|
||
if [ -s "$SCRIPT_DIR/.sysref" ]; then
|
||
doc_root=$(grep "^DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4 || echo "")
|
||
fi
|
||
|
||
if [ -z "$doc_root" ]; then
|
||
print_warning "Document root not found in reference database"
|
||
echo "Please enter the document root manually:"
|
||
read -p "Document root: " doc_root
|
||
else
|
||
echo "Document root: $doc_root"
|
||
fi
|
||
|
||
if [ ! -d "$doc_root" ]; then
|
||
print_warning "Document root does not exist: $doc_root"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
local htaccess_file="$doc_root/.htaccess"
|
||
|
||
echo ""
|
||
echo "This will add bot blocking rules to: $htaccess_file"
|
||
echo ""
|
||
read -p "Proceed? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Create backup
|
||
if [ -f "$htaccess_file" ]; then
|
||
cp "$htaccess_file" "$htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
|
||
print_info "Backed up existing .htaccess"
|
||
fi
|
||
|
||
# Generate bot blocking rules
|
||
print_info "Adding bot blocking rules..."
|
||
|
||
# Get high-risk IPs for this domain
|
||
local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep "^[^|]*|$target_domain|" 2>/dev/null || true | cut -d'|' -f1 | sort -u | while read ip; do
|
||
# Check if this IP has high threat score
|
||
if grep -q "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
|
||
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "0")
|
||
if [ "${score:-0}" -ge 70 ]; then
|
||
echo "$ip"
|
||
fi
|
||
fi
|
||
done || true)
|
||
|
||
# Add rules to .htaccess
|
||
{
|
||
echo ""
|
||
echo "# Bot blocking rules added by toolkit on $(date)"
|
||
echo "# High-risk IPs (threat score >= 70)"
|
||
echo "<IfModule mod_authz_core.c>"
|
||
for ip in $block_ips; do
|
||
echo " Require not ip $ip"
|
||
done
|
||
echo "</IfModule>"
|
||
echo ""
|
||
} >> "$htaccess_file"
|
||
|
||
local block_count=$(echo "$block_ips" | wc -w)
|
||
print_success "Added blocking rules for $block_count IPs to $htaccess_file"
|
||
echo ""
|
||
echo "Backup saved to: $htaccess_file.backup.$(date +%Y%m%d_%H%M%S)"
|
||
echo ""
|
||
|
||
# Verify domains still work after .htaccess changes
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_apache_global_blocking() {
|
||
clear
|
||
print_banner "Add Global Bot Blocking to Apache"
|
||
echo ""
|
||
|
||
print_warning "This feature will add blocking rules to Apache pre-virtualhost configuration"
|
||
echo "This affects ALL domains on the server"
|
||
echo ""
|
||
|
||
# Determine Apache config location
|
||
local apache_conf=""
|
||
if [ -d "/etc/apache2/conf.d" ]; then
|
||
apache_conf="/etc/apache2/conf.d/bot_blocking.conf"
|
||
elif [ -d "/etc/httpd/conf.d" ]; then
|
||
apache_conf="/etc/httpd/conf.d/bot_blocking.conf"
|
||
else
|
||
print_warning "Could not determine Apache config directory"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
echo "Configuration will be written to: $apache_conf"
|
||
echo ""
|
||
read -p "Proceed? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Create backup if file exists
|
||
if [ -f "$apache_conf" ]; then
|
||
cp "$apache_conf" "$apache_conf.backup.$(date +%Y%m%d_%H%M%S)"
|
||
print_info "Backed up existing configuration"
|
||
fi
|
||
|
||
# Generate global blocking rules
|
||
print_info "Generating global bot blocking configuration..."
|
||
|
||
{
|
||
echo "# Global bot blocking rules"
|
||
echo "# Generated by toolkit on $(date)"
|
||
echo ""
|
||
echo "<IfModule mod_authz_core.c>"
|
||
echo " # Block high-risk IPs (threat score >= 70)"
|
||
|
||
awk -F'|' '$1 >= 70 {print " Require not ip " $2}' "$TEMP_DIR/threat_scores.txt" 2>/dev/null
|
||
|
||
echo "</IfModule>"
|
||
echo ""
|
||
} > "$apache_conf"
|
||
|
||
local block_count=$(awk -F'|' '$1 >= 70' "$TEMP_DIR/threat_scores.txt" 2>/dev/null | wc -l)
|
||
print_success "Created global blocking configuration with $block_count IPs"
|
||
|
||
echo ""
|
||
echo "Restarting Apache to apply changes..."
|
||
|
||
if systemctl restart httpd 2>/dev/null || systemctl restart apache2 2>/dev/null; then
|
||
print_success "Apache restarted successfully"
|
||
else
|
||
print_warning "Could not restart Apache - please restart manually"
|
||
fi
|
||
|
||
echo ""
|
||
# Verify domains still work after Apache global blocking
|
||
verify_domains_still_working
|
||
|
||
show_action_menu
|
||
}
|
||
|
||
execute_wp_hardening() {
|
||
clear
|
||
print_banner "WordPress Hardening"
|
||
echo ""
|
||
print_info "WordPress hardening feature coming soon..."
|
||
echo ""
|
||
echo "Recommended manual actions:"
|
||
echo " • Install Wordfence or similar security plugin"
|
||
echo " • Enable two-factor authentication"
|
||
echo " • Limit login attempts"
|
||
echo " • Disable XML-RPC if not needed"
|
||
echo " • Use strong passwords"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
execute_rate_limiting() {
|
||
clear
|
||
print_banner "Enable Rate Limiting"
|
||
echo ""
|
||
print_info "Rate limiting modules like mod_evasive/mod_security can help with application-level DoS"
|
||
echo ""
|
||
echo "For better bot protection, consider:"
|
||
echo " - IP blocking (options 1-3) - Block specific attacking IPs"
|
||
echo " - CSF CT_LIMIT adjustment (option 4) - Limit connections per IP"
|
||
echo " - .htaccess rules (option 5) - Domain-specific blocking"
|
||
echo ""
|
||
echo "This option (rate limiting) is currently a placeholder for future implementation."
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
# execute_csf_portflood() removed - not appropriate for web servers with 400+ sites
|
||
# Blocking ports 80/443 based on connection count would break legitimate traffic
|
||
|
||
execute_csf_synflood() {
|
||
clear
|
||
print_banner "Enable CSF SYNFLOOD Protection"
|
||
echo ""
|
||
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF is not installed on this server"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Get current SYNFLOOD setting
|
||
local current_synflood=$(grep "^SYNFLOOD = " /etc/csf/csf.conf 2>/dev/null | grep -oP '"\K[^"]+' || echo "0")
|
||
|
||
echo "Current SYNFLOOD protection: ${current_synflood}"
|
||
echo ""
|
||
echo "SYNFLOOD protects against SYN flood DDoS attacks by limiting"
|
||
echo "the rate of new TCP connections."
|
||
echo ""
|
||
echo "Recommended settings:"
|
||
echo " SYNFLOOD = \"1\" (enable protection)"
|
||
echo " SYNFLOOD_RATE = \"100/s\" (100 connections per second)"
|
||
echo " SYNFLOOD_BURST = \"150\" (allow burst of 150)"
|
||
echo ""
|
||
read -p "Enable SYNFLOOD protection? (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Operation cancelled"
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
return
|
||
fi
|
||
|
||
# Update CSF config
|
||
print_info "Enabling SYNFLOOD protection..."
|
||
if [ -f /etc/csf/csf.conf ]; then
|
||
sed -i 's/^SYNFLOOD = .*/SYNFLOOD = "1"/' /etc/csf/csf.conf
|
||
sed -i 's/^SYNFLOOD_RATE = .*/SYNFLOOD_RATE = "100\/s"/' /etc/csf/csf.conf
|
||
sed -i 's/^SYNFLOOD_BURST = .*/SYNFLOOD_BURST = "150"/' /etc/csf/csf.conf
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF..."
|
||
csf -r >/dev/null 2>&1
|
||
|
||
print_success "SYNFLOOD protection enabled"
|
||
else
|
||
print_warning "Could not find /etc/csf/csf.conf"
|
||
fi
|
||
|
||
echo ""
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
execute_install_maxmind() {
|
||
clear
|
||
print_banner "Install MaxMind GeoIP2 for Country Blocking"
|
||
echo ""
|
||
|
||
# Check if already installed
|
||
if command -v mmdbinspect >/dev/null 2>&1; then
|
||
print_success "MaxMind GeoIP2 tools already installed"
|
||
echo ""
|
||
echo "Next steps:"
|
||
echo "1. Sign up for free license at: https://www.maxmind.com/en/geolite2/signup"
|
||
echo "2. Get your license key from account page"
|
||
echo "3. Install CSF GeoIP module: /usr/local/csf/bin/csftest.pl -g"
|
||
echo "4. Configure CC_DENY in /etc/csf/csf.conf with country codes"
|
||
echo ""
|
||
echo "Example: CC_DENY = \"CN,RU,KP\" (block China, Russia, North Korea)"
|
||
echo ""
|
||
else
|
||
print_info "MaxMind GeoIP2 not detected"
|
||
echo ""
|
||
echo "To install MaxMind GeoIP2 for CSF country blocking:"
|
||
echo ""
|
||
echo "1. Sign up for free MaxMind account:"
|
||
echo " https://www.maxmind.com/en/geolite2/signup"
|
||
echo ""
|
||
echo "2. Get your license key from:"
|
||
echo " https://www.maxmind.com/en/accounts/current/license-key"
|
||
echo ""
|
||
echo "3. Install GeoIP Perl module:"
|
||
echo " yum install perl-Geo-IP"
|
||
echo " # or"
|
||
echo " cpan -i Geo::IP"
|
||
echo ""
|
||
echo "4. Test CSF GeoIP support:"
|
||
echo " /usr/local/csf/bin/csftest.pl -g"
|
||
echo ""
|
||
echo "5. Configure CC_DENY in /etc/csf/csf.conf:"
|
||
echo " CC_DENY = \"CN,RU\" (example: block China & Russia)"
|
||
echo ""
|
||
echo "6. Restart CSF:"
|
||
echo " csf -r"
|
||
echo ""
|
||
fi
|
||
|
||
# Show geographic analysis if available
|
||
if [ -s "$TEMP_DIR/high_risk_networks.txt" ]; then
|
||
echo "=========================================================══"
|
||
echo "High-Risk Networks Detected:"
|
||
echo ""
|
||
head -10 "$TEMP_DIR/high_risk_networks.txt" | while read count network; do
|
||
echo " • $network - $count high-risk IPs"
|
||
done
|
||
echo ""
|
||
fi
|
||
|
||
read -p "Press Enter to continue..."
|
||
show_action_menu
|
||
}
|
||
|
||
################################################################################
|
||
# INTERACTIVE CSF BLOCKING
|
||
################################################################################
|
||
|
||
offer_csf_blocking() {
|
||
echo ""
|
||
echo "==============================================================="
|
||
print_header "🛡 INTERACTIVE THREAT BLOCKING"
|
||
|
||
# Check if CSF is installed
|
||
if [ "$CSF_AVAILABLE" != true ]; then
|
||
print_warning "CSF (ConfigServer Security & Firewall) is not installed"
|
||
echo "Cannot offer automatic blocking without CSF"
|
||
return 0
|
||
fi
|
||
|
||
# Get high-risk IPs (score >= 70)
|
||
local high_risk_ips=()
|
||
local ip_scores=()
|
||
|
||
if [ -s "$TEMP_DIR/threat_scores.txt" ]; then
|
||
while read -r line; do
|
||
local score=$(echo "$line" | cut -d'|' -f1)
|
||
local ip=$(echo "$line" | cut -d'|' -f2)
|
||
|
||
# Only include scores >= 70 (HIGH and CRITICAL)
|
||
if [ "$score" -ge 70 ]; then
|
||
# Skip excluded IPs
|
||
if is_excluded_ip "$ip"; then
|
||
continue
|
||
fi
|
||
|
||
# Skip false positives
|
||
if [ -s "$TEMP_DIR/false_positives.txt" ] && grep -q "^$ip|" "$TEMP_DIR/false_positives.txt" 2>/dev/null; then
|
||
continue
|
||
fi
|
||
|
||
high_risk_ips+=("$ip")
|
||
ip_scores+=("$score")
|
||
fi
|
||
done < <(awk -F'|' '{print $1 "|" $2}' "$TEMP_DIR/threat_scores.txt" | sort -rn)
|
||
fi
|
||
|
||
# If no high-risk IPs, nothing to block
|
||
if [ ${#high_risk_ips[@]} -eq 0 ]; then
|
||
print_info "No high-risk IPs detected (score >= 70)"
|
||
return 0
|
||
fi
|
||
|
||
# Show IPs that would be blocked
|
||
echo ""
|
||
echo "Found ${#high_risk_ips[@]} high-risk IP(s) with threat score >= 70:"
|
||
echo ""
|
||
|
||
local count=0
|
||
for i in "${!high_risk_ips[@]}"; do
|
||
count=$((count + 1))
|
||
local ip="${high_risk_ips[$i]}"
|
||
local score="${ip_scores[$i]}"
|
||
local requests=$(grep "^$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
|
||
|
||
# Color code by severity
|
||
if [ "$score" -ge 90 ]; then
|
||
echo -e " ${RED}[$count] $ip${NC} - Risk: ${RED}$score/100 CRITICAL${NC} ($requests requests)"
|
||
elif [ "$score" -ge 80 ]; then
|
||
echo -e " ${YELLOW}[$count] $ip${NC} - Risk: ${YELLOW}$score/100 HIGH${NC} ($requests requests)"
|
||
else
|
||
echo -e " [$count] $ip - Risk: $score/100 ELEVATED ($requests requests)"
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
echo "==============================================================="
|
||
echo ""
|
||
|
||
# Ask user if they want to block
|
||
echo -e "${BOLD}Would you like to temporarily block these IPs using CSF?${NC}"
|
||
echo ""
|
||
echo "Options:"
|
||
echo " 1) Block for 1 hour (temporary - auto-expires)"
|
||
echo " 2) Block for 24 hours (temporary - auto-expires)"
|
||
echo " 3) Block permanently (requires manual unblock)"
|
||
echo " 4) Don't block (manual review)"
|
||
echo ""
|
||
read -p "Select option [1-4]: " block_choice
|
||
|
||
case "$block_choice" in
|
||
1)
|
||
local duration=3600 # 1 hour in seconds
|
||
local duration_text="1 hour"
|
||
apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
|
||
;;
|
||
2)
|
||
local duration=86400 # 24 hours in seconds
|
||
local duration_text="24 hours"
|
||
apply_csf_blocks "$duration" "$duration_text" "${high_risk_ips[@]}"
|
||
;;
|
||
3)
|
||
apply_csf_permanent_blocks "${high_risk_ips[@]}"
|
||
;;
|
||
4)
|
||
print_info "Skipping automatic blocking - manual review recommended"
|
||
echo "You can block IPs manually using: csf -td IP DURATION"
|
||
;;
|
||
*)
|
||
print_warning "Invalid option - skipping blocking"
|
||
;;
|
||
esac
|
||
}
|
||
|
||
apply_csf_blocks() {
|
||
local duration=$1
|
||
local duration_text=$2
|
||
shift 2
|
||
local ips=("$@")
|
||
|
||
echo ""
|
||
print_info "Applying temporary CSF blocks for $duration_text..."
|
||
echo ""
|
||
|
||
local success_count=0
|
||
local fail_count=0
|
||
|
||
for ip in "${ips[@]}"; do
|
||
# Get threat score for comment
|
||
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
|
||
# Use csf -td for temporary deny
|
||
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
|
||
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
|
||
success_count=$((success_count + 1))
|
||
else
|
||
echo -e " ${RED}${NC} Failed to block $ip"
|
||
fail_count=$((fail_count + 1))
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
if [ "${success_count:-0}" -gt 0 ]; then
|
||
print_success "Successfully blocked $success_count IP(s) for $duration_text"
|
||
echo ""
|
||
echo "These blocks will automatically expire after $duration_text"
|
||
echo "To view temporary blocks: csf -t"
|
||
echo "To remove a block early: csf -tr IP"
|
||
fi
|
||
|
||
if [ "${fail_count:-0}" -gt 0 ]; then
|
||
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
|
||
fi
|
||
|
||
# Restart CSF to apply changes
|
||
print_info "Restarting CSF to apply changes..."
|
||
if csf -r >/dev/null 2>&1; then
|
||
print_success "CSF restarted successfully"
|
||
else
|
||
print_warning "CSF restart may have failed - check manually with: csf -r"
|
||
fi
|
||
}
|
||
|
||
apply_csf_permanent_blocks() {
|
||
local ips=("$@")
|
||
|
||
echo ""
|
||
print_warning "Applying PERMANENT CSF blocks..."
|
||
echo "These will require manual removal using: csf -dr IP"
|
||
echo ""
|
||
read -p "Are you sure? This is permanent! (yes/no): " confirm
|
||
|
||
if [ "$confirm" != "yes" ]; then
|
||
print_info "Cancelled permanent blocking"
|
||
return 0
|
||
fi
|
||
|
||
echo ""
|
||
local success_count=0
|
||
local fail_count=0
|
||
|
||
for ip in "${ips[@]}"; do
|
||
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
||
|
||
# Use csf -d for permanent deny
|
||
if csf -d "$ip" "Bot threat score: $score/100 - Permanently blocked by toolkit" >/dev/null 2>&1; then
|
||
echo -e " ${GREEN}${NC} Permanently blocked $ip (score: $score/100)"
|
||
success_count=$((success_count + 1))
|
||
else
|
||
echo -e " ${RED}${NC} Failed to block $ip"
|
||
fail_count=$((fail_count + 1))
|
||
fi
|
||
done
|
||
|
||
echo ""
|
||
if [ "${success_count:-0}" -gt 0 ]; then
|
||
print_success "Successfully blocked $success_count IP(s) permanently"
|
||
echo ""
|
||
echo "To view blocked IPs: csf -g"
|
||
echo "To remove a block: csf -dr IP"
|
||
fi
|
||
|
||
if [ "${fail_count:-0}" -gt 0 ]; then
|
||
print_warning "$fail_count IP(s) failed to block - check CSF configuration"
|
||
fi
|
||
|
||
# Restart CSF
|
||
print_info "Restarting CSF to apply changes..."
|
||
if csf -r >/dev/null 2>&1; then
|
||
print_success "CSF restarted successfully"
|
||
else
|
||
print_warning "CSF restart may have failed - check manually with: csf -r"
|
||
fi
|
||
}
|
||
|
||
# Run the script
|
||
main "$@"
|