Files
cschantz fdce4ccd07 Remove duplicate show_progress function
QA scan found duplicate show_progress function in analyze-historical-attacks.sh
that's already available in lib/common-functions.sh.

Changes:
- Added source for lib/common-functions.sh
- Removed local show_progress() definition
- Added comment noting function is now sourced

This reduces code duplication and ensures consistent progress display
across all toolkit scripts.
2026-01-02 16:24:56 -05:00

441 lines
15 KiB
Bash
Executable File

#!/bin/bash
#
# Historical Attack Log Analyzer
# Scans past Apache/Nginx logs for attack patterns using ET Open signatures
#
# Performance Optimizations:
# - Pre-filters static resources (.css, .js, images) = 30-50% reduction
# - Skips clean requests (no query strings or special chars) = 20-30% reduction
# - Deferred parsing with arrays (vs string concat) = 10-15% faster
# - Progress check after pre-filters (reduced overhead) = 2-5% faster
# - Optimized URL counting (pattern matching vs subprocess) = 10-15% faster
# Expected: 2-10x faster on normal traffic, 10-15% faster on attack-heavy logs
#
# Usage: bash analyze-historical-attacks.sh [options]
#
# Options:
# -d DAYS Analyze logs from last N days (default: 7)
# -l LOGFILE Analyze specific log file
# -o OUTPUT Output report file (default: /tmp/attack-analysis-TIMESTAMP.txt)
# -t THRESHOLD Minimum threat score to report (default: 50)
# -v Verbose mode (show all attacks)
# -h Show help
# Get script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
# Source required libraries
source "$SCRIPT_DIR/lib/common-functions.sh" 2>/dev/null || {
echo "ERROR: common-functions.sh not found"
exit 1
}
source "$SCRIPT_DIR/lib/attack-signatures.sh" 2>/dev/null || {
echo "ERROR: attack-signatures.sh not found"
exit 1
}
source "$SCRIPT_DIR/lib/http-attack-analyzer.sh" 2>/dev/null || {
echo "ERROR: http-attack-analyzer.sh not found"
exit 1
}
# Try to source IP reputation library (optional)
source "$SCRIPT_DIR/lib/ip-reputation.sh" 2>/dev/null
# Colors
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
# Default options
DAYS=7
LOG_FILE=""
OUTPUT_FILE="/tmp/attack-analysis-$(date +%Y%m%d_%H%M%S).txt"
THRESHOLD=50
VERBOSE=0
# Parse command line arguments
while getopts "d:l:o:t:vh" opt; do
case $opt in
d) DAYS="$OPTARG" ;;
l) LOG_FILE="$OPTARG" ;;
o) OUTPUT_FILE="$OPTARG" ;;
t) THRESHOLD="$OPTARG" ;;
v) VERBOSE=1 ;;
h)
cat << EOF
Historical Attack Log Analyzer
Scans past Apache/Nginx logs for attack patterns using ET Open signatures
Usage: $0 [options]
Options:
-d DAYS Analyze logs from last N days (default: 7)
-l LOGFILE Analyze specific log file
-o OUTPUT Output report file (default: /tmp/attack-analysis-TIMESTAMP.txt)
-t THRESHOLD Minimum threat score to report (default: 50)
-v Verbose mode (show all attacks)
-h Show this help
Examples:
# Analyze last 7 days
$0
# Analyze last 30 days
$0 -d 30
# Analyze specific log file
$0 -l /var/log/apache2/access.log
# Show all attacks (including low severity)
$0 -t 0 -v
# Save report to custom location
$0 -o /root/attack-report.txt
EOF
exit 0
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
esac
done
echo "================================================================================
"
echo -e "${CYAN}${BOLD}Historical Attack Log Analyzer${NC}"
echo "Powered by Emerging Threats Open Ruleset"
echo "================================================================================
"
# Find log files to analyze
LOG_FILES=()
if [ -n "$LOG_FILE" ]; then
# Specific log file provided
if [ ! -f "$LOG_FILE" ]; then
echo -e "${RED}ERROR: Log file not found: $LOG_FILE${NC}"
exit 1
fi
LOG_FILES=("$LOG_FILE")
echo -e "${GREEN}${NC} Analyzing specific file: $LOG_FILE"
else
# Auto-detect log files
echo -e "${BLUE}[*]${NC} Searching for Apache/Nginx log files..."
# Common log locations
SEARCH_PATHS=(
"/var/log/apache2"
"/var/log/httpd"
"/usr/local/apache/logs"
"/var/log/nginx"
"/usr/local/apache/domlogs"
)
for path in "${SEARCH_PATHS[@]}"; do
if [ -d "$path" ]; then
# Find access logs modified in last N days
while IFS= read -r log; do
LOG_FILES+=("$log")
done < <(find "$path" -type f \( -name "access*.log*" -o -name "access_log*" -o -name "*.com" -o -name "*.net" -o -name "*.org" \) -mtime -"$DAYS" 2>/dev/null)
fi
done
if [ ${#LOG_FILES[@]} -eq 0 ]; then
echo -e "${RED}ERROR: No log files found in last $DAYS days${NC}"
exit 1
fi
echo -e "${GREEN}${NC} Found ${#LOG_FILES[@]} log files from last $DAYS days"
fi
# Initialize counters
TOTAL_LINES=0
TOTAL_ATTACKS=0
CRITICAL_ATTACKS=0
HIGH_ATTACKS=0
MEDIUM_ATTACKS=0
declare -A ATTACK_TYPES
declare -A TOP_ATTACKERS
declare -A SIGNATURE_HITS
declare -A IP_ATTACK_DETAILS # Store detailed attack info per IP
declare -A IP_ATTACK_COUNT # Count attacks per IP
declare -A IP_SAMPLE_URLS # Sample URLs per IP
# OPTIMIZATION: Arrays for deferred parsing (vs string concatenation)
declare -a ATTACK_TYPES_RAW
declare -a SIGNATURE_HITS_RAW
# Progress indicator function now sourced from common-functions.sh
# Start analysis
echo ""
echo -e "${BLUE}[*]${NC} Starting analysis (Threshold: $THRESHOLD)..."
echo ""
{
# Write report header
echo "================================================================================
"
echo "HISTORICAL ATTACK ANALYSIS REPORT"
echo "Generated: $(date)"
echo "Period: Last $DAYS days"
echo "Threshold: $THRESHOLD"
echo "================================================================================
"
echo ""
# Analyze each log file
for log_file in "${LOG_FILES[@]}"; do
echo "[*] Analyzing: $log_file"
# Handle compressed logs
if [[ "$log_file" =~ \.gz$ ]]; then
CAT_CMD="zcat"
elif [[ "$log_file" =~ \.bz2$ ]]; then
CAT_CMD="bzcat"
else
CAT_CMD="cat"
fi
file_attacks=0
line_count=0
while IFS= read -r line; do
line_count=$((line_count + 1))
TOTAL_LINES=$((TOTAL_LINES + 1))
# OPTIMIZATION: Pre-filter obviously clean requests (50-70% speedup)
# Skip static resources and successful requests to common extensions
if [[ "$line" =~ (GET|HEAD)[[:space:]]+[^[:space:]]*\.(css|js|jpg|jpeg|png|gif|ico|woff|woff2|ttf|svg|webp)[[:space:]]HTTP.+\"[[:space:]]+(200|304)[[:space:]] ]]; then
continue
fi
# OPTIMIZATION: Skip requests with no suspicious indicators (no ? or % or special chars in URI)
# Only run if URI looks completely clean (no query string, no encoding, no path traversal)
# Must be GET/POST, status 200-399, and contain no special attack characters
if [[ "$line" =~ \"(GET|POST)[[:space:]]+/[^[:space:]]*[[:space:]]HTTP.+\"[[:space:]]+(200|3[0-9]{2})[[:space:]] ]] && [[ ! "$line" =~ [\?\%\'\"\<\>\;\(\)\|\\] ]]; then
continue
fi
# Show progress every 1000 lines (AFTER pre-filters to reduce overhead)
if [ $((line_count % 1000)) -eq 0 ]; then
show_progress "$TOTAL_LINES" "unknown"
fi
# Analyze line (now only on potentially suspicious requests)
result=$(analyze_http_log_line "$line" 2>/dev/null)
threat_score="${result%%||*}"
if [ "$threat_score" -ge "$THRESHOLD" ]; then
# Extract remaining fields using parameter expansion (optimized order)
temp="${result#*||}"
attack_types="${temp%%||*}"
temp="${temp#*||}"
signatures="${temp%%||*}"
temp="${temp#*||}"
ip="${temp%%||*}"
uri="${temp#*||}"
# Count attacks
TOTAL_ATTACKS=$((TOTAL_ATTACKS + 1))
file_attacks=$((file_attacks + 1))
# Categorize by severity
if [ "$threat_score" -ge 85 ]; then
CRITICAL_ATTACKS=$((CRITICAL_ATTACKS + 1))
elif [ "$threat_score" -ge 70 ]; then
HIGH_ATTACKS=$((HIGH_ATTACKS + 1))
elif [ "$threat_score" -ge 50 ]; then
MEDIUM_ATTACKS=$((MEDIUM_ATTACKS + 1))
fi
# OPTIMIZATION: Defer attack type parsing - use arrays (5-10% faster than string concat)
# Append to global arrays for batch processing (avoids growing string overhead)
ATTACK_TYPES_RAW+=("$attack_types")
SIGNATURE_HITS_RAW+=("$signatures")
# Track top attackers (cumulative score) - use :-0 for first encounter
TOP_ATTACKERS["$ip"]=$((${TOP_ATTACKERS[$ip]:-0} + threat_score))
IP_ATTACK_COUNT["$ip"]=$((${IP_ATTACK_COUNT[$ip]:-0} + 1))
# Store attack type details per IP (keep raw comma-separated)
current_types="${IP_ATTACK_DETAILS[$ip]}"
if [ -z "$current_types" ]; then
IP_ATTACK_DETAILS["$ip"]="$attack_types"
else
IP_ATTACK_DETAILS["$ip"]="$current_types,$attack_types"
fi
# Store sample URL (keep first 3) - OPTIMIZED: pattern matching (no subprocesses)
current_urls="${IP_SAMPLE_URLS[$ip]}"
if [ -z "$current_urls" ]; then
IP_SAMPLE_URLS["$ip"]="${uri:0:100}"
elif [[ "$current_urls" != *"||"*"||"* ]]; then
IP_SAMPLE_URLS["$ip"]="$current_urls||${uri:0:100}"
fi
fi
done < <($CAT_CMD "$log_file" 2>/dev/null)
echo " → Found $file_attacks attacks"
done
# OPTIMIZATION: Batch process attack types and signatures (deferred from main loop)
# Process arrays - split comma-separated values and count occurrences
if [ "${#ATTACK_TYPES_RAW[@]}" -gt 0 ]; then
for entry in "${ATTACK_TYPES_RAW[@]}"; do
IFS=',' read -ra types <<< "$entry"
for type in "${types[@]}"; do
[ -n "$type" ] && ATTACK_TYPES["$type"]=$((${ATTACK_TYPES[$type]:-0} + 1))
done
done
fi
if [ "${#SIGNATURE_HITS_RAW[@]}" -gt 0 ]; then
for entry in "${SIGNATURE_HITS_RAW[@]}"; do
IFS=',' read -ra sigs <<< "$entry"
for sig in "${sigs[@]}"; do
[ -n "$sig" ] && SIGNATURE_HITS["$sig"]=$((${SIGNATURE_HITS[$sig]:-0} + 1))
done
done
fi
echo ""
echo "================================================================================
"
echo "ATTACKING IPs - DETAILED BREAKDOWN"
echo "================================================================================
"
echo ""
# Sort IPs by cumulative threat score and display
# Create sorted list first to avoid subshell issues
sorted_ips=$(for ip in "${!TOP_ATTACKERS[@]}"; do
echo "${TOP_ATTACKERS[$ip]}:$ip"
done | sort -t: -k1 -nr | head -50)
ip_count=0
while IFS=: read -r cumulative_score ip; do
ip_count=$((ip_count + 1))
attack_count="${IP_ATTACK_COUNT[$ip]:-0}"
all_attack_types="${IP_ATTACK_DETAILS[$ip]}"
sample_urls="${IP_SAMPLE_URLS[$ip]}"
# Count occurrences of each attack type
declare -A type_counts
IFS=',' read -ra attacks <<< "$all_attack_types"
for attack in "${attacks[@]}"; do
[ -n "$attack" ] && type_counts["$attack"]=$((${type_counts[$attack]:-0} + 1))
done
# Format attack summary
attack_summary=""
for type in "${!type_counts[@]}"; do
if [ -z "$attack_summary" ]; then
attack_summary="$type(${type_counts[$type]})"
else
attack_summary="$attack_summary, $type(${type_counts[$type]})"
fi
done
unset type_counts
# Determine threat level
avg_score=$((cumulative_score / attack_count))
if [ "$avg_score" -ge 85 ]; then
level="CRITICAL"
elif [ "$avg_score" -ge 70 ]; then
level="HIGH"
else
level="MEDIUM"
fi
# Print IP summary
echo "[$ip_count] $ip"
printf " Attacks: %d | Avg Score: %d | Threat Level: %s\n" "$attack_count" "$avg_score" "$level"
echo " Attack Types: $attack_summary"
# Get reputation (if available)
if type get_threat_intelligence &>/dev/null; then
threat_intel=$(get_threat_intelligence "$ip" 2>/dev/null)
if [ -n "$threat_intel" ]; then
IFS='|' read -r abuse_conf abuse_rpts country isp geo timing whitelisted <<< "$threat_intel"
if [ "${abuse_conf:-0}" -gt 0 ]; then
printf " Reputation: AbuseIPDB %d%% confidence (%d reports) | %s\n" "${abuse_conf:-0}" "${abuse_rpts:-0}" "${country:-Unknown}"
fi
fi
fi
# Show sample URLs
if [ -n "$sample_urls" ]; then
echo " Sample Targets:"
# Replace || delimiter with newlines for proper splitting
echo "$sample_urls" | sed 's/||/\n/g' | while read -r url; do
[ -n "$url" ] && echo " - $url"
done
fi
echo ""
done <<< "$sorted_ips"
echo "================================================================================
"
echo "SUMMARY STATISTICS"
echo "================================================================================
"
echo ""
echo "Total lines processed: $TOTAL_LINES"
echo "Total attacks detected: $TOTAL_ATTACKS"
echo "Unique attacking IPs: ${#TOP_ATTACKERS[@]}"
echo ""
echo "Attack Severity:"
echo " - Critical (≥85): $CRITICAL_ATTACKS"
echo " - High (70-84): $HIGH_ATTACKS"
echo " - Medium (50-69): $MEDIUM_ATTACKS"
echo ""
# Top Attack Types
echo "Top Attack Types:"
for type in "${!ATTACK_TYPES[@]}"; do
echo "$type:${ATTACK_TYPES[$type]}"
done | sort -t: -k2 -nr | head -10 | while IFS=: read -r type count; do
printf " %-20s %5d attacks\n" "$type" "$count"
done
echo ""
echo "================================================================================
"
echo "END OF REPORT"
echo "================================================================================
"
} > "$OUTPUT_FILE"
# Clear progress line
echo -ne "\r\033[K"
# Display summary to terminal
echo ""
echo -e "${GREEN}${NC} Analysis complete!"
echo ""
echo "Summary:"
echo " Lines processed: $TOTAL_LINES"
echo " Attacks detected: $TOTAL_ATTACKS"
echo " - Critical (≥85): $CRITICAL_ATTACKS"
echo " - High (70-84): $HIGH_ATTACKS"
echo " - Medium (50-69): $MEDIUM_ATTACKS"
echo ""
echo -e "${GREEN}${NC} Full report saved to: $OUTPUT_FILE"
echo ""
# Offer to view report
read -p "View report now? [y/N]: " view_report
if [[ "$view_report" =~ ^[Yy]$ ]]; then
less "$OUTPUT_FILE"
fi