Fix live-attack-monitor auto-blocking and bot-analyzer compression

- live-attack-monitor.sh:
  * Remove snapshot loading (start fresh each session)
  * Fix Apache log monitoring to use tail -n 0 -F (only new entries)
  * Add IP file sync to main loop for auto-blocking to work
  * Fix IP_DATA consolidation for cross-process communication

- bot-analyzer.sh:
  * Implement gzip compression for large temp files (10-20x space savings)
  * Update all read/write operations to use compressed files
  * Fix for servers with 200+ domains and millions of log entries

- run.sh:
  * Add HISTFILE fallback to prevent crashes when sourced
This commit is contained in:
cschantz
2025-11-17 22:28:38 -05:00
parent 8cbf62b243
commit 80a4703cdf
3 changed files with 391 additions and 132 deletions
+50 -46
View File
@@ -348,20 +348,22 @@ parse_logs() {
if (ip != "" && ip !~ /^[[:space:]]*$/) {
print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp
}
}' "$logfile" >> "$TEMP_DIR/parsed_logs.txt" 2>/dev/null
done
}' "$logfile" 2>/dev/null
done | gzip > "$TEMP_DIR/parsed_logs.txt.gz"
# Clear the progress line
echo -ne "\r\033[K"
if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then
if [ ! -s "$TEMP_DIR/parsed_logs.txt.gz" ]; then
print_alert "No log entries were parsed. Check log format or permissions."
return 1
fi
local line_count
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
print_success "Logs parsed successfully ($line_count entries)"
line_count=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | wc -l)
local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt.gz" | cut -f1)
print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB compressed)"
return 0
}
@@ -460,16 +462,18 @@ classify_bots() {
if (bot_type != "unknown") {
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
}
}' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
}' < <(zcat "$TEMP_DIR/parsed_logs.txt.gz") | gzip > "$TEMP_DIR/classified_bots.txt.gz"
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
if [ ! -s "$TEMP_DIR/classified_bots.txt.gz" ]; then
print_alert "Bot classification failed"
return 1
fi
local classified_count
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
print_success "Bot classification complete ($classified_count entries)"
classified_count=$(zcat "$TEMP_DIR/classified_bots.txt.gz" | wc -l)
local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt.gz" | cut -f1)
print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB compressed)"
return 0
}
@@ -556,7 +560,7 @@ detect_threats() {
# Track response codes for intelligence
print status > "'"$TEMP_DIR"'/response_codes_raw.txt"
}
' "$TEMP_DIR/parsed_logs.txt"
' < <(zcat "$TEMP_DIR/parsed_logs.txt.gz")
# Process attack vectors by type
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
@@ -619,26 +623,26 @@ detect_threats() {
detect_botnets() {
print_info "Analyzing for botnet patterns..."
# Group IPs by similar behavior patterns
# Pattern 1: Multiple IPs hitting same URLs in coordinated manner
awk -F'|' '{print $1"|"$3}' "$TEMP_DIR/parsed_logs.txt" | \
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1"|"$3}' | \
sort | uniq -c | awk '$1 > 10 {print $2}' | \
cut -d'|' -f2 | sort | uniq -c | sort -rn | \
awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt"
# Pattern 2: IPs with similar User-Agents hitting multiple domains
awk -F'|' '{print $1"|"$6}' "$TEMP_DIR/parsed_logs.txt" | \
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1"|"$6}' | \
sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt"
# Pattern 3: Detect IP ranges (Class C networks) with suspicious activity
awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | \
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | \
awk -F'.' '{print $1"."$2"."$3".0/24"}' | \
sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt"
# Pattern 4: Rapid fire requests (DDoS indicators)
# Extract timestamp and count requests per IP per minute
awk -F'|' '{
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{
ip = $1
timestamp = $8
# Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS)
@@ -647,13 +651,13 @@ detect_botnets() {
time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5]
print ip "|" time_key
}
}' "$TEMP_DIR/parsed_logs.txt" | \
}' | \
sort | uniq -c | \
awk '$1 > 50 {print $1 " " $2}' | \
awk -F'|' '{print $1}' | \
awk '{ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \
sort -rn > "$TEMP_DIR/rapid_fire_ips.txt"
print_success "Botnet analysis complete"
}
@@ -742,13 +746,13 @@ analyze_time_series() {
print_info "Analyzing time-series patterns..."
# Extract hourly bot traffic
awk -F'|' '$9 != "unknown" {
zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown" {
timestamp = $8
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
hour = ts[4]
print hour
}
}' "$TEMP_DIR/classified_bots.txt" | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
# Extract hourly attack traffic
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
@@ -759,7 +763,7 @@ analyze_time_series() {
hour = ts[4]
print hour
}
}' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
}' "$TEMP_DIR/attack_vectors_raw.txt" <(zcat "$TEMP_DIR/parsed_logs.txt.gz") | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
fi
print_success "Time-series analysis complete"
@@ -776,7 +780,7 @@ calculate_threat_scores() {
declare -A ip_request_counts
while IFS='|' read -r ip rest; do
((ip_request_counts["$ip"]++))
done < "$TEMP_DIR/parsed_logs.txt"
done < <(zcat "$TEMP_DIR/parsed_logs.txt.gz")
# Build hash tables from threat files for O(1) lookups
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
@@ -922,7 +926,7 @@ detect_false_positives() {
print_info "Detecting legitimate services (false positives)..."
# Known monitoring service patterns
awk -F'|' '{
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{
ip = $1
domain = $2
url = $3
@@ -948,7 +952,7 @@ detect_false_positives() {
else if (match(ua, /jetpack|vaultpress|updraftplus/)) {
print ip "|Backup Service|" ua "|" domain
}
}' "$TEMP_DIR/parsed_logs.txt" | sort -u > "$TEMP_DIR/false_positives.txt"
}' | sort -u > "$TEMP_DIR/false_positives.txt"
print_success "False positive detection complete"
}
@@ -959,34 +963,34 @@ detect_false_positives() {
generate_statistics() {
print_info "Generating statistics..."
# Top 5 bots by request count
awk -F'|' '$9 != "unknown" {print $10}' "$TEMP_DIR/classified_bots.txt" | \
zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown" {print $10}' | \
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"
# Top 5 most-hit sites
awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" | \
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2}' | \
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_sites.txt"
# Top 5 most-hit URLs
awk -F'|' '{print $2"|"$3}' "$TEMP_DIR/parsed_logs.txt" | \
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2"|"$3}' | \
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_urls.txt"
# Top 5 IP addresses by request count
awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | \
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | \
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_ips.txt"
# Traffic breakdown by bot type
awk -F'|' '{print $9}' "$TEMP_DIR/classified_bots.txt" | \
zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '{print $9}' | \
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"
# Per-domain traffic sources
while read -r domain; do
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
grep "|$domain|" "$TEMP_DIR/classified_bots.txt" | \
zcat "$TEMP_DIR/classified_bots.txt.gz" | grep "|$domain|" | \
awk -F'|' '{print $9}' | sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt"
done < <(awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" | sort -u)
done < <(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2}' | sort -u)
print_success "Statistics generated"
}
@@ -1069,19 +1073,19 @@ generate_report() {
# QUICK STATS DASHBOARD
print_header "QUICK STATS DASHBOARD"
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
unique_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
unique_domains=$(awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
bot_requests=$(awk -F'|' '$9 != "unknown"' "$TEMP_DIR/classified_bots.txt" | wc -l)
total_requests=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | wc -l)
unique_ips=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | sort -u | wc -l)
unique_domains=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2}' | sort -u | wc -l)
bot_requests=$(zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown"' | wc -l)
# Count private/internal IPs (excluded from threat analysis)
private_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' | wc -l)
private_ips=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' | wc -l)
# Count server's own IPs in the logs
server_ip_hits=0
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
while read -r server_ip; do
if grep -q "^$server_ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null; then
if zcat "$TEMP_DIR/parsed_logs.txt.gz" | grep -q "^$server_ip|" 2>/dev/null; then
server_ip_hits=$((server_ip_hits + 1))
fi
done < "$TEMP_DIR/server_ips.txt"