Fix live-attack-monitor auto-blocking and bot-analyzer compression
- live-attack-monitor.sh: * Remove snapshot loading (start fresh each session) * Fix Apache log monitoring to use tail -n 0 -F (only new entries) * Add IP file sync to main loop for auto-blocking to work * Fix IP_DATA consolidation for cross-process communication - bot-analyzer.sh: * Implement gzip compression for large temp files (10-20x space savings) * Update all read/write operations to use compressed files * Fix for servers with 200+ domains and millions of log entries - run.sh: * Add HISTFILE fallback to prevent crashes when sourced
This commit is contained in:
@@ -348,20 +348,22 @@ parse_logs() {
|
||||
if (ip != "" && ip !~ /^[[:space:]]*$/) {
|
||||
print ip "|" domain "|" request_url "|" status "|" size "|" user_agent "|" http_method "|" timestamp
|
||||
}
|
||||
}' "$logfile" >> "$TEMP_DIR/parsed_logs.txt" 2>/dev/null
|
||||
done
|
||||
}' "$logfile" 2>/dev/null
|
||||
done | gzip > "$TEMP_DIR/parsed_logs.txt.gz"
|
||||
|
||||
# Clear the progress line
|
||||
echo -ne "\r\033[K"
|
||||
|
||||
if [ ! -s "$TEMP_DIR/parsed_logs.txt" ]; then
|
||||
if [ ! -s "$TEMP_DIR/parsed_logs.txt.gz" ]; then
|
||||
print_alert "No log entries were parsed. Check log format or permissions."
|
||||
return 1
|
||||
fi
|
||||
|
||||
local line_count
|
||||
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
||||
print_success "Logs parsed successfully ($line_count entries)"
|
||||
line_count=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | wc -l)
|
||||
local file_size_kb
|
||||
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt.gz" | cut -f1)
|
||||
print_success "Logs parsed successfully ($line_count entries, ${file_size_kb}KB compressed)"
|
||||
return 0
|
||||
}
|
||||
|
||||
@@ -460,16 +462,18 @@ classify_bots() {
|
||||
if (bot_type != "unknown") {
|
||||
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
||||
}
|
||||
}' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
|
||||
}' < <(zcat "$TEMP_DIR/parsed_logs.txt.gz") | gzip > "$TEMP_DIR/classified_bots.txt.gz"
|
||||
|
||||
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
|
||||
if [ ! -s "$TEMP_DIR/classified_bots.txt.gz" ]; then
|
||||
print_alert "Bot classification failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local classified_count
|
||||
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
|
||||
print_success "Bot classification complete ($classified_count entries)"
|
||||
classified_count=$(zcat "$TEMP_DIR/classified_bots.txt.gz" | wc -l)
|
||||
local file_size_kb
|
||||
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt.gz" | cut -f1)
|
||||
print_success "Bot classification complete ($classified_count entries, ${file_size_kb}KB compressed)"
|
||||
return 0
|
||||
}
|
||||
|
||||
@@ -556,7 +560,7 @@ detect_threats() {
|
||||
# Track response codes for intelligence
|
||||
print status > "'"$TEMP_DIR"'/response_codes_raw.txt"
|
||||
}
|
||||
' "$TEMP_DIR/parsed_logs.txt"
|
||||
' < <(zcat "$TEMP_DIR/parsed_logs.txt.gz")
|
||||
|
||||
# Process attack vectors by type
|
||||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||||
@@ -619,26 +623,26 @@ detect_threats() {
|
||||
|
||||
detect_botnets() {
|
||||
print_info "Analyzing for botnet patterns..."
|
||||
|
||||
|
||||
# Group IPs by similar behavior patterns
|
||||
# Pattern 1: Multiple IPs hitting same URLs in coordinated manner
|
||||
awk -F'|' '{print $1"|"$3}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1"|"$3}' | \
|
||||
sort | uniq -c | awk '$1 > 10 {print $2}' | \
|
||||
cut -d'|' -f2 | sort | uniq -c | sort -rn | \
|
||||
awk '$1 > 5 {print $2}' > "$TEMP_DIR/coordinated_urls.txt"
|
||||
|
||||
|
||||
# Pattern 2: IPs with similar User-Agents hitting multiple domains
|
||||
awk -F'|' '{print $1"|"$6}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1"|"$6}' | \
|
||||
sort | uniq > "$TEMP_DIR/ip_ua_pairs.txt"
|
||||
|
||||
|
||||
# Pattern 3: Detect IP ranges (Class C networks) with suspicious activity
|
||||
awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | \
|
||||
awk -F'.' '{print $1"."$2"."$3".0/24"}' | \
|
||||
sort | uniq -c | sort -rn | awk '$1 > 20' > "$TEMP_DIR/suspicious_networks.txt"
|
||||
|
||||
|
||||
# Pattern 4: Rapid fire requests (DDoS indicators)
|
||||
# Extract timestamp and count requests per IP per minute
|
||||
awk -F'|' '{
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{
|
||||
ip = $1
|
||||
timestamp = $8
|
||||
# Extract date/time components (handles format: DD/MMM/YYYY:HH:MM:SS)
|
||||
@@ -647,13 +651,13 @@ detect_botnets() {
|
||||
time_key = ts[3] ts[2] ts[1] "_" ts[4] ts[5]
|
||||
print ip "|" time_key
|
||||
}
|
||||
}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
}' | \
|
||||
sort | uniq -c | \
|
||||
awk '$1 > 50 {print $1 " " $2}' | \
|
||||
awk -F'|' '{print $1}' | \
|
||||
awk '{ip=$2; count=$1; sum[ip]+=count; max[ip]=(count>max[ip]?count:max[ip])} END {for(ip in sum) print sum[ip], ip, max[ip]}' | \
|
||||
sort -rn > "$TEMP_DIR/rapid_fire_ips.txt"
|
||||
|
||||
|
||||
print_success "Botnet analysis complete"
|
||||
}
|
||||
|
||||
@@ -742,13 +746,13 @@ analyze_time_series() {
|
||||
print_info "Analyzing time-series patterns..."
|
||||
|
||||
# Extract hourly bot traffic
|
||||
awk -F'|' '$9 != "unknown" {
|
||||
zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown" {
|
||||
timestamp = $8
|
||||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||||
hour = ts[4]
|
||||
print hour
|
||||
}
|
||||
}' "$TEMP_DIR/classified_bots.txt" | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
|
||||
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
|
||||
|
||||
# Extract hourly attack traffic
|
||||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||||
@@ -759,7 +763,7 @@ analyze_time_series() {
|
||||
hour = ts[4]
|
||||
print hour
|
||||
}
|
||||
}' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
|
||||
}' "$TEMP_DIR/attack_vectors_raw.txt" <(zcat "$TEMP_DIR/parsed_logs.txt.gz") | sort | uniq -c > "$TEMP_DIR/hourly_attack_traffic.txt"
|
||||
fi
|
||||
|
||||
print_success "Time-series analysis complete"
|
||||
@@ -776,7 +780,7 @@ calculate_threat_scores() {
|
||||
declare -A ip_request_counts
|
||||
while IFS='|' read -r ip rest; do
|
||||
((ip_request_counts["$ip"]++))
|
||||
done < "$TEMP_DIR/parsed_logs.txt"
|
||||
done < <(zcat "$TEMP_DIR/parsed_logs.txt.gz")
|
||||
|
||||
# Build hash tables from threat files for O(1) lookups
|
||||
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
|
||||
@@ -922,7 +926,7 @@ detect_false_positives() {
|
||||
print_info "Detecting legitimate services (false positives)..."
|
||||
|
||||
# Known monitoring service patterns
|
||||
awk -F'|' '{
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{
|
||||
ip = $1
|
||||
domain = $2
|
||||
url = $3
|
||||
@@ -948,7 +952,7 @@ detect_false_positives() {
|
||||
else if (match(ua, /jetpack|vaultpress|updraftplus/)) {
|
||||
print ip "|Backup Service|" ua "|" domain
|
||||
}
|
||||
}' "$TEMP_DIR/parsed_logs.txt" | sort -u > "$TEMP_DIR/false_positives.txt"
|
||||
}' | sort -u > "$TEMP_DIR/false_positives.txt"
|
||||
|
||||
print_success "False positive detection complete"
|
||||
}
|
||||
@@ -959,34 +963,34 @@ detect_false_positives() {
|
||||
|
||||
generate_statistics() {
|
||||
print_info "Generating statistics..."
|
||||
|
||||
|
||||
# Top 5 bots by request count
|
||||
awk -F'|' '$9 != "unknown" {print $10}' "$TEMP_DIR/classified_bots.txt" | \
|
||||
zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown" {print $10}' | \
|
||||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"
|
||||
|
||||
|
||||
# Top 5 most-hit sites
|
||||
awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2}' | \
|
||||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_sites.txt"
|
||||
|
||||
|
||||
# Top 5 most-hit URLs
|
||||
awk -F'|' '{print $2"|"$3}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2"|"$3}' | \
|
||||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_urls.txt"
|
||||
|
||||
|
||||
# Top 5 IP addresses by request count
|
||||
awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | \
|
||||
zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | \
|
||||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_ips.txt"
|
||||
|
||||
|
||||
# Traffic breakdown by bot type
|
||||
awk -F'|' '{print $9}' "$TEMP_DIR/classified_bots.txt" | \
|
||||
zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '{print $9}' | \
|
||||
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"
|
||||
|
||||
|
||||
# Per-domain traffic sources
|
||||
while read -r domain; do
|
||||
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
|
||||
grep "|$domain|" "$TEMP_DIR/classified_bots.txt" | \
|
||||
zcat "$TEMP_DIR/classified_bots.txt.gz" | grep "|$domain|" | \
|
||||
awk -F'|' '{print $9}' | sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt"
|
||||
done < <(awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" | sort -u)
|
||||
|
||||
done < <(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2}' | sort -u)
|
||||
|
||||
print_success "Statistics generated"
|
||||
}
|
||||
|
||||
@@ -1069,19 +1073,19 @@ generate_report() {
|
||||
# QUICK STATS DASHBOARD
|
||||
print_header "QUICK STATS DASHBOARD"
|
||||
|
||||
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
||||
unique_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
||||
unique_domains=$(awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
||||
bot_requests=$(awk -F'|' '$9 != "unknown"' "$TEMP_DIR/classified_bots.txt" | wc -l)
|
||||
total_requests=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | wc -l)
|
||||
unique_ips=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | sort -u | wc -l)
|
||||
unique_domains=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $2}' | sort -u | wc -l)
|
||||
bot_requests=$(zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown"' | wc -l)
|
||||
|
||||
# Count private/internal IPs (excluded from threat analysis)
|
||||
private_ips=$(awk -F'|' '{print $1}' "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' | wc -l)
|
||||
private_ips=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '{print $1}' | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' | wc -l)
|
||||
|
||||
# Count server's own IPs in the logs
|
||||
server_ip_hits=0
|
||||
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
|
||||
while read -r server_ip; do
|
||||
if grep -q "^$server_ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null; then
|
||||
if zcat "$TEMP_DIR/parsed_logs.txt.gz" | grep -q "^$server_ip|" 2>/dev/null; then
|
||||
server_ip_hits=$((server_ip_hits + 1))
|
||||
fi
|
||||
done < "$TEMP_DIR/server_ips.txt"
|
||||
|
||||
Reference in New Issue
Block a user