diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index 7116832..eaccc08 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -505,7 +505,7 @@ parse_logs() { fi local line_count - line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt") + line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0") local file_size_kb file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0") @@ -639,7 +639,7 @@ classify_bots() { fi local classified_count - classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt") + classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0") local file_size_kb file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0") @@ -1351,33 +1351,35 @@ analyze_domain_targeting_percentage() { printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain] } } - ' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" + ' <([ -f "$TEMP_DIR/attack_vectors_raw.txt" ] && cat "$TEMP_DIR/attack_vectors_raw.txt" || echo "") "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" || true # Also create per-domain attack type breakdown # Format: domain|attack_type|ip|count - awk -F'|' ' - { - ip = $1 - domain = $2 - attack_type = $5 + if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then + awk -F'|' ' + { + ip = $1 + domain = $2 + attack_type = $5 - # Store as domain -> attack_type -> ip -> count - attack_data[domain][attack_type][ip]++ - attack_totals[domain][attack_type]++ - } - END { - for (domain in attack_data) { - domain_file = tmpdir "/domain_attacks_" domain ".txt" - for (attack_type in attack_data[domain]) { - total = attack_totals[domain][attack_type] - for (ip in attack_data[domain][attack_type]) { - count = attack_data[domain][attack_type][ip] - printf "%s|%d|%d\n", attack_type "|" ip, count, total + # Store as domain -> attack_type -> ip -> count + attack_data[domain][attack_type][ip]++ + attack_totals[domain][attack_type]++ + } + END { + for (domain in attack_data) { + domain_file = tmpdir "/domain_attacks_" domain ".txt" + for (attack_type in attack_data[domain]) { + total = attack_totals[domain][attack_type] + for (ip in attack_data[domain][attack_type]) { + count = attack_data[domain][attack_type][ip] + printf "%s|%d|%d\n", attack_type "|" ip, count, total + } } } } - } - ' < "$TEMP_DIR/attack_vectors_raw.txt" + ' -v tmpdir="$TEMP_DIR" < "$TEMP_DIR/attack_vectors_raw.txt" + fi print_success "Domain attack pattern analysis complete" } @@ -1624,13 +1626,13 @@ analyze_time_series() { print_info "Analyzing time-series patterns..." # Extract hourly bot traffic - cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" { + cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" { timestamp = $8 if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) { hour = ts[4] print hour } - }' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" + }' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true # Extract hourly attack traffic if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then @@ -1910,7 +1912,7 @@ detect_false_positives() { print_info "Detecting legitimate services (false positives)..." # Known monitoring service patterns and legitimate CDNs - cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{ + cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{ ip = $1 domain = $2 url = $3 @@ -1964,7 +1966,7 @@ generate_statistics() { # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs # This reads the uncompressed file ONCE instead of 4+ separate reads - cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" ' + cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' -v tmpdir="$TEMP_DIR" ' { # Count by domain (for top sites) domains[$2]++ @@ -2001,17 +2003,17 @@ generate_statistics() { sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt" # Top 5 bots by request count (single decompression) - cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \ - sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" + cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" {print $10}' | \ + sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true # Traffic breakdown by bot type (single decompression) - cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \ - sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" + cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $9}' | \ + sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true # Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep) if [ -f "$TEMP_DIR/all_domains.txt" ]; then # Create indexed bot traffic file (decompress once) - cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt" + cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt" || true while read -r domain; do echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"