diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index 1ee80aa..de0bac8 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -1626,13 +1626,13 @@ analyze_time_series() { print_info "Analyzing time-series patterns..." # Extract hourly bot traffic - cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" { + awk -F'|' '$9 != "unknown" { timestamp = $8 if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) { hour = ts[4] print hour } - }' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true + }' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true # Extract hourly attack traffic if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then @@ -1912,7 +1912,7 @@ detect_false_positives() { print_info "Detecting legitimate services (false positives)..." # Known monitoring service patterns and legitimate CDNs - cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{ + awk -F'|' '{ ip = $1 domain = $2 url = $3 @@ -1952,7 +1952,7 @@ detect_false_positives() { else if (match(url, /checkout|payment|paypal|stripe|square/) && match(ua, /paypal|stripe|square/)) { print ip "|Payment Processor|" ua "|" domain } - }' | sort -u > "$TEMP_DIR/false_positives.txt" || true + }' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u > "$TEMP_DIR/false_positives.txt" || true print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt" 2>/dev/null || echo 0) legitimate services identified)" } @@ -1966,7 +1966,7 @@ generate_statistics() { # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs # This reads the uncompressed file ONCE instead of 4+ separate reads - cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' -v tmpdir="$TEMP_DIR" ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' { # Count by domain (for top sites) domains[$2]++ @@ -1995,7 +1995,7 @@ generate_statistics() { close(tmpdir "/top_sites_raw.txt") close(tmpdir "/top_ips_raw.txt") close(tmpdir "/top_urls_raw.txt") - }' + }' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null # Sort and limit results (files may not exist if no data) [ -f "$TEMP_DIR/top_sites_raw.txt" ] && sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt" || touch "$TEMP_DIR/top_sites.txt" @@ -2003,17 +2003,17 @@ generate_statistics() { [ -f "$TEMP_DIR/top_urls_raw.txt" ] && sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt" || touch "$TEMP_DIR/top_urls.txt" # Top 5 bots by request count (single decompression) - cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" {print $10}' | \ + awk -F'|' '$9 != "unknown" {print $10}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \ sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true # Traffic breakdown by bot type (single decompression) - cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $9}' | \ + awk -F'|' '{print $9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \ sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true # Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep) if [ -f "$TEMP_DIR/all_domains.txt" ]; then # Create indexed bot traffic file (decompress once) - cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt" || true + awk -F'|' '{print $2"|"$9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null > "$TEMP_DIR/domain_bot_types.txt" || true while read -r domain; do echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt" @@ -2633,7 +2633,7 @@ generate_report() { # Calculate total bot bandwidth total_bot_bandwidth=0 if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then - total_bot_bandwidth=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}') + total_bot_bandwidth=$(awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/classified_bots.txt") fi if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then @@ -2642,7 +2642,7 @@ generate_report() { # Estimate cost at $0.09/GB (typical CDN pricing) estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}") - total_bandwidth=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}') + total_bandwidth=$(awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/parsed_logs.txt") bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}") echo ""