diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index a574852..2a1ee82 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -846,7 +846,7 @@ analyze_headers() { # Only flag if high header suspicion score if (score >= 8) { - print ip "|header_anomaly|" score > "'"$TEMP_DIR"'/header_anomalies.txt" + print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt" } } }' < "$TEMP_DIR/parsed_logs.txt" @@ -889,15 +889,15 @@ analyze_entry_points() { # Suspicious entry points indicate bot/scanner if (match(url_lower, /wp-admin|phpmyadmin|admin|xmlrpc|shell\.php|\.env|\.git|backdoor|config\.php/)) { - print ip "|admin_entry|" url "|" status > "'"$TEMP_DIR"'/suspicious_entry_points.txt" + print ip "|admin_entry|" url "|" status > tmpdir "/suspicious_entry_points.txt" } # Legitimate entry: homepage or search else if (match(url_lower, /^\/index|^\/$|^\/search|^\/page|^\/category/)) { - print ip "|normal_entry|" url > "'"$TEMP_DIR"'/normal_entry_points.txt" + print ip "|normal_entry|" url > tmpdir "/normal_entry_points.txt" } # Unusual but possible: static files else if (match(url_lower, /\.(css|js|jpg|png|gif|woff|svg)$/)) { - print ip "|static_entry|" url > "'"$TEMP_DIR"'/static_entry_points.txt" + print ip "|static_entry|" url > tmpdir "/static_entry_points.txt" } } }' < "$TEMP_DIR/parsed_logs.txt" @@ -919,7 +919,7 @@ detect_threats() { print_info "Detecting security threats..." # Use a single AWK pass for multiple threat detections (more efficient) - awk -F'|' ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 domain = $2 @@ -937,7 +937,7 @@ detect_threats() { match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) || match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) || match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) { - print ip "|" domain "|" url "|" status "|sqli" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|sqli" > tmpdir "/attack_vectors_raw.txt" } # XSS patterns @@ -945,7 +945,7 @@ detect_threats() { # This prevents false positives on documentation URLs like /docs/innerhtml-api-guide if (match(url_lower, / "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|xss" > tmpdir "/attack_vectors_raw.txt" } # Path Traversal / LFI @@ -953,7 +953,7 @@ detect_threats() { # FIXED: Case-insensitive hex encoding support (%5C and %5c) if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) || match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) { - print ip "|" domain "|" url "|" status "|path_traversal" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|path_traversal" > tmpdir "/attack_vectors_raw.txt" } # Shell upload / RCE attempts @@ -963,7 +963,7 @@ detect_threats() { match(url_lower, /shell\.php|c99\.php|r57\.php|r00t\.php|backdoor|webshell|cmd\.php|exploit\.php/) || match(url_lower, /base64_decode.*eval|gzinflate.*eval|assert.*\$_/) || (match(url_lower, /\.(php|phtml|php3|php4|php5|phar)\.suspected$/) && method == "POST")) { - print ip "|" domain "|" url "|" status "|rce_upload" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|rce_upload" > tmpdir "/attack_vectors_raw.txt" } # Info Disclosure attempts @@ -979,18 +979,18 @@ detect_threats() { # Only flag if successful access (200) or redirect (301/302) # Failed attempts (404/403) are just scanning, tracked separately if (status ~ /^(200|301|302)/) { - print ip "|" domain "|" url "|" status "|info_disclosure" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|info_disclosure" > tmpdir "/attack_vectors_raw.txt" } } # composer.json / package.json - lower severity, only if successful if (match(url_lower, /composer\.json|package\.json|package-lock\.json/) && status == "200") { - print ip "|" domain "|" url "|" status "|config_exposure" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|config_exposure" > tmpdir "/attack_vectors_raw.txt" } # Login bruteforce if (match(url_lower, /wp-login\.php|xmlrpc\.php/) && method == "POST") { - print ip "|" domain "|" url "|" status "|login_bruteforce" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" + print ip "|" domain "|" url "|" status "|login_bruteforce" > tmpdir "/attack_vectors_raw.txt" } # Admin/sensitive endpoint probing @@ -1000,28 +1000,28 @@ detect_threats() { # Only flag failed access attempts (403 Forbidden, 401 Unauthorized, 404 Not Found) # Successful access (200/302) means legitimate user or already compromised if (status ~ /^(403|401|404)/) { - print ip "|" domain "|" url > "'"$TEMP_DIR"'/admin_probes_raw.txt" + print ip "|" domain "|" url > tmpdir "/admin_probes_raw.txt" } } # 404 scanning (reconnaissance) if (status == "404" || status == "403") { - print ip "|" domain "|" url "|" status > "'"$TEMP_DIR"'/404_scans_raw.txt" + print ip "|" domain "|" url "|" status > tmpdir "/404_scans_raw.txt" } # Large data transfers (potential scraping) if (size > 1000000) { - print ip "|" domain "|" url "|" size > "'"$TEMP_DIR"'/large_transfers_raw.txt" + print ip "|" domain "|" url "|" size > tmpdir "/large_transfers_raw.txt" } # Suspicious user agents if (match(ua_lower, /nikto|nmap|masscan|sqlmap|havij|acunetix|nessus|burp/) || match(ua_lower, /metasploit| "'"$TEMP_DIR"'/suspicious_ua_raw.txt" + print ip "|" ua > tmpdir "/suspicious_ua_raw.txt" } # Track response codes for intelligence - print status > "'"$TEMP_DIR"'/response_codes_raw.txt" + print status > tmpdir "/response_codes_raw.txt" } ' < <(cat "$TEMP_DIR/parsed_logs.txt") @@ -1123,7 +1123,7 @@ analyze_url_entropy() { # If IP hits >20 URLs with lots of numeric params = scanning if (urls_per_ip[ip] > 20 && unique_path_count > 5) { # Likely fuzzing/parameter scanning - print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > "'"$TEMP_DIR"'/fuzzing_ips.txt" + print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt" } } }' < "$TEMP_DIR/parsed_logs.txt" @@ -1189,7 +1189,7 @@ analyze_request_timing() { # Very consistent timing = bot (typically 0.5-2 seconds apart) # Real users: highly variable (5-60+ seconds) if (avg_interval < 3 && count > 100) { - print ip "|consistent_bot_timing|" avg_interval "|" count > "'"$TEMP_DIR"'/timing_anomalies.txt" + print ip "|consistent_bot_timing|" avg_interval "|" count > tmpdir "/timing_anomalies.txt" } } } @@ -1965,17 +1965,17 @@ generate_statistics() { END { # Output top sites for (domain in domains) { - print domains[domain], domain > "'"$TEMP_DIR"'/top_sites_raw.txt" + print domains[domain], domain > tmpdir "/top_sites_raw.txt" } # Output top IPs for (ip in ips) { - print ips[ip], ip > "'"$TEMP_DIR"'/top_ips_raw.txt" + print ips[ip], ip > tmpdir "/top_ips_raw.txt" } # Output top URLs for (url in urls) { - print urls[url], url > "'"$TEMP_DIR"'/top_urls_raw.txt" + print urls[url], url > tmpdir "/top_urls_raw.txt" } }' @@ -3190,24 +3190,24 @@ analyze_domain_threats() { # Old approach: O(domains × high_risk_IPs × file_size) = 83 minutes for 500 domains # New approach: O(file_size) = seconds - awk -F'|' ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' BEGIN { # Load high-risk IPs into memory - while ((getline < "'"$TEMP_DIR"'/threat_scores.txt") > 0) { + while ((getline < tmpdir "/threat_scores.txt") > 0) { score = $1 ip = $2 if (score >= 70) { high_risk[ip] = score } } - close("'"$TEMP_DIR"'/threat_scores.txt") + close(tmpdir "/threat_scores.txt") # Load attack vectors - while ((getline < "'"$TEMP_DIR"'/attack_vectors_raw.txt") > 0) { + while ((getline < tmpdir "/attack_vectors_raw.txt") > 0) { domain = $2 attack_counts[domain]++ } - close("'"$TEMP_DIR"'/attack_vectors_raw.txt") + close(tmpdir "/attack_vectors_raw.txt") } # Process parsed logs (single pass) @@ -3226,11 +3226,11 @@ analyze_domain_threats() { } END { # Now process classified bots - while ((getline < "'"$TEMP_DIR"'/classified_bots.txt") > 0) { + while ((getline < tmpdir "/classified_bots.txt") > 0) { domain = $2 bot_counts[domain]++ } - close("'"$TEMP_DIR"'/classified_bots.txt") + close(tmpdir "/classified_bots.txt") # Output results for each domain for (domain in domain_requests) { @@ -3242,13 +3242,15 @@ analyze_domain_threats() { high_risk_detail = domain_high_risk_ips[domain] # domain|total_requests|bot_requests|bot_percentage|high_risk_ip_count|attack_attempts|high_risk_ips_detail - printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > "'"$TEMP_DIR"'/domain_threats.txt" + printf "%s|%d|%d|%.1f|%d|%d|%s\n", domain, total_req, bot_req, bot_pct, high_risk_count, attacks, high_risk_detail > tmpdir "/domain_threats.txt" # Track high-risk IPs per domain if (high_risk_count > 0) { - printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > "'"$TEMP_DIR"'/domain_high_risk_ips.txt" + printf "%s|%d|%s\n", domain, high_risk_count, high_risk_detail > tmpdir "/domain_high_risk_ips.txt" } } + close(tmpdir "/domain_threats.txt") + close(tmpdir "/domain_high_risk_ips.txt") }' "$TEMP_DIR/parsed_logs.txt" # Sort by high-risk IP count (descending)