diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index 20151cb..c10bc5b 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -622,22 +622,27 @@ detect_threats() { ua_lower = tolower(ua) # SQL Injection patterns (enhanced) + # FIXED: Hex pattern now requires SQL context to avoid false positives on blockchain/product IDs if (match(url_lower, /union.*select|concat\(|benchmark\(|sleep\(|waitfor|cast\(|exec\(/) || match(url_lower, /information_schema|drop table|insert into|update.*set|delete from/) || - match(url_lower, /%27|0x[0-9a-f]+|hex\(|unhex\(|load_file\(/)) { + match(url_lower, /%27.*(union|select|or |and )|hex\(|unhex\(|load_file\(/) || + match(url_lower, /0x[0-9a-f]+.*(union|select|into|from|where|order)/)) { print ip "|" domain "|" url "|" status "|sqli" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" } # XSS patterns + # FIXED: DOM-based patterns (document.cookie, .innerhtml) only flagged in query strings + # This prevents false positives on documentation URLs like /docs/innerhtml-api-guide if (match(url_lower, / "'"$TEMP_DIR"'/attack_vectors_raw.txt" } # Path Traversal / LFI # FIXED: Added URL-encoded variants (%2e%2e, %5c for backslash) + # FIXED: Case-insensitive hex encoding support (%5C and %5c) if (match(url_lower, /\.\.\/|\.\.\\|%2e%2e|%5c|etc\/passwd|etc\/shadow|boot\.ini|win\.ini/) || - match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows[\/\\]system32|windows%5csystem32/)) { + match(url_lower, /proc\/self|proc\/environ|\/etc\/|c:\\|c:%5c|windows(%5c|[\/\\])system32/)) { print ip "|" domain "|" url "|" status "|path_traversal" > "'"$TEMP_DIR"'/attack_vectors_raw.txt" } @@ -655,10 +660,12 @@ detect_threats() { # FIXED: Added status code validation - only flag successful access (200/301/302) # FIXED: readme pattern now only matches actual files (.txt, .html, .md) # FIXED: Added more backup file extensions and URL-encoded variants - if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo/) || + # FIXED: phpinfo now only matches .php files (not documentation URLs) + # FIXED: Removed sitemap.xml.gz (intentionally public for SEO) + if (match(url_lower, /\.git\/|\.env|\.sql$|\.bak$|\.old$|\.backup$|\.orig$|\.swp$|\.sav$|~$|config\.php|phpinfo\.php/) || match(url_lower, /readme\.(txt|html|md)$/) || match(url_lower, /web\.config|\.htaccess|\.htpasswd/) || - match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql|sitemap\.xml\.gz/)) { + match(url_lower, /database\.sql|backup\.zip|backup\.tar|dump\.sql/)) { # Only flag if successful access (200) or redirect (301/302) # Failed attempts (404/403) are just scanning, tracked separately if (status ~ /^(200|301|302)/) {