From e396df5b1a9b6f4df24a3e4d4270d1f4191b061f Mon Sep 17 00:00:00 2001 From: cschantz Date: Mon, 3 Nov 2025 19:05:39 -0500 Subject: [PATCH] Filter out legitimate browsers from bot analyzer - Added intelligent browser detection filter - Excludes Chrome, Firefox, Safari, Edge, Opera, Vivaldi, Samsung Browser - Detects Mozilla/5.0 with AppleWebKit/Gecko as legitimate browsers - Filters mobile browsers (Android, iPhone, iPad) - Only flags actual bots, not regular user traffic - Prevents false positives from browser user agents --- modules/security/bot-analyzer.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index 7e9bde6..c7fa79d 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -496,13 +496,26 @@ classify_bots() { } } } else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-|java\/|scan/)) { + # FILTER OUT legitimate browsers that might contain "bot" in version strings + # Common browsers: Chrome, Firefox, Safari, Edge, Opera, Samsung Browser, etc. + if (match(ua_lower, /chrome\/|firefox\/|safari\/|edg\/|edge\/|opr\/|opera\//) || + match(ua_lower, /mozilla\/5\.0/) && match(ua_lower, /applewebkit|gecko/) && !match(ua_lower, /bot|crawler|spider/) || + match(ua_lower, /samsungbrowser|ucbrowser|yabrowser|vivaldi/) || + match(ua_lower, /android.*mobile|iphone|ipad|windows nt|macintosh|linux x86/) && !match(ua_lower, /bot|crawler|spider/)) { + # This is a legitimate browser, skip it + next + } + bot_type = "unidentified_bot" # Extract first word of UA as bot name match(ua, /^[^ ]+/, name) bot_name = substr(name[0], 1, 30) } - print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name + # Only print if bot_type is not "unknown" (i.e., we identified it as something) + if (bot_type != "unknown") { + print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name + } }' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt" if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then