diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index 7e9bde6..c7fa79d 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -496,13 +496,26 @@ classify_bots() { } } } else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-|java\/|scan/)) { + # FILTER OUT legitimate browsers that might contain "bot" in version strings + # Common browsers: Chrome, Firefox, Safari, Edge, Opera, Samsung Browser, etc. + if (match(ua_lower, /chrome\/|firefox\/|safari\/|edg\/|edge\/|opr\/|opera\//) || + match(ua_lower, /mozilla\/5\.0/) && match(ua_lower, /applewebkit|gecko/) && !match(ua_lower, /bot|crawler|spider/) || + match(ua_lower, /samsungbrowser|ucbrowser|yabrowser|vivaldi/) || + match(ua_lower, /android.*mobile|iphone|ipad|windows nt|macintosh|linux x86/) && !match(ua_lower, /bot|crawler|spider/)) { + # This is a legitimate browser, skip it + next + } + bot_type = "unidentified_bot" # Extract first word of UA as bot name match(ua, /^[^ ]+/, name) bot_name = substr(name[0], 1, 30) } - print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name + # Only print if bot_type is not "unknown" (i.e., we identified it as something) + if (bot_type != "unknown") { + print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name + } }' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt" if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then