Filter out legitimate browsers from bot analyzer
- Added intelligent browser detection filter - Excludes Chrome, Firefox, Safari, Edge, Opera, Vivaldi, Samsung Browser - Detects Mozilla/5.0 with AppleWebKit/Gecko as legitimate browsers - Filters mobile browsers (Android, iPhone, iPad) - Only flags actual bots, not regular user traffic - Prevents false positives from browser user agents
This commit is contained in:
@@ -496,13 +496,26 @@ classify_bots() {
|
||||
}
|
||||
}
|
||||
} else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-|java\/|scan/)) {
|
||||
# FILTER OUT legitimate browsers that might contain "bot" in version strings
|
||||
# Common browsers: Chrome, Firefox, Safari, Edge, Opera, Samsung Browser, etc.
|
||||
if (match(ua_lower, /chrome\/|firefox\/|safari\/|edg\/|edge\/|opr\/|opera\//) ||
|
||||
match(ua_lower, /mozilla\/5\.0/) && match(ua_lower, /applewebkit|gecko/) && !match(ua_lower, /bot|crawler|spider/) ||
|
||||
match(ua_lower, /samsungbrowser|ucbrowser|yabrowser|vivaldi/) ||
|
||||
match(ua_lower, /android.*mobile|iphone|ipad|windows nt|macintosh|linux x86/) && !match(ua_lower, /bot|crawler|spider/)) {
|
||||
# This is a legitimate browser, skip it
|
||||
next
|
||||
}
|
||||
|
||||
bot_type = "unidentified_bot"
|
||||
# Extract first word of UA as bot name
|
||||
match(ua, /^[^ ]+/, name)
|
||||
bot_name = substr(name[0], 1, 30)
|
||||
}
|
||||
|
||||
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
||||
# Only print if bot_type is not "unknown" (i.e., we identified it as something)
|
||||
if (bot_type != "unknown") {
|
||||
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
||||
}
|
||||
}' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
|
||||
|
||||
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
|
||||
|
||||
Reference in New Issue
Block a user