Filter out legitimate browsers from bot analyzer
- Added intelligent browser detection filter - Excludes Chrome, Firefox, Safari, Edge, Opera, Vivaldi, Samsung Browser - Detects Mozilla/5.0 with AppleWebKit/Gecko as legitimate browsers - Filters mobile browsers (Android, iPhone, iPad) - Only flags actual bots, not regular user traffic - Prevents false positives from browser user agents
This commit is contained in:
@@ -496,13 +496,26 @@ classify_bots() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-|java\/|scan/)) {
|
} else if (match(ua_lower, /bot|crawler|spider|scraper|curl|wget|python-|java\/|scan/)) {
|
||||||
|
# FILTER OUT legitimate browsers that might contain "bot" in version strings
|
||||||
|
# Common browsers: Chrome, Firefox, Safari, Edge, Opera, Samsung Browser, etc.
|
||||||
|
if (match(ua_lower, /chrome\/|firefox\/|safari\/|edg\/|edge\/|opr\/|opera\//) ||
|
||||||
|
match(ua_lower, /mozilla\/5\.0/) && match(ua_lower, /applewebkit|gecko/) && !match(ua_lower, /bot|crawler|spider/) ||
|
||||||
|
match(ua_lower, /samsungbrowser|ucbrowser|yabrowser|vivaldi/) ||
|
||||||
|
match(ua_lower, /android.*mobile|iphone|ipad|windows nt|macintosh|linux x86/) && !match(ua_lower, /bot|crawler|spider/)) {
|
||||||
|
# This is a legitimate browser, skip it
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
bot_type = "unidentified_bot"
|
bot_type = "unidentified_bot"
|
||||||
# Extract first word of UA as bot name
|
# Extract first word of UA as bot name
|
||||||
match(ua, /^[^ ]+/, name)
|
match(ua, /^[^ ]+/, name)
|
||||||
bot_name = substr(name[0], 1, 30)
|
bot_name = substr(name[0], 1, 30)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Only print if bot_type is not "unknown" (i.e., we identified it as something)
|
||||||
|
if (bot_type != "unknown") {
|
||||||
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
print ip "|" domain "|" url "|" status "|" size "|" ua "|" method "|" timestamp "|" bot_type "|" bot_name
|
||||||
|
}
|
||||||
}' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
|
}' "$TEMP_DIR/parsed_logs.txt" > "$TEMP_DIR/classified_bots.txt"
|
||||||
|
|
||||||
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
|
if [ ! -s "$TEMP_DIR/classified_bots.txt" ]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user