#!/bin/bash ################################################################################ # Bot Signature Database Library ################################################################################ # Purpose: Shared bot classification signatures for bot-analyzer and live-monitor # Features: Legitimate bots, AI bots, monitoring bots, suspicious bots ################################################################################ # Legitimate bots (search engines) declare -gA LEGIT_BOTS=( ["Googlebot"]="Google Search" ["Googlebot-Image"]="Google Images" ["Googlebot-Video"]="Google Video" ["Googlebot-News"]="Google News" ["Google-InspectionTool"]="Google Search Console" ["Storebot-Google"]="Google Merchant" ["APIs-Google"]="Google APIs" ["AdsBot-Google"]="Google Ads" ["Mediapartners-Google"]="Google AdSense" ["bingbot"]="Bing Search" ["msnbot"]="MSN Search" ["Slurp"]="Yahoo Search" ["DuckDuckBot"]="DuckDuckGo" ["Baiduspider"]="Baidu Search" ["YandexBot"]="Yandex Search" ) # AI Bots declare -gA AI_BOTS=( ["GPTBot"]="OpenAI" ["ChatGPT-User"]="OpenAI ChatGPT" ["ClaudeBot"]="Anthropic Claude" ["Claude-Web"]="Anthropic Web" ["Bytespider"]="ByteDance (TikTok)" ["PetalBot"]="Huawei" ["CCBot"]="Common Crawl" ["anthropic-ai"]="Anthropic" ["Applebot"]="Apple Intelligence" ["facebookexternalhit"]="Facebook/Meta" ["Meta-ExternalAgent"]="Meta AI" ["cohere-ai"]="Cohere AI" ["PerplexityBot"]="Perplexity AI" ["YouBot"]="You.com AI" ["Diffbot"]="Diffbot AI" ["ImagesiftBot"]="ImageSift AI" ["Omgilibot"]="Omgili AI" ) # Monitoring/SEO bots declare -gA MONITOR_BOTS=( ["AhrefsBot"]="Ahrefs SEO" ["SemrushBot"]="SEMrush SEO" ["MJ12bot"]="Majestic SEO" ["DotBot"]="Moz/OpenSite" ["BLEXBot"]="BLEXBot SEO" ["PingdomBot"]="Pingdom Monitoring" ["UptimeRobot"]="Uptime Monitoring" ["StatusCake"]="StatusCake Monitoring" ["SiteImprove"]="SiteImprove Analytics" ) # Suspicious/Aggressive bots (malicious or security scanners) declare -gA SUSPICIOUS_BOTS=( ["MauiBot"]="Malicious crawler" ["DataForSeoBot"]="Data scraper" ["ZoominfoBot"]="Data harvester" ["MegaIndex"]="Aggressive crawler" ["SeznamBot"]="Aggressive crawler" ["Yeti"]="Naver crawler" ["serpstatbot"]="SEO crawler" ["LinkpadBot"]="Link checker" ["Nessus"]="Vulnerability scanner" ["Nikto"]="Security scanner" ["sqlmap"]="SQL injection tool" ["ZmEu"]="Scanner/exploit" ["masscan"]="Port scanner" ["nmap"]="Port scanner" ["wget"]="Command-line tool" ["curl"]="Command-line tool" ["python-requests"]="Script/automation" ["Go-http-client"]="Go automation" ["Java/"]="Java client" ["http.rb"]="Ruby automation" ["python-urllib"]="Python scraper" ["libwww-perl"]="Perl automation" ["Apache-HttpClient"]="HttpClient automation" ["Scrapy"]="Python scraper" ["node-fetch"]="Node.js automation" ["axios"]="JavaScript automation" ) # Check if user-agent is a legitimate bot # Returns: 0 (true) if legit, 1 (false) if not is_legit_bot() { local ua="$1" local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]') for bot in "${!LEGIT_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then return 0 fi done return 1 } # Check if user-agent is an AI bot is_ai_bot() { local ua="$1" local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]') for bot in "${!AI_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then return 0 fi done return 1 } # Check if user-agent is a monitoring/SEO bot is_monitor_bot() { local ua="$1" local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]') for bot in "${!MONITOR_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then return 0 fi done return 1 } # Check if user-agent is a suspicious bot is_suspicious_bot() { local ua="$1" local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]') for bot in "${!SUSPICIOUS_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then return 0 fi done return 1 } # Classify bot type # Returns: legit|ai|monitor|suspicious|unidentified_bot|human|unknown classify_bot_type() { local ua="$1" local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]') # Check each category in priority order if is_legit_bot "$ua"; then echo "legit" elif is_ai_bot "$ua"; then echo "ai" elif is_monitor_bot "$ua"; then echo "monitor" elif is_suspicious_bot "$ua"; then echo "suspicious" elif [[ "$ua_lower" =~ (bot|crawler|spider|scraper) ]]; then # Filter out legitimate browsers that might contain "bot" in version strings if [[ "$ua_lower" =~ (chrome/|firefox/|safari/|edg/|edge/|opr/|opera/) ]] || [[ "$ua_lower" =~ (samsungbrowser|ucbrowser|yabrowser|vivaldi) ]] || [[ "$ua_lower" =~ (android.*mobile|iphone|ipad|windows nt|macintosh|linux x86) ]] && [[ ! "$ua_lower" =~ (bot|crawler|spider) ]]; then echo "human" else echo "unidentified_bot" fi else echo "human" fi } # Get bot name from user-agent get_bot_name() { local ua="$1" local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]') # Check each category for bot in "${!LEGIT_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then echo "${LEGIT_BOTS[$bot]}" return 0 fi done for bot in "${!AI_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then echo "${AI_BOTS[$bot]}" return 0 fi done for bot in "${!MONITOR_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then echo "${MONITOR_BOTS[$bot]}" return 0 fi done for bot in "${!SUSPICIOUS_BOTS[@]}"; do local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]') if [[ "$ua_lower" =~ $bot_lower ]]; then echo "${SUSPICIOUS_BOTS[$bot]}" return 0 fi done # Extract first word as bot name if unidentified echo "$ua" | awk '{print substr($1, 1, 30)}' } export -f is_legit_bot export -f is_ai_bot export -f is_monitor_bot export -f is_suspicious_bot export -f classify_bot_type export -f get_bot_name