Linux-Server-Management-Too…/lib/bot-signatures.sh

#!/bin/bash

################################################################################
# Bot Signature Database Library
################################################################################
# Purpose: Shared bot classification signatures for bot-analyzer and live-monitor
# Features: Legitimate bots, AI bots, monitoring bots, suspicious bots
################################################################################

# Legitimate bots (search engines)
declare -gA LEGIT_BOTS=(
    ["Googlebot"]="Google Search"
    ["Googlebot-Image"]="Google Images"
    ["Googlebot-Video"]="Google Video"
    ["Googlebot-News"]="Google News"
    ["Google-InspectionTool"]="Google Search Console"
    ["Storebot-Google"]="Google Merchant"
    ["APIs-Google"]="Google APIs"
    ["AdsBot-Google"]="Google Ads"
    ["Mediapartners-Google"]="Google AdSense"
    ["bingbot"]="Bing Search"
    ["msnbot"]="MSN Search"
    ["Slurp"]="Yahoo Search"
    ["DuckDuckBot"]="DuckDuckGo"
    ["Baiduspider"]="Baidu Search"
    ["YandexBot"]="Yandex Search"
)

# AI Bots
declare -gA AI_BOTS=(
    ["GPTBot"]="OpenAI"
    ["ChatGPT-User"]="OpenAI ChatGPT"
    ["ClaudeBot"]="Anthropic Claude"
    ["Claude-Web"]="Anthropic Web"
    ["Bytespider"]="ByteDance (TikTok)"
    ["PetalBot"]="Huawei"
    ["CCBot"]="Common Crawl"
    ["anthropic-ai"]="Anthropic"
    ["Applebot"]="Apple Intelligence"
    ["facebookexternalhit"]="Facebook/Meta"
    ["Meta-ExternalAgent"]="Meta AI"
    ["cohere-ai"]="Cohere AI"
    ["PerplexityBot"]="Perplexity AI"
    ["YouBot"]="You.com AI"
    ["Diffbot"]="Diffbot AI"
    ["ImagesiftBot"]="ImageSift AI"
    ["Omgilibot"]="Omgili AI"
)

# Monitoring/SEO bots
declare -gA MONITOR_BOTS=(
    ["AhrefsBot"]="Ahrefs SEO"
    ["SemrushBot"]="SEMrush SEO"
    ["MJ12bot"]="Majestic SEO"
    ["DotBot"]="Moz/OpenSite"
    ["BLEXBot"]="BLEXBot SEO"
    ["PingdomBot"]="Pingdom Monitoring"
    ["UptimeRobot"]="Uptime Monitoring"
    ["StatusCake"]="StatusCake Monitoring"
    ["SiteImprove"]="SiteImprove Analytics"
)

# Suspicious/Aggressive bots (malicious or security scanners)
declare -gA SUSPICIOUS_BOTS=(
    ["MauiBot"]="Malicious crawler"
    ["DataForSeoBot"]="Data scraper"
    ["ZoominfoBot"]="Data harvester"
    ["MegaIndex"]="Aggressive crawler"
    ["SeznamBot"]="Aggressive crawler"
    ["Yeti"]="Naver crawler"
    ["serpstatbot"]="SEO crawler"
    ["LinkpadBot"]="Link checker"
    ["Nessus"]="Vulnerability scanner"
    ["Nikto"]="Security scanner"
    ["sqlmap"]="SQL injection tool"
    ["ZmEu"]="Scanner/exploit"
    ["masscan"]="Port scanner"
    ["nmap"]="Port scanner"
    ["wget"]="Command-line tool"
    ["curl"]="Command-line tool"
    ["python-requests"]="Script/automation"
    ["Go-http-client"]="Go automation"
    ["Java/"]="Java client"
    ["http.rb"]="Ruby automation"
    ["python-urllib"]="Python scraper"
    ["libwww-perl"]="Perl automation"
    ["Apache-HttpClient"]="HttpClient automation"
    ["Scrapy"]="Python scraper"
    ["node-fetch"]="Node.js automation"
    ["axios"]="JavaScript automation"
)

# Check if user-agent is a legitimate bot
# Returns: 0 (true) if legit, 1 (false) if not
is_legit_bot() {
    local ua="$1"
    local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]')

    for bot in "${!LEGIT_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            return 0
        fi
    done

    return 1
}

# Check if user-agent is an AI bot
is_ai_bot() {
    local ua="$1"
    local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]')

    for bot in "${!AI_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            return 0
        fi
    done

    return 1
}

# Check if user-agent is a monitoring/SEO bot
is_monitor_bot() {
    local ua="$1"
    local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]')

    for bot in "${!MONITOR_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            return 0
        fi
    done

    return 1
}

# Check if user-agent is a suspicious bot
is_suspicious_bot() {
    local ua="$1"
    local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]')

    for bot in "${!SUSPICIOUS_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            return 0
        fi
    done

    return 1
}

# Classify bot type
# Returns: legit|ai|monitor|suspicious|unidentified_bot|human|unknown
classify_bot_type() {
    local ua="$1"
    local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]')

    # Check each category in priority order
    if is_legit_bot "$ua"; then
        echo "legit"
    elif is_ai_bot "$ua"; then
        echo "ai"
    elif is_monitor_bot "$ua"; then
        echo "monitor"
    elif is_suspicious_bot "$ua"; then
        echo "suspicious"
    elif [[ "$ua_lower" =~ (bot|crawler|spider|scraper) ]]; then
        # Filter out legitimate browsers that might contain "bot" in version strings
        if [[ "$ua_lower" =~ (chrome/|firefox/|safari/|edg/|edge/|opr/|opera/) ]] ||
           [[ "$ua_lower" =~ (samsungbrowser|ucbrowser|yabrowser|vivaldi) ]] ||
           [[ "$ua_lower" =~ (android.*mobile|iphone|ipad|windows nt|macintosh|linux x86) ]] &&
           [[ ! "$ua_lower" =~ (bot|crawler|spider) ]]; then
            echo "human"
        else
            echo "unidentified_bot"
        fi
    else
        echo "human"
    fi
}

# Get bot name from user-agent
get_bot_name() {
    local ua="$1"
    local ua_lower=$(echo "$ua" | tr '[:upper:]' '[:lower:]')

    # Check each category
    for bot in "${!LEGIT_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            echo "${LEGIT_BOTS[$bot]}"
            return 0
        fi
    done

    for bot in "${!AI_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            echo "${AI_BOTS[$bot]}"
            return 0
        fi
    done

    for bot in "${!MONITOR_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            echo "${MONITOR_BOTS[$bot]}"
            return 0
        fi
    done

    for bot in "${!SUSPICIOUS_BOTS[@]}"; do
        local bot_lower=$(echo "$bot" | tr '[:upper:]' '[:lower:]')
        if [[ "$ua_lower" =~ $bot_lower ]]; then
            echo "${SUSPICIOUS_BOTS[$bot]}"
            return 0
        fi
    done

    # Extract first word as bot name if unidentified
    echo "$ua" | awk '{print substr($1, 1, 30)}'
}

export -f is_legit_bot
export -f is_ai_bot
export -f is_monitor_bot
export -f is_suspicious_bot
export -f classify_bot_type
export -f get_bot_name