Integrate shared libraries into bot-analyzer

- Remove duplicate bot signatures (77 lines), now use lib/bot-signatures.sh
- Add threat intelligence integration with AbuseIPDB and GeoIP
- Enhance threat scoring with external reputation data
- Add bonuses: +15 for high-confidence malicious IPs, +5 for high-risk countries
- Bot analyzer now shares intelligence with live-attack-monitor
This commit is contained in:
cschantz
2025-11-14 20:42:18 -05:00
parent 851dfdb30c
commit 45ec5413ac
+25 -74
View File
@@ -28,6 +28,9 @@ source "$SCRIPT_DIR/lib/common-functions.sh"
source "$SCRIPT_DIR/lib/system-detect.sh"
source "$SCRIPT_DIR/lib/user-manager.sh"
source "$SCRIPT_DIR/lib/ip-reputation.sh"
source "$SCRIPT_DIR/lib/bot-signatures.sh"
source "$SCRIPT_DIR/lib/attack-patterns.sh"
source "$SCRIPT_DIR/lib/threat-intelligence.sh"
# Default configuration (auto-detected from system)
LOG_DIR="${SYS_LOG_DIR:-/var/log/apache2/domlogs}"
@@ -230,80 +233,8 @@ trap "rm -rf $TEMP_DIR" EXIT
#############################################################################
# Bot Signature Database
#############################################################################
# Legitimate bots (search engines)
declare -A LEGIT_BOTS=(
["Googlebot"]="Google Search"
["Googlebot-Image"]="Google Images"
["Googlebot-Video"]="Google Video"
["Googlebot-News"]="Google News"
["Google-InspectionTool"]="Google Search Console"
["Storebot-Google"]="Google Merchant"
["APIs-Google"]="Google APIs"
["AdsBot-Google"]="Google Ads"
["Mediapartners-Google"]="Google AdSense"
["bingbot"]="Bing Search"
["msnbot"]="MSN Search"
["Slurp"]="Yahoo Search"
["DuckDuckBot"]="DuckDuckGo"
["Baiduspider"]="Baidu Search"
["YandexBot"]="Yandex Search"
)
# AI Bots
declare -A AI_BOTS=(
["GPTBot"]="OpenAI"
["ChatGPT-User"]="OpenAI ChatGPT"
["ClaudeBot"]="Anthropic Claude"
["Claude-Web"]="Anthropic Web"
["Bytespider"]="ByteDance (TikTok)"
["PetalBot"]="Huawei"
["CCBot"]="Common Crawl"
["anthropic-ai"]="Anthropic"
["Applebot"]="Apple Intelligence"
["facebookexternalhit"]="Facebook/Meta"
["Meta-ExternalAgent"]="Meta AI"
["cohere-ai"]="Cohere AI"
["PerplexityBot"]="Perplexity AI"
["YouBot"]="You.com AI"
["Diffbot"]="Diffbot AI"
["ImagesiftBot"]="ImageSift AI"
["Omgilibot"]="Omgili AI"
)
# Monitoring/SEO bots
declare -A MONITOR_BOTS=(
["AhrefsBot"]="Ahrefs SEO"
["SemrushBot"]="SEMrush SEO"
["MJ12bot"]="Majestic SEO"
["DotBot"]="Moz/OpenSite"
["BLEXBot"]="BLEXBot SEO"
["PingdomBot"]="Pingdom Monitoring"
["UptimeRobot"]="Uptime Monitoring"
["StatusCake"]="StatusCake Monitoring"
["SiteImprove"]="SiteImprove Analytics"
)
# Suspicious/Aggressive bots (malicious or security scanners)
declare -A SUSPICIOUS_BOTS=(
["MauiBot"]="Malicious crawler"
["DataForSeoBot"]="Data scraper"
["ZoominfoBot"]="Data harvester"
["MegaIndex"]="Aggressive crawler"
["SeznamBot"]="Aggressive crawler"
["Yeti"]="Naver crawler"
["serpstatbot"]="SEO crawler"
["LinkpadBot"]="Link checker"
["Nessus"]="Vulnerability scanner"
["Nikto"]="Security scanner"
["sqlmap"]="SQL injection tool"
["ZmEu"]="Scanner/exploit"
["masscan"]="Port scanner"
["nmap"]="Port scanner"
["wget"]="Command-line tool"
["curl"]="Command-line tool"
["python-requests"]="Script/automation"
)
# NOTE: Bot signatures now loaded from lib/bot-signatures.sh
# Arrays available: LEGIT_BOTS, AI_BOTS, MONITOR_BOTS, SUSPICIOUS_BOTS
#############################################################################
# Helper Functions
@@ -933,6 +864,26 @@ calculate_threat_scores() {
scan_404=${threat_404_count[$ip]:-0}
[ "$scan_404" -gt 50 ] 2>/dev/null && score=$((score + 3))
# Threat Intelligence Enrichment (from external sources)
# Check AbuseIPDB reputation
local abuse_data=$(check_abuseipdb "$ip" 2>/dev/null || echo "0|0|Unknown|Unknown")
IFS='|' read -r abuse_confidence abuse_reports abuse_country abuse_isp <<< "$abuse_data"
# Add bonus for known malicious IPs
if [ "$abuse_confidence" -ge 75 ]; then
score=$((score + 15)) # High confidence malicious
elif [ "$abuse_confidence" -ge 50 ]; then
score=$((score + 8)) # Moderate confidence
elif [ "$abuse_confidence" -ge 25 ]; then
score=$((score + 3)) # Low confidence
fi
# Geographic risk assessment
local geo_country=$(get_country_code "$ip" 2>/dev/null || echo "XX")
if is_high_risk_country "$geo_country" 2>/dev/null; then
score=$((score + 5)) # High-risk country bonus
fi
# Cap at 100
[ $score -gt 100 ] && score=100