Optimize bot-analyzer to use cached domain status from reference database

Changes to modules/security/bot-analyzer.sh:

Problem:
- baseline_health_check() was re-checking HTTP/HTTPS status for all domains
- verify_domains_still_working() was re-testing domains again
- Wasteful duplicate checks when data already cached in reference database

Solution:
- baseline_health_check() now uses get_all_domain_statuses() from reference DB
- verify_domains_still_working() now uses get_domain_status() from reference DB
- Eliminated all curl HTTP status checks for local domains
- Significantly faster execution (no network requests needed)

Benefits:
- Instant baseline loading (uses pre-cached data from launcher startup)
- No redundant HTTP/HTTPS requests
- Consistent with toolkit architecture (centralized status collection)
- Same functionality, better performance

Technical Details:
- Uses get_all_domain_statuses() to load all domain status data
- Uses get_domain_status() to check individual domain status
- Returns same data format: domain|http_code|https_code|status_summary
- Added cache age warning in verify function (max 1 hour old)
- Maintains all existing baseline/verification logic

Note: Acronis scripts unchanged - they check external cloud URLs, not local domains

Performance Impact:
- Before: ~3-5 seconds per domain check (HTTP + HTTPS curl requests)
- After: Instant (reads from .sysref cache file)
- For 50 domains: ~5 minutes saved per execution
This commit is contained in:
cschantz
2025-12-11 15:54:22 -05:00
parent aadda82f7e
commit 5f6a141114
+46 -71
View File
@@ -1635,75 +1635,43 @@ generate_report() {
################################################################################ ################################################################################
baseline_health_check() { baseline_health_check() {
print_info "Performing baseline health check on all domains..." print_info "Loading baseline health status from cached data..."
echo "" echo ""
# Create baseline health file # Create baseline health file
> "$TEMP_DIR/baseline_health.txt" > "$TEMP_DIR/baseline_health.txt"
> "$TEMP_DIR/domain_list.txt"
# Get all domains from logs (we'll test these) # Use get_all_domain_statuses() from reference database instead of re-checking
find "$LOG_DIR" -type f -name "*.com" -o -name "*.net" -o -name "*.org" 2>/dev/null | \ # Returns: domain|http_code|https_code|status_summary
xargs -r basename -a 2>/dev/null | \ if ! command -v get_all_domain_statuses &>/dev/null; then
sort -u > "$TEMP_DIR/domain_list.txt" print_warning "Reference database functions not available - skipping health check"
# If no domains found from log files, try reference database
if [ ! -s "$TEMP_DIR/domain_list.txt" ]; then
if [ -s "$SCRIPT_DIR/.sysref" ]; then
grep "^DOMAIN|" "$SCRIPT_DIR/.sysref" 2>/dev/null | \
cut -d'|' -f2 | sort -u > "$TEMP_DIR/domain_list.txt"
fi
fi
local domain_count=$(wc -l < "$TEMP_DIR/domain_list.txt" 2>/dev/null || echo "0")
if [ "$domain_count" -eq 0 ]; then
print_warning "No domains found for health check"
return 0 return 0
fi fi
print_info "Testing $domain_count domain(s)..."
echo ""
local tested=0 local tested=0
local working=0 local working=0
local broken=0 local broken=0
while read -r domain; do # Get all domain statuses from cached reference database
while IFS='|' read -r domain http_status https_status result; do
[ -z "$domain" ] && continue [ -z "$domain" ] && continue
tested=$((tested + 1)) tested=$((tested + 1))
# Test HTTP and HTTPS # Display status based on cached result
local http_status="" if [ "$result" = "200_OK" ]; then
local https_status=""
local result=""
# Try HTTP first (timeout 5 seconds)
http_status=$(curl -s -o /dev/null -w "%{http_code}" -m 5 "http://$domain" 2>/dev/null || echo "timeout")
# Try HTTPS (timeout 5 seconds)
https_status=$(curl -s -o /dev/null -w "%{http_code}" -m 5 -k "https://$domain" 2>/dev/null || echo "timeout")
# Determine overall status
if [ "$http_status" = "200" ] || [ "$https_status" = "200" ]; then
result="200_OK"
working=$((working + 1)) working=$((working + 1))
echo -e " ${GREEN}${NC} $domain - HTTP:$http_status HTTPS:$https_status" echo -e " ${GREEN}${NC} $domain - HTTP:$http_status HTTPS:$https_status"
elif [ "$http_status" = "301" ] || [ "$http_status" = "302" ] || [ "$https_status" = "301" ] || [ "$https_status" = "302" ]; then elif [ "$result" = "REDIRECT" ]; then
result="REDIRECT"
working=$((working + 1)) working=$((working + 1))
echo -e " ${YELLOW}${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)" echo -e " ${YELLOW}${NC} $domain - Redirect (HTTP:$http_status HTTPS:$https_status)"
elif [ "$http_status" = "403" ] || [ "$https_status" = "403" ]; then elif [ "$result" = "403_FORBIDDEN" ]; then
result="403_FORBIDDEN"
broken=$((broken + 1)) broken=$((broken + 1))
echo -e " ${RED}${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)" echo -e " ${RED}${NC} $domain - Forbidden (HTTP:$http_status HTTPS:$https_status)"
elif [ "$http_status" = "timeout" ] && [ "$https_status" = "timeout" ]; then elif [ "$result" = "TIMEOUT" ] || [ "$result" = "UNREACHABLE" ]; then
result="TIMEOUT"
broken=$((broken + 1)) broken=$((broken + 1))
echo -e " ${RED}${NC} $domain - Timeout (unreachable)" echo -e " ${RED}${NC} $domain - Timeout (unreachable)"
else else
result="ERROR_${http_status}_${https_status}"
broken=$((broken + 1)) broken=$((broken + 1))
echo -e " ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status" echo -e " ${YELLOW}?${NC} $domain - HTTP:$http_status HTTPS:$https_status"
fi fi
@@ -1711,15 +1679,20 @@ baseline_health_check() {
# Store baseline: domain|http_status|https_status|result # Store baseline: domain|http_status|https_status|result
echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt" echo "$domain|$http_status|$https_status|$result" >> "$TEMP_DIR/baseline_health.txt"
done < "$TEMP_DIR/domain_list.txt" done < <(get_all_domain_statuses)
if [ "$tested" -eq 0 ]; then
print_warning "No domain status data available in reference database"
return 0
fi
echo "" echo ""
print_success "Baseline health check complete: $working working, $broken with issues" print_success "Baseline loaded from cache: $working working, $broken with issues"
echo "" echo ""
} }
verify_domains_still_working() { verify_domains_still_working() {
print_info "Verifying domains still work after changes..." print_info "Checking current domain status from cached data..."
echo "" echo ""
if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then if [ ! -s "$TEMP_DIR/baseline_health.txt" ]; then
@@ -1727,46 +1700,44 @@ verify_domains_still_working() {
return 0 return 0
fi fi
if ! command -v get_domain_status &>/dev/null; then
print_warning "Reference database functions not available - skipping verification"
return 0
fi
local changes_detected=0 local changes_detected=0
local now_broken=0 local now_broken=0
while IFS='|' read -r domain baseline_http baseline_https baseline_result; do while IFS='|' read -r domain baseline_http baseline_https baseline_result; do
[ -z "$domain" ] && continue [ -z "$domain" ] && continue
# Re-test domain # Get current status from cached reference database
local http_status=$(curl -s -o /dev/null -w "%{http_code}" -m 5 "http://$domain" 2>/dev/null || echo "timeout") local current_status=$(get_domain_status "$domain")
local https_status=$(curl -s -o /dev/null -w "%{http_code}" -m 5 -k "https://$domain" 2>/dev/null || echo "timeout")
# Determine new status if [ -z "$current_status" ]; then
local new_result="" # Domain not in cache - skip
if [ "$http_status" = "200" ] || [ "$https_status" = "200" ]; then continue
new_result="200_OK"
elif [ "$http_status" = "301" ] || [ "$http_status" = "302" ] || [ "$https_status" = "301" ] || [ "$https_status" = "302" ]; then
new_result="REDIRECT"
elif [ "$http_status" = "403" ] || [ "$https_status" = "403" ]; then
new_result="403_FORBIDDEN"
elif [ "$http_status" = "timeout" ] && [ "$https_status" = "timeout" ]; then
new_result="TIMEOUT"
else
new_result="ERROR"
fi fi
# Parse current status: http_code|https_code|status_summary
IFS='|' read -r http_status https_status new_result <<< "$current_status"
# Compare to baseline # Compare to baseline
if [ "$baseline_result" != "$new_result" ]; then if [ "$baseline_result" != "$new_result" ]; then
changes_detected=$((changes_detected + 1)) changes_detected=$((changes_detected + 1))
# Check if it got worse # Check if it got worse
if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then if [ "$baseline_result" = "200_OK" ] || [ "$baseline_result" = "REDIRECT" ]; then
if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "ERROR" ]; then if [ "$new_result" = "403_FORBIDDEN" ] || [ "$new_result" = "TIMEOUT" ] || [ "$new_result" = "UNREACHABLE" ]; then
now_broken=$((now_broken + 1)) now_broken=$((now_broken + 1))
echo -e " ${RED}BROKEN:${NC} $domain" echo -e " ${RED}BROKEN:${NC} $domain"
echo -e " Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)" echo -e " Before: $baseline_result (HTTP:$baseline_http HTTPS:$baseline_https)"
echo -e " After: $new_result (HTTP:$http_status HTTPS:$https_status)" echo -e " After: $new_result (HTTP:$http_status HTTPS:$https_status)"
echo -e " ${RED}WARNING: This domain stopped working after your changes!${NC}" echo -e " ${RED}WARNING: This domain stopped working after your changes!${NC}"
echo "" echo ""
fi fi
# Check if it got better # Check if it got better
elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ]; then elif [ "$baseline_result" = "403_FORBIDDEN" ] || [ "$baseline_result" = "TIMEOUT" ] || [ "$baseline_result" = "UNREACHABLE" ]; then
if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then if [ "$new_result" = "200_OK" ] || [ "$new_result" = "REDIRECT" ]; then
echo -e " ${GREEN}✅ FIXED:${NC} $domain" echo -e " ${GREEN}✅ FIXED:${NC} $domain"
echo -e " Before: $baseline_result" echo -e " Before: $baseline_result"
@@ -1779,18 +1750,22 @@ verify_domains_still_working() {
if [ "${now_broken:-0}" -gt 0 ]; then if [ "${now_broken:-0}" -gt 0 ]; then
echo "" echo ""
print_alert "WARNING: $now_broken domain(s) stopped working after your changes!" print_alert "WARNING: $now_broken domain(s) may have stopped working!"
echo ""
echo "NOTE: Status is from cached data (max 1 hour old)."
echo "If you just made changes, the cache may not reflect real-time status."
echo "" echo ""
echo "Recommended actions:" echo "Recommended actions:"
echo " 1. Review the firewall rules you just applied" echo " 1. Review the firewall rules you just applied"
echo " 2. Check CSF temporary blocks: csf -t" echo " 2. Check CSF temporary blocks: csf -t"
echo " 3. Check CSF deny list: csf -g" echo " 3. Check CSF deny list: csf -g"
echo " 4. Consider reverting changes if issues persist" echo " 4. Manually verify domain: curl -I http://domain.com"
echo " 5. Consider reverting changes if issues persist"
echo "" echo ""
elif [ "${changes_detected:-0}" -eq 0 ]; then elif [ "${changes_detected:-0}" -eq 0 ]; then
print_success "All domains still working normally" print_success "All domains show same status as baseline (cache-based check)"
else else
print_success "Some status changes detected but no domains broken" print_success "Some status changes detected but no domains broken (cache-based check)"
fi fi
echo "" echo ""