Fix critical bugs in bot-analyzer: gzipped file access, performance, and scoping issues
CRITICAL FIXES: - Fix gzipped file access bug causing script to hang at "Calculating threat scores" - Changed all parsed_logs.txt references to use zcat on .gz files - Fixed lines 1203, 1315, 1324, 1800, 1807, 1810, 1823-1824, 2781 - Fix user_domains scoping bug preventing user filtering (-u flag) - Export user_domains from main() before parse_logs() call - Fix TOOLKIT_BASE_DIR undefined variable - Changed to SCRIPT_DIR in lines 1551, 2732 CODE QUALITY: - Add missing BOLD color code definition - Add is_valid_ip() function for IPv4/IPv6 validation - Integrate IP validation into is_excluded_ip() to prevent malformed data PERFORMANCE OPTIMIZATION: - Major optimization in analyze_domain_threats() - Create indexed lookup files (one-time decompression) - Eliminates nested zcat calls (was 4x per IP per domain) - Expected 10-100x speedup for servers with 200+ domains SYSTEM DETECTION: - Add firewall detection exports to system-detect.sh
This commit is contained in:
@@ -25,6 +25,9 @@ export SYS_LOG_DIR=""
|
|||||||
export SYS_USER_HOME_BASE=""
|
export SYS_USER_HOME_BASE=""
|
||||||
export SYS_PHP_VERSIONS=()
|
export SYS_PHP_VERSIONS=()
|
||||||
export SYS_CLOUDFLARE_ACTIVE=""
|
export SYS_CLOUDFLARE_ACTIVE=""
|
||||||
|
export SYS_FIREWALL=""
|
||||||
|
export SYS_FIREWALL_VERSION=""
|
||||||
|
export SYS_FIREWALL_ACTIVE=""
|
||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
# CONTROL PANEL DETECTION
|
# CONTROL PANEL DETECTION
|
||||||
|
|||||||
@@ -190,6 +190,7 @@ YELLOW='\033[1;33m'
|
|||||||
GREEN='\033[0;32m'
|
GREEN='\033[0;32m'
|
||||||
BLUE='\033[0;34m'
|
BLUE='\033[0;34m'
|
||||||
CYAN='\033[0;36m'
|
CYAN='\033[0;36m'
|
||||||
|
BOLD='\033[1m'
|
||||||
NC='\033[0m' # No Color
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
# Check for required commands
|
# Check for required commands
|
||||||
@@ -724,10 +725,39 @@ detect_server_ips() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Helper function to validate IP address format
|
||||||
|
is_valid_ip() {
|
||||||
|
local ip="$1"
|
||||||
|
|
||||||
|
# IPv4 validation
|
||||||
|
if [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
|
||||||
|
local IFS='.'
|
||||||
|
local -a octets=($ip)
|
||||||
|
for octet in "${octets[@]}"; do
|
||||||
|
if [ "$octet" -gt 255 ]; then
|
||||||
|
return 1 # Invalid
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 0 # Valid IPv4
|
||||||
|
fi
|
||||||
|
|
||||||
|
# IPv6 basic validation (simplified)
|
||||||
|
if [[ "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then
|
||||||
|
return 0 # Valid IPv6
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 1 # Invalid
|
||||||
|
}
|
||||||
|
|
||||||
# Helper function to check if an IP should be excluded
|
# Helper function to check if an IP should be excluded
|
||||||
is_excluded_ip() {
|
is_excluded_ip() {
|
||||||
local ip="$1"
|
local ip="$1"
|
||||||
|
|
||||||
|
# First validate IP format
|
||||||
|
if ! is_valid_ip "$ip"; then
|
||||||
|
return 0 # Exclude invalid IPs
|
||||||
|
fi
|
||||||
|
|
||||||
# Check if private/internal IP
|
# Check if private/internal IP
|
||||||
if [[ "$ip" =~ ^127\. ]] || \
|
if [[ "$ip" =~ ^127\. ]] || \
|
||||||
[[ "$ip" =~ ^10\. ]] || \
|
[[ "$ip" =~ ^10\. ]] || \
|
||||||
@@ -1199,7 +1229,7 @@ generate_report() {
|
|||||||
ip=$(echo "$line" | cut -d'|' -f1)
|
ip=$(echo "$line" | cut -d'|' -f1)
|
||||||
service=$(echo "$line" | cut -d'|' -f2)
|
service=$(echo "$line" | cut -d'|' -f2)
|
||||||
domain=$(echo "$line" | cut -d'|' -f4)
|
domain=$(echo "$line" | cut -d'|' -f4)
|
||||||
req_count=$(grep -c "^$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)
|
req_count=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" 2>/dev/null | grep -c "^$ip|" || echo 0)
|
||||||
echo " $ip - $req_count requests - Identified as: $service"
|
echo " $ip - $req_count requests - Identified as: $service"
|
||||||
echo " → Domain: $domain"
|
echo " → Domain: $domain"
|
||||||
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
||||||
@@ -1310,8 +1340,8 @@ generate_report() {
|
|||||||
if [ -s "$TEMP_DIR/large_transfers.txt" ]; then
|
if [ -s "$TEMP_DIR/large_transfers.txt" ]; then
|
||||||
# Calculate total bot bandwidth
|
# Calculate total bot bandwidth
|
||||||
total_bot_bandwidth=0
|
total_bot_bandwidth=0
|
||||||
if [ -f "$TEMP_DIR/classified_bots.txt" ]; then
|
if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then
|
||||||
total_bot_bandwidth=$(awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/classified_bots.txt")
|
total_bot_bandwidth=$(zcat "$TEMP_DIR/classified_bots.txt.gz" | awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
|
if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
|
||||||
@@ -1320,7 +1350,7 @@ generate_report() {
|
|||||||
# Estimate cost at $0.09/GB (typical CDN pricing)
|
# Estimate cost at $0.09/GB (typical CDN pricing)
|
||||||
estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")
|
estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")
|
||||||
|
|
||||||
total_bandwidth=$(awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/parsed_logs.txt")
|
total_bandwidth=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" | awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
|
||||||
bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")
|
bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
@@ -1547,8 +1577,8 @@ baseline_health_check() {
|
|||||||
|
|
||||||
# If no domains found from log files, try reference database
|
# If no domains found from log files, try reference database
|
||||||
if [ ! -s "$TEMP_DIR/domain_list.txt" ]; then
|
if [ ! -s "$TEMP_DIR/domain_list.txt" ]; then
|
||||||
if [ -s "$TOOLKIT_BASE_DIR/.sysref" ]; then
|
if [ -s "$SCRIPT_DIR/.sysref" ]; then
|
||||||
grep "^DOMAIN|" "$TOOLKIT_BASE_DIR/.sysref" 2>/dev/null | \
|
grep "^DOMAIN|" "$SCRIPT_DIR/.sysref" 2>/dev/null | \
|
||||||
cut -d'|' -f2 | sort -u > "$TEMP_DIR/domain_list.txt"
|
cut -d'|' -f2 | sort -u > "$TEMP_DIR/domain_list.txt"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
@@ -1722,12 +1752,14 @@ main() {
|
|||||||
# User filtering
|
# User filtering
|
||||||
if [ -n "$FILTER_USER" ]; then
|
if [ -n "$FILTER_USER" ]; then
|
||||||
print_info "Filtering logs for user: $FILTER_USER"
|
print_info "Filtering logs for user: $FILTER_USER"
|
||||||
user_domains=$(get_user_domains "$FILTER_USER")
|
export user_domains=$(get_user_domains "$FILTER_USER")
|
||||||
if [ -z "$user_domains" ]; then
|
if [ -z "$user_domains" ]; then
|
||||||
print_error "No domains found for user: $FILTER_USER"
|
print_error "No domains found for user: $FILTER_USER"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
print_info "User has $(echo "$user_domains" | wc -l) domain(s)"
|
print_info "User has $(echo "$user_domains" | wc -l) domain(s)"
|
||||||
|
else
|
||||||
|
export user_domains=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l)
|
log_count=$(find "$LOG_DIR" -type f ! -name "*-bytes_log" ! -name "*.offset" ! -name "*error_log" "${find_opts[@]}" 2>/dev/null | wc -l)
|
||||||
@@ -1796,17 +1828,21 @@ analyze_domain_threats() {
|
|||||||
> "$TEMP_DIR/domain_high_risk_ips.txt"
|
> "$TEMP_DIR/domain_high_risk_ips.txt"
|
||||||
|
|
||||||
# Get all unique domains from parsed logs
|
# Get all unique domains from parsed logs
|
||||||
awk -F'|' '{print $2}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u > "$TEMP_DIR/all_domains.txt"
|
zcat "$TEMP_DIR/parsed_logs.txt.gz" 2>/dev/null | awk -F'|' '{print $2}' | sort -u > "$TEMP_DIR/all_domains.txt"
|
||||||
|
|
||||||
|
# Pre-process: Create indexed lookup files for performance (one-time decompression)
|
||||||
|
zcat "$TEMP_DIR/parsed_logs.txt.gz" 2>/dev/null | awk -F'|' '{print $2"|"$1}' | sort > "$TEMP_DIR/domain_ip_lookup.txt"
|
||||||
|
zcat "$TEMP_DIR/classified_bots.txt.gz" 2>/dev/null | awk -F'|' '{print $2}' | sort > "$TEMP_DIR/bot_domains_lookup.txt"
|
||||||
|
|
||||||
# For each domain, calculate threat metrics
|
# For each domain, calculate threat metrics
|
||||||
while read -r domain; do
|
while read -r domain; do
|
||||||
[ -z "$domain" ] && continue
|
[ -z "$domain" ] && continue
|
||||||
|
|
||||||
# Total requests to this domain
|
# Total requests to this domain (from indexed file)
|
||||||
local total_requests=$(grep -c "^[^|]*|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
local total_requests=$(grep -c "^$domain|" "$TEMP_DIR/domain_ip_lookup.txt" 2>/dev/null || echo "0")
|
||||||
|
|
||||||
# Bot requests to this domain
|
# Bot requests to this domain (from indexed file)
|
||||||
local bot_requests=$(grep "|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
local bot_requests=$(grep -c "^$domain$" "$TEMP_DIR/bot_domains_lookup.txt" 2>/dev/null || echo "0")
|
||||||
|
|
||||||
# High-risk IPs hitting this domain (score >= 70)
|
# High-risk IPs hitting this domain (score >= 70)
|
||||||
local high_risk_count=0
|
local high_risk_count=0
|
||||||
@@ -1818,9 +1854,9 @@ analyze_domain_threats() {
|
|||||||
local ip=$(echo "$score_line" | cut -d'|' -f2)
|
local ip=$(echo "$score_line" | cut -d'|' -f2)
|
||||||
|
|
||||||
if [ "$score" -ge 70 ]; then
|
if [ "$score" -ge 70 ]; then
|
||||||
# Check if this IP hit this domain
|
# Check if this IP hit this domain (from indexed file)
|
||||||
if grep -q "^$ip|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null; then
|
local ip_requests=$(grep -c "^$domain|$ip$" "$TEMP_DIR/domain_ip_lookup.txt" 2>/dev/null || echo "0")
|
||||||
local ip_requests=$(grep -c "^$ip|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
if [ "$ip_requests" -gt 0 ]; then
|
||||||
high_risk_count=$((high_risk_count + 1))
|
high_risk_count=$((high_risk_count + 1))
|
||||||
high_risk_ips="${high_risk_ips}${ip}:${score}:${ip_requests} "
|
high_risk_ips="${high_risk_ips}${ip}:${score}:${ip_requests} "
|
||||||
fi
|
fi
|
||||||
@@ -2728,8 +2764,8 @@ execute_htaccess_domain_blocking() {
|
|||||||
|
|
||||||
# Find document root for this domain using reference database
|
# Find document root for this domain using reference database
|
||||||
local doc_root=""
|
local doc_root=""
|
||||||
if [ -s "$TOOLKIT_BASE_DIR/.sysref" ]; then
|
if [ -s "$SCRIPT_DIR/.sysref" ]; then
|
||||||
doc_root=$(grep "^DOMAIN|$target_domain|" "$TOOLKIT_BASE_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4)
|
doc_root=$(grep "^DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$doc_root" ]; then
|
if [ -z "$doc_root" ]; then
|
||||||
@@ -2773,7 +2809,7 @@ execute_htaccess_domain_blocking() {
|
|||||||
print_info "Adding bot blocking rules..."
|
print_info "Adding bot blocking rules..."
|
||||||
|
|
||||||
# Get high-risk IPs for this domain
|
# Get high-risk IPs for this domain
|
||||||
local block_ips=$(grep "^[^|]*|$target_domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -d'|' -f1 | sort -u | while read ip; do
|
local block_ips=$(zcat "$TEMP_DIR/parsed_logs.txt.gz" 2>/dev/null | grep "^[^|]*|$target_domain|" | cut -d'|' -f1 | sort -u | while read ip; do
|
||||||
# Check if this IP has high threat score
|
# Check if this IP has high threat score
|
||||||
if grep -q "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
|
if grep -q "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
|
||||||
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" | cut -d'|' -f1)
|
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" | cut -d'|' -f1)
|
||||||
|
|||||||
Reference in New Issue
Block a user