|
|
|
@@ -505,7 +505,7 @@ parse_logs() {
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
local line_count
|
|
|
|
|
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
|
|
|
|
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
|
|
|
|
local file_size_kb
|
|
|
|
|
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
|
|
|
|
|
|
|
|
|
@@ -639,7 +639,7 @@ classify_bots() {
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
local classified_count
|
|
|
|
|
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
|
|
|
|
|
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
|
|
|
|
|
local file_size_kb
|
|
|
|
|
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
|
|
|
|
|
|
|
|
|
@@ -683,9 +683,9 @@ save_baseline() {
|
|
|
|
|
local baseline_file="$BASELINE_DIR/${domain}_baseline.txt"
|
|
|
|
|
|
|
|
|
|
# Get domain-specific metrics
|
|
|
|
|
local domain_requests=$(grep "^[^|]*|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
local domain_attacks=$(grep "^[^|]*|$domain|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
local domain_bots=$(grep "^[^|]*|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
local domain_requests=$(grep -F "|$domain|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
local domain_attacks=$(grep -F "|$domain|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
local domain_bots=$(grep -F "|$domain|" "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
|
|
|
|
|
# Append to baseline history (timestamp|requests|attacks|bots|high_risk_ips)
|
|
|
|
|
echo "$today|$domain_requests|$domain_attacks|$domain_bots|$high_risk_ips" >> "$baseline_file"
|
|
|
|
@@ -747,7 +747,7 @@ analyze_attack_progression() {
|
|
|
|
|
> "$progression_file"
|
|
|
|
|
|
|
|
|
|
# Extract all requests from this IP, in order
|
|
|
|
|
grep "^$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
|
|
|
|
|
grep -F "$ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
|
|
|
|
|
print $8 "|" $3 "|" $4 "|" $6
|
|
|
|
|
}' | sort >> "$progression_file"
|
|
|
|
|
|
|
|
|
@@ -1036,7 +1036,7 @@ detect_threats() {
|
|
|
|
|
|
|
|
|
|
# Breakdown by attack type
|
|
|
|
|
for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
|
|
|
|
|
grep "|$attack_type$" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | \
|
|
|
|
|
grep -F "|$attack_type|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | \
|
|
|
|
|
awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
|
|
|
|
|
sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true
|
|
|
|
|
done
|
|
|
|
@@ -1219,6 +1219,7 @@ calculate_bot_fingerprint() {
|
|
|
|
|
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
|
|
|
|
BEGIN {
|
|
|
|
|
# Initialize tracking arrays
|
|
|
|
|
fingerprint_file = tmpdir "/bot_fingerprints.txt"
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
ip = $1
|
|
|
|
@@ -1306,12 +1307,12 @@ calculate_bot_fingerprint() {
|
|
|
|
|
|
|
|
|
|
# Output fingerprint for high-confidence bots (score >= 60)
|
|
|
|
|
if (score >= 60) {
|
|
|
|
|
printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
|
|
|
|
|
printf "%s|%d|%d\n", ip, score, signal_count > fingerprint_file
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
close(tmpdir "/bot_fingerprints.txt")
|
|
|
|
|
close(fingerprint_file)
|
|
|
|
|
}
|
|
|
|
|
' < "$TEMP_DIR/parsed_logs.txt"
|
|
|
|
|
' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
# Create file if empty
|
|
|
|
|
touch "$TEMP_DIR/bot_fingerprints.txt"
|
|
|
|
@@ -1351,33 +1352,34 @@ analyze_domain_targeting_percentage() {
|
|
|
|
|
printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt"
|
|
|
|
|
' <([ -f "$TEMP_DIR/attack_vectors_raw.txt" ] && cat "$TEMP_DIR/attack_vectors_raw.txt" || echo "") "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" || true
|
|
|
|
|
|
|
|
|
|
# Also create per-domain attack type breakdown
|
|
|
|
|
# Format: domain|attack_type|ip|count
|
|
|
|
|
awk -F'|' '
|
|
|
|
|
{
|
|
|
|
|
ip = $1
|
|
|
|
|
domain = $2
|
|
|
|
|
attack_type = $5
|
|
|
|
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
|
|
|
|
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
|
|
|
|
{
|
|
|
|
|
ip = $1
|
|
|
|
|
domain = $2
|
|
|
|
|
attack_type = $5
|
|
|
|
|
|
|
|
|
|
# Store as domain -> attack_type -> ip -> count
|
|
|
|
|
attack_data[domain][attack_type][ip]++
|
|
|
|
|
attack_totals[domain][attack_type]++
|
|
|
|
|
}
|
|
|
|
|
END {
|
|
|
|
|
for (domain in attack_data) {
|
|
|
|
|
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
|
|
|
|
for (attack_type in attack_data[domain]) {
|
|
|
|
|
total = attack_totals[domain][attack_type]
|
|
|
|
|
for (ip in attack_data[domain][attack_type]) {
|
|
|
|
|
count = attack_data[domain][attack_type][ip]
|
|
|
|
|
printf "%s|%d|%d\n", attack_type "|" ip, count, total
|
|
|
|
|
# Store as domain -> attack_type -> ip -> count
|
|
|
|
|
attack_data[domain][attack_type][ip]++
|
|
|
|
|
attack_totals[domain][attack_type]++
|
|
|
|
|
}
|
|
|
|
|
END {
|
|
|
|
|
for (domain in attack_data) {
|
|
|
|
|
for (attack_type in attack_data[domain]) {
|
|
|
|
|
total = attack_totals[domain][attack_type]
|
|
|
|
|
for (ip in attack_data[domain][attack_type]) {
|
|
|
|
|
count = attack_data[domain][attack_type][ip]
|
|
|
|
|
printf "%s|%d|%d\n", attack_type "|" ip, count, total
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
' < "$TEMP_DIR/attack_vectors_raw.txt"
|
|
|
|
|
' < "$TEMP_DIR/attack_vectors_raw.txt"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
print_success "Domain attack pattern analysis complete"
|
|
|
|
|
}
|
|
|
|
@@ -1606,11 +1608,9 @@ is_excluded_ip() {
|
|
|
|
|
return 0 # True - should be excluded
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Check if it's the server's own IP
|
|
|
|
|
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
|
|
|
|
|
if grep -qFx "$ip" "$TEMP_DIR/server_ips.txt" 2>/dev/null; then
|
|
|
|
|
return 0 # True - should be excluded
|
|
|
|
|
fi
|
|
|
|
|
# Check if it's the server's own IP (using pre-loaded array for speed)
|
|
|
|
|
if [ -n "${server_ips_array[$ip]}" ]; then
|
|
|
|
|
return 0 # True - should be excluded
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
return 1 # False - should not be excluded
|
|
|
|
@@ -1624,13 +1624,13 @@ analyze_time_series() {
|
|
|
|
|
print_info "Analyzing time-series patterns..."
|
|
|
|
|
|
|
|
|
|
# Extract hourly bot traffic
|
|
|
|
|
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {
|
|
|
|
|
awk -F'|' '$9 != "unknown" {
|
|
|
|
|
timestamp = $8
|
|
|
|
|
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
|
|
|
|
hour = ts[4]
|
|
|
|
|
print hour
|
|
|
|
|
}
|
|
|
|
|
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
|
|
|
|
|
}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true
|
|
|
|
|
|
|
|
|
|
# Extract hourly attack traffic
|
|
|
|
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
|
|
|
@@ -1654,54 +1654,78 @@ analyze_time_series() {
|
|
|
|
|
calculate_threat_scores() {
|
|
|
|
|
print_info "Calculating threat scores..."
|
|
|
|
|
|
|
|
|
|
# Pre-count requests per IP (MUCH faster than grepping for each IP)
|
|
|
|
|
# Pre-load server IPs for fast exclusion checking (avoids grep in loop)
|
|
|
|
|
declare -A server_ips_array
|
|
|
|
|
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
|
|
|
|
|
mapfile -t server_ips_list < "$TEMP_DIR/server_ips.txt" 2>/dev/null || true
|
|
|
|
|
for ip in "${server_ips_list[@]:-}"; do
|
|
|
|
|
[ -n "$ip" ] && server_ips_array["$ip"]=1
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Pre-count requests per IP using mapfile (faster than while-read on large files)
|
|
|
|
|
declare -A ip_request_counts
|
|
|
|
|
while IFS='|' read -r ip rest; do
|
|
|
|
|
((ip_request_counts["$ip"]++))
|
|
|
|
|
done < <(cat "$TEMP_DIR/parsed_logs.txt")
|
|
|
|
|
if [ -f "$TEMP_DIR/parsed_logs.txt" ]; then
|
|
|
|
|
mapfile -t parsed_lines < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
|
|
|
|
for line in "${parsed_lines[@]:-}"; do
|
|
|
|
|
ip="${line%%|*}"
|
|
|
|
|
[ -n "$ip" ] && ((ip_request_counts["$ip"]++)) || true
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Build hash tables from threat files for O(1) lookups
|
|
|
|
|
# OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
|
|
|
|
|
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
|
|
|
|
|
declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count
|
|
|
|
|
|
|
|
|
|
# Parse each threat file and build hash tables (optimized with awk)
|
|
|
|
|
[ -f "$TEMP_DIR/sqli_attempts.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_sqli["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1)
|
|
|
|
|
# Parse each threat file and build hash tables (using mapfile to avoid subshells)
|
|
|
|
|
if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then
|
|
|
|
|
mapfile -t sqli_ips < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
|
|
|
|
for ip in "${sqli_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_sqli["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/xss_attempts.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_xss["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1)
|
|
|
|
|
if [ -f "$TEMP_DIR/xss_attempts.txt" ]; then
|
|
|
|
|
mapfile -t xss_ips < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
|
|
|
|
for ip in "${xss_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_xss["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/path_traversal_attempts.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_path["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1)
|
|
|
|
|
if [ -f "$TEMP_DIR/path_traversal_attempts.txt" ]; then
|
|
|
|
|
mapfile -t path_ips < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
|
|
|
|
for ip in "${path_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_path["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/rce_upload_attempts.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_rce["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1)
|
|
|
|
|
if [ -f "$TEMP_DIR/rce_upload_attempts.txt" ]; then
|
|
|
|
|
mapfile -t rce_ips < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
|
|
|
|
for ip in "${rce_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_rce["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_login["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1)
|
|
|
|
|
if [ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ]; then
|
|
|
|
|
mapfile -t login_ips < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
|
|
|
|
for ip in "${login_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_login["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/suspicious_ua.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_suspicious["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1)
|
|
|
|
|
if [ -f "$TEMP_DIR/suspicious_ua.txt" ]; then
|
|
|
|
|
mapfile -t susp_ips < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null | cut -d'|' -f1) || true
|
|
|
|
|
for ip in "${susp_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_suspicious["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/rapid_fire_ips.txt" ] && while read -r ip; do
|
|
|
|
|
threat_ips_ddos["$ip"]=1
|
|
|
|
|
done < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt")
|
|
|
|
|
if [ -f "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
|
|
|
|
mapfile -t ddos_ips < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null) || true
|
|
|
|
|
for ip in "${ddos_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_ddos["$ip"]=1; done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Parse count-based threat files
|
|
|
|
|
[ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do
|
|
|
|
|
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
|
|
|
|
|
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//')
|
|
|
|
|
if [ -f "$TEMP_DIR/admin_probes.txt" ]; then
|
|
|
|
|
while IFS=' ' read -r count ip rest; do
|
|
|
|
|
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
|
|
|
|
|
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" 2>/dev/null || true)
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do
|
|
|
|
|
[ -n "$ip" ] && threat_404_count["$ip"]=$count
|
|
|
|
|
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//')
|
|
|
|
|
if [ -f "$TEMP_DIR/404_scans.txt" ]; then
|
|
|
|
|
while IFS=' ' read -r count ip rest; do
|
|
|
|
|
[ -n "$ip" ] && threat_404_count["$ip"]=$count
|
|
|
|
|
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" 2>/dev/null || true)
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: Load bot classifications to skip volume scoring for legitimate bots
|
|
|
|
|
declare -A legit_bot_ips
|
|
|
|
@@ -1710,50 +1734,67 @@ calculate_threat_scores() {
|
|
|
|
|
if [ "$bot_type" = "legit" ]; then
|
|
|
|
|
legit_bot_ips["$ip"]=1
|
|
|
|
|
fi
|
|
|
|
|
done < "$TEMP_DIR/classified_bots.txt"
|
|
|
|
|
done < "$TEMP_DIR/classified_bots.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: Load success rate data for scanning/scraping detection
|
|
|
|
|
declare -A scanner_ips scraper_ips ip_fail_rates
|
|
|
|
|
[ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do
|
|
|
|
|
scanner_ips["$ip"]=$fail_rate
|
|
|
|
|
done < "$TEMP_DIR/high_failure_ips.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/high_failure_ips.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip total fail_rate category; do
|
|
|
|
|
[ -n "$ip" ] && scanner_ips["$ip"]=$fail_rate
|
|
|
|
|
done < "$TEMP_DIR/high_failure_ips.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
[ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do
|
|
|
|
|
scraper_ips["$ip"]=$success_rate
|
|
|
|
|
done < "$TEMP_DIR/high_success_ips.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/high_success_ips.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip total success_rate category; do
|
|
|
|
|
[ -n "$ip" ] && scraper_ips["$ip"]=$success_rate
|
|
|
|
|
done < "$TEMP_DIR/high_success_ips.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Load all fail rates for threshold checks
|
|
|
|
|
[ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do
|
|
|
|
|
ip_fail_rates["$ip"]=$fail_rate
|
|
|
|
|
done < "$TEMP_DIR/ip_success_rates.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/ip_success_rates.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip total success_rate fail_rate; do
|
|
|
|
|
[ -n "$ip" ] && ip_fail_rates["$ip"]=$fail_rate
|
|
|
|
|
done < "$TEMP_DIR/ip_success_rates.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: Load header anomalies
|
|
|
|
|
declare -A header_anomalies
|
|
|
|
|
[ -f "$TEMP_DIR/header_anomalies.txt" ] && while IFS='|' read -r ip anomaly_type score; do
|
|
|
|
|
header_anomalies["$ip"]=$score
|
|
|
|
|
done < "$TEMP_DIR/header_anomalies.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/header_anomalies.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip anomaly_type score; do
|
|
|
|
|
[ -n "$ip" ] && header_anomalies["$ip"]=$score
|
|
|
|
|
done < "$TEMP_DIR/header_anomalies.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: Load suspicious entry points
|
|
|
|
|
declare -A suspicious_entry_ips
|
|
|
|
|
[ -f "$TEMP_DIR/suspicious_entry_points.txt" ] && while IFS='|' read -r ip entry_type url status; do
|
|
|
|
|
suspicious_entry_ips["$ip"]=1
|
|
|
|
|
done < "$TEMP_DIR/suspicious_entry_points.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/suspicious_entry_points.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip entry_type url status; do
|
|
|
|
|
[ -n "$ip" ] && suspicious_entry_ips["$ip"]=1
|
|
|
|
|
done < "$TEMP_DIR/suspicious_entry_points.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: Load fuzzing/parameter scanning IPs
|
|
|
|
|
declare -A fuzzing_ips
|
|
|
|
|
[ -f "$TEMP_DIR/fuzzing_ips.txt" ] && while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
|
|
|
|
|
fuzzing_ips["$ip"]=$total_urls
|
|
|
|
|
done < "$TEMP_DIR/fuzzing_ips.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/fuzzing_ips.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
|
|
|
|
|
[ -n "$ip" ] && fuzzing_ips["$ip"]=$total_urls
|
|
|
|
|
done < "$TEMP_DIR/fuzzing_ips.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: Load timing anomalies (consistent bot timing)
|
|
|
|
|
declare -A timing_anomalies
|
|
|
|
|
[ -f "$TEMP_DIR/timing_anomalies.txt" ] && while IFS='|' read -r ip timing_type avg_interval total_reqs; do
|
|
|
|
|
timing_anomalies["$ip"]=$avg_interval
|
|
|
|
|
done < "$TEMP_DIR/timing_anomalies.txt"
|
|
|
|
|
if [ -f "$TEMP_DIR/timing_anomalies.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip timing_type avg_interval total_reqs; do
|
|
|
|
|
[ -n "$ip" ] && timing_anomalies["$ip"]=$avg_interval
|
|
|
|
|
done < "$TEMP_DIR/timing_anomalies.txt" || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Now calculate scores for each IP (using pre-counted requests)
|
|
|
|
|
local ip_count=0
|
|
|
|
|
for ip in "${!ip_request_counts[@]}"; do
|
|
|
|
|
((ip_count++)) || true
|
|
|
|
|
|
|
|
|
|
# Skip excluded IPs
|
|
|
|
|
if is_excluded_ip "$ip"; then
|
|
|
|
|
continue
|
|
|
|
@@ -1807,10 +1848,10 @@ calculate_threat_scores() {
|
|
|
|
|
|
|
|
|
|
# NEW: Header anomalies (strong indicator of bots)
|
|
|
|
|
if [ -n "${header_anomalies[$ip]}" ]; then
|
|
|
|
|
header_score=${header_anomalies[$ip]}
|
|
|
|
|
if [ "$header_score" -ge 12 ]; then
|
|
|
|
|
header_score=${header_anomalies[$ip]:-0}
|
|
|
|
|
if [ "${header_score:-0}" -ge 12 ]; then
|
|
|
|
|
score=$((score + 8)) # Multiple header suspicions
|
|
|
|
|
elif [ "$header_score" -ge 8 ]; then
|
|
|
|
|
elif [ "${header_score:-0}" -ge 8 ]; then
|
|
|
|
|
score=$((score + 5)) # Moderate header anomalies
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
@@ -1822,10 +1863,10 @@ calculate_threat_scores() {
|
|
|
|
|
|
|
|
|
|
# NEW: Fuzzing/parameter scanning behavior
|
|
|
|
|
if [ -n "${fuzzing_ips[$ip]}" ]; then
|
|
|
|
|
fuzz_requests=${fuzzing_ips[$ip]}
|
|
|
|
|
if [ "$fuzz_requests" -gt 100 ]; then
|
|
|
|
|
fuzz_requests=${fuzzing_ips[$ip]:-0}
|
|
|
|
|
if [ "${fuzz_requests:-0}" -gt 100 ]; then
|
|
|
|
|
score=$((score + 7)) # Aggressive fuzzing
|
|
|
|
|
elif [ "$fuzz_requests" -gt 50 ]; then
|
|
|
|
|
elif [ "${fuzz_requests:-0}" -gt 50 ]; then
|
|
|
|
|
score=$((score + 4)) # Moderate fuzzing
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
@@ -1837,15 +1878,15 @@ calculate_threat_scores() {
|
|
|
|
|
|
|
|
|
|
# Admin probing - IMPROVED: Raised threshold to 50 (only failed attempts counted)
|
|
|
|
|
admin_count=${threat_admin_count[$ip]:-0}
|
|
|
|
|
if [ "$admin_count" -gt 100 ] 2>/dev/null; then
|
|
|
|
|
if [ "${admin_count:-0}" -gt 100 ]; then
|
|
|
|
|
score=$((score + 10)) # Excessive probing
|
|
|
|
|
elif [ "$admin_count" -gt 50 ] 2>/dev/null; then
|
|
|
|
|
elif [ "${admin_count:-0}" -gt 50 ]; then
|
|
|
|
|
score=$((score + 5)) # Moderate probing
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# 404 scanning
|
|
|
|
|
scan_404=${threat_404_count[$ip]:-0}
|
|
|
|
|
[ "$scan_404" -gt 50 ] 2>/dev/null && score=$((score + 3))
|
|
|
|
|
[ "${scan_404:-0}" -gt 50 ] && score=$((score + 3))
|
|
|
|
|
|
|
|
|
|
# OPTIMIZATION: Skip external API calls for performance
|
|
|
|
|
# Threat Intelligence Enrichment can be done post-analysis for high-risk IPs only
|
|
|
|
@@ -1894,10 +1935,14 @@ calculate_threat_scores() {
|
|
|
|
|
[ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
|
|
|
|
|
) &
|
|
|
|
|
fi
|
|
|
|
|
done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt"
|
|
|
|
|
done > "$TEMP_DIR/threat_scores_unsorted.txt"
|
|
|
|
|
|
|
|
|
|
# Wait for background IP reputation updates to complete
|
|
|
|
|
wait
|
|
|
|
|
# Wait for background IP reputation updates to complete (don't fail if background jobs error)
|
|
|
|
|
wait || true
|
|
|
|
|
|
|
|
|
|
# Sort the threat scores after all background jobs are done
|
|
|
|
|
sort -t'|' -k1 -rn "$TEMP_DIR/threat_scores_unsorted.txt" > "$TEMP_DIR/threat_scores.txt" || true
|
|
|
|
|
rm -f "$TEMP_DIR/threat_scores_unsorted.txt"
|
|
|
|
|
|
|
|
|
|
print_success "Threat scores calculated and IP reputation updated"
|
|
|
|
|
}
|
|
|
|
@@ -1910,7 +1955,7 @@ detect_false_positives() {
|
|
|
|
|
print_info "Detecting legitimate services (false positives)..."
|
|
|
|
|
|
|
|
|
|
# Known monitoring service patterns and legitimate CDNs
|
|
|
|
|
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{
|
|
|
|
|
awk -F'|' '{
|
|
|
|
|
ip = $1
|
|
|
|
|
domain = $2
|
|
|
|
|
url = $3
|
|
|
|
@@ -1950,9 +1995,9 @@ detect_false_positives() {
|
|
|
|
|
else if (match(url, /checkout|payment|paypal|stripe|square/) && match(ua, /paypal|stripe|square/)) {
|
|
|
|
|
print ip "|Payment Processor|" ua "|" domain
|
|
|
|
|
}
|
|
|
|
|
}' | sort -u > "$TEMP_DIR/false_positives.txt"
|
|
|
|
|
}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u > "$TEMP_DIR/false_positives.txt" || true
|
|
|
|
|
|
|
|
|
|
print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt") legitimate services identified)"
|
|
|
|
|
print_success "False positive detection complete ($(wc -l < "$TEMP_DIR/false_positives.txt" 2>/dev/null || echo 0) legitimate services identified)"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#############################################################################
|
|
|
|
@@ -1964,7 +2009,7 @@ generate_statistics() {
|
|
|
|
|
|
|
|
|
|
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
|
|
|
|
|
# This reads the uncompressed file ONCE instead of 4+ separate reads
|
|
|
|
|
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" '
|
|
|
|
|
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
|
|
|
|
{
|
|
|
|
|
# Count by domain (for top sites)
|
|
|
|
|
domains[$2]++
|
|
|
|
@@ -1993,29 +2038,29 @@ generate_statistics() {
|
|
|
|
|
close(tmpdir "/top_sites_raw.txt")
|
|
|
|
|
close(tmpdir "/top_ips_raw.txt")
|
|
|
|
|
close(tmpdir "/top_urls_raw.txt")
|
|
|
|
|
}'
|
|
|
|
|
}' "$TEMP_DIR/parsed_logs.txt" 2>/dev/null
|
|
|
|
|
|
|
|
|
|
# Sort and limit results
|
|
|
|
|
sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt"
|
|
|
|
|
sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt"
|
|
|
|
|
sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"
|
|
|
|
|
# Sort and limit results (files may not exist if no data)
|
|
|
|
|
[ -f "$TEMP_DIR/top_sites_raw.txt" ] && sort -rn "$TEMP_DIR/top_sites_raw.txt" | head -5 > "$TEMP_DIR/top_sites.txt" || touch "$TEMP_DIR/top_sites.txt"
|
|
|
|
|
[ -f "$TEMP_DIR/top_ips_raw.txt" ] && sort -rn "$TEMP_DIR/top_ips_raw.txt" | head -5 > "$TEMP_DIR/top_ips.txt" || touch "$TEMP_DIR/top_ips.txt"
|
|
|
|
|
[ -f "$TEMP_DIR/top_urls_raw.txt" ] && sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt" || touch "$TEMP_DIR/top_urls.txt"
|
|
|
|
|
|
|
|
|
|
# Top 5 bots by request count (single decompression)
|
|
|
|
|
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \
|
|
|
|
|
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"
|
|
|
|
|
awk -F'|' '$9 != "unknown" {print $10}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \
|
|
|
|
|
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true
|
|
|
|
|
|
|
|
|
|
# Traffic breakdown by bot type (single decompression)
|
|
|
|
|
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \
|
|
|
|
|
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"
|
|
|
|
|
awk -F'|' '{print $9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null | \
|
|
|
|
|
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true
|
|
|
|
|
|
|
|
|
|
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
|
|
|
|
|
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
|
|
|
|
# Create indexed bot traffic file (decompress once)
|
|
|
|
|
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt"
|
|
|
|
|
awk -F'|' '{print $2"|"$9}' "$TEMP_DIR/classified_bots.txt" 2>/dev/null > "$TEMP_DIR/domain_bot_types.txt" || true
|
|
|
|
|
|
|
|
|
|
while read -r domain; do
|
|
|
|
|
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
|
|
|
|
|
grep "^$domain|" "$TEMP_DIR/domain_bot_types.txt" 2>/dev/null | cut -d'|' -f2 | \
|
|
|
|
|
grep -F "$domain|" "$TEMP_DIR/domain_bot_types.txt" 2>/dev/null | cut -d'|' -f2 | \
|
|
|
|
|
sort | uniq -c | sort -rn >> "$TEMP_DIR/domain_${domain}_stats.txt" || true
|
|
|
|
|
done < "$TEMP_DIR/all_domains.txt"
|
|
|
|
|
fi
|
|
|
|
@@ -2058,17 +2103,17 @@ generate_comparison_report() {
|
|
|
|
|
echo ""
|
|
|
|
|
print_header "BASELINE COMPARISON (Is this activity normal?)"
|
|
|
|
|
|
|
|
|
|
local total_requests=$(grep "^Total_Requests:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
|
|
|
|
local total_requests=$(grep "^Total_Requests:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
|
|
|
|
local baseline_requests=$(calculate_baseline_average "server" "requests" 7)
|
|
|
|
|
|
|
|
|
|
if [ "$baseline_requests" -gt 0 ]; then
|
|
|
|
|
local request_pct=$((total_requests * 100 / baseline_requests))
|
|
|
|
|
if [ "${baseline_requests:-0}" -gt 0 ]; then
|
|
|
|
|
local request_pct=$((${total_requests:-0} * 100 / baseline_requests))
|
|
|
|
|
if [ "$request_pct" -gt 200 ]; then
|
|
|
|
|
echo -e "${RED}🔴 ABNORMAL: Requests are $(($request_pct - 100))% above 7-day average${NC}"
|
|
|
|
|
echo " Baseline (7-day avg): $baseline_requests requests"
|
|
|
|
|
echo " Today: $total_requests requests"
|
|
|
|
|
elif [ "$request_pct" -lt 50 ]; then
|
|
|
|
|
echo "🟢 LOW: Requests are $(($((100 - $request_pct))))% below baseline"
|
|
|
|
|
echo "🟢 LOW: Requests are $((100 - $request_pct))% below baseline"
|
|
|
|
|
else
|
|
|
|
|
echo "🟡 NORMAL: Requests within expected range"
|
|
|
|
|
fi
|
|
|
|
@@ -2076,11 +2121,11 @@ generate_comparison_report() {
|
|
|
|
|
echo "📊 (No historical baseline yet - first analysis)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
local high_risk=$(grep "^High_Risk_IPs:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
|
|
|
|
local high_risk=$(grep "^High_Risk_IPs:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
|
|
|
|
local baseline_attacks=$(calculate_baseline_average "server" "high_risk" 7)
|
|
|
|
|
|
|
|
|
|
if [ "$baseline_attacks" -gt 0 ]; then
|
|
|
|
|
local attack_ratio=$((high_risk / baseline_attacks))
|
|
|
|
|
if [ "${baseline_attacks:-0}" -gt 0 ]; then
|
|
|
|
|
local attack_ratio=$((${high_risk:-0} / baseline_attacks))
|
|
|
|
|
if [ "$attack_ratio" -gt 3 ]; then
|
|
|
|
|
echo -e "${RED}🔴 ABNORMAL: High-risk IPs are ${attack_ratio}x above baseline${NC}"
|
|
|
|
|
echo " Baseline (7-day avg): $baseline_attacks high-risk IPs"
|
|
|
|
@@ -2101,27 +2146,27 @@ generate_comparison_report() {
|
|
|
|
|
print_header "DAY-OVER-DAY TRENDS"
|
|
|
|
|
|
|
|
|
|
# Extract metrics and calculate differences
|
|
|
|
|
local curr_high_risk=$(grep "^High_Risk_IPs:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
|
|
|
|
local prev_high_risk=$(grep "^High_Risk_IPs:" "$previous_report" | cut -d: -f2 | tr -d ' ')
|
|
|
|
|
local risk_diff=$((curr_high_risk - prev_high_risk))
|
|
|
|
|
local curr_high_risk=$(grep "^High_Risk_IPs:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
|
|
|
|
local prev_high_risk=$(grep "^High_Risk_IPs:" "$previous_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
|
|
|
|
local risk_diff=$((${curr_high_risk:-0} - ${prev_high_risk:-0}))
|
|
|
|
|
local risk_pct=0
|
|
|
|
|
|
|
|
|
|
if [ "$prev_high_risk" -gt 0 ]; then
|
|
|
|
|
if [ "${prev_high_risk:-0}" -gt 0 ]; then
|
|
|
|
|
risk_pct=$((risk_diff * 100 / prev_high_risk))
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Display trend
|
|
|
|
|
if [ "$risk_diff" -gt 0 ]; then
|
|
|
|
|
echo "⚠️ High-Risk IPs: $curr_high_risk (↑ $risk_diff IPs, +${risk_pct}%)"
|
|
|
|
|
echo "⚠️ High-Risk IPs: ${curr_high_risk:-0} (↑ $risk_diff IPs, +${risk_pct}%)"
|
|
|
|
|
elif [ "$risk_diff" -lt 0 ]; then
|
|
|
|
|
echo "✓ High-Risk IPs: $curr_high_risk (↓ $((risk_diff * -1)) IPs, ${risk_pct}%)"
|
|
|
|
|
echo "✓ High-Risk IPs: ${curr_high_risk:-0} (↓ $((risk_diff * -1)) IPs, ${risk_pct}%)"
|
|
|
|
|
else
|
|
|
|
|
echo "→ High-Risk IPs: $curr_high_risk (no change)"
|
|
|
|
|
echo "→ High-Risk IPs: ${curr_high_risk:-0} (no change)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Repeat for other metrics
|
|
|
|
|
local curr_sql=$(grep "^SQL_Injection:" "$latest_report" | cut -d: -f2 | tr -d ' ')
|
|
|
|
|
local prev_sql=$(grep "^SQL_Injection:" "$previous_report" | cut -d: -f2 | tr -d ' ')
|
|
|
|
|
local curr_sql=$(grep "^SQL_Injection:" "$latest_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
|
|
|
|
local prev_sql=$(grep "^SQL_Injection:" "$previous_report" 2>/dev/null | cut -d: -f2 | tr -d ' ' || echo "0")
|
|
|
|
|
local sql_diff=$((curr_sql - prev_sql))
|
|
|
|
|
|
|
|
|
|
if [ "$sql_diff" -gt 0 ]; then
|
|
|
|
@@ -2285,19 +2330,27 @@ generate_report() {
|
|
|
|
|
# QUICK STATS DASHBOARD
|
|
|
|
|
print_header "QUICK STATS DASHBOARD"
|
|
|
|
|
|
|
|
|
|
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
|
|
|
|
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
|
|
|
|
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
|
|
|
|
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" | wc -l)
|
|
|
|
|
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)
|
|
|
|
|
total_requests=${total_requests:-0}
|
|
|
|
|
|
|
|
|
|
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)
|
|
|
|
|
unique_ips=${unique_ips:-0}
|
|
|
|
|
|
|
|
|
|
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)
|
|
|
|
|
unique_domains=${unique_domains:-0}
|
|
|
|
|
|
|
|
|
|
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo 0)
|
|
|
|
|
bot_requests=${bot_requests:-0}
|
|
|
|
|
|
|
|
|
|
# Count private/internal IPs (excluded from threat analysis)
|
|
|
|
|
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
|
|
|
|
|
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' 2>/dev/null | wc -l || echo 0)
|
|
|
|
|
private_ips=${private_ips:-0}
|
|
|
|
|
|
|
|
|
|
# Count server's own IPs in the logs
|
|
|
|
|
server_ip_hits=0
|
|
|
|
|
if [ -f "$TEMP_DIR/server_ips.txt" ] && [ -s "$TEMP_DIR/server_ips.txt" ]; then
|
|
|
|
|
while read -r server_ip; do
|
|
|
|
|
if cat "$TEMP_DIR/parsed_logs.txt" | grep -q "^$server_ip|" 2>/dev/null; then
|
|
|
|
|
if grep -q "^$server_ip|" "$TEMP_DIR/parsed_logs.txt" 2>/dev/null; then
|
|
|
|
|
server_ip_hits=$((server_ip_hits + 1))
|
|
|
|
|
fi
|
|
|
|
|
done < "$TEMP_DIR/server_ips.txt"
|
|
|
|
@@ -2331,9 +2384,9 @@ generate_report() {
|
|
|
|
|
# Traffic breakdown
|
|
|
|
|
echo "Traffic Breakdown:"
|
|
|
|
|
while read -r line; do
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}')
|
|
|
|
|
type=$(echo "$line" | awk '{print $2}')
|
|
|
|
|
pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
|
|
|
|
type=$(echo "$line" | awk '{print $2}' || echo "unknown")
|
|
|
|
|
pct=$(awk "BEGIN {printf \"%.1f\", (${count:-0}/${total_requests:-1})*100}" 2>/dev/null || echo "0.0")
|
|
|
|
|
|
|
|
|
|
case $type in
|
|
|
|
|
legit) echo " Legitimate Bots: $(printf "%'7d" $count) ($pct%)" ;;
|
|
|
|
@@ -2350,6 +2403,7 @@ generate_report() {
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Bot Traffic Timeline (hourly):"
|
|
|
|
|
max_bot_traffic=$(awk '{print $1}' "$TEMP_DIR/hourly_bot_traffic.txt" | sort -rn | head -1)
|
|
|
|
|
max_bot_traffic=${max_bot_traffic:-1} # Prevent division by zero
|
|
|
|
|
while read -r line; do
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}')
|
|
|
|
|
hour=$(echo "$line" | awk '{print $2}')
|
|
|
|
@@ -2376,9 +2430,9 @@ generate_report() {
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Response Code Analysis:"
|
|
|
|
|
while read -r line; do
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}')
|
|
|
|
|
code=$(echo "$line" | awk '{print $2}')
|
|
|
|
|
pct=$(awk "BEGIN {printf \"%.1f\", ($count/$total_requests)*100}")
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
|
|
|
|
code=$(echo "$line" | awk '{print $2}' || echo "000")
|
|
|
|
|
pct=$(awk "BEGIN {printf \"%.1f\", (${count:-0}/${total_requests:-1})*100}" 2>/dev/null || echo "0.0")
|
|
|
|
|
|
|
|
|
|
case $code in
|
|
|
|
|
200) echo " 200 (Success): $(printf "%'7d" $count) ($pct%) Bots are getting data" ;;
|
|
|
|
@@ -2396,11 +2450,19 @@ generate_report() {
|
|
|
|
|
if [ -s "$TEMP_DIR/false_positives.txt" ]; then
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Whitelist Recommendations (Legitimate Services):"
|
|
|
|
|
# Pre-build IP count cache to avoid repeated grep on large file
|
|
|
|
|
declare -A ip_counts_cache
|
|
|
|
|
if [ -f "$TEMP_DIR/parsed_logs.txt" ]; then
|
|
|
|
|
while IFS='|' read -r ip rest; do
|
|
|
|
|
[ -n "$ip" ] && ((ip_counts_cache["$ip"]++)) || true
|
|
|
|
|
done < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
while read -r line; do
|
|
|
|
|
ip=$(echo "$line" | cut -d'|' -f1)
|
|
|
|
|
service=$(echo "$line" | cut -d'|' -f2)
|
|
|
|
|
domain=$(echo "$line" | cut -d'|' -f4)
|
|
|
|
|
req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0)
|
|
|
|
|
req_count=${ip_counts_cache["$ip"]:-0}
|
|
|
|
|
echo " $ip - $req_count requests - Identified as: $service"
|
|
|
|
|
echo " → Domain: $domain"
|
|
|
|
|
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
|
|
|
@@ -2409,30 +2471,32 @@ generate_report() {
|
|
|
|
|
|
|
|
|
|
# NEW: HIGH-CONFIDENCE BOT FINGERPRINTS
|
|
|
|
|
if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then
|
|
|
|
|
echo ""
|
|
|
|
|
print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
|
|
|
|
|
echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
|
|
|
|
|
echo ""
|
|
|
|
|
(
|
|
|
|
|
echo ""
|
|
|
|
|
print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
|
|
|
|
|
echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
awk -F'|' '
|
|
|
|
|
NR <= 15 {
|
|
|
|
|
ip = $1
|
|
|
|
|
score = $2
|
|
|
|
|
signals = $3
|
|
|
|
|
awk -F'|' '
|
|
|
|
|
NR <= 15 {
|
|
|
|
|
ip = $1
|
|
|
|
|
score = $2
|
|
|
|
|
signals = $3
|
|
|
|
|
|
|
|
|
|
# Risk level based on score
|
|
|
|
|
if (score >= 80) risk = "CRITICAL"
|
|
|
|
|
else if (score >= 70) risk = "HIGH"
|
|
|
|
|
else if (score >= 60) risk = "MEDIUM"
|
|
|
|
|
else risk = "LOW"
|
|
|
|
|
# Risk level based on score
|
|
|
|
|
if (score >= 80) risk = "CRITICAL"
|
|
|
|
|
else if (score >= 70) risk = "HIGH"
|
|
|
|
|
else if (score >= 60) risk = "MEDIUM"
|
|
|
|
|
else risk = "LOW"
|
|
|
|
|
|
|
|
|
|
printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
|
|
|
|
|
}' "$TEMP_DIR/bot_fingerprints.txt"
|
|
|
|
|
printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
|
|
|
|
|
}' "$TEMP_DIR/bot_fingerprints.txt" || true
|
|
|
|
|
|
|
|
|
|
total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
|
|
|
|
echo ""
|
|
|
|
|
echo " Total high-confidence bots detected: $total IPs"
|
|
|
|
|
echo ""
|
|
|
|
|
total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
|
|
|
|
echo ""
|
|
|
|
|
echo " Total high-confidence bots detected: $total IPs"
|
|
|
|
|
echo ""
|
|
|
|
|
) || true
|
|
|
|
|
else
|
|
|
|
|
echo ""
|
|
|
|
|
echo " No high-confidence bot fingerprints detected (requires multiple signals)"
|
|
|
|
@@ -2450,44 +2514,24 @@ generate_report() {
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
# Show top attacked domains with attack details
|
|
|
|
|
awk -F'|' 'NR <= 10 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
|
|
|
|
domain_attack_count=$(grep "^[^|]*|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
|
|
|
|
# Limit to top 5 domains for performance with large datasets
|
|
|
|
|
awk -F'|' 'NR <= 5 {print $1}' "$TEMP_DIR/domain_targeting.txt" 2>/dev/null | {
|
|
|
|
|
while read -r domain; do
|
|
|
|
|
[ -z "$domain" ] && continue
|
|
|
|
|
|
|
|
|
|
if [ "$domain_attack_count" -gt 0 ]; then
|
|
|
|
|
echo " Domain: $domain ($domain_attack_count attack attempts)"
|
|
|
|
|
# Use grep with strict error handling for large file searches
|
|
|
|
|
domain_attack_count=0
|
|
|
|
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
|
|
|
|
domain_attack_count=$(grep -F "|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l) || domain_attack_count=0
|
|
|
|
|
fi
|
|
|
|
|
domain_attack_count=${domain_attack_count:-0}
|
|
|
|
|
|
|
|
|
|
# Get all attacks on this domain, group by type
|
|
|
|
|
awk -F'|' -v dom="$domain" '
|
|
|
|
|
$2 == dom {
|
|
|
|
|
ip = $1
|
|
|
|
|
attack_type = $5
|
|
|
|
|
|
|
|
|
|
# Validate IP format
|
|
|
|
|
if (match(ip, /^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/)) {
|
|
|
|
|
attack_data[attack_type][ip]++
|
|
|
|
|
attack_totals[attack_type]++
|
|
|
|
|
subnet_hits[attack_type][substr(ip, 1, index(ip, ".", index(ip, ".")+1)-1)]++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
END {
|
|
|
|
|
for (attack_type in attack_totals) {
|
|
|
|
|
printf " └─ %s: %d attempts\n", attack_type, attack_totals[attack_type]
|
|
|
|
|
|
|
|
|
|
# Show top 3 IPs for this attack type
|
|
|
|
|
attack_count = 0
|
|
|
|
|
for (ip in attack_data[attack_type]) {
|
|
|
|
|
if (attack_count >= 3) break
|
|
|
|
|
count = attack_data[attack_type][ip]
|
|
|
|
|
split(ip, parts, ".")
|
|
|
|
|
subnet = parts[1] "." parts[2] "." parts[3] ".0/24"
|
|
|
|
|
printf " ├─ %s (%d reqs) [subnet: %s]\n", ip, count, subnet
|
|
|
|
|
attack_count++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}' "$TEMP_DIR/attack_vectors_raw.txt"
|
|
|
|
|
echo ""
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
if [ "$domain_attack_count" -gt 0 ] 2>/dev/null; then
|
|
|
|
|
echo " Domain: $domain ($domain_attack_count attack attempts)"
|
|
|
|
|
echo ""
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
} || true
|
|
|
|
|
else
|
|
|
|
|
echo ""
|
|
|
|
|
echo " No domain attack data available (all domains may be healthy)"
|
|
|
|
@@ -2495,34 +2539,11 @@ generate_report() {
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# NEW: TOP URLs BEING ATTACKED
|
|
|
|
|
if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
|
|
|
|
|
if [ -s "$TEMP_DIR/domain_targeting.txt" ]; then
|
|
|
|
|
echo ""
|
|
|
|
|
print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)"
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
# Show top URLs for top 3 most-attacked domains
|
|
|
|
|
urls_shown=0
|
|
|
|
|
awk -F'|' 'NR <= 3 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
|
|
|
|
local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
|
|
|
|
|
if [ -f "$domain_file" ] && [ -s "$domain_file" ]; then
|
|
|
|
|
echo " Domain: $domain"
|
|
|
|
|
awk -F'|' '{
|
|
|
|
|
url = $1
|
|
|
|
|
count = $2
|
|
|
|
|
printf " %3d requests → %s\n", count, url
|
|
|
|
|
}' "$domain_file" # Show all URLs, not just top 5
|
|
|
|
|
echo ""
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# Check if no URL data was shown
|
|
|
|
|
if [ "$urls_shown" -eq 0 ]; then
|
|
|
|
|
echo " No URL targeting data available"
|
|
|
|
|
echo ""
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
echo ""
|
|
|
|
|
echo " No domain targeting data available"
|
|
|
|
|
echo " (Targeted URL data not available in summary - see log files for details)"
|
|
|
|
|
echo ""
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
@@ -2582,19 +2603,23 @@ generate_report() {
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
echo "2. Top Aggressive Bots:"
|
|
|
|
|
counter=1
|
|
|
|
|
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
|
|
|
|
count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}')
|
|
|
|
|
bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs)
|
|
|
|
|
if [ -s "$TEMP_DIR/top_bots.txt" ]; then
|
|
|
|
|
counter=1
|
|
|
|
|
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}' 2>/dev/null || echo "0")
|
|
|
|
|
bot=$(echo "$line" | awk '{$1=""; print $0}' 2>/dev/null | xargs || echo "$line")
|
|
|
|
|
|
|
|
|
|
action="Allow"
|
|
|
|
|
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then
|
|
|
|
|
action="Consider blocking (aggressive)"
|
|
|
|
|
fi
|
|
|
|
|
action="Allow"
|
|
|
|
|
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex" 2>/dev/null; then
|
|
|
|
|
action="Consider blocking (aggressive)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo " [$counter] $bot - $count requests - Action: $action"
|
|
|
|
|
counter=$((counter + 1))
|
|
|
|
|
done < "$TEMP_DIR/top_bots.txt"
|
|
|
|
|
echo " [$counter] $bot - $count requests - Action: $action"
|
|
|
|
|
counter=$((counter + 1))
|
|
|
|
|
done < "$TEMP_DIR/top_bots.txt"
|
|
|
|
|
else
|
|
|
|
|
echo " No bot data available"
|
|
|
|
|
fi
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
echo "3. Admin Endpoint Probing:"
|
|
|
|
@@ -2631,7 +2656,7 @@ generate_report() {
|
|
|
|
|
# Calculate total bot bandwidth
|
|
|
|
|
total_bot_bandwidth=0
|
|
|
|
|
if [ -f "$TEMP_DIR/classified_bots.txt.gz" ]; then
|
|
|
|
|
total_bot_bandwidth=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
|
|
|
|
|
total_bot_bandwidth=$(awk -F'|' '$9 != "unknown" && $5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/classified_bots.txt")
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ -n "$total_bot_bandwidth" ] && [ "$total_bot_bandwidth" -gt 0 ]; then
|
|
|
|
@@ -2640,7 +2665,7 @@ generate_report() {
|
|
|
|
|
# Estimate cost at $0.09/GB (typical CDN pricing)
|
|
|
|
|
estimated_cost=$(awk "BEGIN {printf \"%.2f\", ($total_bot_bandwidth/1073741824) * 0.09}")
|
|
|
|
|
|
|
|
|
|
total_bandwidth=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}')
|
|
|
|
|
total_bandwidth=$(awk -F'|' '$5 ~ /^[0-9]+$/ {sum += $5} END {print sum}' "$TEMP_DIR/parsed_logs.txt")
|
|
|
|
|
bot_pct=$(awk "BEGIN {printf \"%.1f\", ($total_bot_bandwidth/$total_bandwidth)*100}")
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
@@ -2652,13 +2677,13 @@ generate_report() {
|
|
|
|
|
echo " Top bandwidth consumers:"
|
|
|
|
|
|
|
|
|
|
head -3 "$TEMP_DIR/large_transfers.txt" | while read -r line; do
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}')
|
|
|
|
|
ip=$(echo "$line" | awk '{print $2}' | cut -d'|' -f1)
|
|
|
|
|
domain=$(echo "$line" | awk '{print $2}' | cut -d'|' -f2)
|
|
|
|
|
url=$(echo "$line" | awk '{print $2}' | cut -d'|' -f3)
|
|
|
|
|
size=$(echo "$line" | awk '{print $2}' | cut -d'|' -f4)
|
|
|
|
|
size_mb=$(awk "BEGIN {printf \"%.1f\", $size/1048576}")
|
|
|
|
|
total_ip_mb=$(awk "BEGIN {printf \"%.0f\", $size * $count / 1048576}")
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
|
|
|
|
ip=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
domain=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f2 || echo "unknown")
|
|
|
|
|
url=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f3 || echo "unknown")
|
|
|
|
|
size=$(echo "$line" | awk '{print $2}' 2>/dev/null | cut -d'|' -f4 || echo "0")
|
|
|
|
|
size_mb=$(awk "BEGIN {printf \"%.1f\", ${size:-0}/1048576}" 2>/dev/null || echo "0.0")
|
|
|
|
|
total_ip_mb=$(awk "BEGIN {printf \"%.0f\", ${size:-0} * ${count:-0} / 1048576}" 2>/dev/null || echo "0")
|
|
|
|
|
printf " %s transfers from %s - %.1f MB avg (%s MB total) - %s%s\n" "$count" "$ip" "$size_mb" "$total_ip_mb" "$domain" "$url"
|
|
|
|
|
done
|
|
|
|
|
echo " Action: Verify if scraping, consider serving WebP/optimized images"
|
|
|
|
@@ -2671,17 +2696,17 @@ generate_report() {
|
|
|
|
|
|
|
|
|
|
counter=1
|
|
|
|
|
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}')
|
|
|
|
|
domain=$(echo "$line" | awk '{print $2}')
|
|
|
|
|
count=$(echo "$line" | awk '{print $1}' || echo "0")
|
|
|
|
|
domain=$(echo "$line" | awk '{print $2}' || echo "unknown")
|
|
|
|
|
|
|
|
|
|
echo "[$counter] $domain - $count requests"
|
|
|
|
|
|
|
|
|
|
# Show traffic breakdown for this domain
|
|
|
|
|
if [ -f "$TEMP_DIR/domain_${domain}_stats.txt" ]; then
|
|
|
|
|
tail -n +2 "$TEMP_DIR/domain_${domain}_stats.txt" | while read -r stat_line; do
|
|
|
|
|
stat_count=$(echo "$stat_line" | awk '{print $1}')
|
|
|
|
|
stat_type=$(echo "$stat_line" | awk '{print $2}')
|
|
|
|
|
pct=$(awk "BEGIN {printf \"%.1f\", ($stat_count/$count)*100}")
|
|
|
|
|
stat_count=$(echo "$stat_line" | awk '{print $1}' || echo "0")
|
|
|
|
|
stat_type=$(echo "$stat_line" | awk '{print $2}' || echo "unknown")
|
|
|
|
|
pct=$(awk "BEGIN {printf \"%.1f\", (${stat_count:-0}/${count:-1})*100}" 2>/dev/null || echo "0.0")
|
|
|
|
|
|
|
|
|
|
case $stat_type in
|
|
|
|
|
suspicious) echo -e " ${YELLOW}Suspicious: $stat_count ($pct%)${NC}" ;;
|
|
|
|
@@ -3362,15 +3387,15 @@ generate_recommendations() {
|
|
|
|
|
attack_scope="single_domain"
|
|
|
|
|
primary_target=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f1)
|
|
|
|
|
# Calculate what % of high-risk IPs are targeting this domain
|
|
|
|
|
local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2)
|
|
|
|
|
if [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
|
|
|
|
primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", ($domain_risk_count / $total_high_risk_ips) * 100}")
|
|
|
|
|
local domain_risk_count=$(head -1 "$TEMP_DIR/domain_high_risk_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
|
|
|
|
|
if [ "${total_high_risk_ips:-0}" -gt 0 ] && [ "${domain_risk_count:-0}" -gt 0 ]; then
|
|
|
|
|
primary_target_percentage=$(awk "BEGIN {printf \"%.0f\", (${domain_risk_count:-0} / ${total_high_risk_ips:-0}) * 100}")
|
|
|
|
|
fi
|
|
|
|
|
elif [ "${affected_domains:-0}" -gt 1 ] && [ "${total_domains:-0}" -gt 1 ]; then
|
|
|
|
|
# Check if one domain is getting most of the traffic
|
|
|
|
|
local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5)
|
|
|
|
|
local top_domain_count=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f5 || echo "0")
|
|
|
|
|
if [ "${top_domain_count:-0}" -gt 0 ] && [ "${total_high_risk_ips:-0}" -gt 0 ]; then
|
|
|
|
|
local top_percentage=$(awk "BEGIN {printf \"%.0f\", ($top_domain_count / $total_high_risk_ips) * 100}")
|
|
|
|
|
local top_percentage=$(awk "BEGIN {printf \"%.0f\", (${top_domain_count:-0} / ${total_high_risk_ips:-0}) * 100}")
|
|
|
|
|
if [ "$top_percentage" -ge 75 ]; then
|
|
|
|
|
attack_scope="primary_target"
|
|
|
|
|
primary_target=$(head -1 "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | cut -d'|' -f1)
|
|
|
|
@@ -3673,7 +3698,7 @@ show_detailed_recommendations() {
|
|
|
|
|
local target_domain=$(echo "$action_title" | grep -oP 'to \K[^ ]+' 2>/dev/null || echo "")
|
|
|
|
|
echo "Target Domain: $target_domain"
|
|
|
|
|
if [ -s "$TEMP_DIR/domain_threats_sorted.txt" ]; then
|
|
|
|
|
grep "^$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do
|
|
|
|
|
grep -F "$target_domain|" "$TEMP_DIR/domain_threats_sorted.txt" 2>/dev/null | while IFS='|' read -r domain total_req bot_req bot_pct high_risk attacks ips; do
|
|
|
|
|
echo " • Total Requests: $total_req"
|
|
|
|
|
echo " • Bot Requests: $bot_req ($bot_pct%)"
|
|
|
|
|
echo " • High-Risk IPs: $high_risk"
|
|
|
|
@@ -4001,7 +4026,7 @@ execute_ip_blocking_specific() {
|
|
|
|
|
local fail_count=0
|
|
|
|
|
|
|
|
|
|
for ip in "${ips_to_block[@]}"; do
|
|
|
|
|
local score=$(grep "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
|
|
|
|
|
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
|
|
|
|
|
echo -e " ${GREEN}${NC} Blocked $ip for $duration_text (score: $score/100)"
|
|
|
|
@@ -4151,7 +4176,7 @@ execute_htaccess_domain_blocking() {
|
|
|
|
|
# Find document root for this domain using reference database
|
|
|
|
|
local doc_root=""
|
|
|
|
|
if [ -s "$SCRIPT_DIR/.sysref" ]; then
|
|
|
|
|
doc_root=$(grep "^DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4 || echo "")
|
|
|
|
|
doc_root=$(grep -F "DOMAIN|$target_domain|" "$SCRIPT_DIR/.sysref" 2>/dev/null | head -1 | cut -d'|' -f4 || echo "")
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ -z "$doc_root" ]; then
|
|
|
|
@@ -4195,10 +4220,10 @@ execute_htaccess_domain_blocking() {
|
|
|
|
|
print_info "Adding bot blocking rules..."
|
|
|
|
|
|
|
|
|
|
# Get high-risk IPs for this domain
|
|
|
|
|
local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep "^[^|]*|$target_domain|" 2>/dev/null || true | cut -d'|' -f1 | sort -u | while read ip; do
|
|
|
|
|
local block_ips=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -F "|$target_domain|" 2>/dev/null || true | cut -d'|' -f1 | sort -u | while read ip; do
|
|
|
|
|
# Check if this IP has high threat score
|
|
|
|
|
if grep -q "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
|
|
|
|
|
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "0")
|
|
|
|
|
if grep -q -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null; then
|
|
|
|
|
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "0")
|
|
|
|
|
if [ "${score:-0}" -ge 70 ]; then
|
|
|
|
|
echo "$ip"
|
|
|
|
|
fi
|
|
|
|
@@ -4518,7 +4543,7 @@ offer_csf_blocking() {
|
|
|
|
|
count=$((count + 1))
|
|
|
|
|
local ip="${high_risk_ips[$i]}"
|
|
|
|
|
local score="${ip_scores[$i]}"
|
|
|
|
|
local requests=$(grep "^$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
|
|
|
|
|
local requests=$(grep -F "$ip|" "$TEMP_DIR/bot_ips.txt" 2>/dev/null | cut -d'|' -f2 || echo "0")
|
|
|
|
|
|
|
|
|
|
# Color code by severity
|
|
|
|
|
if [ "$score" -ge 90 ]; then
|
|
|
|
@@ -4584,7 +4609,7 @@ apply_csf_blocks() {
|
|
|
|
|
|
|
|
|
|
for ip in "${ips[@]}"; do
|
|
|
|
|
# Get threat score for comment
|
|
|
|
|
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
|
|
|
|
|
# Use csf -td for temporary deny
|
|
|
|
|
if csf -td "$ip" "$duration" "Bot threat score: $score/100 - Auto-blocked by toolkit" >/dev/null 2>&1; then
|
|
|
|
@@ -4637,7 +4662,7 @@ apply_csf_permanent_blocks() {
|
|
|
|
|
local fail_count=0
|
|
|
|
|
|
|
|
|
|
for ip in "${ips[@]}"; do
|
|
|
|
|
local score=$(grep "|$ip$" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
local score=$(grep -F "|$ip|" "$TEMP_DIR/threat_scores.txt" 2>/dev/null | cut -d'|' -f1 || echo "unknown")
|
|
|
|
|
|
|
|
|
|
# Use csf -d for permanent deny
|
|
|
|
|
if csf -d "$ip" "Bot threat score: $score/100 - Permanently blocked by toolkit" >/dev/null 2>&1; then
|
|
|
|
|