Compare commits
9 Commits
cf362c2adf
..
dev
| Author | SHA1 | Date | |
|---|---|---|---|
| 08e8e8b5f0 | |||
| 6181da7b42 | |||
| 6a586ef721 | |||
| 43a94884e4 | |||
| da02dcfd61 | |||
| baf058d1dc | |||
| 1c3f12744b | |||
| 55dc21f6e5 | |||
| b0873bbf13 |
+188
-166
@@ -1036,7 +1036,7 @@ detect_threats() {
|
|||||||
|
|
||||||
# Breakdown by attack type
|
# Breakdown by attack type
|
||||||
for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
|
for attack_type in sqli xss path_traversal rce_upload info_disclosure login_bruteforce; do
|
||||||
grep -F "|$attack_type" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | grep -F "|$attack_type$" | \
|
grep -F "|$attack_type|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | \
|
||||||
awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
|
awk -F'|' '{print $1"|"$2"|"$3"|"$4}' | \
|
||||||
sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true
|
sort | uniq -c | sort -rn > "$TEMP_DIR/${attack_type}_attempts.txt" || true
|
||||||
done
|
done
|
||||||
@@ -1219,6 +1219,7 @@ calculate_bot_fingerprint() {
|
|||||||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||||||
BEGIN {
|
BEGIN {
|
||||||
# Initialize tracking arrays
|
# Initialize tracking arrays
|
||||||
|
fingerprint_file = tmpdir "/bot_fingerprints.txt"
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
ip = $1
|
ip = $1
|
||||||
@@ -1306,10 +1307,10 @@ calculate_bot_fingerprint() {
|
|||||||
|
|
||||||
# Output fingerprint for high-confidence bots (score >= 60)
|
# Output fingerprint for high-confidence bots (score >= 60)
|
||||||
if (score >= 60) {
|
if (score >= 60) {
|
||||||
printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
|
printf "%s|%d|%d\n", ip, score, signal_count > fingerprint_file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close(tmpdir "/bot_fingerprints.txt")
|
close(fingerprint_file)
|
||||||
}
|
}
|
||||||
' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
||||||
|
|
||||||
@@ -1356,7 +1357,7 @@ analyze_domain_targeting_percentage() {
|
|||||||
# Also create per-domain attack type breakdown
|
# Also create per-domain attack type breakdown
|
||||||
# Format: domain|attack_type|ip|count
|
# Format: domain|attack_type|ip|count
|
||||||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||||||
awk -F'|' '
|
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||||||
{
|
{
|
||||||
ip = $1
|
ip = $1
|
||||||
domain = $2
|
domain = $2
|
||||||
@@ -1368,7 +1369,6 @@ analyze_domain_targeting_percentage() {
|
|||||||
}
|
}
|
||||||
END {
|
END {
|
||||||
for (domain in attack_data) {
|
for (domain in attack_data) {
|
||||||
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
|
||||||
for (attack_type in attack_data[domain]) {
|
for (attack_type in attack_data[domain]) {
|
||||||
total = attack_totals[domain][attack_type]
|
total = attack_totals[domain][attack_type]
|
||||||
for (ip in attack_data[domain][attack_type]) {
|
for (ip in attack_data[domain][attack_type]) {
|
||||||
@@ -1378,7 +1378,7 @@ analyze_domain_targeting_percentage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
' -v tmpdir="$TEMP_DIR" < "$TEMP_DIR/attack_vectors_raw.txt"
|
' < "$TEMP_DIR/attack_vectors_raw.txt"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
print_success "Domain attack pattern analysis complete"
|
print_success "Domain attack pattern analysis complete"
|
||||||
@@ -1608,11 +1608,9 @@ is_excluded_ip() {
|
|||||||
return 0 # True - should be excluded
|
return 0 # True - should be excluded
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if it's the server's own IP
|
# Check if it's the server's own IP (using pre-loaded array for speed)
|
||||||
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
|
if [ -n "${server_ips_array[$ip]}" ]; then
|
||||||
if grep -qFx "$ip" "$TEMP_DIR/server_ips.txt" 2>/dev/null; then
|
return 0 # True - should be excluded
|
||||||
return 0 # True - should be excluded
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
return 1 # False - should not be excluded
|
return 1 # False - should not be excluded
|
||||||
@@ -1656,54 +1654,78 @@ analyze_time_series() {
|
|||||||
calculate_threat_scores() {
|
calculate_threat_scores() {
|
||||||
print_info "Calculating threat scores..."
|
print_info "Calculating threat scores..."
|
||||||
|
|
||||||
# Pre-count requests per IP (MUCH faster than grepping for each IP)
|
# Pre-load server IPs for fast exclusion checking (avoids grep in loop)
|
||||||
|
declare -A server_ips_array
|
||||||
|
if [ -f "$TEMP_DIR/server_ips.txt" ]; then
|
||||||
|
mapfile -t server_ips_list < "$TEMP_DIR/server_ips.txt" 2>/dev/null || true
|
||||||
|
for ip in "${server_ips_list[@]:-}"; do
|
||||||
|
[ -n "$ip" ] && server_ips_array["$ip"]=1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Pre-count requests per IP using mapfile (faster than while-read on large files)
|
||||||
declare -A ip_request_counts
|
declare -A ip_request_counts
|
||||||
while IFS='|' read -r ip rest; do
|
if [ -f "$TEMP_DIR/parsed_logs.txt" ]; then
|
||||||
((ip_request_counts["$ip"]++))
|
mapfile -t parsed_lines < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
||||||
done < "$TEMP_DIR/parsed_logs.txt"
|
for line in "${parsed_lines[@]:-}"; do
|
||||||
|
ip="${line%%|*}"
|
||||||
|
[ -n "$ip" ] && ((ip_request_counts["$ip"]++)) || true
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
# Build hash tables from threat files for O(1) lookups
|
# Build hash tables from threat files for O(1) lookups
|
||||||
# OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
|
# OPTIMIZATION: Use awk instead of echo|awk|cut in loops (10x faster)
|
||||||
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
|
declare -A threat_ips_sqli threat_ips_xss threat_ips_path threat_ips_rce threat_ips_login
|
||||||
declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count
|
declare -A threat_ips_suspicious threat_ips_ddos threat_admin_count threat_404_count
|
||||||
|
|
||||||
# Parse each threat file and build hash tables (optimized with awk)
|
# Parse each threat file and build hash tables (using mapfile to avoid subshells)
|
||||||
[ -f "$TEMP_DIR/sqli_attempts.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/sqli_attempts.txt" ]; then
|
||||||
threat_ips_sqli["$ip"]=1
|
mapfile -t sqli_ips < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/sqli_attempts.txt" | cut -d'|' -f1)
|
for ip in "${sqli_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_sqli["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/xss_attempts.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/xss_attempts.txt" ]; then
|
||||||
threat_ips_xss["$ip"]=1
|
mapfile -t xss_ips < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/xss_attempts.txt" | cut -d'|' -f1)
|
for ip in "${xss_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_xss["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/path_traversal_attempts.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/path_traversal_attempts.txt" ]; then
|
||||||
threat_ips_path["$ip"]=1
|
mapfile -t path_ips < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/path_traversal_attempts.txt" | cut -d'|' -f1)
|
for ip in "${path_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_path["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/rce_upload_attempts.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/rce_upload_attempts.txt" ]; then
|
||||||
threat_ips_rce["$ip"]=1
|
mapfile -t rce_ips < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/rce_upload_attempts.txt" | cut -d'|' -f1)
|
for ip in "${rce_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_rce["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/login_bruteforce_attempts.txt" ]; then
|
||||||
threat_ips_login["$ip"]=1
|
mapfile -t login_ips < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" 2>/dev/null | cut -d'|' -f1) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/login_bruteforce_attempts.txt" | cut -d'|' -f1)
|
for ip in "${login_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_login["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/suspicious_ua.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/suspicious_ua.txt" ]; then
|
||||||
threat_ips_suspicious["$ip"]=1
|
mapfile -t susp_ips < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" 2>/dev/null | cut -d'|' -f1) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/suspicious_ua.txt" | cut -d'|' -f1)
|
for ip in "${susp_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_suspicious["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/rapid_fire_ips.txt" ] && while read -r ip; do
|
if [ -f "$TEMP_DIR/rapid_fire_ips.txt" ]; then
|
||||||
threat_ips_ddos["$ip"]=1
|
mapfile -t ddos_ips < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt" 2>/dev/null) || true
|
||||||
done < <(awk '{print $2}' "$TEMP_DIR/rapid_fire_ips.txt")
|
for ip in "${ddos_ips[@]:-}"; do [ -n "$ip" ] && threat_ips_ddos["$ip"]=1; done
|
||||||
|
fi
|
||||||
|
|
||||||
# Parse count-based threat files
|
# Parse count-based threat files
|
||||||
[ -f "$TEMP_DIR/admin_probes.txt" ] && while read -r count ip; do
|
if [ -f "$TEMP_DIR/admin_probes.txt" ]; then
|
||||||
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
|
while IFS=' ' read -r count ip rest; do
|
||||||
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" | sed 's/|.*//')
|
[ -n "$ip" ] && threat_admin_count["$ip"]=$count
|
||||||
|
done < <(awk '{print $1, $2}' "$TEMP_DIR/admin_probes.txt" 2>/dev/null || true)
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/404_scans.txt" ] && while read -r count ip; do
|
if [ -f "$TEMP_DIR/404_scans.txt" ]; then
|
||||||
[ -n "$ip" ] && threat_404_count["$ip"]=$count
|
while IFS=' ' read -r count ip rest; do
|
||||||
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" | sed 's/|.*//')
|
[ -n "$ip" ] && threat_404_count["$ip"]=$count
|
||||||
|
done < <(awk '{print $1, $2}' "$TEMP_DIR/404_scans.txt" 2>/dev/null || true)
|
||||||
|
fi
|
||||||
|
|
||||||
# NEW: Load bot classifications to skip volume scoring for legitimate bots
|
# NEW: Load bot classifications to skip volume scoring for legitimate bots
|
||||||
declare -A legit_bot_ips
|
declare -A legit_bot_ips
|
||||||
@@ -1712,50 +1734,67 @@ calculate_threat_scores() {
|
|||||||
if [ "$bot_type" = "legit" ]; then
|
if [ "$bot_type" = "legit" ]; then
|
||||||
legit_bot_ips["$ip"]=1
|
legit_bot_ips["$ip"]=1
|
||||||
fi
|
fi
|
||||||
done < "$TEMP_DIR/classified_bots.txt"
|
done < "$TEMP_DIR/classified_bots.txt" || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# NEW: Load success rate data for scanning/scraping detection
|
# NEW: Load success rate data for scanning/scraping detection
|
||||||
declare -A scanner_ips scraper_ips ip_fail_rates
|
declare -A scanner_ips scraper_ips ip_fail_rates
|
||||||
[ -f "$TEMP_DIR/high_failure_ips.txt" ] && while IFS='|' read -r ip total fail_rate category; do
|
if [ -f "$TEMP_DIR/high_failure_ips.txt" ]; then
|
||||||
scanner_ips["$ip"]=$fail_rate
|
while IFS='|' read -r ip total fail_rate category; do
|
||||||
done < "$TEMP_DIR/high_failure_ips.txt"
|
[ -n "$ip" ] && scanner_ips["$ip"]=$fail_rate
|
||||||
|
done < "$TEMP_DIR/high_failure_ips.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
[ -f "$TEMP_DIR/high_success_ips.txt" ] && while IFS='|' read -r ip total success_rate category; do
|
if [ -f "$TEMP_DIR/high_success_ips.txt" ]; then
|
||||||
scraper_ips["$ip"]=$success_rate
|
while IFS='|' read -r ip total success_rate category; do
|
||||||
done < "$TEMP_DIR/high_success_ips.txt"
|
[ -n "$ip" ] && scraper_ips["$ip"]=$success_rate
|
||||||
|
done < "$TEMP_DIR/high_success_ips.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
# Load all fail rates for threshold checks
|
# Load all fail rates for threshold checks
|
||||||
[ -f "$TEMP_DIR/ip_success_rates.txt" ] && while IFS='|' read -r ip total success_rate fail_rate; do
|
if [ -f "$TEMP_DIR/ip_success_rates.txt" ]; then
|
||||||
ip_fail_rates["$ip"]=$fail_rate
|
while IFS='|' read -r ip total success_rate fail_rate; do
|
||||||
done < "$TEMP_DIR/ip_success_rates.txt"
|
[ -n "$ip" ] && ip_fail_rates["$ip"]=$fail_rate
|
||||||
|
done < "$TEMP_DIR/ip_success_rates.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
# NEW: Load header anomalies
|
# NEW: Load header anomalies
|
||||||
declare -A header_anomalies
|
declare -A header_anomalies
|
||||||
[ -f "$TEMP_DIR/header_anomalies.txt" ] && while IFS='|' read -r ip anomaly_type score; do
|
if [ -f "$TEMP_DIR/header_anomalies.txt" ]; then
|
||||||
header_anomalies["$ip"]=$score
|
while IFS='|' read -r ip anomaly_type score; do
|
||||||
done < "$TEMP_DIR/header_anomalies.txt"
|
[ -n "$ip" ] && header_anomalies["$ip"]=$score
|
||||||
|
done < "$TEMP_DIR/header_anomalies.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
# NEW: Load suspicious entry points
|
# NEW: Load suspicious entry points
|
||||||
declare -A suspicious_entry_ips
|
declare -A suspicious_entry_ips
|
||||||
[ -f "$TEMP_DIR/suspicious_entry_points.txt" ] && while IFS='|' read -r ip entry_type url status; do
|
if [ -f "$TEMP_DIR/suspicious_entry_points.txt" ]; then
|
||||||
suspicious_entry_ips["$ip"]=1
|
while IFS='|' read -r ip entry_type url status; do
|
||||||
done < "$TEMP_DIR/suspicious_entry_points.txt"
|
[ -n "$ip" ] && suspicious_entry_ips["$ip"]=1
|
||||||
|
done < "$TEMP_DIR/suspicious_entry_points.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
# NEW: Load fuzzing/parameter scanning IPs
|
# NEW: Load fuzzing/parameter scanning IPs
|
||||||
declare -A fuzzing_ips
|
declare -A fuzzing_ips
|
||||||
[ -f "$TEMP_DIR/fuzzing_ips.txt" ] && while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
|
if [ -f "$TEMP_DIR/fuzzing_ips.txt" ]; then
|
||||||
fuzzing_ips["$ip"]=$total_urls
|
while IFS='|' read -r ip fuzz_type total_urls unique_paths; do
|
||||||
done < "$TEMP_DIR/fuzzing_ips.txt"
|
[ -n "$ip" ] && fuzzing_ips["$ip"]=$total_urls
|
||||||
|
done < "$TEMP_DIR/fuzzing_ips.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
# NEW: Load timing anomalies (consistent bot timing)
|
# NEW: Load timing anomalies (consistent bot timing)
|
||||||
declare -A timing_anomalies
|
declare -A timing_anomalies
|
||||||
[ -f "$TEMP_DIR/timing_anomalies.txt" ] && while IFS='|' read -r ip timing_type avg_interval total_reqs; do
|
if [ -f "$TEMP_DIR/timing_anomalies.txt" ]; then
|
||||||
timing_anomalies["$ip"]=$avg_interval
|
while IFS='|' read -r ip timing_type avg_interval total_reqs; do
|
||||||
done < "$TEMP_DIR/timing_anomalies.txt"
|
[ -n "$ip" ] && timing_anomalies["$ip"]=$avg_interval
|
||||||
|
done < "$TEMP_DIR/timing_anomalies.txt" || true
|
||||||
|
fi
|
||||||
|
|
||||||
# Now calculate scores for each IP (using pre-counted requests)
|
# Now calculate scores for each IP (using pre-counted requests)
|
||||||
|
local ip_count=0
|
||||||
for ip in "${!ip_request_counts[@]}"; do
|
for ip in "${!ip_request_counts[@]}"; do
|
||||||
|
((ip_count++)) || true
|
||||||
|
|
||||||
# Skip excluded IPs
|
# Skip excluded IPs
|
||||||
if is_excluded_ip "$ip"; then
|
if is_excluded_ip "$ip"; then
|
||||||
continue
|
continue
|
||||||
@@ -1896,11 +1935,15 @@ calculate_threat_scores() {
|
|||||||
[ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
|
[ -n "${threat_ips_suspicious[$ip]}" ] && flag_ip_attack "$ip" "SCANNER" 0 "Bot analyzer: Suspicious user-agent" >/dev/null 2>&1
|
||||||
) &
|
) &
|
||||||
fi
|
fi
|
||||||
done | sort -t'|' -k1 -rn > "$TEMP_DIR/threat_scores.txt"
|
done > "$TEMP_DIR/threat_scores_unsorted.txt"
|
||||||
|
|
||||||
# Wait for background IP reputation updates to complete (don't fail if background jobs error)
|
# Wait for background IP reputation updates to complete (don't fail if background jobs error)
|
||||||
wait || true
|
wait || true
|
||||||
|
|
||||||
|
# Sort the threat scores after all background jobs are done
|
||||||
|
sort -t'|' -k1 -rn "$TEMP_DIR/threat_scores_unsorted.txt" > "$TEMP_DIR/threat_scores.txt" || true
|
||||||
|
rm -f "$TEMP_DIR/threat_scores_unsorted.txt"
|
||||||
|
|
||||||
print_success "Threat scores calculated and IP reputation updated"
|
print_success "Threat scores calculated and IP reputation updated"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2287,13 +2330,21 @@ generate_report() {
|
|||||||
# QUICK STATS DASHBOARD
|
# QUICK STATS DASHBOARD
|
||||||
print_header "QUICK STATS DASHBOARD"
|
print_header "QUICK STATS DASHBOARD"
|
||||||
|
|
||||||
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo 0)
|
||||||
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
|
total_requests=${total_requests:-0}
|
||||||
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo "0")
|
|
||||||
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo "0")
|
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)
|
||||||
|
unique_ips=${unique_ips:-0}
|
||||||
|
|
||||||
|
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | wc -l || echo 0)
|
||||||
|
unique_domains=${unique_domains:-0}
|
||||||
|
|
||||||
|
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" 2>/dev/null | wc -l || echo 0)
|
||||||
|
bot_requests=${bot_requests:-0}
|
||||||
|
|
||||||
# Count private/internal IPs (excluded from threat analysis)
|
# Count private/internal IPs (excluded from threat analysis)
|
||||||
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' 2>/dev/null | wc -l || echo "0")
|
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' 2>/dev/null | wc -l || echo 0)
|
||||||
|
private_ips=${private_ips:-0}
|
||||||
|
|
||||||
# Count server's own IPs in the logs
|
# Count server's own IPs in the logs
|
||||||
server_ip_hits=0
|
server_ip_hits=0
|
||||||
@@ -2399,11 +2450,19 @@ generate_report() {
|
|||||||
if [ -s "$TEMP_DIR/false_positives.txt" ]; then
|
if [ -s "$TEMP_DIR/false_positives.txt" ]; then
|
||||||
echo ""
|
echo ""
|
||||||
echo "Whitelist Recommendations (Legitimate Services):"
|
echo "Whitelist Recommendations (Legitimate Services):"
|
||||||
|
# Pre-build IP count cache to avoid repeated grep on large file
|
||||||
|
declare -A ip_counts_cache
|
||||||
|
if [ -f "$TEMP_DIR/parsed_logs.txt" ]; then
|
||||||
|
while IFS='|' read -r ip rest; do
|
||||||
|
[ -n "$ip" ] && ((ip_counts_cache["$ip"]++)) || true
|
||||||
|
done < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
while read -r line; do
|
while read -r line; do
|
||||||
ip=$(echo "$line" | cut -d'|' -f1)
|
ip=$(echo "$line" | cut -d'|' -f1)
|
||||||
service=$(echo "$line" | cut -d'|' -f2)
|
service=$(echo "$line" | cut -d'|' -f2)
|
||||||
domain=$(echo "$line" | cut -d'|' -f4)
|
domain=$(echo "$line" | cut -d'|' -f4)
|
||||||
req_count=$(cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | grep -c "^$ip|" || echo 0)
|
req_count=${ip_counts_cache["$ip"]:-0}
|
||||||
echo " $ip - $req_count requests - Identified as: $service"
|
echo " $ip - $req_count requests - Identified as: $service"
|
||||||
echo " → Domain: $domain"
|
echo " → Domain: $domain"
|
||||||
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
echo " → Action: VERIFY OWNERSHIP then whitelist"
|
||||||
@@ -2412,30 +2471,32 @@ generate_report() {
|
|||||||
|
|
||||||
# NEW: HIGH-CONFIDENCE BOT FINGERPRINTS
|
# NEW: HIGH-CONFIDENCE BOT FINGERPRINTS
|
||||||
if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then
|
if [ -s "$TEMP_DIR/bot_fingerprints.txt" ]; then
|
||||||
echo ""
|
(
|
||||||
print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
|
echo ""
|
||||||
echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
|
print_header "HIGH-CONFIDENCE BOT FINGERPRINTS (Multi-signal analysis - reduced false positives)"
|
||||||
echo ""
|
echo "These IPs show MULTIPLE bot indicators combined (not just single signal):"
|
||||||
|
echo ""
|
||||||
|
|
||||||
awk -F'|' '
|
awk -F'|' '
|
||||||
NR <= 15 {
|
NR <= 15 {
|
||||||
ip = $1
|
ip = $1
|
||||||
score = $2
|
score = $2
|
||||||
signals = $3
|
signals = $3
|
||||||
|
|
||||||
# Risk level based on score
|
# Risk level based on score
|
||||||
if (score >= 80) risk = "CRITICAL"
|
if (score >= 80) risk = "CRITICAL"
|
||||||
else if (score >= 70) risk = "HIGH"
|
else if (score >= 70) risk = "HIGH"
|
||||||
else if (score >= 60) risk = "MEDIUM"
|
else if (score >= 60) risk = "MEDIUM"
|
||||||
else risk = "LOW"
|
else risk = "LOW"
|
||||||
|
|
||||||
printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
|
printf " %s - Score: %2d/100 - Risk: %s - Signals: %d\n", ip, score, risk, signals
|
||||||
}' "$TEMP_DIR/bot_fingerprints.txt"
|
}' "$TEMP_DIR/bot_fingerprints.txt" || true
|
||||||
|
|
||||||
total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
total=$(wc -l < "$TEMP_DIR/bot_fingerprints.txt" 2>/dev/null || echo "0")
|
||||||
echo ""
|
echo ""
|
||||||
echo " Total high-confidence bots detected: $total IPs"
|
echo " Total high-confidence bots detected: $total IPs"
|
||||||
echo ""
|
echo ""
|
||||||
|
) || true
|
||||||
else
|
else
|
||||||
echo ""
|
echo ""
|
||||||
echo " No high-confidence bot fingerprints detected (requires multiple signals)"
|
echo " No high-confidence bot fingerprints detected (requires multiple signals)"
|
||||||
@@ -2453,44 +2514,24 @@ generate_report() {
|
|||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Show top attacked domains with attack details
|
# Show top attacked domains with attack details
|
||||||
awk -F'|' 'NR <= 10 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
# Limit to top 5 domains for performance with large datasets
|
||||||
domain_attack_count=$(grep -F "|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l || echo "0")
|
awk -F'|' 'NR <= 5 {print $1}' "$TEMP_DIR/domain_targeting.txt" 2>/dev/null | {
|
||||||
|
while read -r domain; do
|
||||||
|
[ -z "$domain" ] && continue
|
||||||
|
|
||||||
if [ "$domain_attack_count" -gt 0 ]; then
|
# Use grep with strict error handling for large file searches
|
||||||
echo " Domain: $domain ($domain_attack_count attack attempts)"
|
domain_attack_count=0
|
||||||
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||||||
|
domain_attack_count=$(grep -F "|${domain}|" "$TEMP_DIR/attack_vectors_raw.txt" 2>/dev/null | wc -l) || domain_attack_count=0
|
||||||
|
fi
|
||||||
|
domain_attack_count=${domain_attack_count:-0}
|
||||||
|
|
||||||
# Get all attacks on this domain, group by type
|
if [ "$domain_attack_count" -gt 0 ] 2>/dev/null; then
|
||||||
awk -F'|' -v dom="$domain" '
|
echo " Domain: $domain ($domain_attack_count attack attempts)"
|
||||||
$2 == dom {
|
echo ""
|
||||||
ip = $1
|
fi
|
||||||
attack_type = $5
|
done
|
||||||
|
} || true
|
||||||
# Validate IP format
|
|
||||||
if (match(ip, /^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$/)) {
|
|
||||||
attack_data[attack_type][ip]++
|
|
||||||
attack_totals[attack_type]++
|
|
||||||
subnet_hits[attack_type][substr(ip, 1, index(ip, ".", index(ip, ".")+1)-1)]++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
END {
|
|
||||||
for (attack_type in attack_totals) {
|
|
||||||
printf " └─ %s: %d attempts\n", attack_type, attack_totals[attack_type]
|
|
||||||
|
|
||||||
# Show top 3 IPs for this attack type
|
|
||||||
attack_count = 0
|
|
||||||
for (ip in attack_data[attack_type]) {
|
|
||||||
if (attack_count >= 3) break
|
|
||||||
count = attack_data[attack_type][ip]
|
|
||||||
split(ip, parts, ".")
|
|
||||||
subnet = parts[1] "." parts[2] "." parts[3] ".0/24"
|
|
||||||
printf " ├─ %s (%d reqs) [subnet: %s]\n", ip, count, subnet
|
|
||||||
attack_count++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}' "$TEMP_DIR/attack_vectors_raw.txt"
|
|
||||||
echo ""
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
else
|
else
|
||||||
echo ""
|
echo ""
|
||||||
echo " No domain attack data available (all domains may be healthy)"
|
echo " No domain attack data available (all domains may be healthy)"
|
||||||
@@ -2498,34 +2539,11 @@ generate_report() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# NEW: TOP URLs BEING ATTACKED
|
# NEW: TOP URLs BEING ATTACKED
|
||||||
if [ -f "$TEMP_DIR/domain_targeting.txt" ]; then
|
if [ -s "$TEMP_DIR/domain_targeting.txt" ]; then
|
||||||
echo ""
|
echo ""
|
||||||
print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)"
|
print_header "TOP TARGETED URLs (What files/endpoints are bots hitting?)"
|
||||||
echo ""
|
echo ""
|
||||||
|
echo " (Targeted URL data not available in summary - see log files for details)"
|
||||||
# Show top URLs for top 3 most-attacked domains
|
|
||||||
urls_shown=0
|
|
||||||
awk -F'|' 'NR <= 3 {print $1}' "$TEMP_DIR/domain_targeting.txt" | while read -r domain; do
|
|
||||||
local domain_file="$TEMP_DIR/domain_urls_${domain}.txt"
|
|
||||||
if [ -f "$domain_file" ] && [ -s "$domain_file" ]; then
|
|
||||||
echo " Domain: $domain"
|
|
||||||
awk -F'|' '{
|
|
||||||
url = $1
|
|
||||||
count = $2
|
|
||||||
printf " %3d requests → %s\n", count, url
|
|
||||||
}' "$domain_file" # Show all URLs, not just top 5
|
|
||||||
echo ""
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if no URL data was shown
|
|
||||||
if [ "$urls_shown" -eq 0 ]; then
|
|
||||||
echo " No URL targeting data available"
|
|
||||||
echo ""
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo ""
|
|
||||||
echo " No domain targeting data available"
|
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -2585,19 +2603,23 @@ generate_report() {
|
|||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
echo "2. Top Aggressive Bots:"
|
echo "2. Top Aggressive Bots:"
|
||||||
counter=1
|
if [ -s "$TEMP_DIR/top_bots.txt" ]; then
|
||||||
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
counter=1
|
||||||
count=$(echo "$line" | awk 'BEGIN {count=0} {print $1}')
|
while read -r line && [ "${counter:-0}" -le 5 ]; do
|
||||||
bot=$(echo "$line" | awk 'BEGIN {f=""} {$1=""; print $0}' | xargs)
|
count=$(echo "$line" | awk '{print $1}' 2>/dev/null || echo "0")
|
||||||
|
bot=$(echo "$line" | awk '{$1=""; print $0}' 2>/dev/null | xargs || echo "$line")
|
||||||
action="Allow"
|
|
||||||
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex"; then
|
action="Allow"
|
||||||
action="Consider blocking (aggressive)"
|
if echo "$bot" | grep -qiE "ahrefs|semrush|dotbot|blex|megaindex" 2>/dev/null; then
|
||||||
fi
|
action="Consider blocking (aggressive)"
|
||||||
|
fi
|
||||||
echo " [$counter] $bot - $count requests - Action: $action"
|
|
||||||
counter=$((counter + 1))
|
echo " [$counter] $bot - $count requests - Action: $action"
|
||||||
done < "$TEMP_DIR/top_bots.txt"
|
counter=$((counter + 1))
|
||||||
|
done < "$TEMP_DIR/top_bots.txt"
|
||||||
|
else
|
||||||
|
echo " No bot data available"
|
||||||
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
echo "3. Admin Endpoint Probing:"
|
echo "3. Admin Endpoint Probing:"
|
||||||
|
|||||||
Reference in New Issue
Block a user