CRITICAL: Fix quote escaping in calculate_bot_fingerprint + du error handling + UUOC patterns

QUOTE ESCAPING BUGS (Same issue as before):
- Line 1213: calculate_bot_fingerprint() awk - Added -v tmpdir variable
- Line 1303: Fixed file redirection from broken quote syntax to tmpdir concatenation
- Line 1306: Added close() statement for bot_fingerprints.txt
- Line 1325: analyze_domain_targeting_percentage() - Added -v tmpdir variable
- Line 1364: Fixed domain_file path from broken quote syntax to tmpdir concatenation

FILE OPERATION SAFETY:
- Lines 510, 644: du | cut commands now have error handling (|| echo 0)
  - These commands could fail with set -eo pipefail if du fails
  - Added 2>/dev/null and fallback value

EFFICIENCY IMPROVEMENTS (UUOC):
- Lines 2272-2278: Replaced cat | awk/wc patterns with direct input
  - cat file | wc -l → wc -l < file
  - cat file | awk → awk < file (eliminates unnecessary processes)

IMPACT:
- New fingerprinting and domain targeting analysis sections will now execute
- All file operations safe from pipefail crashes
- More efficient command pipelines
This commit is contained in:
Developer
2026-04-23 18:32:38 -04:00
parent 50a996bce3
commit ae1503b928
+12 -11
View File
@@ -507,7 +507,7 @@ parse_logs() {
local line_count local line_count
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt") line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
local file_size_kb local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" | cut -f1) file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
# Compress for storage (gzip saves ~90% space on text) # Compress for storage (gzip saves ~90% space on text)
# But we keep uncompressed version for fast analysis # But we keep uncompressed version for fast analysis
@@ -641,7 +641,7 @@ classify_bots() {
local classified_count local classified_count
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt") classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
local file_size_kb local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" | cut -f1) file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
# Compress for storage in background # Compress for storage in background
gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" & gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &
@@ -1210,7 +1210,7 @@ calculate_bot_fingerprint() {
# Each signal contributes to confidence that an IP is a bot # Each signal contributes to confidence that an IP is a bot
# Real traffic rarely has ALL signals, bots typically have multiple # Real traffic rarely has ALL signals, bots typically have multiple
awk -F'|' ' awk -F'|' -v tmpdir="$TEMP_DIR" '
BEGIN { BEGIN {
# Initialize tracking arrays # Initialize tracking arrays
} }
@@ -1300,9 +1300,10 @@ calculate_bot_fingerprint() {
# Output fingerprint for high-confidence bots (score >= 60) # Output fingerprint for high-confidence bots (score >= 60)
if (score >= 60) { if (score >= 60) {
printf "%s|%d|%d\n", ip, score, signal_count > "'"$TEMP_DIR"'/bot_fingerprints.txt" printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
} }
} }
close(tmpdir "/bot_fingerprints.txt")
} }
' < "$TEMP_DIR/parsed_logs.txt" ' < "$TEMP_DIR/parsed_logs.txt"
@@ -1321,7 +1322,7 @@ analyze_domain_targeting_percentage() {
# Build per-domain attack data # Build per-domain attack data
# Format: domain|attack_type|ip|count # Format: domain|attack_type|ip|count
awk -F'|' ' awk -F'|' -v tmpdir="$TEMP_DIR" '
NR == FNR { NR == FNR {
# Skip attack vectors file - using parsed_logs for all data # Skip attack vectors file - using parsed_logs for all data
next next
@@ -1360,7 +1361,7 @@ analyze_domain_targeting_percentage() {
} }
END { END {
for (domain in attack_data) { for (domain in attack_data) {
domain_file = "'"$TEMP_DIR"'/domain_attacks_" domain ".txt" domain_file = tmpdir "/domain_attacks_" domain ".txt"
for (attack_type in attack_data[domain]) { for (attack_type in attack_data[domain]) {
total = attack_totals[domain][attack_type] total = attack_totals[domain][attack_type]
for (ip in attack_data[domain][attack_type]) { for (ip in attack_data[domain][attack_type]) {
@@ -2268,13 +2269,13 @@ generate_report() {
# QUICK STATS DASHBOARD # QUICK STATS DASHBOARD
print_header "QUICK STATS DASHBOARD" print_header "QUICK STATS DASHBOARD"
total_requests=$(cat "$TEMP_DIR/parsed_logs.txt" | wc -l) total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
unique_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | wc -l) unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
unique_domains=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $2}' | sort -u | wc -l) unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
bot_requests=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown"' | wc -l) bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" | wc -l)
# Count private/internal IPs (excluded from threat analysis) # Count private/internal IPs (excluded from threat analysis)
private_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l) private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
# Count server's own IPs in the logs # Count server's own IPs in the logs
server_ip_hits=0 server_ip_hits=0