CRITICAL: Fix quote escaping in calculate_bot_fingerprint + du error handling + UUOC patterns

QUOTE ESCAPING BUGS (Same issue as before):
- Line 1213: calculate_bot_fingerprint() awk - Added -v tmpdir variable
- Line 1303: Fixed file redirection from broken quote syntax to tmpdir concatenation
- Line 1306: Added close() statement for bot_fingerprints.txt
- Line 1325: analyze_domain_targeting_percentage() - Added -v tmpdir variable
- Line 1364: Fixed domain_file path from broken quote syntax to tmpdir concatenation

FILE OPERATION SAFETY:
- Lines 510, 644: du | cut commands now have error handling (|| echo 0)
  - These commands could fail with set -eo pipefail if du fails
  - Added 2>/dev/null and fallback value

EFFICIENCY IMPROVEMENTS (UUOC):
- Lines 2272-2278: Replaced cat | awk/wc patterns with direct input
  - cat file | wc -l → wc -l < file
  - cat file | awk → awk < file (eliminates unnecessary processes)

IMPACT:
- New fingerprinting and domain targeting analysis sections will now execute
- All file operations safe from pipefail crashes
- More efficient command pipelines
This commit is contained in:
Developer
2026-04-23 18:32:38 -04:00
parent 50a996bce3
commit ae1503b928
+12 -11
View File
@@ -507,7 +507,7 @@ parse_logs() {
local line_count
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" | cut -f1)
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
# Compress for storage (gzip saves ~90% space on text)
# But we keep uncompressed version for fast analysis
@@ -641,7 +641,7 @@ classify_bots() {
local classified_count
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" | cut -f1)
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
# Compress for storage in background
gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &
@@ -1210,7 +1210,7 @@ calculate_bot_fingerprint() {
# Each signal contributes to confidence that an IP is a bot
# Real traffic rarely has ALL signals, bots typically have multiple
awk -F'|' '
awk -F'|' -v tmpdir="$TEMP_DIR" '
BEGIN {
# Initialize tracking arrays
}
@@ -1300,9 +1300,10 @@ calculate_bot_fingerprint() {
# Output fingerprint for high-confidence bots (score >= 60)
if (score >= 60) {
printf "%s|%d|%d\n", ip, score, signal_count > "'"$TEMP_DIR"'/bot_fingerprints.txt"
printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
}
}
close(tmpdir "/bot_fingerprints.txt")
}
' < "$TEMP_DIR/parsed_logs.txt"
@@ -1321,7 +1322,7 @@ analyze_domain_targeting_percentage() {
# Build per-domain attack data
# Format: domain|attack_type|ip|count
awk -F'|' '
awk -F'|' -v tmpdir="$TEMP_DIR" '
NR == FNR {
# Skip attack vectors file - using parsed_logs for all data
next
@@ -1360,7 +1361,7 @@ analyze_domain_targeting_percentage() {
}
END {
for (domain in attack_data) {
domain_file = "'"$TEMP_DIR"'/domain_attacks_" domain ".txt"
domain_file = tmpdir "/domain_attacks_" domain ".txt"
for (attack_type in attack_data[domain]) {
total = attack_totals[domain][attack_type]
for (ip in attack_data[domain][attack_type]) {
@@ -2268,13 +2269,13 @@ generate_report() {
# QUICK STATS DASHBOARD
print_header "QUICK STATS DASHBOARD"
total_requests=$(cat "$TEMP_DIR/parsed_logs.txt" | wc -l)
unique_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | wc -l)
unique_domains=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $2}' | sort -u | wc -l)
bot_requests=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown"' | wc -l)
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" | wc -l)
# Count private/internal IPs (excluded from threat analysis)
private_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
# Count server's own IPs in the logs
server_ip_hits=0