CRITICAL: Fix quote escaping in calculate_bot_fingerprint + du error handling + UUOC patterns
QUOTE ESCAPING BUGS (Same issue as before): - Line 1213: calculate_bot_fingerprint() awk - Added -v tmpdir variable - Line 1303: Fixed file redirection from broken quote syntax to tmpdir concatenation - Line 1306: Added close() statement for bot_fingerprints.txt - Line 1325: analyze_domain_targeting_percentage() - Added -v tmpdir variable - Line 1364: Fixed domain_file path from broken quote syntax to tmpdir concatenation FILE OPERATION SAFETY: - Lines 510, 644: du | cut commands now have error handling (|| echo 0) - These commands could fail with set -eo pipefail if du fails - Added 2>/dev/null and fallback value EFFICIENCY IMPROVEMENTS (UUOC): - Lines 2272-2278: Replaced cat | awk/wc patterns with direct input - cat file | wc -l → wc -l < file - cat file | awk → awk < file (eliminates unnecessary processes) IMPACT: - New fingerprinting and domain targeting analysis sections will now execute - All file operations safe from pipefail crashes - More efficient command pipelines
This commit is contained in:
@@ -507,7 +507,7 @@ parse_logs() {
|
||||
local line_count
|
||||
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
||||
local file_size_kb
|
||||
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" | cut -f1)
|
||||
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
|
||||
|
||||
# Compress for storage (gzip saves ~90% space on text)
|
||||
# But we keep uncompressed version for fast analysis
|
||||
@@ -641,7 +641,7 @@ classify_bots() {
|
||||
local classified_count
|
||||
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
|
||||
local file_size_kb
|
||||
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" | cut -f1)
|
||||
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
|
||||
|
||||
# Compress for storage in background
|
||||
gzip -c "$TEMP_DIR/classified_bots.txt" > "$TEMP_DIR/classified_bots.txt.gz" &
|
||||
@@ -1210,7 +1210,7 @@ calculate_bot_fingerprint() {
|
||||
|
||||
# Each signal contributes to confidence that an IP is a bot
|
||||
# Real traffic rarely has ALL signals, bots typically have multiple
|
||||
awk -F'|' '
|
||||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||||
BEGIN {
|
||||
# Initialize tracking arrays
|
||||
}
|
||||
@@ -1300,9 +1300,10 @@ calculate_bot_fingerprint() {
|
||||
|
||||
# Output fingerprint for high-confidence bots (score >= 60)
|
||||
if (score >= 60) {
|
||||
printf "%s|%d|%d\n", ip, score, signal_count > "'"$TEMP_DIR"'/bot_fingerprints.txt"
|
||||
printf "%s|%d|%d\n", ip, score, signal_count > tmpdir "/bot_fingerprints.txt"
|
||||
}
|
||||
}
|
||||
close(tmpdir "/bot_fingerprints.txt")
|
||||
}
|
||||
' < "$TEMP_DIR/parsed_logs.txt"
|
||||
|
||||
@@ -1321,7 +1322,7 @@ analyze_domain_targeting_percentage() {
|
||||
|
||||
# Build per-domain attack data
|
||||
# Format: domain|attack_type|ip|count
|
||||
awk -F'|' '
|
||||
awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||||
NR == FNR {
|
||||
# Skip attack vectors file - using parsed_logs for all data
|
||||
next
|
||||
@@ -1360,7 +1361,7 @@ analyze_domain_targeting_percentage() {
|
||||
}
|
||||
END {
|
||||
for (domain in attack_data) {
|
||||
domain_file = "'"$TEMP_DIR"'/domain_attacks_" domain ".txt"
|
||||
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
||||
for (attack_type in attack_data[domain]) {
|
||||
total = attack_totals[domain][attack_type]
|
||||
for (ip in attack_data[domain][attack_type]) {
|
||||
@@ -2268,13 +2269,13 @@ generate_report() {
|
||||
# QUICK STATS DASHBOARD
|
||||
print_header "QUICK STATS DASHBOARD"
|
||||
|
||||
total_requests=$(cat "$TEMP_DIR/parsed_logs.txt" | wc -l)
|
||||
unique_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | wc -l)
|
||||
unique_domains=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $2}' | sort -u | wc -l)
|
||||
bot_requests=$(cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown"' | wc -l)
|
||||
total_requests=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
||||
unique_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
||||
unique_domains=$(awk -F'|' '{print $2}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | wc -l)
|
||||
bot_requests=$(awk -F'|' '$9 != "unknown"' < "$TEMP_DIR/classified_bots.txt" | wc -l)
|
||||
|
||||
# Count private/internal IPs (excluded from threat analysis)
|
||||
private_ips=$(cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{print $1}' | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
|
||||
private_ips=$(awk -F'|' '{print $1}' < "$TEMP_DIR/parsed_logs.txt" | sort -u | grep -E '^(127\.|10\.|192\.168\.|172\.(1[6-9]|2[0-9]|3[01])\.|169\.254\.)' || true | wc -l)
|
||||
|
||||
# Count server's own IPs in the logs
|
||||
server_ip_hits=0
|
||||
|
||||
Reference in New Issue
Block a user