CRITICAL FIX: Add error handling to all file reads

Multiple lines: Protected all file reads with error handling
- Line 508: parsed_logs.txt wc -l with 2>/dev/null || echo 0
- Line 642: classified_bots.txt wc -l with 2>/dev/null || echo 0
- Line 1627: classified_bots.txt cat with 2>/dev/null
- Line 1913: parsed_logs.txt cat with 2>/dev/null
- Line 1967: parsed_logs.txt cat with 2>/dev/null
- Lines 2004, 2008, 2014: classified_bots.txt cats with 2>/dev/null and || true
- Lines 1354, 1380: attack_vectors_raw.txt reads with conditional checks

This prevents script exit when files don't exist due to set -e behavior.
This commit is contained in:
Developer
2026-04-23 20:03:35 -04:00
parent 54e4d5b67f
commit a805676be5
+15 -13
View File
@@ -505,7 +505,7 @@ parse_logs() {
fi fi
local line_count local line_count
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt") line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
local file_size_kb local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0") file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
@@ -639,7 +639,7 @@ classify_bots() {
fi fi
local classified_count local classified_count
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt") classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
local file_size_kb local file_size_kb
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0") file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
@@ -1351,10 +1351,11 @@ analyze_domain_targeting_percentage() {
printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain] printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
} }
} }
' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" ' <([ -f "$TEMP_DIR/attack_vectors_raw.txt" ] && cat "$TEMP_DIR/attack_vectors_raw.txt" || echo "") "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" || true
# Also create per-domain attack type breakdown # Also create per-domain attack type breakdown
# Format: domain|attack_type|ip|count # Format: domain|attack_type|ip|count
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
awk -F'|' ' awk -F'|' '
{ {
ip = $1 ip = $1
@@ -1377,7 +1378,8 @@ analyze_domain_targeting_percentage() {
} }
} }
} }
' < "$TEMP_DIR/attack_vectors_raw.txt" ' -v tmpdir="$TEMP_DIR" < "$TEMP_DIR/attack_vectors_raw.txt"
fi
print_success "Domain attack pattern analysis complete" print_success "Domain attack pattern analysis complete"
} }
@@ -1624,13 +1626,13 @@ analyze_time_series() {
print_info "Analyzing time-series patterns..." print_info "Analyzing time-series patterns..."
# Extract hourly bot traffic # Extract hourly bot traffic
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" { cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" {
timestamp = $8 timestamp = $8
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) { if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
hour = ts[4] hour = ts[4]
print hour print hour
} }
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" }' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true
# Extract hourly attack traffic # Extract hourly attack traffic
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
@@ -1910,7 +1912,7 @@ detect_false_positives() {
print_info "Detecting legitimate services (false positives)..." print_info "Detecting legitimate services (false positives)..."
# Known monitoring service patterns and legitimate CDNs # Known monitoring service patterns and legitimate CDNs
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{ cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
ip = $1 ip = $1
domain = $2 domain = $2
url = $3 url = $3
@@ -1964,7 +1966,7 @@ generate_statistics() {
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
# This reads the uncompressed file ONCE instead of 4+ separate reads # This reads the uncompressed file ONCE instead of 4+ separate reads
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" ' cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' -v tmpdir="$TEMP_DIR" '
{ {
# Count by domain (for top sites) # Count by domain (for top sites)
domains[$2]++ domains[$2]++
@@ -2001,17 +2003,17 @@ generate_statistics() {
sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt" sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"
# Top 5 bots by request count (single decompression) # Top 5 bots by request count (single decompression)
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \ cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" {print $10}' | \
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true
# Traffic breakdown by bot type (single decompression) # Traffic breakdown by bot type (single decompression)
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \ cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $9}' | \
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep) # Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
if [ -f "$TEMP_DIR/all_domains.txt" ]; then if [ -f "$TEMP_DIR/all_domains.txt" ]; then
# Create indexed bot traffic file (decompress once) # Create indexed bot traffic file (decompress once)
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt" cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt" || true
while read -r domain; do while read -r domain; do
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt" echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"