CRITICAL FIX: Add error handling to all file reads
Multiple lines: Protected all file reads with error handling - Line 508: parsed_logs.txt wc -l with 2>/dev/null || echo 0 - Line 642: classified_bots.txt wc -l with 2>/dev/null || echo 0 - Line 1627: classified_bots.txt cat with 2>/dev/null - Line 1913: parsed_logs.txt cat with 2>/dev/null - Line 1967: parsed_logs.txt cat with 2>/dev/null - Lines 2004, 2008, 2014: classified_bots.txt cats with 2>/dev/null and || true - Lines 1354, 1380: attack_vectors_raw.txt reads with conditional checks This prevents script exit when files don't exist due to set -e behavior.
This commit is contained in:
@@ -505,7 +505,7 @@ parse_logs() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
local line_count
|
local line_count
|
||||||
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt")
|
line_count=$(wc -l < "$TEMP_DIR/parsed_logs.txt" 2>/dev/null || echo "0")
|
||||||
local file_size_kb
|
local file_size_kb
|
||||||
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
|
file_size_kb=$(du -k "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | cut -f1 || echo "0")
|
||||||
|
|
||||||
@@ -639,7 +639,7 @@ classify_bots() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
local classified_count
|
local classified_count
|
||||||
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt")
|
classified_count=$(wc -l < "$TEMP_DIR/classified_bots.txt" 2>/dev/null || echo "0")
|
||||||
local file_size_kb
|
local file_size_kb
|
||||||
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
|
file_size_kb=$(du -k "$TEMP_DIR/classified_bots.txt" 2>/dev/null | cut -f1 || echo "0")
|
||||||
|
|
||||||
@@ -1351,33 +1351,35 @@ analyze_domain_targeting_percentage() {
|
|||||||
printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
|
printf "%s|%d|%d\n", domain, ip_count, request_count_per_domain[domain]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
' "$TEMP_DIR/attack_vectors_raw.txt" "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt"
|
' <([ -f "$TEMP_DIR/attack_vectors_raw.txt" ] && cat "$TEMP_DIR/attack_vectors_raw.txt" || echo "") "$TEMP_DIR/parsed_logs.txt" | sort -t'|' -k3 -rn > "$TEMP_DIR/domain_targeting.txt" || true
|
||||||
|
|
||||||
# Also create per-domain attack type breakdown
|
# Also create per-domain attack type breakdown
|
||||||
# Format: domain|attack_type|ip|count
|
# Format: domain|attack_type|ip|count
|
||||||
awk -F'|' '
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||||||
{
|
awk -F'|' '
|
||||||
ip = $1
|
{
|
||||||
domain = $2
|
ip = $1
|
||||||
attack_type = $5
|
domain = $2
|
||||||
|
attack_type = $5
|
||||||
|
|
||||||
# Store as domain -> attack_type -> ip -> count
|
# Store as domain -> attack_type -> ip -> count
|
||||||
attack_data[domain][attack_type][ip]++
|
attack_data[domain][attack_type][ip]++
|
||||||
attack_totals[domain][attack_type]++
|
attack_totals[domain][attack_type]++
|
||||||
}
|
}
|
||||||
END {
|
END {
|
||||||
for (domain in attack_data) {
|
for (domain in attack_data) {
|
||||||
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
domain_file = tmpdir "/domain_attacks_" domain ".txt"
|
||||||
for (attack_type in attack_data[domain]) {
|
for (attack_type in attack_data[domain]) {
|
||||||
total = attack_totals[domain][attack_type]
|
total = attack_totals[domain][attack_type]
|
||||||
for (ip in attack_data[domain][attack_type]) {
|
for (ip in attack_data[domain][attack_type]) {
|
||||||
count = attack_data[domain][attack_type][ip]
|
count = attack_data[domain][attack_type][ip]
|
||||||
printf "%s|%d|%d\n", attack_type "|" ip, count, total
|
printf "%s|%d|%d\n", attack_type "|" ip, count, total
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
' -v tmpdir="$TEMP_DIR" < "$TEMP_DIR/attack_vectors_raw.txt"
|
||||||
' < "$TEMP_DIR/attack_vectors_raw.txt"
|
fi
|
||||||
|
|
||||||
print_success "Domain attack pattern analysis complete"
|
print_success "Domain attack pattern analysis complete"
|
||||||
}
|
}
|
||||||
@@ -1624,13 +1626,13 @@ analyze_time_series() {
|
|||||||
print_info "Analyzing time-series patterns..."
|
print_info "Analyzing time-series patterns..."
|
||||||
|
|
||||||
# Extract hourly bot traffic
|
# Extract hourly bot traffic
|
||||||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {
|
cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" {
|
||||||
timestamp = $8
|
timestamp = $8
|
||||||
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
if (match(timestamp, /([0-9]{2})\/([A-Za-z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, ts)) {
|
||||||
hour = ts[4]
|
hour = ts[4]
|
||||||
print hour
|
print hour
|
||||||
}
|
}
|
||||||
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt"
|
}' | sort | uniq -c > "$TEMP_DIR/hourly_bot_traffic.txt" || true
|
||||||
|
|
||||||
# Extract hourly attack traffic
|
# Extract hourly attack traffic
|
||||||
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
|
||||||
@@ -1910,7 +1912,7 @@ detect_false_positives() {
|
|||||||
print_info "Detecting legitimate services (false positives)..."
|
print_info "Detecting legitimate services (false positives)..."
|
||||||
|
|
||||||
# Known monitoring service patterns and legitimate CDNs
|
# Known monitoring service patterns and legitimate CDNs
|
||||||
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '{
|
cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' '{
|
||||||
ip = $1
|
ip = $1
|
||||||
domain = $2
|
domain = $2
|
||||||
url = $3
|
url = $3
|
||||||
@@ -1964,7 +1966,7 @@ generate_statistics() {
|
|||||||
|
|
||||||
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
|
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
|
||||||
# This reads the uncompressed file ONCE instead of 4+ separate reads
|
# This reads the uncompressed file ONCE instead of 4+ separate reads
|
||||||
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" '
|
cat "$TEMP_DIR/parsed_logs.txt" 2>/dev/null | awk -F'|' -v tmpdir="$TEMP_DIR" '
|
||||||
{
|
{
|
||||||
# Count by domain (for top sites)
|
# Count by domain (for top sites)
|
||||||
domains[$2]++
|
domains[$2]++
|
||||||
@@ -2001,17 +2003,17 @@ generate_statistics() {
|
|||||||
sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"
|
sort -rn "$TEMP_DIR/top_urls_raw.txt" | head -5 > "$TEMP_DIR/top_urls.txt"
|
||||||
|
|
||||||
# Top 5 bots by request count (single decompression)
|
# Top 5 bots by request count (single decompression)
|
||||||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '$9 != "unknown" {print $10}' | \
|
cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '$9 != "unknown" {print $10}' | \
|
||||||
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt"
|
sort | uniq -c | sort -rn | head -5 > "$TEMP_DIR/top_bots.txt" || true
|
||||||
|
|
||||||
# Traffic breakdown by bot type (single decompression)
|
# Traffic breakdown by bot type (single decompression)
|
||||||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $9}' | \
|
cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $9}' | \
|
||||||
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt"
|
sort | uniq -c | sort -rn > "$TEMP_DIR/traffic_breakdown.txt" || true
|
||||||
|
|
||||||
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
|
# Per-domain traffic sources (OPTIMIZED: read uncompressed file once, use grep)
|
||||||
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
if [ -f "$TEMP_DIR/all_domains.txt" ]; then
|
||||||
# Create indexed bot traffic file (decompress once)
|
# Create indexed bot traffic file (decompress once)
|
||||||
cat "$TEMP_DIR/classified_bots.txt" | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt"
|
cat "$TEMP_DIR/classified_bots.txt" 2>/dev/null | awk -F'|' '{print $2"|"$9}' > "$TEMP_DIR/domain_bot_types.txt" || true
|
||||||
|
|
||||||
while read -r domain; do
|
while read -r domain; do
|
||||||
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
|
echo "$domain" > "$TEMP_DIR/domain_${domain}_stats.txt"
|
||||||
|
|||||||
Reference in New Issue
Block a user