CRITICAL: Fix missing tmpdir variables + process substitution + missing close() statements

ISSUE 1: Missing -v tmpdir variable in 5 awk blocks:
- analyze_headers() (line 773)
- analyze_entry_points() (line 868)
- analyze_url_entropy() (line 1095)
- analyze_request_timing() (line 1149)
- detect_false_positives() top sites analysis (line 1960)

These awk blocks were trying to use tmpdir variable without it being passed in,
causing 'tmpdir' to be treated as empty string or undefined variable. Files would
be written to root directory with broken names, silently failing.

ISSUE 2: Process substitution inefficiency in detect_threats():
- Line 1026: Changed from '< <(cat file)' to '< file'
- Process substitution creates unnecessary pipe and subshell

ISSUE 3: Missing close() statements for file handles in awk:
- analyze_headers(): Added close() for header_anomalies.txt
- analyze_entry_points(): Added close() for 3 output files
- analyze_url_entropy(): Added close() for fuzzing_ips.txt
- analyze_request_timing(): Added close() for timing_anomalies.txt
- detect_false_positives(): Added close() for 3 output files

FILE OUTPUT IMPACT:
All these functions now properly:
- Have tmpdir variable available
- Create files in correct temp directory
- Close file handles properly for buffer flushing
- Avoid unnecessary process substitutions

VERIFIED:
- Syntax check: PASSED
- All tmpdir references now have corresponding -v definitions
- All file-writing awk blocks have explicit close() calls
This commit is contained in:
Developer
2026-04-23 18:37:18 -04:00
parent 8477c8d7e1
commit adbe5c14d5
+15 -6
View File
@@ -770,7 +770,7 @@ analyze_headers() {
print_info "Analyzing request headers for bot patterns..."
# Analyze header patterns to improve bot detection accuracy
awk -F'|' '
awk -F'|' -v tmpdir="$TEMP_DIR" '
{
ip = $1
domain = $2
@@ -849,6 +849,7 @@ analyze_headers() {
print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt"
}
}
close(tmpdir "/header_anomalies.txt")
}' < "$TEMP_DIR/parsed_logs.txt"
# Create file if it doesn't exist
@@ -864,7 +865,7 @@ analyze_entry_points() {
print_info "Analyzing first request patterns (bot vs. user entry points)..."
# Get first request from each IP
awk -F'|' '
awk -F'|' -v tmpdir="$TEMP_DIR" '
BEGIN {
ip_first_request[ip] = url
ip_first_status[ip] = status
@@ -900,6 +901,9 @@ analyze_entry_points() {
print ip "|static_entry|" url > tmpdir "/static_entry_points.txt"
}
}
close(tmpdir "/suspicious_entry_points.txt")
close(tmpdir "/normal_entry_points.txt")
close(tmpdir "/static_entry_points.txt")
}' < "$TEMP_DIR/parsed_logs.txt"
# Count suspicious entry points
@@ -1023,7 +1027,7 @@ detect_threats() {
# Track response codes for intelligence
print status > tmpdir "/response_codes_raw.txt"
}
' < <(cat "$TEMP_DIR/parsed_logs.txt")
' < "$TEMP_DIR/parsed_logs.txt"
# Process attack vectors by type
if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
@@ -1088,7 +1092,7 @@ analyze_url_entropy() {
print_info "Analyzing URL parameter entropy (fuzzing detection)..."
# Detect IPs that generate random parameters (scanning/fuzzing behavior)
awk -F'|' '
awk -F'|' -v tmpdir="$TEMP_DIR" '
{
ip = $1
url = $3
@@ -1126,6 +1130,7 @@ analyze_url_entropy() {
print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt"
}
}
close(tmpdir "/fuzzing_ips.txt")
}' < "$TEMP_DIR/parsed_logs.txt"
# Create file if it doesn't exist
@@ -1141,7 +1146,7 @@ analyze_request_timing() {
print_info "Analyzing request timing patterns (DDoS detection)..."
# Analyze timing consistency to detect bots/DDoS
awk -F'|' '
awk -F'|' -v tmpdir="$TEMP_DIR" '
{
ip = $1
timestamp = $8
@@ -1194,6 +1199,7 @@ analyze_request_timing() {
}
}
}
close(tmpdir "/timing_anomalies.txt")
}' < "$TEMP_DIR/parsed_logs.txt"
# Create file if it doesn't exist
@@ -1951,7 +1957,7 @@ generate_statistics() {
# OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
# This reads the uncompressed file ONCE instead of 4+ separate reads
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '
cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" '
{
# Count by domain (for top sites)
domains[$2]++
@@ -1977,6 +1983,9 @@ generate_statistics() {
for (url in urls) {
print urls[url], url > tmpdir "/top_urls_raw.txt"
}
close(tmpdir "/top_sites_raw.txt")
close(tmpdir "/top_ips_raw.txt")
close(tmpdir "/top_urls_raw.txt")
}'
# Sort and limit results