CRITICAL: Fix missing tmpdir variables + process substitution + missing close() statements

ISSUE 1: Missing -v tmpdir variable in 5 awk blocks: - analyze_headers() (line 773) - analyze_entry_points() (line 868) - analyze_url_entropy() (line 1095) - analyze_request_timing() (line 1149) - detect_false_positives() top sites analysis (line 1960) These awk blocks were trying to use tmpdir variable without it being passed in, causing 'tmpdir' to be treated as empty string or undefined variable. Files would be written to root directory with broken names, silently failing. ISSUE 2: Process substitution inefficiency in detect_threats(): - Line 1026: Changed from '< <(cat file)' to '< file' - Process substitution creates unnecessary pipe and subshell ISSUE 3: Missing close() statements for file handles in awk: - analyze_headers(): Added close() for header_anomalies.txt - analyze_entry_points(): Added close() for 3 output files - analyze_url_entropy(): Added close() for fuzzing_ips.txt - analyze_request_timing(): Added close() for timing_anomalies.txt - detect_false_positives(): Added close() for 3 output files FILE OUTPUT IMPACT: All these functions now properly: - Have tmpdir variable available - Create files in correct temp directory - Close file handles properly for buffer flushing - Avoid unnecessary process substitutions VERIFIED: - Syntax check: PASSED - All tmpdir references now have corresponding -v definitions - All file-writing awk blocks have explicit close() calls
2026-04-23 18:37:18 -04:00
parent 8477c8d7e1
commit adbe5c14d5
1 changed files with 15 additions and 6 deletions
@@ -770,7 +770,7 @@ analyze_headers() {
    print_info "Analyzing request headers for bot patterns..."

    # Analyze header patterns to improve bot detection accuracy
-    awk -F'|' '
+    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        domain = $2
@@ -849,6 +849,7 @@ analyze_headers() {
                print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt"
            }
        }
+        close(tmpdir "/header_anomalies.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if it doesn't exist
@@ -864,7 +865,7 @@ analyze_entry_points() {
    print_info "Analyzing first request patterns (bot vs. user entry points)..."

    # Get first request from each IP
-    awk -F'|' '
+    awk -F'|' -v tmpdir="$TEMP_DIR" '
    BEGIN {
        ip_first_request[ip] = url
        ip_first_status[ip] = status
@@ -900,6 +901,9 @@ analyze_entry_points() {
                print ip "|static_entry|" url > tmpdir "/static_entry_points.txt"
            }
        }
+        close(tmpdir "/suspicious_entry_points.txt")
+        close(tmpdir "/normal_entry_points.txt")
+        close(tmpdir "/static_entry_points.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Count suspicious entry points
@@ -1023,7 +1027,7 @@ detect_threats() {
        # Track response codes for intelligence
        print status > tmpdir "/response_codes_raw.txt"
    }
-    ' < <(cat "$TEMP_DIR/parsed_logs.txt")
+    ' < "$TEMP_DIR/parsed_logs.txt"

    # Process attack vectors by type
    if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then
@@ -1088,7 +1092,7 @@ analyze_url_entropy() {
    print_info "Analyzing URL parameter entropy (fuzzing detection)..."

    # Detect IPs that generate random parameters (scanning/fuzzing behavior)
-    awk -F'|' '
+    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        url = $3
@@ -1126,6 +1130,7 @@ analyze_url_entropy() {
                print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt"
            }
        }
+        close(tmpdir "/fuzzing_ips.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if it doesn't exist
@@ -1141,7 +1146,7 @@ analyze_request_timing() {
    print_info "Analyzing request timing patterns (DDoS detection)..."

    # Analyze timing consistency to detect bots/DDoS
-    awk -F'|' '
+    awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        ip = $1
        timestamp = $8
@@ -1194,6 +1199,7 @@ analyze_request_timing() {
                }
            }
        }
+        close(tmpdir "/timing_anomalies.txt")
    }' < "$TEMP_DIR/parsed_logs.txt"

    # Create file if it doesn't exist
@@ -1951,7 +1957,7 @@ generate_statistics() {

    # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs
    # This reads the uncompressed file ONCE instead of 4+ separate reads
-    cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' '
+    cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" '
    {
        # Count by domain (for top sites)
        domains[$2]++
@@ -1977,6 +1983,9 @@ generate_statistics() {
        for (url in urls) {
            print urls[url], url > tmpdir "/top_urls_raw.txt"
        }
+        close(tmpdir "/top_sites_raw.txt")
+        close(tmpdir "/top_ips_raw.txt")
+        close(tmpdir "/top_urls_raw.txt")
    }'

    # Sort and limit results