diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index 2a1ee82..07eec79 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -770,7 +770,7 @@ analyze_headers() { print_info "Analyzing request headers for bot patterns..." # Analyze header patterns to improve bot detection accuracy - awk -F'|' ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 domain = $2 @@ -849,6 +849,7 @@ analyze_headers() { print ip "|header_anomaly|" score > tmpdir "/header_anomalies.txt" } } + close(tmpdir "/header_anomalies.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Create file if it doesn't exist @@ -864,7 +865,7 @@ analyze_entry_points() { print_info "Analyzing first request patterns (bot vs. user entry points)..." # Get first request from each IP - awk -F'|' ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' BEGIN { ip_first_request[ip] = url ip_first_status[ip] = status @@ -900,6 +901,9 @@ analyze_entry_points() { print ip "|static_entry|" url > tmpdir "/static_entry_points.txt" } } + close(tmpdir "/suspicious_entry_points.txt") + close(tmpdir "/normal_entry_points.txt") + close(tmpdir "/static_entry_points.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Count suspicious entry points @@ -1023,7 +1027,7 @@ detect_threats() { # Track response codes for intelligence print status > tmpdir "/response_codes_raw.txt" } - ' < <(cat "$TEMP_DIR/parsed_logs.txt") + ' < "$TEMP_DIR/parsed_logs.txt" # Process attack vectors by type if [ -f "$TEMP_DIR/attack_vectors_raw.txt" ]; then @@ -1088,7 +1092,7 @@ analyze_url_entropy() { print_info "Analyzing URL parameter entropy (fuzzing detection)..." # Detect IPs that generate random parameters (scanning/fuzzing behavior) - awk -F'|' ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 url = $3 @@ -1126,6 +1130,7 @@ analyze_url_entropy() { print ip "|parameter_fuzzing|" urls_per_ip[ip] "|" unique_path_count > tmpdir "/fuzzing_ips.txt" } } + close(tmpdir "/fuzzing_ips.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Create file if it doesn't exist @@ -1141,7 +1146,7 @@ analyze_request_timing() { print_info "Analyzing request timing patterns (DDoS detection)..." # Analyze timing consistency to detect bots/DDoS - awk -F'|' ' + awk -F'|' -v tmpdir="$TEMP_DIR" ' { ip = $1 timestamp = $8 @@ -1194,6 +1199,7 @@ analyze_request_timing() { } } } + close(tmpdir "/timing_anomalies.txt") }' < "$TEMP_DIR/parsed_logs.txt" # Create file if it doesn't exist @@ -1951,7 +1957,7 @@ generate_statistics() { # OPTIMIZATION: Use single-pass AWK to generate multiple stats from parsed logs # This reads the uncompressed file ONCE instead of 4+ separate reads - cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' ' + cat "$TEMP_DIR/parsed_logs.txt" | awk -F'|' -v tmpdir="$TEMP_DIR" ' { # Count by domain (for top sites) domains[$2]++ @@ -1977,6 +1983,9 @@ generate_statistics() { for (url in urls) { print urls[url], url > tmpdir "/top_urls_raw.txt" } + close(tmpdir "/top_sites_raw.txt") + close(tmpdir "/top_ips_raw.txt") + close(tmpdir "/top_urls_raw.txt") }' # Sort and limit results