diff --git a/modules/security/bot-analyzer.sh b/modules/security/bot-analyzer.sh index fdbe241..3958ecf 100755 --- a/modules/security/bot-analyzer.sh +++ b/modules/security/bot-analyzer.sh @@ -357,7 +357,19 @@ parse_logs() { # Parse Apache Combined Log Format with error handling # Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT" - awk -v domain="$domain" ' + awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" ' + BEGIN { + # Calculate cutoff timestamp (hours takes precedence) + if (hours_filter != "") { + cmd = "date -d \"" hours_filter " hours ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" hours_filter "H +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null" + cmd | getline cutoff_ts + close(cmd) + } else if (days_filter != "") { + cmd = "date -d \"" days_filter " days ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" days_filter "d +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null" + cmd | getline cutoff_ts + close(cmd) + } + } { # Skip empty lines and malformed entries if (NF < 10 || length($0) < 50) next @@ -372,6 +384,18 @@ parse_logs() { timestamp = "unknown" } + # Filter by timestamp if time filter is set + if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_ts != "") { + # Extract just the date/time part (before timezone) + split(timestamp, ts_parts, " ") + log_ts = ts_parts[1] + + # Simple string comparison works for this format (dd/mmm/yyyy:HH:MM:SS) + if (log_ts < cutoff_ts) { + next # Skip this entry, it's too old + } + } + # Extract HTTP method, URL, and status if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) { http_method = req[1]