Fix bot analyzer to filter log entries by timestamp, not just files
Previously, the script filtered log FILES by modification time but read ALL entries from those files, causing "Last 1 hour" to show entries from weeks/months ago if they were in recently-modified files. Now filters individual log entries by parsing their timestamps and comparing to the selected time range (1 hour, 6 hours, 24 hours, etc.). Changes: - Added cutoff timestamp calculation in awk BEGIN block - Extract timestamp from each Apache log entry - Skip entries older than cutoff with timestamp comparison - Works with both GNU date and BSD date for portability 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -357,7 +357,19 @@ parse_logs() {
|
||||
|
||||
# Parse Apache Combined Log Format with error handling
|
||||
# Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
|
||||
awk -v domain="$domain" '
|
||||
awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
|
||||
BEGIN {
|
||||
# Calculate cutoff timestamp (hours takes precedence)
|
||||
if (hours_filter != "") {
|
||||
cmd = "date -d \"" hours_filter " hours ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" hours_filter "H +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null"
|
||||
cmd | getline cutoff_ts
|
||||
close(cmd)
|
||||
} else if (days_filter != "") {
|
||||
cmd = "date -d \"" days_filter " days ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" days_filter "d +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null"
|
||||
cmd | getline cutoff_ts
|
||||
close(cmd)
|
||||
}
|
||||
}
|
||||
{
|
||||
# Skip empty lines and malformed entries
|
||||
if (NF < 10 || length($0) < 50) next
|
||||
@@ -372,6 +384,18 @@ parse_logs() {
|
||||
timestamp = "unknown"
|
||||
}
|
||||
|
||||
# Filter by timestamp if time filter is set
|
||||
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_ts != "") {
|
||||
# Extract just the date/time part (before timezone)
|
||||
split(timestamp, ts_parts, " ")
|
||||
log_ts = ts_parts[1]
|
||||
|
||||
# Simple string comparison works for this format (dd/mmm/yyyy:HH:MM:SS)
|
||||
if (log_ts < cutoff_ts) {
|
||||
next # Skip this entry, it's too old
|
||||
}
|
||||
}
|
||||
|
||||
# Extract HTTP method, URL, and status
|
||||
if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) {
|
||||
http_method = req[1]
|
||||
|
||||
Reference in New Issue
Block a user