Fix bot analyzer to filter log entries by timestamp, not just files

Previously, the script filtered log FILES by modification time but read
ALL entries from those files, causing "Last 1 hour" to show entries from
weeks/months ago if they were in recently-modified files.

Now filters individual log entries by parsing their timestamps and
comparing to the selected time range (1 hour, 6 hours, 24 hours, etc.).

Changes:
- Added cutoff timestamp calculation in awk BEGIN block
- Extract timestamp from each Apache log entry
- Skip entries older than cutoff with timestamp comparison
- Works with both GNU date and BSD date for portability

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
cschantz
2025-12-31 22:15:00 -05:00
parent 74e3999486
commit ea26efaf0a
+25 -1
View File
@@ -357,7 +357,19 @@ parse_logs() {
# Parse Apache Combined Log Format with error handling
# Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
awk -v domain="$domain" '
awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
BEGIN {
# Calculate cutoff timestamp (hours takes precedence)
if (hours_filter != "") {
cmd = "date -d \"" hours_filter " hours ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" hours_filter "H +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null"
cmd | getline cutoff_ts
close(cmd)
} else if (days_filter != "") {
cmd = "date -d \"" days_filter " days ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" days_filter "d +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null"
cmd | getline cutoff_ts
close(cmd)
}
}
{
# Skip empty lines and malformed entries
if (NF < 10 || length($0) < 50) next
@@ -372,6 +384,18 @@ parse_logs() {
timestamp = "unknown"
}
# Filter by timestamp if time filter is set
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_ts != "") {
# Extract just the date/time part (before timezone)
split(timestamp, ts_parts, " ")
log_ts = ts_parts[1]
# Simple string comparison works for this format (dd/mmm/yyyy:HH:MM:SS)
if (log_ts < cutoff_ts) {
next # Skip this entry, it's too old
}
}
# Extract HTTP method, URL, and status
if (match($0, /"([A-Z]+) ([^ ]+) [^"]*" ([0-9]+) ([0-9-]+)/, req)) {
http_method = req[1]