Fix timestamp comparison to use epoch seconds for accurate filtering

Previous commit used string comparison which failed across month/year
boundaries (e.g., "01/Jan/2026" < "31/Dec/2025" due to day comparison).

Now converts timestamps to epoch seconds for proper numerical comparison:
- Cutoff calculated as epoch seconds (date +%s)
- Apache log timestamps converted from "dd/mmm/yyyy:HH:MM:SS" format
- Format conversion: replace slashes and first colon with spaces
- Numerical comparison ensures correct ordering across all boundaries

Tested with dates spanning year/month changes - works correctly.
This commit is contained in:
cschantz
2025-12-31 22:21:01 -05:00
parent de3e95bcb7
commit 3730f8bd0c
+20 -8
View File
@@ -359,14 +359,14 @@ parse_logs() {
# Format: IP - - [timestamp] "METHOD URL PROTOCOL" STATUS SIZE "REFERRER" "USER-AGENT"
awk -v domain="$domain" -v hours_filter="$HOURS_BACK" -v days_filter="$DAYS_BACK" '
BEGIN {
# Calculate cutoff timestamp (hours takes precedence)
# Calculate cutoff timestamp in epoch seconds for proper comparison
if (hours_filter != "") {
cmd = "date -d \"" hours_filter " hours ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" hours_filter "H +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null"
cmd | getline cutoff_ts
cmd = "date -d \"" hours_filter " hours ago\" +%s 2>/dev/null || date -v-" hours_filter "H +%s 2>/dev/null"
cmd | getline cutoff_epoch
close(cmd)
} else if (days_filter != "") {
cmd = "date -d \"" days_filter " days ago\" +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null || date -v-" days_filter "d +\"%d/%b/%Y:%H:%M:%S\" 2>/dev/null"
cmd | getline cutoff_ts
cmd = "date -d \"" days_filter " days ago\" +%s 2>/dev/null || date -v-" days_filter "d +%s 2>/dev/null"
cmd | getline cutoff_epoch
close(cmd)
}
}
@@ -385,13 +385,25 @@ parse_logs() {
}
# Filter by timestamp if time filter is set
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_ts != "") {
if ((hours_filter != "" || days_filter != "") && timestamp != "unknown" && cutoff_epoch != "") {
# Extract just the date/time part (before timezone)
split(timestamp, ts_parts, " ")
log_ts = ts_parts[1]
# Simple string comparison works for this format (dd/mmm/yyyy:HH:MM:SS)
if (log_ts < cutoff_ts) {
# Convert Apache timestamp format for date parsing
# From: 31/Dec/2025:10:30:15
# To: 31 Dec 2025 10:30:15
log_ts_formatted = log_ts
sub(/:/, " ", log_ts_formatted) # Replace first : with space
gsub(/\//, " ", log_ts_formatted) # Replace all / with space
# Convert to epoch seconds (GNU date for Linux, BSD date for macOS)
cmd = "date -d \"" log_ts_formatted "\" +%s 2>/dev/null || date -j -f \"%d %b %Y %H:%M:%S\" \"" log_ts_formatted "\" +%s 2>/dev/null"
cmd | getline log_epoch
close(cmd)
# Numerical comparison of epoch seconds
if (log_epoch != "" && log_epoch < cutoff_epoch) {
next # Skip this entry, it's too old
}
}