How would I calculate time-based metrics (hourly average) based on log file data?
let me make this more clear, consider a log file that contains entries as follows:
Extending Ed Morton's solution:
function parse_time (date, time, newtime) {
gsub(/-/, " ", date)
gsub(/:/, " ", time)
gsub(/,.*/, "", time)
newtime = date" "time
return newtime
}
(gensub(/.*;gt;([^&]+).*/,"\\1","") in starttime) {
etime = parse_time($1, $2)
endtime[gensub(/.*;gt;([^&]+).*/,"\\1","")] = etime
next
}
{
stime = parse_time($1, $2)
starttime[gensub(/.*;gt;([^&]+).*/,"\\1","")] = stime
}
END {
for (x in starttime) {
for (y in endtime) {
if (x==y) {
diff = mktime(endtime[x]) - mktime(starttime[y])
diff = sprintf("%dh:%dm:%ds",diff/(60*60),diff%(60*60)/60,diff%60)
print x, diff
delete starttime[x]
delete endtime[y]
}
}
}
}
$ cat log.file
2013-04-03 08:54:19,989 INFO [LOGGER] <UId>904c-be-4e-bbda-3e62</UId><
2013-04-03 08:54:34,979 INFO [LOGGER] <UId>edfc-fr-5e-bced-3443</UId><
2013-04-03 08:54:39,389 INFO [LOGGER] <UId>904c-be-4e-bbda-3e62</UId><
2013-04-03 08:55:19,569 INFO [LOGGER] <UId>edfc-fr-5e-bced-3443</UId><
$ awk -f script.awk log.file
904c-be-4e-bbda-3e62 0h:0m:20s
edfc-fr-5e-bced-3443 0h:0m:45s