I am completely new to python and I have a serious problem which I cannot solve.
I have a few log files with identical structure:
[timestamp] [level]
All of the other answers here read in all the logs before the first line is printed, which can be incredibly slow, and even break things if the logs are too big.
This solution uses a regex and a strptime format, like the above solutions, but it "merges" the logs as it goes.
That means you can pipe the output of the to "head" or "less", and expect it to be snappy.
import typing
import time
from dataclasses import dataclass
t_fmt = "%Y%m%d.%H%M%S.%f" # format of time stamps
t_pat = re.compile(r"([^ ]+)") # pattern to extract timestamp
def get_time(line, prev_t):
# uses the prev time if the time isn't found
res = t_pat.search(line)
if not res:
return prev_t
try:
cur = time.strptime(res.group(1), t_fmt)
except ValueError:
return prev_t
return cur
def print_sorted(files):
@dataclass
class FInfo:
path: str
fh: typing.TextIO
cur_l = ""
cur_t = None
def __read(self):
self.cur_l += self.fh.readline()
if not self.cur_l:
# eof found, set time so file is sorted last
self.cur_t = time.localtime(time.time() + 86400)
else:
self.cur_t = get_time(self.cur_l, self.cur_t)
def read(self):
# clear out the current line, and read
self.cur_l = ""
self.__read()
while self.cur_t is None:
self.__read()
finfos = []
for f in files:
try:
fh = open(f, "r")
except FileNotFoundError:
continue
fi = FInfo(f, fh)
fi.read()
finfos.append(fi)
while True:
# get file with first log entry
fi = sorted(finfos, key=lambda x: x.cur_t)[0]
if not fi.cur_l:
break
print(fi.cur_l, end="")
fi.read()