I\'m trying to parse an Apache Log with regex using Python and assign it to separate variables.
ACCESS_LOG_PATTERN = \'^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[
import re HOST = r'^(?P.*?)' SPACE = r'\s' IDENTITY = r'\S+' USER = r'\S+' TIME = r'(?P\[.*?\])' REQUEST = r'\"(?P.*?)\"' STATUS = r'(?P\d{3})' SIZE = r'(?P\S+)' REGEX = HOST+SPACE+IDENTITY+SPACE+USER+SPACE+TIME+SPACE+REQUEST+SPACE+STATUS+SPACE+SIZE+SPACE def parser(log_line): match = re.search(REGEX,log_line) return ( (match.group('host'), match.group('time'), match.group('request') , match.group('status') , match.group('size') ) ) logLine = """180.76.15.30 - - [24/Mar/2017:19:37:57 +0000] "GET /shop/page/32/?count=15&orderby=title&add_to_wishlist=4846 HTTP/1.1" 404 10202 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)""" result = parser(logLine) print(result)
RESULT
('180.76.15.30', '[24/Mar/2017:19:37:57 +0000]', 'GET /shop/page/32/?count=15&orderby=title&add_to_wishlist=4846 HTTP/1.1', '404', '10202')