小项目一---Python日志分析
日志分析 概述 分析的前提 半结构化数据 文本分析 提取数据(信息提取) 一、空格分隔 with open( ' xxx.log ' )as f: for line in f: for field in line.split(): print (field) # 注意这里拼接的一些技巧 logs = ''' 138.60.212.153 - - [19/Feb/2013:10:23:29 +0800] "GET /020/media.html?menu\ =3 HTTP/1.1" 200 16691 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou\ .com/search/spider.html)" ''' fields = [] flag = False tmp = '' #注意拼接 "GET /020/media.html?menu=3 HTTP/1.1"这种字符串需借助标记变量! for field in logs.split(): if not flag and (field.startswith( ' [ ' ) or field.startswith( ' " ' )): if field.endswith( ' ] ' ) or field.endswith( ' " ' ): # 处理首尾均有