前言
分析日志是工作中经常要做的事情,比如要将日志中某个特定类型的数据全部提取出来进行分析,我之前都是用Excel来做这个提取的工作(将日志粘贴到Excel,按空格切割数据,提取指定的列,并筛选数据),效率不高,自从学了python,发现python实现这种自动化操作的脚本非常方便,下面就是我实现的一个提取指定关键字后面数值的脚本。
源码
# -*- coding: UTF-8 -*-
''' Extract data from file by the special keyword '''
import argparse
from os import path
from re import search
class DataExtractor(object):
''' DataExtrator class '''
def __init__(self, infile, keyword, outfile):
'''
构造函数
infile:输入文件名
keyword:目标数据前面的关键字
outfile:输出文件名
'''
self.infile = infile
self.keyword = keyword
self.outfile = outfile
def data_after_keyword(self):
''' Extract data from infile after the keyword. '''
try:
data = []
patt = '%s (\d+\.?\d?)' % self.keyword # 使用正则表达式搜索数据
with open(self.infile, 'r') as fi:
with open(self.outfile, 'w') as fo:
for eachLine in fi:
s = search(patt, eachLine)
if s is not None:
fo.write(s.group(1) + '\n')
data.append(float(s.group(1)))
return data
except IOError:
print(
"Open file [%s] or [%s] failed!" % (self.infile, self.outfile))
return False
def main():
''' Main function '''
parser = argparse.ArgumentParser(description='Extract data from file.')
parser.add_argument('infile', help='input file which contains data')
parser.add_argument('keyword', help='keywords for find data')
parser.add_argument('-o', '--outfile',
default=path.basename(__file__).split('.')[0] + '.out',
help='output file for save data')
args = parser.parse_args() # 命令行参数解析
infile = args.infile
outfile = args.outfile
keyword = args.keyword
extractor = DataExtractor(infile, keyword, outfile)
ret = extractor.data_after_keyword()
if ret:
print("Export data from file[%s] after keyword[%s] successed!" % (
infile, keyword))
else:
print("Export data from file[%s] after keyword[%s] failed!" % (
infile, keyword))
if __name__ == '__main__':
main()
结语
提取数据只是第一步,python数据可视化也非常方便,比如结合matplotlib可以直接将提取数据绘制成曲线,进一步提高日志分析的效率。