编写了一个简单脚本可以实现对路径下的txt文件提取需要的内容
#!/usr/bin/python
#coding:utf-8
import re
import os
file_path = "/home/gbase/Documents" #txt文件路径
re_str = r'source="(.+)" target=' #提取source="和" target=之间的内容
result=''
flag = 0
for file in os.listdir(file_path):
suff_name = os.path.splitext(file)[1] # 获取文件后缀
# 过滤非txt格式文件
if suff_name == '.txt':
file_name = os.path.splitext(file)[0] # 获取文件名称
path = os.path.join(file_path + '//' + file_name+'.txt') # 获取文件路径
for line in open (path): #按行提取
check = re.findall(r'target="GBase8a"',line) #只提取符合条件段落下的
if len(check) > 0:
flag = 1
check2 = re.findall('</dtm>',line)
if len(check2) > 0:
flag = 0
if flag == 1: #最终提取语句
resp = re.findall(re_str, line)
result = result + '\n' + ' '.join(resp)
print("结果为:\n"+result)
~
~
~
按行遍历一个log文件,只要该行中出现Initialize单词,便将该行日志信息追加写到另一个文件result.log中
with open('log.txt', 'r') as f:
with open('result.log', 'a') as result:
for line in f:
if 'Initialize' in line:
result.write(line)