python实现查找某个路径下文件是否有某个字段,用的python3.6
#coding=utf8
import os
#import tkFileDialog
def readFilename(file_dir):
for root, dirs, files in os.walk(file_dir):
return files,dirs,root
def findstring(pathfile):
fp = open(pathfile, "r",encoding='UTF-8')#注意这里的打开文件编码方式
strr = fp.read()
#print strr.find("DoubleVec")
if(strr.find("trimQuotation") != -1):
print ('here?')
return True
return False
def startfind(files,dirs,root):
for ii in files:
#print(ii)
#if ii.endswith('.lua'):
try:
if(findstring(root+"\\"+ii)):
print (ii)
except Exception as err:
print(err)
continue
for jj in dirs:
fi,di,ro = readFilename(root+"\\"+jj)
startfind(fi,di,ro)
if __name__ == '__main__':
default_dir = u"E:\\RZRKCode" # 设置默认打开目录
file_path = default_dir#th.expanduser(default_dir)))
files,dirs,root = readFilename(file_path)
startfind(files,dirs,root)
上面的代码还是有一些问题,当文档编码是gbk的是被catch了。
下面给出一个查找所有文档的ip地址
#coding=utf8
import os
import re
pattern = re.compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
def readFilename(file_dir):
for root, dirs, files in os.walk(file_dir):
return files,dirs,root
def findstring(pathfile,edcode):
fp = open(pathfile, "r",encoding=edcode)#注意这里的打开文件编码方式
strr = fp.read()
txt = pattern.findall(strr)
if txt != []:
print (txt)
return True
return False
def startfind(files,dirs,root):
for ii in files:
try:
if(findstring(root+"\\"+ii,'utf-8')):
print (root,ii)
except Exception as err:
try:
if(findstring(root+"\\"+ii,'gbk')):
print (root,ii)
except Exception as er:
continue
for jj in dirs:
fi,di,ro = readFilename(root+"\\"+jj)
startfind(fi,di,ro)
if __name__ == '__main__':
default_dir = u"D:\\zd_gdzq" # 设置默认打开目录
file_path = default_dir
files,dirs,root = readFilename(file_path)
startfind(files,dirs,root)
good luck!