#coding:utf-8 import json import os from jieba import analyse from xlwt import * def listFile(fileDir): L = [] CL = [] for root,dir,files in os.walk(fileDir): for file in files: if os.path.splitext(file)[1] == '.json': L.append(os.path.join(root,file)) for jsons in L: #json文件的处理 try: with open(jsons,'r') as f: jsonObject = json.load(f) for jo in jsonObject: atty = jo['page_action_type'] #json文件取内容 cntt = jo['content'] if atty == '07' and len(cntt) > 0: CL.append(cntt) except Exception as e: continue return CL def resolveKeyword(content): #print content kws = '' tfidf = analyse.tfidf(content) print tfidf length = len(tfidf) if length > 1: for i,t in enumerate(tfidf): if i == 0: kws = t else: kws = kws + ' OR ' + t return kws if __name__ == "__main__": ###json文件所在的目录 filename = "E:\siteVerify\BD\data\scansbak" KL = [] file = Workbook(encoding='utf-8') table = file.add_sheet(u"kwss") CL = listFile(filename) filter(None,CL) #过滤 for i,c in enumerate(CL): #建立索引的形式 kw = resolveKeyword(c) if kw != None: KL.append(kw) for t,k in enumerate(KL): print t table.write(t,0,t) table.write(t,1,k) file.save("E:\siteVerify\BD\data\kwss.xls")
python 遍历文件,组成列表格式
最新推荐文章于 2022-10-31 14:49:42 发布