1 # 进入的path是个文件夹路径,里面的文件是txt 2 def eachFile(filepath): 3 pathDir = os.listdir(filepath) # 获取当前路径下的文件名,返回List 4 title = [] 5 key1 = [];key2 = [];key3 = [];key4 = [];key5 = [];key6 = [];key7 = [];key8 = [];key9 = [];key10 = [] 6 key_words = [key1, key2, key3, key4, key5, key6, key7, key8, key9, key10] 7 for s in pathDir: 8 newDir = os.path.join(filepath, s) # 将文件命加入到当前文件路径后面 9 if os.path.isfile(newDir) and os.path.splitext(newDir)[1] == ".txt": # 如果是txt文件 10 with open(newDir, 'r') as f: 11 content = f.read() 12 if content: 13 title.append(s.replace('.txt', '')) 14 tags = jieba.analyse.extract_tags(content, 15 topK=10, allowPOS=('n','v')) 16 for i in range(len(key_words)): 17 try: 18 key_words[i].append(tags[i]) 19 except: 20 key_words[i].append('') 21 22 result = pd.DataFrame({'title': title, 'key1': key1, 23 'key2': key2, 'key3': key3, 'key4': key4, 24 'key5': key5, 'key6': key6, 'key7': key7, 25 'key8': key8, 'key9': key9, 'key10': key10, }, 26 columns=['title', 'key1', 'key2', 'key3', 'key4', 'key5', 27 'key6', 'key7', 'key8', 'key9', 'key10', ]) 28 29 return result