import os
import jieba
数据集如下:
folder_path = r"C:\Users\Machine Learning\comments"
os.listdir(folder_path)
['201603枕.txt',
'201603锅.txt',
'201604枕.txt',
'201604锅.txt',
'201605枕.txt',
'201605锅.txt',
'201606枕.txt',
'201606锅.txt',
'201607枕.txt',
'201607锅.txt',
'201608枕.txt',
'201608锅.txt',
'201609枕.txt',
'201609锅.txt',
'201610枕.txt',
'201610锅.txt',
'201611枕.txt',
'201611锅.txt',
'201612枕.txt',
'201612锅.txt',
'201701枕.txt',
'201701锅.txt',
'201702枕.txt',
'201702锅.txt',
'201703枕.txt',
'201703锅.txt',
'201704枕.txt',
'201704锅.txt',
'201705枕.txt',
'201705锅.txt',
'201706枕.txt',
'201706锅.txt',
'201707枕.txt',
'201707锅.txt',
'201708枕.txt',
'201708锅.txt',
'201709枕.txt',
'201709锅.txt',
'201710枕.txt',
'201710锅.txt',
'201711枕.txt',
'201711锅.txt',
'201712枕.txt',
'201712锅.txt',
'201801枕.txt',
'201801锅.txt',
'201802枕.txt',
'201802锅.txt',
'201803锅.txt',
'201804枕.txt',
'201804锅.txt',
'201805枕.txt',
'201805锅.txt',
'201806枕.txt',
'201806锅.txt',
'201807枕.txt',
'201807锅.txt',
'201808枕.txt',
'201808锅.txt',
'201809枕.txt',
'201809锅.txt',
'201810枕.txt',
'201810锅.txt',
'201811枕.txt',
'201811锅.txt',
'201812枕.txt',
'201812锅.txt',
'201901枕.txt',
'201901锅.txt',
'201902枕.tx