data=open('/Users/momo/Desktop/小论文/模拟/红楼梦/红楼梦.txt')
#for line in data:
# print(line)
type(data)
fout = open('/Users/momo/Desktop/小论文/模拟/红楼梦/output.txt', 'w')
import jieba as jb
seg_list = jb.cut('你是谁', cut_all=True)
print("Full Mode:", "/ ".join(seg_list)) # 全模式
Full Mode: 你/ 是/ 谁
for eachLine in data:
wordList = list(jb.cut(eachLine))#用结巴分词,对每行内容进行分词
outStr = ''
for word in wordList:
outStr += word
outStr += '/ '
fout.write(outStr.strip()+'\n') #将分词好的结果写入到输出文件
data.close()
fout.close()
待学习:分词流程处理,分词字典应用