import pkuseg
import os
s ="无时无刻巴赫"import os
import time
st = time.time()
path ="C:\\Users\\1\\.pkuseg\\"# 下载的模型全部放在了这个目录
files =os.listdir(path)
files.sort()for file_ in files:
f_name =str(file_)if os.path.isdir(path +file_)and f_name !="postag":
seg = pkuseg.pkuseg(model_name=path+file_,postag=True)
text = seg.cut(s)print(f_name,text)print(time.time()- st)
import jieba
s =["李扬来到了移动杭研大厦","北京理工","翅展万丈",'无时无刻巴赫']for i in s:
seg = pkuseg.pkuseg(model_name='C:\\Users\\1\\.pkuseg\\msra')
text = seg.cut(i)print("pkuseg-medicine:",text)
seg_list=jieba.cut(i,cut_all=True)print("jieba默认模式: "+"/ ".join(seg_list))# 默认模式
seg_list=jieba.cut(i,cut_all=False)print("jieba精确模式: "+"/ ".join(seg_list))# 精确模式print('\n')
for i in s:
seg = pkuseg.pkuseg(model_name='C:\\Users\\1\\.pkuseg\\msra',postag =True)
text = seg.cut(i)print("pkuseg-medicine:",text)
seg = jieba.posseg.cut(i)
seglist =""for k in seg:
seglist += k.word +" "+ k.flag +'\t'print("jieba", seglist,'\n')
# thulacimport thulac
defget_thulac(text_content):
thu1 = thulac.thulac(seg_only=False)
text = thu1.cut(text_content, text=True)return text