jieba最基本的操作,word.txt里面放的是自己定义的固定词语,比如“神龟虽寿”,分词后“神龟虽寿”就可以被划分到一个词语里边
#encoding:utf-8
import jieba #不能把文件名命名为‘jieba’,不然出错
from jieba import posseg
jieba.load_userdict('./word.txt')
str='神龟虽寿,严重急性呼吸道综合征,俗称传染性非典型肺炎,也称非典型肺炎。'
cut1=jieba.cut(str) #精准模式
cut2=jieba.cut(str,cut_all=True)#全模式
cut3=jieba.cut(str,HMM=False)
cut4=posseg.cut(str)
for i in cut1:
print(i)
# print('/'.join(cut2))
# print('/'.join(cut3))
# a=[]
# for w in cut4:
# print(w.word,end='')
# print(w.flag)
#