jieba分词及词性标注
想着先分词,再给分过的词标注词性
很简单但是弄了蛮久
代码也不简便
要学习啊
鸡汤:脚踏实地,眼看前方
import jieba
import jieba.posseg as pseg
jieba.load_userdict('userdict1.txt')
# 创建停用词list
def stopwordslist(filepath):
stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
return stopwords
# 对句子进行分词
def seg_sentence(sentence):
sentence_seged = jieba.posseg.cut(sentence.strip())
stopwords = stopwordslist('stop_words.txt') ## 这里加载停用词的路径
outstr = ''
for pairs in sentence_seged:
# print(pairs)
# print('.'*60)
for word in pairs:
# print(word)
# print('='*50)
if word not in stopwords:
if word != '\t':
# print(word)
# print('>'*50)
outstr += word