defcal_clear_word(test):
stoplist =[' ','\n',',']deffunction(a):
word_list =[w for w in jieba.cut(a)if w notinlist(stoplist)]return word_list
test['text']= test.apply(lambda x: function(x['text']), axis=1)return test
# 结巴分类以及去掉停用词
return_df = cal_clear_word(test)
Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
Loading model cost 1.240 seconds.
Prefix dict has been built succesfully.
a=[]for i in return_df['text']:for j in i:
a.append(j)
words_dict ={}# 建立字典for word in a:if word notin words_dict:# 如果单词不在字典中
words_dict[word]=1# 初始词频为1else:
words_dict[word]+=1# 每次单词出现,词频加1