import pandas as pd
import jieba
import re
df = pd.read_excel(r'C:\Users\41809\Desktop\拉拉裤.xlsx', sheet_name="最终")["商品名称"]
words = []
for name in df.values.tolist():
word = list(jieba.cut(name))
words += word
wordcount = {}
for word in words:
word = re.sub(r"[0-9A-z\s+—!,;:。?、~@#¥%…&*()()\-【】 /.]", "", word) # 删除数字、字母、空白、符号
if word != '':
wordcount[word] = wordcount.get(word, 0)+1
result = sorted(wordcount.items(), key=lambda x: x[1], reverse=True)[:100]
print(result)
【结巴分词+剔除符号】京东商城商品标题词频统计
最新推荐文章于 2022-05-12 17:07:21 发布