print(d_train.head()) #d_train 为 Dataframe
document = " ".join(d_train.title).split() #将文本连成文件再用空格分词,return list
print(document)
ss = Counter(document).most_common(100) #return 单词出现最多的100个单词--次数
print(ss)
对分词统计后的数据存储
from collections import Counter
import pandas as pd
import numpy as np
bill_path = r'switch category.xlsx'
df = pd.DataFrame(pd.read_excel(bill_path))
df = df.dropna()
product_names = " ".join([str(i) for i in df["product_name"].values])
words = product_names.split()
fenci = Counter(words).most_common()
fenci =np.array(fenci)
df = pd.DataFrame(fenci,columns=["word","count"])
df["count"] = df["count"].astype("int")
df.to_excel("fenci_1115_3.xlsx",index = False)