目录
execl内容如下 :
库的导入
import pandas as pd
import jieba
from wordcloud import WordCloud
import matplotlib
# matplotlib.use('TkAgg') OS报错macOS 12 (1207) or later required, have instead 12 (1206) !
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
import collections
停用的表如下:
数据清洗
对excel表的内容进行读取/查重/分词/统计
def filter_same():
df = pd.read_csv(execl_path) # 读取爬取的数据
data = pd.DataFrame(df)
# 删除重复行:
datatmsp = data.drop_duplicates()
# print(datatmsp)
data = datatmsp[['标题', '价格']]
# print(data)
data.head()