绘制词云
准备相关库
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from PIL import Image # 图片处理
import numpy as np
import pymysql
from sqlalchemy import create_engine
准备准备词云所需的文字
con = create_engine('mysql+pymysql://root:1356130369@localhost:3306/dt', encoding='utf8')
sql = "select 标题 from lips "
result = con.execute(sql)
result = result.fetchall()
text=""
for item in result:
text=text+item[0]
开始分词
cut=jieba.cut(text)
string=" ".join(cut)
准备图片,开始画词云
img=Image.open(r'C:\Users\zzz\Desktop\数据可视化部分前端开发素材\前端素材\tree.jpg')
img_array=np.array(img) # 将图片转换为数组
wc=WordCloud(
background_color='white',
mask=img_array,
font_path="simsun.ttc" # 字体位置: C:\Windows\Fonts
)
wc.generate_from_text(string)
输出词云图片到文件
plt.imshow(wc) # 按照词云规则显示出来
plt.axis('off') # 是否显示坐标轴
plt.savefig(r'C:\Users\zzz\Desktop\tree.jpg',dpi=500)
关于词的分析
先利用python处理文本数据
import jieba #分词
import numpy as np
import pandas as pd
import pymysql
from sqlalchemy import create_engine
data=pd.read_excel('./口红.xlsx')
text=list(data['标题'].values)
# 将大list里面的小list分词
alldata=[]
for i in text:
i=jieba.lcut(i)
alldata.append(i)
# 剔除停用词
stop_word=[' ','/','【','】','色']
clean_alldata=[]
for item in alldata:
clean_data=[]
for i in item:
if i not in stop_word:
clean_data.append(i)
clean_alldata.append(clean_data)
# 进行去重(所以 为了准确性 这里对过滤后的数据大list中的每个小list的元素进行去重,
# 即每个标题被分割后的词语唯一。)
alldata_clean=[]
for item in clean_alldata:
temp=[]
for i in item:
if i not in temp:
temp.append(i)
alldata_clean.append(temp)
# 转换为一个list
dfdata=[]
for item in alldata_clean:
for i in item:
dfdata.append(i)
# 将list转换为dataframe
df=pd.DataFrame(dfdata)
df.columns=['allword']
# 进行分类汇总
count_df=df.value_counts().reset_index()
count_df.columns=['word','count']
# 对前15个高频词每个高频词的销售量进行统计
sum_number=[]
for item in count_df['word'][0:15]:
sale_list=[]
i=0
for w in alldata_clean: # 对于每一个小list
if item in w:
sale_list.append(data['付款人数'][i])
i = +1
sum_number.append(sum(sale_list))
xiaoliang=pd.DataFrame(sum_number)
gaopinci=pd.DataFrame(count_df['word'][0:15].values)
df_sum=pd.concat([xiaoliang,gaopinci],axis=1,ignore_index=True)
df_sum.columns=['xiaoliang','gaopinci']
# 建立连接,username替换为用户名,passwd替换为密码,test替换为数据库名
conn = create_engine('mysql+pymysql://root:1356130369@localhost:3306/dt',encoding='utf8')
# 写入数据,table_name为表名,‘replace’表示如果同名表存在就替换掉
pd.io.sql.to_sql(df_sum, "gpcxlqk", conn, if_exists='replace')
# 进行测试
xiaoliang = []
gaopinci = []
con = create_engine('mysql+pymysql://root:1356130369@localhost:3306/dt', encoding='utf8')
sql = "select xiaoliang,gaopinci from gpcxlqk "
result = con.execute(sql)
result = result.fetchall()
for item in result:
xiaoliang.append(item[0])
gaopinci.append(item[1])
renliang = [i * 5 for i in range(1, 16)]
alldata=[]
for i in range(15):
list = []
list.append(renliang[i])
list.append(gaopinci[i])
list.append(xiaoliang[i])
alldata.append(list)
print(alldata)
print(gaopinci)
print(xiaoliang)
搭建Flask框架,使得图形能在网页显示
@app.route('/')
def gaopinci():
xiaoliang = []
gaopinci = []
con = create_engine('mysql+pymysql://root:1356130369@localhost:3306/dt', encoding='utf8')
sql = "select xiaoliang,gaopinci from gpcxlqk "
result = con.execute(sql)
result = result.fetchall()
for item in result:
xiaoliang.append(item[0])
gaopinci.append(item[1])
renliang=[i*5 for i in [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]]
return render_template('高频词销量情况.html',renliang=renliang,xiaoliang=xiaoliang,gaopinci=gaopinci)
书写名字为高频词销量情况.html 文件
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>ECharts</title>
<!-- 引入 echarts.js -->
<script src="../static/echarts.min.js"></script>
</head>
<body>
<!-- 为ECharts准备一个具备大小(宽高)的Dom -->
<div id="main" style="width: 80%;height:600px;"></div>
<script type="text/javascript">
// 基于准备好的dom,初始化echarts实例
var myChart = echarts.init(document.getElementById('main'));
// 指定图表的配置项和数据
var option = {
dataset: {
source:[
['score', 'amount', 'product'],
{% for i in range(15) %}
[{{ renliang[i] }},{{ xiaoliang[i] }},'{{ gaopinci[i] }}'],
{% endfor %}
]
},
grid: {containLabel: true},
xAxis: {name: 'amount'},
yAxis: {type: 'category'},
visualMap: {
orient: 'horizontal',
left: 'center',
min: 10,
max: 100,
text: ['High Score', 'Low Score'],
// Map the score column to color
dimension: 0,
inRange: {
color: ['#d7da8b', '#e15457']
}
},
series: [
{
type: 'bar',
encode: {
// Map the "amount" column to X axis.
x: 'amount',
// Map the "product" column to Y axis
y: 'product'
}
}
]
};
// 使用刚指定的配置项和数据显示图表。
myChart.setOption(option);
</script>
</body>
</html>