import tkinter as tk
import jieba
import re
import pandas as pd
import PIL.Image as image
import PIL
import numpy as np
from wordcloud import WordCloud
from GUI.Viewdata import Viewdata
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker
from pyecharts.charts import Bar
from pyecharts.charts import Pie
from itertools import groupby
from pyecharts.charts import Line
'''
小结
2020年疫情时期,赶作业写出来的,运用了pycharts生成地图,适当清洗数据。
pycharts生成的html文件,画出来的图较美观,画图过程较简单,没有能运用matplotlib,简单的运用了pandas。
'''
'''
这是从数据库读取数据!
def save_title():
text = ''
sql = 'SELECT * FROM 零食 '
with pyodbc.connect(DRIVER='{SQL Server}', SERVER='212.64.70.92', DATABASE='weimingzhong', UID="SA",
PWD="Guat1234") as conn:
with conn.cursor() as cursor: # 数据存放到元组中
cursor.execute(sql)
result = cursor.fetchall()
for item in result:
t = item[0]
text += str(item[0])
#print(chardet.detect(str.encode(text)))
#清洗标题,只保留中文
chinese = "[A-Za-z0-9\!\%\[\]\,\。]"
clean_text= re.sub(chinese, "", text)
f=open("clean_title.txt",'w')
f.write(clean_text)
f.close()
'''
def save_title():
'''
1.利用pandas读取CSV数据,生成的是dataframe格式
2.将dataframe转化成list
3.遍历list把值写成字符串
4.将标题中的数字之类再次清洗,保存只有中文
'''
t=''
df=pd.read_csv("D:/Awei/零食.csv",encoding='utf-8',header=None,usecols=[0])
list=df.values
for li in list:
t+=str(li[0])
chinese = "[A-Za-z0-9\!\%\[\]\,\。]"
clean_text = re.sub(chinese, "",t)
f = open("D:/clean_title.txt", 'w')
f.write(clean_text)
f.close()
def read_title():
'''
读取文本中的标题,返回一个字符串
:return:
'''
with open("D:/Awei/clean_title.txt",'r') as f:
title=f.read()
return title
def trans_CN(title_data):
#用结巴分成list
word_list=jieba.cut(title_data)
result=" ".join(word_list)
return result
def title_frequency():
title_data = read_title()
words = jieba.lcut(title_data) # 使用精确模式对文本进行分词
# print(words)
counts = {} # 通过键值对的形式存储词语及其出现的次数
for word in words:
'''if len(word)==1:
continue
else:'''
counts[word] = counts.get(word, 0) + 1
# print(counts.get(word,0)+1)
# print(counts)
items = list(counts.items())
# print(items)
items.sort(key=lambda x: x[1], reverse=True)
for i in range(50):
word, count = items[i]
print(u"{0:<10}{1:>5}".format(word, count))
class Platform():
def __init__(self):
#将Windows转化成成员
self.windows=tk.Tk()
self.windows.title("零食数据分析平台")
self.windows.geometry("800x500")
self.windows.resizable(False, False)
self.windows.iconbitmap('D:/Awei/ico.ico')
self.windows["bg"] = "RoyalBlue"
tk.Button(self.windows, text="查看数据", font="宋体", command=viewdata_click).place(x=350, y=50)
tk.Button(self.windows, text="标题词云", font="宋体", command=wordcloud_click).place(x=350, y=100)
tk.Button(self.windows, text="商家地图", font="宋体", command=map_click).place(x=350, y=150)
tk.Button(self.windows, text="销量排名", font="宋体", command=deal_click).place(x=350, y=200)
tk.Button(self.windows, text="销量分布", font="宋体", command=distribution_click).place(x=350, y=250)
self.windows.mainloop()
def distribution_click():
df = pd.read_csv("D:/Awei/零食.csv", encoding='utf-8', header=None, usecols=[1])
listBins = [0, 1000, 2000, 5000, 10000, 30000, 50000, 80000]
# 设置切分后对应标签
listLabels = ['0_1000', '1000_2000', '2000_5000', '5000_10000', '10000_30000', '30000_50000', '50000_80000']
#将价格按给与的列表切割,并且赋值给区间
cut = pd.cut(df[1],bins=listBins, labels=listLabels, include_lowest=True)
#统计区间内的数,并排序
df=pd.value_counts(cut).sort_values()
#print(df.values.tolist())
#print(df.index.tolist())
c = (
Line(init_opts=opts.InitOpts(
width='1200px',
height='800px',
page_title='问就自杀'
))
.add_xaxis(df.index.tolist())
.add_yaxis("销量", df.values.tolist())
.set_global_opts(title_opts=opts.TitleOpts(title="Line"))
.render("D:/Awei/line_base.html")
)
def shop_click():
df = pd.read_csv("D:/Awei/零食.csv", encoding='utf-8', header=None, usecols=[2])
df=df.apply(pd.value_counts)[0:7]
list_shop=[]
list_count=[]
for i in df.values.tolist() :
list_shop.append(i[0])
#print(list_shop)
for i in df.index.tolist():
list_count.append(i)
#print(list_count)
z=[]
for i in zip(list_count,list_shop):
z.append(i)
c = (
Pie(init_opts=opts.InitOpts(
width='1200px',
height='800px',
page_title='问就自杀'
))
.add("",data_pair=z)
.set_colors(["blue", "green", "yellow", "red", "pink", "orange", "purple"])
.set_global_opts(title_opts=opts.TitleOpts(title="店家统计"))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
.render("D:/Awei/pie.html")
)
def deal_click():
df = pd.read_csv("D:/Awei/零食.csv", encoding='utf-8', header=None, usecols=[0,1])
df.sort_values(by=[1],ascending=False,inplace=True)
'''DataFrame.sort_values(by=‘##’,axis=0,ascending=True, inplace=False, na_position=‘last’)
by :指定列名
axis:axis=0或’index’,则按照指定列中数据大小排序;若axis=1或’columns’,则按照指定索引中数据大小排序,默认axis=0
ascending:是否按指定列的数组升序排列,默认为True,即升序排列
inplace 是否用排序后的数据集替换原来的数据,默认为False,即不替换
na_position {‘first’,‘last’},设定缺失值的显示位置
'''
t = df.values.tolist()
top_deal=t[0:10]
x=[]
y=[]
for i in top_deal:
x.append(i[0])
y.append(i[1])
bar = (
Bar(init_opts=opts.InitOpts(
width='1200px',
height='800px',
page_title='问就自杀'
))
.add_xaxis(x)
.add_yaxis("销量",y)
)
bar.render("D:/Awei/deal.html")
def map_click():
'''
data = [('湖北', 9074), ('浙江', 661), ('广东', 632), ('河南', 493), ('湖南', 463),
('安徽', 340), ('江西', 333), ('重庆', 275), ('江苏', 236), ('四川', 231),
('山东', 230), ('北京', 191), ('上海', 182), ('福建', 159), ('陕西', 116),
('广西', 111), ('云南', 105), ('河北', 104), ('黑龙江', 95), ('辽宁', 69),
('海南', 64), ('新疆', 21), ('内蒙古', 21), ('宁夏', 28), ('青海', 11), ('甘肃', 40), ('西藏', 1),
('贵州', 38), ('山西', 56), ('吉林', 23), ('台湾', 10), ('天津', 48), ('香港', 14), ('澳门', 8)]
'''
df = pd.read_csv("D:/Awei/零食.csv", encoding='utf-8', header=None, usecols=[3])
items = df.values
words = []
#将省份全部读取
for item in items:
# 截取字符串前两个字符
words.append(item[0][0:2])
counts = {} # 通过键值对的形式存储词语及其出现的次数
for word in words:
# #对word出现的频率进行统计,当word不在words时,返回值是0,当word在words中时,返回+1,以此进行累计计数
counts[word] = counts.get(word, 0) + 1
#print(counts.get(word,0)+1)
#print(counts)
#将字典转化成列表
items = list(counts.items())
#排序
items.sort(key=lambda x: x[1], reverse=True)
#print(items)
c = (
Map(init_opts=opts.InitOpts(
width='1200px',
height='800px',
page_title='问就自杀',
))
.add("商家数", data_pair=items)
.set_global_opts(
title_opts=opts.TitleOpts(title="零食商家分布"),
visualmap_opts=opts.VisualMapOpts(max_=300,is_piecewise=True)
)
.render("D:/Awei/map.html",)
)
def viewdata_click():
viewdata=Viewdata()
def wordcloud_click():
title_data = read_title()
text = trans_CN(title_data)
mask = np.array(image.open("D:/Awei/login.jpg"))
wordcloud = WordCloud(
mask=mask,
font_path="msyh.ttc",
collocations=False,
max_font_size=200,
scale = 4
).generate(text)
image_produce = wordcloud.to_image()
image_produce.show()
pyecharts生成地图
最新推荐文章于 2024-01-14 16:00:36 发布