import json
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from tkinter import *
plt.rcParams[u’font.sans-serif’] = [‘simhei’]
def click1():
with open(‘D:\ECommerce\源代码\static\countryCustomer.json’, ‘r’, encoding=‘utf8’) as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x1 = []
y1 = []
for data in json_data:
x1.append(data[0])
y1.append(data[1])
plt.figure(figsize=(15,12))
plt.title(‘概览’)
plt.subplot(321)
plt.title(‘客户数最多的10个国家’)
plt.xticks(rotation=70)
plt.bar(x1, y1)
#plt.show()
with open('D:\ECommerce\源代码\static\countryQuantity.json', 'r', encoding='utf8') as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x2 = []
y2 = []
for data in json_data:
x2.append(data[0])
y2.append(data[1])
plt.xticks(rotation=70)
plt.subplot(322)
plt.title('销量最高的10个国家')
plt.xticks(rotation=70)
plt.bar(x2, y2)
#plt.show()
with open('D:\ECommerce\源代码\static\countrySumOfPrice.json', 'r', encoding='utf8') as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
labels = []
nums = []
for i in range(0, len(json_data)):
labels.append(json_data[i][0])
nums.append(json_data[i][1])
#plt.figure(figsize=(20,6.5))
plt.subplot(325)
plt.title('总销售额最高的10个国家分布情况')
plt.pie(x=nums, labels=labels, pctdistance=0.5,explode=(0,0.1,0.1,0.1,0.1,0.1,0.3,0.5,0.9,1.0))
# plt.legend(labels)
# plt.legend(loc='upper right')
#plt.show()
with open('D:\ECommerce\源代码\static\stockQuantity.json', 'r', encoding='utf8') as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x4 = []
y4 = []
for data in json_data:
x4.append(data[0])
y4.append(data[1])
plt.xticks(rotation=70)
plt.subplot(326)
plt.title('销量最高的10个商品')
plt.bar(x4, y4)
plt.show()
def click2():
with open(‘D:\ECommerce\源代码\static\wordCount.json’, ‘r’, encoding=‘utf8’) as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
wc=WordCloud(background_color=‘white’,max_words=100)
x5 = []
y5 = []
for data in json_data:
x5.append(data[0])
y5.append(data[1])
dic=dict(zip(x5,y5))
wc.generate_from_frequencies(dic)
plt.figure(figsize=(8, 5))
plt.title(‘热门关键词’)
plt.imshow(wc)
plt.axis(“off”)
plt.show()
def click3():
with open(‘D:\ECommerce\源代码\static\countryReturnInvoice.json’, ‘r’, encoding=‘utf8’) as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x6 = []
y6 = []
for data in json_data:
x6.append(data[0])
y6.append(data[1])
plt.figure(figsize=(15, 12))
plt.title(‘关系’)
plt.subplot(231)
plt.title(‘退货订单数最多的10个国家’)
plt.xticks(rotation=70)
plt.bar(x6, y6)
#plt.show()
with open('D:\ECommerce\源代码\static\\tradePrice.json', 'r', encoding='utf8') as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x7 = []
y7 = []
for data in json_data:
x7.append(data[0])
y7.append(data[1])
plt.subplot(233)
plt.title('月销售额随时间的变化趋势')
plt.xticks(rotation=70)
plt.plot(x7, y7)
#plt.show()
with open('D:\ECommerce\源代码\static\saleQuantity.json', 'r', encoding='utf8') as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x8 = []
y8 = []
for data in json_data:
x8.append(data[0])
y8.append(data[1])
plt.subplot(235)
plt.title('日销量随时间的变化趋势')
plt.xticks(rotation=70)
plt.plot(x8, y8)
plt.xticks(range(0, 365, 30))
plt.show()
def click4():
with open(‘D:\ECommerce\源代码\static\buyReturn.json’, ‘r’, encoding=‘utf8’) as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x9 = []
y9 = []
for data in json_data:
x9.append(data[1])
y9.append(data[2])
del x9[x9.index(max(x9))]
del y9[y9.index(max(y9))]
plt.figure(figsize=(15, 12))
plt.subplot(121)
plt.title(‘各国的购买订单量和退货订单量的关系’)
plt.scatter(x9, y9, marker=’*’)
#plt.show()
with open('D:\ECommerce\源代码\static\\unitPriceSales.json','r', encoding='utf8') as fp:
json_data = json.load(fp)
print(json_data)
print(type(json_data))
# print(json_data[0][1])
x10 = []
y10 = []
for data in json_data:
x10.append(data[1])
y10.append(data[2])
# del x9[x9.index(max(x9))]
# del y9[y9.index(max(y9))]
newx10=[]
newy10=[]
for x in x10:
if x<=50:
newx10.append(x)
newy10.append(y10[x10.index(x)])
plt.subplot(122)
plt.title('商品的平均单价与销量的关系')
plt.scatter(newx10, newy10,marker='*')
plt.show()
def click5():
# plt.rcParams[‘font.sans-serif’] = [‘SimHei’]
plt.rcParams[‘axes.unicode_minus’] = False
filepath = “D:\ECommerce\数据集\E_Commerce_Data_Clean.csv”
dataset = pd.read_csv(filepath, decimal=’,’)
data_Q = np.array(dataset["Quantity"])
data_P = np.array(dataset["UnitPrice"])
data_quantity = []
data_price = []
for i in range(len(data_Q)):
if data_Q[i] < 0 and data_Q[i] > -100:
data_quantity.append(data_Q[i] * (-1))
data_price.append(float(data_P[i]))
data_quantity = np.array(data_quantity)
data_price = np.array(data_price)
# plt.figure(figsize=(120, 8), dpi=80)
# plt.title('退货数量与价格关系', size=15)
# plt.xlabel('价格',fontsize=15)
# plt.ylabel('退货数量',fontsize=15)
# plt.xticks(rotation=60)
# plt.bar(range(len(data_price)), data_quantity, tick_label=data_price)
# plt.savefig('kpi_time.png', bbox_inches='tight')
# Pearson相关系数
data1 = pd.Series(data_quantity).sort_values()
data2 = pd.Series(data_price).sort_values()
print(data1)
data = pd.DataFrame({'value1': data1.values,
'value2': data2.values})
print(data)
print('------')
# 正态性检验
u1, u2 = data['value1'].mean(), data['value2'].mean() # 计算均值
std1, std2 = data['value1'].std(), data['value2'].std() # 计算标准差
print('value1正态性检验:\n', stats.kstest(data['value1'], 'norm', (u1, std1)))
print('value2正态性检验:\n', stats.kstest(data['value2'], 'norm', (u2, std2)))
print('------')
# 制作Pearson相关系数求值表
data['(x-u1)*(y-u2)'] = (data['value1'] - u1) * (data['value2'] - u2)
data['(x-u1)**2'] = (data['value1'] - u1) ** 2
data['(y-u2)**2'] = (data['value2'] - u2) ** 2
print(data.head())
print('------')
# 求出r
# |r| > 0.8 → 高度线性相关
r = data['(x-u1)*(y-u2)'].sum() / (np.sqrt(data['(x-u1)**2'].sum() * data['(y-u2)**2'].sum()))
print('Pearson相关系数为:%.4f' % r)
window = Tk()
window.minsize(85, 60)
window.title(‘数据可视化分析’)
lbl = Label(window, text=‘销量分析’)
lbl.grid(column=0, row=0)
txt = Entry(window, width=20)
txt.grid(column=2, row=0)
btn1=Button(window,text=‘概览’,bg=‘yellow’,fg=‘red’,command=click1)
btn1.grid(column=0,row=1)
btn2=Button(window,text=‘热门关键词’,bg=‘orange’,fg=‘white’,command=click2)
btn2.grid(column=0,row=2)
btn3 = Button(window, text=‘关系1’, command=click3)
btn3.grid(column=1, row=1)
btn4 = Button(window, text=‘关系2’, command=click4)
btn4.grid(column=1, row=2)
window.mainloop()
window.mainloop()