# -*- encoding: utf-8 -*-
"""
@File : homework_6_电商数据可视化分析.py
@Time : 2019/9/24 10:31
@Author : chen
"""
# 可以打开下面的网页,看各种图形的源码
# https://plot.ly/python/
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# pip install colorlover
import colorlover as cl
colors = ['#F1948A', '#AED6F1', '#F9E79F', '#E5E8E8', '#F1948A', '#D0ECE7', '#F6DDCC', '#D2B4DE',
'#117A65', '#FAE5D3', '#34495E', '#DC7633', '#D35400', '#0E6251', '#FCF3CF', '#E8F8F5', '#D4E6F1', '#FAD8D8', '#E59866']
data = pd.read_csv('BlackFriday.csv')
print(data.head(10)) # 前10条数据
print(data.shape) # 数据维度
print(data.Occupation.unique()) #
print(len(data.Occupation.unique())) # 数据长度
print(data.describe()) # 输出数据
# 缺失值查看
print(data.isna().sum())
# pivot_table Pandas的高级应用中的透视表的功能
gender_purchase = data.pivot_table(values='Purchase', aggfunc="sum", index=["User_ID","Gender"]).reset_index() # aggfunc="sum"代表指定的函数 ["User_ID","Gender"]按照ID,性别分组
gender_purchase.head(20) # 前20个数据
print(gender_purchase.count())
gender_count = gender_purchase.groupby(by="Gender").size().reset_index(name="人数")
gender_count["占比"] = gender_count["人数"]/gender_count["人数"].sum()
print(gender_count)
# 描绘数据
trace = go.Pie(labels=gender_purchase.Gender.tolist(),
values=gender_purchase.Purchase.tolist(),
hole=0.5)
fig= go.Figure(data=[trace])
fig.show()
# 购物分布 男女性购物 箱型图
x_female = gender_purchase[gender_purchase.Gender == "F"].Purchase
y_male = gender_purchase[gender_purchase.Gender == "M"].Purchase
trance1= go.Box(y=y_male, name="男性购物", boxmean=True) # y=y_male 改为x=y_male 可以图像横着显示
trance2= go.Box(y=x_female, name="女性购物", boxmean=True)
fig = go.Figure(data=[trance1, trance2])
fig.show()
# 销售前10
top10_sellers = data.pivot_table(values=['Purchase'],
index=['Product_ID'],
aggfunc='count').reset_index().sort_values(by='Purchase',ascending=False).head(10)
print("top10_sellers:", top10_sellers)
# 购买热销产品的是谁
top_sellers_buyers = data[data.Product_ID.isin(top10_sellers.Product_ID.tolist())]
print(top_sellers_buyers.head(10))
# 热销商品和性别的关系
top_sellers_gender = top_sellers_buyers.pivot_table(values="Purchase",
index=["Product_ID","Gender"],
aggfunc="count").reset_index()
print(top_sellers_gender)
traces=[]
i=0
for g in top_sellers_gender.Gender.unique():
trace = go.Bar(x=top_sellers_gender[top_sellers_gender.Gender==g].Purchase,
y=top_sellers_gender[top_sellers_gender.Gender==g].Product_ID,
name=g, # 横轴的名称
marker= dict(color=colors[i]), # 颜色
orientation = "h") # 方向控制
traces.append(trace)
i+=1
fig = go.Figure(data=traces)
fig.show()
# 热销产品和城市之间的关系
top_sellers_city = top_sellers_buyers.prvot_table(values="Purchase",
index=['Product_ID',"City_Category"],
aggfunc="count").reset_index()
print(top_sellers_city.head())
traces=[]
i=0
for c in top_sellers_city.City_Category.unique():
trace = go.Bar(x=top_sellers_city[top_sellers_city.City_Category==c].Purchase,
y=top_sellers_city[top_sellers_city.City_Category==c].Product_ID,
name=c, # 横轴的名称
marker=dict(color=colors[i]), # 颜色
orientation="h") # 方向控制
traces.append(trace)
i += 1
go.Figure(data=traces).show()
电商数据可视化分析
最新推荐文章于 2024-07-30 14:57:31 发布