数据处理---电商数据整合

63 篇文章 1 订阅
9 篇文章 1 订阅

由于阿里的生意参谋数据做了反爬数据,利用python采集已经不是最佳方案,由于提供下载的数据表,这期将整合这些数据:


##处理店内数据,可做数据留存和可视化报表
# by carson 2019-06-04
import sys
import pandas as pd
from pandas import read_csv
from pandas import to_datetime

#要处理数据的目录,当然可以和上期的数据用一种方法,这个的灵活性更大
file_1 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2018-07-04-2019-06-04.xls"
file_2 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2018-08-04-2019-06-04.xls"
file_3 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2018-09-04-2019-06-04.xls"
file_4 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2018-10-04-2019-06-04.xls"
file_5 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2018-11-04-2019-06-04.xls"
file_6 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2018-12-04-2019-06-04.xls"
file_7 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2019-01-04-2019-06-04.xls"
file_8 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2019-02-04-2019-06-04.xls"
file_9 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2019-03-04-2019-06-04.xls"
file_10 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2019-04-04-2019-06-04.xls"
file_11 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2019-05-04-2019-06-04.xls"
file_12 = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\2019-06-04-2019-06-04.xls"


#数据处理函数
def convert_shop(path):
    df1 = pd.read_excel(path)
    df1.columns = df1.iloc[6]
    df2 = df1.iloc[7:,:].copy()
    #PC端字段
    columns_1 = ["统计日期","PC端访客数","PC端浏览量","PC端商品访客数","PC端商品浏览量","PC端平均停留时长",
             "PC端跳失率","PC端商品收藏买家数","PC端商品收藏次数","PC端加购人数","PC端支付金额","PC端支付买家数","PC端支付子订单数","PC端支付件数","PC端下单金额","PC端下单买家数","PC端下单件数","PC端人均浏览量","PC端下单转化率","PC端支付转化率","PC端客单价","PC端UV价值","PC端店铺收藏买家数"]
    #无线端字段
    columns_2 = ["统计日期","无线端访客数","无线端浏览量","无线端商品访客数","无线端商品浏览量","无线端平均停留时长","无线端跳失率","无线端商品收藏买家数","无线端商品收藏次数","无线端加购人数","无线端支付金额","无线端支付买家数","无线端支付子订单数","无线端支付件数","无线端下单金额","无线端下单买家数","无线端下单件数","无线端人均浏览量","无线端下单转化率","无线端支付转化率","无线端客单价","无线端UV价值"]
    #所有终端字段
    columns_3 = ["统计日期","访客数","浏览量","商品访客数","商品浏览量","平均停留时长","跳失率","商品收藏买家数","商品收藏次数","加购人数","支付金额","支付买家数","支付子订单数","支付件数","下单金额","下单买家数","下单件数","人均浏览量","下单转化率","支付转化率","客单价","UV价值","店铺收藏买家数"]
    temp1 = df2[columns_1]
    temp2 = df2[columns_2]
    temp3 = df2[columns_3]
    #添加字段获取终端类型
    temp1.insert(1,"终端类型", "PC端")
    temp2.insert(1,"终端类型", "WX端")
    temp3.insert(1,"终端类型", "all端")
    temp2.insert(23,"WX端店铺收藏买家数", "0")
    demo1 = temp1.copy()
    demo2 = temp2.copy()
    demo3 = temp3.copy()
    columns_demo = ["统计日期","终端类型","访客数","浏览量","商品访客数","商品浏览量","平均停留时长","跳失率","商品收藏买家数","商品收藏次数","加购人数","支付金额","支付买家数","支付子订单数","支付件数","下单金额","下单买家数","下单件数","人均浏览量","下单转化率","支付转化率","客单价","UV价值","店铺收藏买家数"]
    demo1.columns = columns_demo
    demo2.columns = columns_demo
    demo3.columns = columns_demo
    result = pd.concat([demo1, demo2,demo3], axis=0)
    return  result
	
#合并数据
list = [result_date_1,result_date_2,result_date_3,result_date_4,result_date_5,result_date_6,result_date_7,result_date_8,result_date_9,result_date_10,result_date_11,result_date_12]
result_total = pd.concat(list, axis=0)

#查看局部数据
result_total.head()

#查看数据的类型
result_total.info()

#转化率类型
result_total["统计日期"]=to_datetime(result_total["统计日期"],format="%Y/%m/%d")
result_total["跳失率"] = result_total["跳失率"].apply(lambda x: x.replace("%","")).astype("float")/100
result_total["下单转化率"] = result_total["下单转化率"].apply(lambda x: x.replace("%","")).astype("float")/100
result_total["支付转化率"] = result_total["支付转化率"].apply(lambda x: x.replace("%","")).astype("float")/100

#浮点类型
result_total["平均停留时长"] = result_total["平均停留时长"].apply(lambda x: x.replace(",","")).astype("float")
result_total["支付金额"] = result_total["支付金额"].apply(lambda x: x.replace(",","")).astype("float")
result_total["下单金额"] = result_total["下单金额"].apply(lambda x: x.replace(",","")).astype("float")
result_total["人均浏览量"] = result_total["人均浏览量"].apply(lambda x: x.replace(",","")).astype("float")
result_total["客单价"] = result_total["客单价"].apply(lambda x: x.replace(",","")).astype("float")
result_total["UV价值"] = result_total["UV价值"].apply(lambda x: x.replace(",","")).astype("float")


#整数类型
result_total["访客数"] = result_total["访客数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["浏览量"] = result_total["浏览量"].apply(lambda x: x.replace(",","")).astype("int")
result_total["商品访客数"] = result_total["商品访客数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["商品浏览量"] = result_total["商品浏览量"].apply(lambda x: x.replace(",","")).astype("int")
result_total["商品收藏买家数"] = result_total["商品收藏买家数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["商品收藏次数"] = result_total["商品收藏次数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["加购人数"] = result_total["加购人数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["支付买家数"] = result_total["支付买家数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["支付子订单数"] = result_total["支付子订单数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["支付件数"] = result_total["支付件数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["下单买家数"] = result_total["下单买家数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["下单件数"] = result_total["下单件数"].apply(lambda x: x.replace(",","")).astype("int")
result_total["店铺收藏买家数"] = result_total["店铺收藏买家数"].apply(lambda x: x.replace(",","")).astype("int")

#保存数据
open_file = r"C:\Users\lihwa11\Desktop\所有留存数据\店铺\店铺整体\demo.xlsx"
result_total.to_excel(open_file,index=False)

#查看部分数据
result_total.iloc[1:5,0:10]

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 6
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值