import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams[‘font.family’]=‘SimHei’
data=pd.read_csv(‘OnlineRetail.csv’,encoding=‘ISO-8859-1’)
data.info()
data.apply(lambda x:sum(x.isnull())/len(x),axis=0)
data.drop([‘Description’],axis=1,inplace=True)
data
data[‘CustomerID’]=data[‘CustomerID’].fillna(‘U’)#填充缺失值
data[‘amount’]=data[‘Quantity’]*data[‘UnitPrice’]
data[‘date’]=data[‘InvoiceDate’].str.split(’ ‘).str[0]
data[‘time’]=data[‘InvoiceDate’].str.split(’ ‘).str[1]
data.drop([‘InvoiceDate’],inplace=True,axis=1)
data[‘year’]=data[‘date’].str.split(’/’).str[2]
data[‘month’]=data[‘date’].str.split(’/’).str[0]
data[‘day’]=data[‘date’].str.split(’/’).str[1]
data[‘date’]=pd.to_datetime(data[‘date’])
data=data.drop_duplicates()
2.电商平台零售数据分析
最新推荐文章于 2022-10-18 11:53:02 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)