#1.1
# TODO:
hotel_booking = pd.read_csv('./hotels.csv',dtype={'adr':float})
#1.2
#众数填充
hotel_booking['children'].fillna(hotel_booking['children'].mode().iloc[0],inplace=True)
#下一个填充
hotel_booking['country'].fillna(method='bfill',inplace=True)
hotel_booking['children']=hotel_booking['children'].astype(int)
#1.3
look_up = {'January':1,'February':2,'March':3,'April':4,'May':5,'June':6,
'July':7,'August':8,'September':9,'October':10,'November':11,'December':12}
hotel_booking['arrival_date_month'] = hotel_booking['arrival_date_month'].apply(lambda x:look_up[x])
#1.4
hotel_booking['stays_in_nights']=hotel_booking['stays_in_weekend_nights']+hotel_booking['stays_in_week_nights']
hotel_booking['arrival_date'] = hotel_booking['arrival_date_year'].map(str)+"-"+hotel_booking['arrival_date_month'].map(str)+"-"+hotel_booking['arrival_date_day_of_month'].map(str)
hotel_booking['arrival_date'] = pd.to_datetime(hotel_booking['arrival_date'], format='%Y-%m-%d')
#1.5
#日期筛选
df = hotel_booking[(hotel_booking['arrival_date'] >= '2015-07-01' ) & (hotel_booking['arrival_date'] <= '2017-07-31')]
sj = df[['arrival_date','stays_in_nights','adults']]
zs = sj[sj['stays_in_nights']>0]
f=zs.groupby(by=['arrival_date'])['adults'].sum()
f=f.reset_index()
f.rename(columns={'arrival_date':'Date','adults':'guests'})
demand = f.rename(columns={'arrival_date':'Date','adults':'guests'})
#1.6
ah = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
ah['arrival_date']=ah['arrival_date'].map(str).str[0:7]
#不同年月的不同酒店类型的订单数
n = ah.groupby(['arrival_date'])['hotel'].value_counts()
resort_hotel = ah.arrival_date[ah['hotel']=='Resort Hotel'].value_counts()
city_hotel = ah.arrival_date[ah['hotel']=='City Hotel'].value_counts()
resort_hotel=resort_hotel.reset_index()
city_hotel=city_hotel.reset_index()
city = city_hotel.rename(columns={'index':'date','arrival_date':'sales'})
resort = resort_hotel.rename(columns={'index':'date','arrival_date':'sales'})
a=city['date'].astype('datetime64[ns]').sort_values()
city['date'] = pd.to_datetime(city['date'], format='%Y/%m')
#按时间排序
city = city.sort_values('date')
#切片(只要年月)
city['date']=city['date'].map(str).str[0:7]
resort = resort.sort_values('date')
resort['date']=resort['date'].map(str).str[0:7]
#索引更新
city.reset_index(drop=True,inplace=True)
resort.reset_index(drop=True,inplace=True)
#绘图
import pyecharts.options as opts
from pyecharts.charts import Line, Bar
from pyecharts.faker import Faker
from pyecharts.globals import ThemeType
#取出元素
a1 = city['date']
a2 = city['sales'].map(int)
a3 = resort['sales'].map(int)
#放进列表
b1 = []
b2 = []
b3 = []
for i in range(len(a1)):
b1.append(a1[i])
for i in range(len(a2)):
b2.append(int(a2[i]))
for i in range(len(a3)):
b3.append(int(a3[i]))
c = (
Line()
.add_xaxis(xaxis_data=b1[0:26])
.add_yaxis("City Hotel", y_axis=b2[0:26], is_smooth=True)
.add_yaxis("Resort Hotel", y_axis=b3[0:26], is_smooth=True)
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(type_='max', name='Max')])
)
.set_global_opts(
title_opts=opts.TitleOpts(title="Line-面积图(紧贴 Y 轴)"),
xaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
is_scale=False,
boundary_gap=False,
),
)
)
c.render_notebook()
#1.7
rh = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
hotel_booking[['arrival_date','required_car_parking_spaces']]
rh = hotel_booking.groupby(['arrival_date'])['required_car_parking_spaces'].count()
rh = rh.reset_index()
rh = rh.rename(columns={'arrival_date':'Date','required_car_parking_spaces':'park'})
rh = rh[(rh['Date']>'2015-12-31')&(rh['Date']<'2017-1-1')]
rh.reset_index(drop=True,inplace=True)
rh['Date'] = rh['Date'].map(str)
r1 = [[str(rh['Date'][i][0:10]),int(rh['park'][i]/10)]
for i in range(len(rh))]
import pyecharts.options as opts
from pyecharts.charts import Calendar
tu=(
Calendar(init_opts=opts.InitOpts(width="1600px", height="1000px"))
.add(
series_name="",
yaxis_data=r1,
calendar_opts=opts.CalendarOpts(
pos_top="120",
pos_left="30",
pos_right="30",
range_="2016",
yearlabel_opts=opts.CalendarYearLabelOpts(is_show=False),
),
)
.set_global_opts(
title_opts=opts.TitleOpts(pos_top="30", pos_left="center", title="2016年停车位数量和"),
visualmap_opts=opts.VisualMapOpts(
max_=50, min_=0, orient="horizontal", is_piecewise=True
),
)
)
tu.render_notebook()
#2.1
rh = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
rh =rh[['arrival_date','lead_time']]
rh.reset_index(drop=True,inplace=True)
m = pd.to_datetime(rh['arrival_date'])-pd.to_timedelta(rh['lead_time'],unit='D')
l = m.reset_index()
df = l.drop(labels='index', axis=1)
df = df.rename(columns={0:'arrival_date'})
df['arrival_date'] = pd.Series(df['arrival_date'])
m = []
for i in range(len(df)):
#计算星期几
m.append(df['arrival_date'][i].weekday())
df['xq'] = m
df['xq'] = df['xq'] + 1
#过滤掉周六、周日
df = df[(df['xq']!=6)&(df['xq']!=7)]
#2.2
hm = hotel_booking[['hotel','meal','arrival_date_month','arrival_date_year']]
hm = hm[(hm['hotel']=='Resort Hotel')&(hm['arrival_date_month']==8)&(hm['arrival_date_year']==2016)]
hm.groupby(by='meal').value_counts()
#2.3
arr = hotel_booking[['assigned_room_type','reserved_room_type','reservation_status']]
ar = arr[(arr['reservation_status']=='Check-Out')]
qc = ar[(ar['assigned_room_type']!=ar['reserved_room_type'])]
a = len(ar)
b = len(qc)
task2_3 = format(float(b)/float(a),'.2f')
#2.4
rh = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
sa= rh[['arrival_date','stays_in_nights','adr']]
sa['sum_adr'] = sa['adr']*sa['stays_in_nights']
n =sa
n['arrival_date'] = n['arrival_date'].map(str).str[0:7]
ny.groupby('arrival_date')['sum_adr'].count()
t ='(2017,2016-10)'
#2.5
m = hotel_booking[(hotel_booking['is_canceled']==1)]
qx=hotel_booking['distribution_channel'][(hotel_booking['is_canceled']==1)&(hotel_booking['distribution_channel']!='Undefined')].value_counts()
qx = qx.reset_index()
wq=hotel_booking['distribution_channel'][(hotel_booking['is_canceled']==0)&(hotel_booking['distribution_channel']!='Undefined')].value_counts()
wq = wq.reset_index()
wq['qxld'] = qx['distribution_channel']
wq['hj'] = wq['distribution_channel'] + wq['qxld']
format(float(b)/float(a),'.2f')
wq['qxl'] = wq['distribution_channel']/wq['hj']
wq
#2.6
m=hotel_booking[(hotel_booking['country']=="PRT")&(hotel_booking['is_canceled']==0)]
m[['country','is_canceled']]
a= [['China',540],['France',8468],['Great Britain',9666],['Russia',391],['United States',1592],['Portugal',20976]]
b = [540,8468,9666,391,1592,20976]
chn = 540
fra = 8468
gbr = 9666
rus = 391
usa = 1592
prt = 20976
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker
c = (
Map()
.add("商家A", a, "world")
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="Map-世界地图"),
visualmap_opts=opts.VisualMapOpts(max_=20000),
)
)
c.render_notebook()
09-07
2733
12-01
15万+
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交