python数据分析

#1.1
# TODO:

hotel_booking = pd.read_csv('./hotels.csv',dtype={'adr':float})

#1.2
#众数填充
hotel_booking['children'].fillna(hotel_booking['children'].mode().iloc[0],inplace=True)
#下一个填充
hotel_booking['country'].fillna(method='bfill',inplace=True)
hotel_booking['children']=hotel_booking['children'].astype(int)

#1.3
look_up = {'January':1,'February':2,'March':3,'April':4,'May':5,'June':6,
          'July':7,'August':8,'September':9,'October':10,'November':11,'December':12}
hotel_booking['arrival_date_month'] = hotel_booking['arrival_date_month'].apply(lambda x:look_up[x])


#1.4
hotel_booking['stays_in_nights']=hotel_booking['stays_in_weekend_nights']+hotel_booking['stays_in_week_nights']
hotel_booking['arrival_date'] = hotel_booking['arrival_date_year'].map(str)+"-"+hotel_booking['arrival_date_month'].map(str)+"-"+hotel_booking['arrival_date_day_of_month'].map(str)
hotel_booking['arrival_date'] = pd.to_datetime(hotel_booking['arrival_date'], format='%Y-%m-%d')

#1.5
#日期筛选
df = hotel_booking[(hotel_booking['arrival_date'] >= '2015-07-01' ) & (hotel_booking['arrival_date'] <= '2017-07-31')]
sj = df[['arrival_date','stays_in_nights','adults']]
zs = sj[sj['stays_in_nights']>0]
f=zs.groupby(by=['arrival_date'])['adults'].sum()
f=f.reset_index()
f.rename(columns={'arrival_date':'Date','adults':'guests'})
demand = f.rename(columns={'arrival_date':'Date','adults':'guests'})

#1.6
ah = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
ah['arrival_date']=ah['arrival_date'].map(str).str[0:7]
#不同年月的不同酒店类型的订单数
n = ah.groupby(['arrival_date'])['hotel'].value_counts()

resort_hotel = ah.arrival_date[ah['hotel']=='Resort Hotel'].value_counts()
city_hotel = ah.arrival_date[ah['hotel']=='City Hotel'].value_counts()
resort_hotel=resort_hotel.reset_index()
city_hotel=city_hotel.reset_index()
city = city_hotel.rename(columns={'index':'date','arrival_date':'sales'})
resort = resort_hotel.rename(columns={'index':'date','arrival_date':'sales'})

a=city['date'].astype('datetime64[ns]').sort_values()
city['date'] = pd.to_datetime(city['date'], format='%Y/%m')
#按时间排序
city = city.sort_values('date')
#切片(只要年月)
city['date']=city['date'].map(str).str[0:7]
resort = resort.sort_values('date')
resort['date']=resort['date'].map(str).str[0:7]

#索引更新
city.reset_index(drop=True,inplace=True)
resort.reset_index(drop=True,inplace=True)
#绘图
import pyecharts.options as opts
from pyecharts.charts import Line, Bar
from pyecharts.faker import Faker
from pyecharts.globals import ThemeType

#取出元素
a1 = city['date']
a2 = city['sales'].map(int)
a3 = resort['sales'].map(int)

#放进列表
b1 = []
b2 = []
b3 = []
for i in range(len(a1)):
    b1.append(a1[i])
for i in range(len(a2)):
    b2.append(int(a2[i]))
for i in range(len(a3)):
    b3.append(int(a3[i]))

c = (
    Line()
        .add_xaxis(xaxis_data=b1[0:26])
        .add_yaxis("City Hotel", y_axis=b2[0:26], is_smooth=True)
        .add_yaxis("Resort Hotel", y_axis=b3[0:26], is_smooth=True)
        .set_series_opts(
        areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
        label_opts=opts.LabelOpts(is_show=False),
        markpoint_opts=opts.MarkPointOpts(
            data=[opts.MarkPointItem(type_='max', name='Max')])
    )
        .set_global_opts(
        title_opts=opts.TitleOpts(title="Line-面积图(紧贴 Y 轴)"),
        xaxis_opts=opts.AxisOpts(
            axistick_opts=opts.AxisTickOpts(is_align_with_label=True),
            is_scale=False,
            boundary_gap=False,
        ),
    )
)
c.render_notebook()

#1.7
rh = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
hotel_booking[['arrival_date','required_car_parking_spaces']]
rh = hotel_booking.groupby(['arrival_date'])['required_car_parking_spaces'].count()
rh = rh.reset_index()
rh = rh.rename(columns={'arrival_date':'Date','required_car_parking_spaces':'park'})
rh = rh[(rh['Date']>'2015-12-31')&(rh['Date']<'2017-1-1')]
rh.reset_index(drop=True,inplace=True)
rh['Date'] = rh['Date'].map(str)
r1 = [[str(rh['Date'][i][0:10]),int(rh['park'][i]/10)]
     for i in range(len(rh))]

import pyecharts.options as opts
from pyecharts.charts import Calendar

tu=(
    Calendar(init_opts=opts.InitOpts(width="1600px", height="1000px"))
    .add(
        series_name="",
        yaxis_data=r1,
        calendar_opts=opts.CalendarOpts(
            pos_top="120",
            pos_left="30",
            pos_right="30",
            range_="2016",
            yearlabel_opts=opts.CalendarYearLabelOpts(is_show=False),
        ),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(pos_top="30", pos_left="center", title="2016年停车位数量和"),
        visualmap_opts=opts.VisualMapOpts(
            max_=50, min_=0, orient="horizontal", is_piecewise=True
        ),
    )
)
tu.render_notebook()

#2.1
rh = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
rh =rh[['arrival_date','lead_time']]
rh.reset_index(drop=True,inplace=True)

m = pd.to_datetime(rh['arrival_date'])-pd.to_timedelta(rh['lead_time'],unit='D')
l = m.reset_index()
df = l.drop(labels='index', axis=1)
df = df.rename(columns={0:'arrival_date'})
df['arrival_date'] = pd.Series(df['arrival_date'])

m = []
for i in range(len(df)):
    #计算星期几
    m.append(df['arrival_date'][i].weekday())
df['xq'] = m
df['xq'] = df['xq'] + 1
#过滤掉周六、周日
df = df[(df['xq']!=6)&(df['xq']!=7)]


#2.2
hm = hotel_booking[['hotel','meal','arrival_date_month','arrival_date_year']]
hm = hm[(hm['hotel']=='Resort Hotel')&(hm['arrival_date_month']==8)&(hm['arrival_date_year']==2016)]
hm.groupby(by='meal').value_counts()

#2.3
arr = hotel_booking[['assigned_room_type','reserved_room_type','reservation_status']]
ar = arr[(arr['reservation_status']=='Check-Out')]
qc = ar[(ar['assigned_room_type']!=ar['reserved_room_type'])]
a = len(ar)
b = len(qc)
task2_3 = format(float(b)/float(a),'.2f')

#2.4
rh = hotel_booking[(hotel_booking['is_canceled']==0)&(hotel_booking['stays_in_nights']>0)]
sa= rh[['arrival_date','stays_in_nights','adr']]
sa['sum_adr'] = sa['adr']*sa['stays_in_nights']
n =sa
n['arrival_date'] = n['arrival_date'].map(str).str[0:7]
ny.groupby('arrival_date')['sum_adr'].count()
t ='(2017,2016-10)'

#2.5
m = hotel_booking[(hotel_booking['is_canceled']==1)]
qx=hotel_booking['distribution_channel'][(hotel_booking['is_canceled']==1)&(hotel_booking['distribution_channel']!='Undefined')].value_counts()
qx = qx.reset_index()

wq=hotel_booking['distribution_channel'][(hotel_booking['is_canceled']==0)&(hotel_booking['distribution_channel']!='Undefined')].value_counts()
wq = wq.reset_index()
wq['qxld'] = qx['distribution_channel']
wq['hj'] = wq['distribution_channel'] + wq['qxld']
format(float(b)/float(a),'.2f')
wq['qxl'] = wq['distribution_channel']/wq['hj']
wq

#2.6
m=hotel_booking[(hotel_booking['country']=="PRT")&(hotel_booking['is_canceled']==0)]
m[['country','is_canceled']]
a= [['China',540],['France',8468],['Great Britain',9666],['Russia',391],['United States',1592],['Portugal',20976]]
b = [540,8468,9666,391,1592,20976]
chn = 540
fra = 8468
gbr = 9666
rus = 391
usa = 1592
prt = 20976

from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker

c = (
    Map()
    .add("商家A", a, "world")
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="Map-世界地图"),
        visualmap_opts=opts.VisualMapOpts(max_=20000),
    )
)
c.render_notebook()
















  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值