Airbnb data analysis

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
from pprint import pprint

Read Data

airbnb = pd.read_csv('./new-york-city-airbnb-open-data/AB_NYC_2019.csv')
pprint(airbnb.head())
     id                                              name  host_id  \
0  2539                Clean & quiet apt home by the park     2787   
1  2595                             Skylit Midtown Castle     2845   
2  3647               THE VILLAGE OF HARLEM....NEW YORK !     4632   
3  3831                   Cozy Entire Floor of Brownstone     4869   
4  5022  Entire Apt: Spacious Studio/Loft by central park     7192   

     host_name neighbourhood_group neighbourhood  latitude  longitude  \
0         John            Brooklyn    Kensington  40.64749  -73.97237   
1     Jennifer           Manhattan       Midtown  40.75362  -73.98377   
2    Elisabeth           Manhattan        Harlem  40.80902  -73.94190   
3  LisaRoxanne            Brooklyn  Clinton Hill  40.68514  -73.95976   
4        Laura           Manhattan   East Harlem  40.79851  -73.94399   

         room_type  price  minimum_nights  number_of_reviews last_review  \
0     Private room    149               1                  9  2018-10-19   
1  Entire home/apt    225               1                 45  2019-05-21   
2     Private room    150               3                  0         NaN   
3  Entire home/apt     89               1                270  2019-07-05   
4  Entire home/apt     80              10                  9  2018-11-19   

   reviews_per_month  calculated_host_listings_count  availability_365  
0               0.21                               6               365  
1               0.38                               2               355  
2                NaN                               1               365  
3               4.64                               1               194  
4               0.10                               1                 0  
airbnb.describe()
id host_id latitude longitude price minimum_nights number_of_reviews reviews_per_month calculated_host_listings_count availability_365
count 4.889500e+04 4.889500e+04 48895.000000 48895.000000 48895.000000 48895.000000 48895.000000 38843.000000 48895.000000 48895.000000
mean 1.901714e+07 6.762001e+07 40.728949 -73.952170 152.720687 7.029962 23.274466 1.373221 7.143982 112.781327
std 1.098311e+07 7.861097e+07 0.054530 0.046157 240.154170 20.510550 44.550582 1.680442 32.952519 131.622289
min 2.539000e+03 2.438000e+03 40.499790 -74.244420 0.000000 1.000000 0.000000 0.010000 1.000000 0.000000
25% 9.471945e+06 7.822033e+06 40.690100 -73.983070 69.000000 1.000000 1.000000 0.190000 1.000000 0.000000
50% 1.967728e+07 3.079382e+07 40.723070 -73.955680 106.000000 3.000000 5.000000 0.720000 1.000000 45.000000
75% 2.915218e+07 1.074344e+08 40.763115 -73.936275 175.000000 5.000000 24.000000 2.020000 2.000000 227.000000
max 3.648724e+07 2.743213e+08 40.913060 -73.712990 10000.000000 1250.000000 629.000000 58.500000 327.000000 365.000000
airbnb.shape
(48895, 16)
airbnb.columns
Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365'],
      dtype='object')
column = airbnb.columns.tolist()
column
['id',
 'name',
 'host_id',
 'host_name',
 'neighbourhood_group',
 'neighbourhood',
 'latitude',
 'longitude',
 'room_type',
 'price',
 'minimum_nights',
 'number_of_reviews',
 'last_review',
 'reviews_per_month',
 'calculated_host_listings_count',
 'availability_365']
airbnb.dtypes
id                                  int64
name                               object
host_id                             int64
host_name                          object
neighbourhood_group                object
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                               int64
minimum_nights                      int64
number_of_reviews                   int64
last_review                        object
reviews_per_month                 float64
calculated_host_listings_count      int64
availability_365                    int64
dtype: object

clean data

airbnb.isnull().sum()
id                                    0
name                                 16
host_id                               0
host_name                            21
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                       10052
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64
# 删除缺失值
airbnb.drop(['id','host_name','last_review'],axis = 1,inplace= True)
airbnb.head(5)
name host_id neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews reviews_per_month calculated_host_listings_count availability_365
0 Clean & quiet apt home by the park 2787 Brooklyn Kensington 40.64749 -73.97237 Private room 149 1 9 0.21 6 365
1 Skylit Midtown Castle 2845 Manhattan Midtown 40.75362 -73.98377 Entire home/apt 225 1 45 0.38 2 355
2 THE VILLAGE OF HARLEM....NEW YORK ! 4632 Manhattan Harlem 40.80902 -73.94190 Private room 150 3 0 NaN 1 365
3 Cozy Entire Floor of Brownstone 4869 Brooklyn Clinton Hill 40.68514 -73.95976 Entire home/apt 89 1 270 4.64 1 194
4 Entire Apt: Spacious Studio/Loft by central park 7192 Manhattan East Harlem 40.79851 -73.94399 Entire home/apt 80 10 9 0.10 1 0
# 填充缺失值
airbnb.fillna({
   'reviews_per_month':0},inplace = True)
airbnb
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值