关注CSDN:程志伟的博客
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
colors=sns.color_palette("deep")
H:\Anaconda3\lib\site-packages\statsmodels\tools\_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
import pandas.util.testing as tm
#读取飞机航班数据
data=pd.read_csv('H:/0date/airport-ontime.csv')
data.head()
Out[2]:
FL_DATE UNIQUE_CARRIER ... DISTANCE_GROUP Unnamed: 16
0 2014-06-01 AA ... 10 NaN
1 2014-06-01 AA ... 10 NaN
2 2014-06-01 AA ... 10 NaN
3 2014-06-01 AA ... 10 NaN
4 2014-06-01 AA ... 11 NaN
[5 rows x 17 columns]
#查看数据的维度
data.shape
Out[3]: (502617, 17)
#查看数据的列名
data.columns
Out[4]:
Index(['FL_DATE', 'UNIQUE_CARRIER', 'ORIGIN_AIRPORT_ID',
'ORIGIN_AIRPORT_SEQ_ID', 'ORIGIN_CITY_MARKET_ID', 'ORIGIN_STATE_ABR',
'DEST_AIRPORT_ID', 'DEST_AIRPORT_SEQ_ID', 'DEST_CITY_MARKET_ID',
'DEST_STATE_ABR', 'DEP_DELAY_NEW', 'DEP_DEL15', 'ARR_DELAY_NEW',
'ARR_DEL15', 'DISTANCE', 'DISTANCE_GROUP', 'Unnamed: 16'],
dtype='object')
#查看各列数据的信息
data.info()
RangeIndex: 502617 entries, 0 to 502616
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 FL_DATE 502617 non-null object
1 UNIQUE_CARRIER 502617 non-null object
2 ORIGIN_AIRPORT_ID 502617 non-null int64
3 ORIGIN_AIRPORT_SEQ_ID 502617 non-null int64
4 ORIGIN_CITY_MARKET_ID 502617 non-null int64
5 ORIGIN_STATE_ABR 502617 non-null object
6 DEST_AIRPORT_ID 502617 non-null int64
7 DEST_AIRPORT_SEQ_ID 502617 non-null int64
8 DEST_CITY_MARKET_ID 502617 non-null int64
9 DEST_STATE_ABR 502617 non-null object
10 DEP_DELAY_NEW 492974 non-null float64
11 DEP_DEL15 492974 non-null float64
12 ARR_DELAY_NEW 490716 non-null float64
13 ARR_DEL15 490716 non-null float64
14 DISTANCE 502617 non-null float64
15 DISTANCE_GROUP 502617 non-null int64
16 Unnamed: 16 0 non-null float64
dtypes: float64(6), int64(7), object(4)
memory usage: 65.2+ MB
#删除全部是缺失值的列
data.dropna(how='all',inplace=True,axis=1)
data.info()
RangeIndex: 502617 entries, 0 to 502616
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 FL_DATE 502617 non-null object
1 UNIQUE_CARRIER 502617 non-null object
2 ORIGIN_AIRPORT_ID 502617 non-null int64
3 ORIGIN_AIRPORT_SEQ_ID 502617 non-null int64
4 ORIGIN_CITY_MARKET_ID 502617 non-null int64
5 ORIGIN_STATE_ABR 502617 n