2-1

Loading [MathJax]/extensions/Safe.js
 
lect02_eg01 Last Checkpoint: 11/06/2017 (autosaved)
 
 
 
 
 

Pandas进阶及技巧

 

1. 创建Pandas

 
 
 
 
 
 
import pandas as pd
country1 = pd.Series({'Name': '中国',
                    'Language': 'Chinese',
                    'Area': '9.597M km2',
                     'Happiness Rank': 79})
country2 = pd.Series({'Name': '美国',
                    'Language': 'English (US)',
                    'Area': '9.834M km2',
                     'Happiness Rank': 14})
country3 = pd.Series({'Name': '澳大利亚',
                    'Language': 'English (AU)',
                    'Area': '7.692M km2',
                     'Happiness Rank': 9})
df = pd.DataFrame([country1, country2, country3], index=['CH', 'US', 'AU'])
 
 
 
 
 
 
 
 
# 注意在jupyter中使用print和不使用print的区别
print(df)
df
 
 
 
          Area  Happiness Rank      Language  Name
CH  9.597M km2              79       Chinese    中国
US  9.834M km2              14  English (US)    美国
AU  7.692M km2               9  English (AU)  澳大利亚
 
 AreaHappiness RankLanguageName
CH9.597M km279Chinese中国
US9.834M km214English (US)美国
AU7.692M km29English (AU)澳大利亚
 
 
 
 
 
 
# 添加数据
# 如果个数为1,会自动进行“广播”操作
# 如果大于要求的个数,会报错
#如果刚好等于要求的个数,会依次填充字段属性值
df['Location'] = '地球'
print(df)
df['Region'] = ['亚洲', '北美洲', '大洋洲']
print(df)
df
 
 
 
          Area  Happiness Rank      Language  Name Location
CH  9.597M km2              79       Chinese    中国       地球
US  9.834M km2              14  English (US)    美国       地球
AU  7.692M km2               9  English (AU)  澳大利亚       地球
          Area  Happiness Rank      Language  Name Location Region
CH  9.597M km2              79       Chinese    中国       地球     亚洲
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲
 
 AreaHappiness RankLanguageNameLocationRegion
CH9.597M km279Chinese中国地球亚洲
US9.834M km214English (US)美国地球北美洲
AU7.692M km29English (AU)澳大利亚地球大洋洲
 

2. Pandas索引

 
 
 
 
 
 
# 行索引
print('loc:')
print(df.loc['CH'])
print(type(df.loc['CH']))
print('iloc:')
print(df.iloc[1])
 
 
 
loc:
Area              9.597M km2
Happiness Rank            79
Language             Chinese
Name                      中国
Location                  地球
Region                    亚洲
Name: CH, dtype: object
<class 'pandas.core.series.Series'>
iloc:
Area                9.834M km2
Happiness Rank              14
Language          English (US)
Name                        美国
Location                    地球
Region                     北美洲
Name: US, dtype: object
 
 
 
 
 
 
# 列索引
print(df['Area'])
print(type(df['Area']))
 
 
 
CH    9.597M km2
US    9.834M km2
AU    7.692M km2
Name: Area, dtype: object
<class 'pandas.core.series.Series'>
 
 
 
 
 
 
# 获取不连续的列数据
print(df[['Name', 'Area']])
 
 
 
    Name        Area
CH    中国  9.597M km2
US    美国  9.834M km2
AU  澳大利亚  7.692M km2
 
 
 
 
 
 
# 混合索引
# 注意写法上的区别
print('先取出列,再取行:')
print(df['Area']['CH'])
print(df['Area'].loc['CH'])
print(df['Area'].iloc[0])
print('先取出行,再取列:')
print(df.loc['CH']['Area'])
print(df.iloc[0]['Area'])
 
 
 
先取出列,再取行:
9.597M km2
9.597M km2
9.597M km2
先取出行,再取列:
9.597M km2
9.597M km2
 
 
 
 
 
 
# 转换行和列
print(df.T)
 
 
 
                        CH            US            AU
Area            9.597M km2    9.834M km2    7.692M km2
Happiness Rank          79            14             9
Language           Chinese  English (US)  English (AU)
Name                    中国            美国          澳大利亚
Location                地球            地球            地球
Region                  亚洲           北美洲           大洋洲
 

3. 删除数据

 
 
 
 
 
 
print(df.drop(['CH']))
# 注意drop操作只是将修改后的数据copy一份,而不会对原始数据进行修改
print(df)
 
 
 
          Area  Happiness Rank      Language  Name Location Region
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲
          Area  Happiness Rank      Language  Name Location Region
CH  9.597M km2              79       Chinese    中国       地球     亚洲
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲
 
 
 
 
 
 
print(df.drop(['CH'], inplace=True))
# 如果使用了inplace=True,会在原始数据上进行修改,同时不会返回一个copy
print(df)
 
 
 
None
          Area  Happiness Rank      Language  Name Location Region
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲
 
 
 
 
 
 
#  如果需要删除列,需要指定axis=1
print(df.drop(['Area'], axis=1))
print(df)
 
 
 
    Happiness Rank      Language  Name Location Region
CH              79       Chinese    中国       地球     亚洲
US              14  English (US)    美国       地球    北美洲
AU               9  English (AU)  澳大利亚       地球    大洋洲
          Area  Happiness Rank      Language  Name Location Region
CH  9.597M km2              79       Chinese    中国       地球     亚洲
US  9.834M km2              14  English (US)    美国       地球    北美洲
AU  7.692M km2               9  English (AU)  澳大利亚       地球    大洋洲
 
 
 
 
 
 
# 也可直接使用del关键字
del df['Name']
print(df)
 
 
 
          Area  Happiness Rank      Language Location Region
US  9.834M km2              14  English (US)       地球    北美洲
AU  7.692M km2               9  English (AU)       地球    大洋洲
 

4. DataFrame的操作与加载

 
 
 
 
 
 
# 注意从DataFrame中取出的数据进行操作后,会对原始数据产生影响
ranks = df['Happiness Rank']
ranks += 2
print(ranks)
print(df)
 
 
 
US    18
AU    13
Name: Happiness Rank, dtype: int64
          Area  Happiness Rank      Language Location Region
US  9.834M km2              18  English (US)       地球    北美洲
AU  7.692M km2              13  English (AU)       地球    大洋洲
 
 
 
 
 
 
# 注意从DataFrame中取出的数据进行操作后,会对原始数据产生影响
# 安全的操作是使用copy()
ranks = df['Happiness Rank'].copy()
ranks += 2
print(ranks)
print(df)
 
 
 
US    20
AU    15
Name: Happiness Rank, dtype: int64
          Area  Happiness Rank      Language Location Region
US  9.834M km2              18  English (US)       地球    北美洲
AU  7.692M km2              13  English (AU)       地球    大洋洲
 
 
 
 
 
 
# 加载csv文件数据
reprot_2015_df = pd.read_csv('./2015.csv')
print('2015年数据预览:')
#print(reprot_2015_df.head())
reprot_2015_df.head()
 
 
 
2015年数据预览:
 
 CountryRegionHappiness RankHappiness ScoreStandard ErrorEconomy (GDP per Capita)FamilyHealth (Life Expectancy)FreedomTrust (Government Corruption)GenerosityDystopia Residual
0SwitzerlandWestern Europe17.5870.034111.396511.349510.941430.665570.419780.296782.51738
1IcelandWestern Europe27.5610.048841.302321.402230.947840.628770.141450.436302.70201
2DenmarkWestern Europe37.5270.033281.325481.360580.874640.649380.483570.341392.49204
3NorwayWestern Europe47.5220.038801.459001.330950.885210.669730.365030.346992.46531
4CanadaNorth America57.4270.035531.326291.322610.905630.632970.329570.458112.45176
 
 
 
 
 
 
print(reprot_2015_df.info())
 
 
 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
Country                          158 non-null object
Region                           158 non-null object
Happiness Rank                   158 non-null int64
Happiness Score                  158 non-null float64
Standard Error                   158 non-null float64
Economy (GDP per Capita)         158 non-null float64
Family                           158 non-null float64
Health (Life Expectancy)         158 non-null float64
Freedom                          158 non-null float64
Trust (Government Corruption)    158 non-null float64
Generosity                       158 non-null float64
Dystopia Residual                158 non-null float64
dtypes: float64(9), int64(1), object(2)
memory usage: 14.9+ KB
None
 
 
 
 
 
 
# 使用index_col指定索引列
# 使用usecols指定需要读取的列
reprot_2016_df = pd.read_csv('./2016.csv', 
                             index_col='Country',
                             usecols=['Country', 'Happiness Rank', 'Happiness Score', 'Region'])
# 数据预览
reprot_2016_df.head()
 
 
 
 RegionHappiness RankHappiness Score
Country   
DenmarkWestern Europe17.526
SwitzerlandWestern Europe27.509
IcelandWestern Europe37.501
NorwayWestern Europe47.498
FinlandWestern Europe57.413
 
 
 
 
 
 
print('列名(column):', reprot_2016_df.columns)
print('行名(index):', reprot_2016_df.index)
 
 
 
列名(column): Index(['Region', 'Happiness Rank', 'Happiness Score'], dtype='object')
行名(index): Index(['Denmark', 'Switzerland', 'Iceland', 'Norway', 'Finland', 'Canada',
       'Netherlands', 'New Zealand', 'Australia', 'Sweden',
       ...
       'Madagascar', 'Tanzania', 'Liberia', 'Guinea', 'Rwanda', 'Benin',
       'Afghanistan', 'Togo', 'Syria', 'Burundi'],
      dtype='object', name='Country', length=157)
 
 
 
 
 
 
# 注意index是不可变的
reprot_2016_df.index[0] = '丹麦'
 
 
 
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-104-9214cb35e379> in <module>()
----> 1 reprot_2016_df.index[0] = '丹麦' C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in __setitem__(self, key, value)  1618  1619 def __setitem__(self, key, value): -> 1620 raise TypeError("Index does not support mutable operations")  1621  1622 def __getitem__(self, key): TypeError: Index does not support mutable operations 
 
 
 
 
 
 
# 重置index
# 注意inplace加与不加的区别
reprot_2016_df.reset_index().head()
 
 
 
 Country地区排名幸福指数
0DenmarkWestern Europe17.526
1SwitzerlandWestern Europe27.509
2IcelandWestern Europe37.501
3NorwayWestern Europe47.498
4FinlandWestern Europe57.413
 
 
 
 
 
 
# 重命名列名
reprot_2016_df.rename(columns={'Region': '地区', 'Hapiness Rank': '排名', 'Hapiness Score': '幸福指数'})
reprot_2016_df.head()
 
 
 
 地区排名幸福指数
Country   
DenmarkWestern Europe17.526
SwitzerlandWestern Europe27.509
IcelandWestern Europe37.501
NorwayWestern Europe47.498
FinlandWestern Europe57.413
 
 
 
 
 
 
# 重命名列名,注意inplace的使用
reprot_2016_df.rename(columns={'Region': '地区', 'Happiness Rank': '排名', 'Happiness Score': '幸福指数'},
                     inplace=True)
reprot_2016_df.head()
 
 
 
 地区排名幸福指数
Country   
DenmarkWestern Europe17.526
SwitzerlandWestern Europe27.509
IcelandWestern Europe37.501
NorwayWestern Europe47.498
FinlandWestern Europe57.413
 

5. Boolean Mask

 
 
 
 
 
 
# 过滤 Western Europe 地区的国家
only_western_europe = reprot_2016_df['地区'] == 'Western Europe'
only_western_europe
 
 
 
Country
Denmark                  True
Switzerland              True
Iceland                  True
Norway                   True
Finland                  True
Canada                  False
Netherlands              True
New Zealand             False
Australia               False
Sweden                   True
Israel                  False
Austria                  True
United States           False
Costa Rica              False
Puerto Rico             False
Germany                  True
Brazil                  False
Belgium                  True
Ireland                  True
Luxembourg               True
Mexico                  False
Singapore               False
United Kingdom           True
Chile                   False
Panama                  False
Argentina               False
Czech Republic          False
United Arab Emirates    False
Uruguay                 False
Malta                    True
                        ...  
Senegal                 False
Bulgaria                False
Mauritania              False
Zimbabwe                False
Malawi                  False
Sudan                   False
Gabon                   False
Mali                    False
Haiti                   False
Botswana                False
Comoros                 False
Ivory Coast             False
Cambodia                False
Angola                  False
Niger                   False
South Sudan             False
Chad                    False
Burkina Faso            False
Uganda                  False
Yemen                   False
Madagascar              False
Tanzania                False
Liberia                 False
Guinea                  False
Rwanda                  False
Benin                   False
Afghanistan             False
Togo                    False
Syria                   False
Burundi                 False
Name: 地区, Length: 157, dtype: bool
 
 
 
 
 
 
# 过滤 Western Europe 地区的国家
# 并且排名在10之外
only_western_europe_10 = (reprot_2016_df['地区'] == 'Western Europe') & (reprot_2016_df['排名'] > 10)
only_western_europe_10
 
 
 
Country
Denmark                 False
Switzerland             False
Iceland                 False
Norway                  False
Finland                 False
Canada                  False
Netherlands             False
New Zealand             False
Australia               False
Sweden                  False
Israel                  False
Austria                  True
United States           False
Costa Rica              False
Puerto Rico             False
Germany                  True
Brazil                  False
Belgium                  True
Ireland                  True
Luxembourg               True
Mexico                  False
Singapore               False
United Kingdom           True
Chile                   False
Panama                  False
Argentina               False
Czech Republic          False
United Arab Emirates    False
Uruguay                 False
Malta                    True
                        ...  
Senegal                 False
Bulgaria                False
Mauritania              False
Zimbabwe                False
Malawi                  False
Sudan                   False
Gabon                   False
Mali                    False
Haiti                   False
Botswana                False
Comoros                 False
Ivory Coast             False
Cambodia                False
Angola                  False
Niger                   False
South Sudan             False
Chad                    False
Burkina Faso            False
Uganda                  False
Yemen                   False
Madagascar              False
Tanzania                False
Liberia                 False
Guinea                  False
Rwanda                  False
Benin                   False
Afghanistan             False
Togo                    False
Syria                   False
Burundi                 False
Length: 157, dtype: bool
 
 
 
 
 
 
# 叠加 boolean mask 得到最终结果
reprot_2016_df[only_western_europe_10]
 
 
 
 地区排名幸福指数
Country   
AustriaWestern Europe127.119
GermanyWestern Europe166.994
BelgiumWestern Europe186.929
IrelandWestern Europe196.907
LuxembourgWestern Europe206.871
United KingdomWestern Europe236.725
MaltaWestern Europe306.488
FranceWestern Europe326.478
SpainWestern Europe376.361
ItalyWestern Europe505.977
North CyprusWestern Europe625.771
CyprusWestern Europe695.546
PortugalWestern Europe945.123
GreeceWestern Europe995.033
 
 
 
 
 
 
# 熟练以后可以写在一行中
reprot_2016_df[(reprot_2016_df['地区'] == 'Western Europe') & (reprot_2016_df['排名'] > 10)]
 
 
 
 地区排名幸福指数
Country   
AustriaWestern Europe127.119
GermanyWestern Europe166.994
BelgiumWestern Europe186.929
IrelandWestern Europe196.907
LuxembourgWestern Europe206.871
United KingdomWestern Europe236.725
MaltaWestern Europe306.488
FranceWestern Europe326.478
SpainWestern Europe376.361
ItalyWestern Europe505.977
North CyprusWestern Europe625.771
CyprusWestern Europe695.546
PortugalWestern Europe945.123
GreeceWestern Europe995.033
 

6. 层级索引

 
 
 
 
 
 
reprot_2015_df.head()
 
 
 
 CountryRegionHappiness RankHappiness ScoreStandard ErrorEconomy (GDP per Capita)FamilyHealth (Life Expectancy)FreedomTrust (Government Corruption)GenerosityDystopia Residual
0SwitzerlandWestern Europe17.5870.034111.396511.349510.941430.665570.419780.296782.51738
1IcelandWestern Europe27.5610.048841.302321.402230.947840.628770.141450.436302.70201
2DenmarkWestern Europe37.5270.033281.325481.360580.874640.649380.483570.341392.49204
3NorwayWestern Europe47.5220.038801.459001.330950.885210.669730.365030.346992.46531
4CanadaNorth America57.4270.035531.326291.322610.905630.632970.329570.458112.45176
 
 
 
 
 
 
# 设置层级索引
report_2015_df2 = reprot_2015_df.set_index(['Region', 'Country'])
report_2015_df2.head(20)
 
 
 
  Happiness RankHappiness ScoreStandard ErrorEconomy (GDP per Capita)FamilyHealth (Life Expectancy)FreedomTrust (Government Corruption)GenerosityDystopia Residual
RegionCountry          
Western EuropeSwitzerland17.5870.034111.396511.349510.941430.665570.419780.296782.51738
Iceland27.5610.048841.302321.402230.947840.628770.141450.436302.70201
Denmark37.5270.033281.325481.360580.874640.649380.483570.341392.49204
Norway47.5220.038801.459001.330950.885210.669730.365030.346992.46531
North AmericaCanada57.4270.035531.326291.322610.905630.632970.329570.458112.45176
Western EuropeFinland67.4060.031401.290251.318260.889110.641690.413720.233512.61955
Netherlands77.3780.027991.329441.280170.892840.615760.318140.476102.46570
Sweden87.3640.031571.331711.289070.910870.659800.438440.362622.37119
Australia and New ZealandNew Zealand97.2860.033711.250181.319670.908370.639380.429220.475012.26425
Australia107.2840.040831.333581.309230.931560.651240.356370.435622.26646
Middle East and Northern AfricaIsrael117.2780.034701.228571.223930.913870.413190.077850.331723.08854
Latin America and CaribbeanCosta Rica127.2260.044540.955781.237880.860270.633760.105830.254973.17728
Western EuropeAustria137.2000.037511.337231.297040.890420.624330.186760.330882.53320
Latin America and CaribbeanMexico147.1870.041761.020540.914510.814440.481810.213120.140743.60214
North AmericaUnited States157.1190.038391.394511.247110.861790.546040.158900.401052.51011
Latin America and CaribbeanBrazil166.9830.040760.981241.232870.697020.490490.175210.145743.26001
Western EuropeLuxembourg176.9460.034991.563911.219630.918940.615830.377980.280341.96961
Ireland186.9400.036761.335961.369480.895330.617770.287030.459011.97570
Belgium196.9370.035951.307821.285660.896670.584500.225400.222502.41484
Middle East and Northern AfricaUnited Arab Emirates206.9010.037291.427271.125750.809250.641570.385830.264282.24743
 
 
 
 
 
 
# level0 索引
report_2015_df2.loc['Western Europe']
 
 
 
 Happiness RankHappiness ScoreStandard ErrorEconomy (GDP per Capita)FamilyHealth (Life Expectancy)FreedomTrust (Government Corruption)GenerosityDystopia Residual
Country          
Switzerland17.5870.034111.396511.349510.941430.665570.419780.296782.51738
Iceland27.5610.048841.302321.402230.947840.628770.141450.436302.70201
Denmark37.5270.033281.325481.360580.874640.649380.483570.341392.49204
Norway47.5220.038801.459001.330950.885210.669730.365030.346992.46531
Finland67.4060.031401.290251.318260.889110.641690.413720.233512.61955
Netherlands77.3780.027991.329441.280170.892840.615760.318140.476102.46570
Sweden87.3640.031571.331711.289070.910870.659800.438440.362622.37119
Austria137.2000.037511.337231.297040.890420.624330.186760.330882.53320
Luxembourg176.9460.034991.563911.219630.918940.615830.377980.280341.96961
Ireland186.9400.036761.335961.369480.895330.617770.287030.459011.97570
Belgium196.9370.035951.307821.285660.896670.584500.225400.222502.41484
United Kingdom216.8670.018661.266371.285480.909430.596250.320670.519121.96994
Germany266.7500.018481.327921.299370.891860.614770.218430.282142.11569
France296.5750.035121.277781.260380.945790.550110.206460.123322.21126
Spain366.3290.034681.230111.313790.955620.459510.063980.182272.12367
Malta376.3020.042061.207401.302030.887210.603650.135860.517521.64880
Italy505.9480.039141.251141.197770.954460.262360.029010.228232.02518
North Cyprus665.6950.056351.208061.070080.923560.490270.142800.261691.59888
Cyprus675.6890.055801.208130.893180.923560.406720.061460.306381.88931
Portugal885.1020.048021.159911.139350.875190.514690.010780.137191.26462
Greece1024.8570.050621.154060.929330.882130.076990.013970.000001.80101
 
 
 
 
 
 
# 两层索引
report_2015_df2.loc['Western Europe', 'Switzerland']
 
 
 
Happiness Rank                   1.00000
Happiness Score                  7.58700
Standard Error                   0.03411
Economy (GDP per Capita)         1.39651
Family                           1.34951
Health (Life Expectancy)         0.94143
Freedom                          0.66557
Trust (Government Corruption)    0.41978
Generosity                       0.29678
Dystopia Residual                2.51738
Name: (Western Europe, Switzerland), dtype: float64
 
 
 
 
 
 
# 交换分层顺序
report_2015_df2.swaplevel()
 
 
 
  Happiness RankHappiness ScoreStandard ErrorEconomy (GDP per Capita)FamilyHealth (Life Expectancy)FreedomTrust (Government Corruption)GenerosityDystopia Residual
CountryRegion          
SwitzerlandWestern Europe17.5870.034111.396511.349510.941430.665570.419780.296782.51738
IcelandWestern Europe27.5610.048841.302321.402230.947840.628770.141450.436302.70201
DenmarkWestern Europe37.5270.033281.325481.360580.874640.649380.483570.341392.49204
NorwayWestern Europe47.5220.038801.459001.330950.885210.669730.365030.346992.46531
CanadaNorth America57.4270.035531.326291.322610.905630.632970.329570.458112.45176
FinlandWestern Europe67.4060.031401.290251.318260.889110.641690.413720.233512.61955
NetherlandsWestern Europe77.3780.027991.329441.280170.892840.615760.318140.476102.46570
SwedenWestern Europe87.3640.031571.331711.289070.910870.659800.438440.362622.37119
New ZealandAustralia and New Zealand97.2860.033711.250181.319670.908370.639380.429220.475012.26425
AustraliaAustralia and New Zealand107.2840.040831.333581.309230.931560.651240.356370.435622.26646
IsraelMiddle East and Northern Africa117.2780.034701.228571.223930.913870.413190.077850.331723.08854
Costa RicaLatin America and Caribbean127.2260.044540.955781.237880.860270.633760.105830.254973.17728
AustriaWestern Europe137.2000.037511.337231.297040.890420.624330.186760.330882.53320
MexicoLatin America and Caribbean147.1870.041761.020540.914510.814440.481810.213120.140743.60214
United StatesNorth America157.1190.038391.394511.247110.861790.546040.158900.401052.51011
BrazilLatin America and Caribbean166.9830.040760.981241.232870.697020.490490.175210.145743.26001
LuxembourgWestern Europe176.9460.034991.563911.219630.918940.615830.377980.280341.96961
IrelandWestern Europe186.9400.036761.335961.369480.895330.617770.287030.459011.97570
BelgiumWestern Europe196.9370.035951.307821.285660.896670.584500.225400.222502.41484
United Arab EmiratesMiddle East and Northern Africa206.9010.037291.427271.125750.809250.641570.385830.264282.24743
United KingdomWestern Europe216.8670.018661.266371.285480.909430.596250.320670.519121.96994
OmanMiddle East and Northern Africa226.8530.053351.360111.081820.762760.632740.325240.215422.47489
VenezuelaLatin America and Caribbean236.8100.064761.044241.255960.720520.429080.110690.058413.19131
SingaporeSoutheastern Asia246.7980.037801.521861.020001.025250.542520.492100.311051.88501
PanamaLatin America and Caribbean256.7860.049101.063531.198500.796610.542100.092700.244342.84848
GermanyWestern Europe266.7500.018481.327921.299370.891860.614770.218430.282142.11569
ChileLatin America and Caribbean276.6700.058001.107151.124470.858570.441320.128690.333632.67585
QatarMiddle East and Northern Africa286.6110.062571.690421.078600.797330.640400.522080.325731.55674
FranceWestern Europe296.5750.035121.277781.260380.945790.550110.206460.123322.21126
ArgentinaLatin America and Caribbean306.5740.046121.053511.248230.787230.449740.084840.114512.83600
....................................
MyanmarSoutheastern Asia1294.3070.043510.271080.709050.482460.440170.190340.795881.41805
GeorgiaCentral and Eastern Europe1304.2970.042210.741900.385620.729260.405770.383310.055471.59541
MalawiSub-Saharan Africa1314.2920.061300.016040.411340.225620.430540.069770.331282.80791
Sri LankaSouthern Asia1324.2710.037510.835241.019050.708060.537260.091790.408280.67108
CameroonSub-Saharan Africa1334.2520.046780.422500.887670.234020.493090.057860.206181.95071
BulgariaCentral and Eastern Europe1344.2180.048281.012161.106140.766490.305870.008720.119210.89991
EgyptMiddle East and Northern Africa1354.1940.032600.881800.747000.617120.172880.063240.112911.59927
YemenMiddle East and Northern Africa1364.0770.043670.546490.680930.400640.355710.078540.091311.92313
AngolaSub-Saharan Africa1374.0330.047580.757780.860400.166830.103840.071220.123441.94939
MaliSub-Saharan Africa1383.9950.056020.260741.035260.205830.388570.123520.187981.79293
Congo (Brazzaville)Sub-Saharan Africa1393.9890.066820.678660.662900.310510.414660.116860.123881.68135
ComorosSub-Saharan Africa1403.9560.047970.239060.792730.363150.229170.199000.174411.95812
UgandaSub-Saharan Africa1413.9310.043170.211021.132990.338610.457270.072670.290661.42766
SenegalSub-Saharan Africa1423.9040.036080.364980.976190.435400.367720.107130.208431.44395
GabonSub-Saharan Africa1433.8960.045471.060240.905280.433720.319140.110910.068220.99895
NigerSub-Saharan Africa1443.8450.036020.069400.772650.297070.476920.156390.193871.87877
CambodiaSoutheastern Asia1453.8190.050690.460380.627360.611140.662460.072470.403590.98195
TanzaniaSub-Saharan Africa1463.7810.050610.285201.002680.382150.328780.057470.343771.38079
MadagascarSub-Saharan Africa1473.6810.036330.208240.668010.467210.191840.081240.213331.85100
Central African RepublicSub-Saharan Africa1483.6780.061120.078500.000000.066990.488790.082890.238352.72230
ChadSub-Saharan Africa1493.6670.038300.341930.760620.150100.235010.052690.183861.94296
GuineaSub-Saharan Africa1503.6560.035900.174170.464750.240090.377250.121390.286571.99172
Ivory CoastSub-Saharan Africa1513.6550.051410.465340.771150.151850.468660.179220.201651.41723
Burkina FasoSub-Saharan Africa1523.5870.043240.258120.851880.271250.394930.128320.217471.46494
AfghanistanSouthern Asia1533.5750.030840.319820.302850.303350.234140.097190.365101.95210
RwandaSub-Saharan Africa1543.4650.034640.222080.773700.428640.592010.551910.226280.67042
BeninSub-Saharan Africa1553.3400.036560.286650.353860.319100.484500.080100.182601.63328
SyriaMiddle East and Northern Africa1563.0060.050150.663200.474890.721930.156840.189060.471790.32858
BurundiSub-Saharan Africa1572.9050.086580.015300.415870.223960.118500.100620.197271.83302
TogoSub-Saharan Africa1582.8390.067270.208680.139950.284430.364530.107310.166811.56726

158 rows × 10 columns

 
 
 
 
 
 
# 排序分层
report_2015_df2.sort_index(level=0)
 
 
 
  Happiness RankHappiness ScoreStandard ErrorEconomy (GDP per Capita)FamilyHealth (Life Expectancy)FreedomTrust (Government Corruption)GenerosityDystopia Residual
RegionCountry          
Australia and New ZealandAustralia107.2840.040831.333581.309230.931560.651240.356370.435622.26646
New Zealand97.2860.033711.250181.319670.908370.639380.429220.475012.26425
Central and Eastern EuropeAlbania954.9590.050130.878670.804340.813250.357330.064130.142721.89894
Armenia1274.3500.047630.768210.777110.729900.198470.039000.078551.75873
Azerbaijan805.2120.033631.023890.937930.640450.370300.160650.077992.00073
Belarus595.8130.039381.031921.232890.736080.379380.190900.110462.13090
Bosnia and Herzegovina964.9490.069130.832230.919160.790810.092450.002270.248082.06367
Bulgaria1344.2180.048281.012161.106140.766490.305870.008720.119210.89991
Croatia625.7590.043941.082540.796240.788050.258830.024300.054442.75414
Czech Republic316.5050.041681.178981.206430.844830.463640.026520.106862.67782
Estonia735.4290.040131.151741.227910.773610.448880.151840.086801.58782
Georgia1304.2970.042210.741900.385620.729260.405770.383310.055471.59541
Hungary1044.8000.061071.120941.202150.759050.321120.027580.128001.24074
Kazakhstan545.8550.041141.122541.122410.643680.516490.084540.118272.24729
Kosovo695.5890.050180.801480.811980.631320.247490.047410.283102.76579
Kyrgyzstan775.2860.038230.474281.151150.650880.434770.042320.300302.23270
Latvia895.0980.046401.113121.095620.724370.296710.063320.182261.62215
Lithuania565.8330.038431.147231.257450.731280.213420.010310.026412.44649
Macedonia935.0070.053760.918511.002320.735450.334570.053270.223591.73933
Moldova525.8890.037990.594481.015280.618260.328180.016150.209513.10712
Montenegro825.1920.052350.974380.905570.725210.182600.142960.161402.10017
Poland605.7910.042631.125551.279480.779030.531220.042120.167591.86565
Romania865.1240.066071.043450.885880.768900.350680.006490.137481.93129
Russia645.7160.031351.137641.236170.669260.366790.030050.001992.27394
Serbia875.1230.048640.920531.009640.748360.201070.026170.192312.02500
Slovakia455.9950.042671.168911.269990.789020.317510.034310.168932.24639
Slovenia555.8480.042511.184981.273850.873370.608550.037870.253281.61583
Tajikistan1064.7860.031980.390470.855630.573790.472160.150720.229742.11399
Turkmenistan705.5480.041750.958471.226680.538860.476100.308440.169791.86984
Ukraine1114.6810.044120.799071.202780.673900.251230.029610.152751.57140
....................................
Sub-Saharan AfricaSomaliland region915.0570.061610.188470.951520.438730.465820.399280.503182.11032
South Africa1134.6420.045850.920491.184680.276880.332070.088840.119731.71956
Sudan1184.5500.067400.521071.014040.368780.100810.146600.190622.20857
Swaziland1014.8670.087420.712061.072840.075660.306580.030600.182592.48676
Tanzania1463.7810.050610.285201.002680.382150.328780.057470.343771.38079
Togo1582.8390.067270.208680.139950.284430.364530.107310.166811.56726
Uganda1413.9310.043170.211021.132990.338610.457270.072670.290661.42766
Zambia855.1290.069880.470380.916120.299240.488270.124680.195912.63430
Zimbabwe1154.6100.042900.271001.032760.334750.258610.080790.189872.44191
Western EuropeAustria137.2000.037511.337231.297040.890420.624330.186760.330882.53320
Belgium196.9370.035951.307821.285660.896670.584500.225400.222502.41484
Cyprus675.6890.055801.208130.893180.923560.406720.061460.306381.88931
Denmark37.5270.033281.325481.360580.874640.649380.483570.341392.49204
Finland67.4060.031401.290251.318260.889110.641690.413720.233512.61955
France296.5750.035121.277781.260380.945790.550110.206460.123322.21126
Germany266.7500.018481.327921.299370.891860.614770.218430.282142.11569
Greece1024.8570.050621.154060.929330.882130.076990.013970.000001.80101
Iceland27.5610.048841.302321.402230.947840.628770.141450.436302.70201
Ireland186.9400.036761.335961.369480.895330.617770.287030.459011.97570
Italy505.9480.039141.251141.197770.954460.262360.029010.228232.02518
Luxembourg176.9460.034991.563911.219630.918940.615830.377980.280341.96961
Malta376.3020.042061.207401.302030.887210.603650.135860.517521.64880
Netherlands77.3780.027991.329441.280170.892840.615760.318140.476102.46570
North Cyprus665.6950.056351.208061.070080.923560.490270.142800.261691.59888
Norway47.5220.038801.459001.330950.885210.669730.365030.346992.46531
Portugal885.1020.048021.159911.139350.875190.514690.010780.137191.26462
Spain366.3290.034681.230111.313790.955620.459510.063980.182272.12367
Sweden87.3640.031571.331711.289070.910870.659800.438440.362622.37119
Switzerland17.5870.034111.396511.349510.941430.665570.419780.296782.51738
United Kingdom216.8670.018661.266371.285480.909430.596250.320670.519121.96994

158 rows × 10 columns

 

7. 数据清洗

 
 
 
 
 
 
log_data = pd.read_csv('log.csv')
log_data
 
 
 
 timeuservideoplayback positionpausedvolume
01469974424cherylintro.html5False10.0
11469974454cherylintro.html6NaNNaN
21469974544cherylintro.html9NaNNaN
31469974574cherylintro.html10NaNNaN
41469977514bobintro.html1NaNNaN
51469977544bobintro.html1NaNNaN
61469977574bobintro.html1NaNNaN
71469977604bobintro.html1NaNNaN
81469974604cherylintro.html11NaNNaN
91469974694cherylintro.html14NaNNaN
101469974724cherylintro.html15NaNNaN
111469974454sueadvanced.html24NaNNaN
121469974524sueadvanced.html25NaNNaN
131469974424sueadvanced.html23False10.0
141469974554sueadvanced.html26NaNNaN
151469974624sueadvanced.html27NaNNaN
161469974654sueadvanced.html28NaN5.0
171469974724sueadvanced.html29NaNNaN
181469974484cherylintro.html7NaNNaN
191469974514cherylintro.html8NaNNaN
201469974754sueadvanced.html30NaNNaN
211469974824sueadvanced.html31NaNNaN
221469974854sueadvanced.html32NaNNaN
231469974924sueadvanced.html33NaNNaN
241469977424bobintro.html1True10.0
251469977454bobintro.html1NaNNaN
261469977484bobintro.html1NaNNaN
271469977634bobintro.html1NaNNaN
281469977664bobintro.html1NaNNaN
291469974634cherylintro.html12NaNNaN
301469974664cherylintro.html13NaNNaN
311469977694bobintro.html1NaNNaN
321469977724bobintro.html1NaNNaN
 
 
 
 
 
 
log_data.set_index(['time', 'user'], inplace=True)
log_data.sort_index(inplace=True)
log_data
 
 
 
  videoplayback positionpausedvolume
timeuser    
1469974424cherylintro.html5False10.0
sueadvanced.html23False10.0
1469974454cherylintro.html6NaNNaN
sueadvanced.html24NaNNaN
1469974484cherylintro.html7NaNNaN
1469974514cherylintro.html8NaNNaN
1469974524sueadvanced.html25NaNNaN
1469974544cherylintro.html9NaNNaN
1469974554sueadvanced.html26NaNNaN
1469974574cherylintro.html10NaNNaN
1469974604cherylintro.html11NaNNaN
1469974624sueadvanced.html27NaNNaN
1469974634cherylintro.html12NaNNaN
1469974654sueadvanced.html28NaN5.0
1469974664cherylintro.html13NaNNaN
1469974694cherylintro.html14NaNNaN
1469974724cherylintro.html15NaNNaN
sueadvanced.html29NaNNaN
1469974754sueadvanced.html30NaNNaN
1469974824sueadvanced.html31NaNNaN
1469974854sueadvanced.html32NaNNaN
1469974924sueadvanced.html33NaNNaN
1469977424bobintro.html1True10.0
1469977454bobintro.html1NaNNaN
1469977484bobintro.html1NaNNaN
1469977514bobintro.html1NaNNaN
1469977544bobintro.html1NaNNaN
1469977574bobintro.html1NaNNaN
1469977604bobintro.html1NaNNaN
1469977634bobintro.html1NaNNaN
1469977664bobintro.html1NaNNaN
1469977694bobintro.html1NaNNaN
1469977724bobintro.html1NaNNaN
 
 
 
 
 
 
log_data.fillna(0)
 
 
 
  videoplayback positionpausedvolume
timeuser    
1469974424cherylintro.html5False10.0
sueadvanced.html23False10.0
1469974454cherylintro.html600.0
sueadvanced.html2400.0
1469974484cherylintro.html700.0
1469974514cherylintro.html800.0
1469974524sueadvanced.html2500.0
1469974544cherylintro.html900.0
1469974554sueadvanced.html2600.0
1469974574cherylintro.html1000.0
1469974604cherylintro.html1100.0
1469974624sueadvanced.html2700.0
1469974634cherylintro.html1200.0
1469974654sueadvanced.html2805.0
1469974664cherylintro.html1300.0
1469974694cherylintro.html1400.0
1469974724cherylintro.html1500.0
sueadvanced.html2900.0
1469974754sueadvanced.html3000.0
1469974824sueadvanced.html3100.0
1469974854sueadvanced.html3200.0
1469974924sueadvanced.html3300.0
1469977424bobintro.html1True10.0
1469977454bobintro.html100.0
1469977484bobintro.html100.0
1469977514bobintro.html100.0
1469977544bobintro.html100.0
1469977574bobintro.html100.0
1469977604bobintro.html100.0
1469977634bobintro.html100.0
1469977664bobintro.html100.0
1469977694bobintro.html100.0
1469977724bobintro.html100.0
 
 
 
 
 
 
log_data.dropna()
 
 
 
  videoplayback positionpausedvolume
timeuser    
1469974424cherylintro.html5False10.0
sueadvanced.html23False10.0
1469977424bobintro.html1True10.0
 
 
 
 
 
 
log_data.ffill()
 
 
 
  videoplayback positionpausedvolume
timeuser    
1469974424cherylintro.html5False10.0
sueadvanced.html23False10.0
1469974454cherylintro.html6False10.0
sueadvanced.html24False10.0
1469974484cherylintro.html7False10.0
1469974514cherylintro.html8False10.0
1469974524sueadvanced.html25False10.0
1469974544cherylintro.html9False10.0
1469974554sueadvanced.html26False10.0
1469974574cherylintro.html10False10.0
1469974604cherylintro.html11False10.0
1469974624sueadvanced.html27False10.0
1469974634cherylintro.html12False10.0
1469974654sueadvanced.html28False5.0
1469974664cherylintro.html13False5.0
1469974694cherylintro.html14False5.0
1469974724cherylintro.html15False5.0
sueadvanced.html29False5.0
1469974754sueadvanced.html30False5.0
1469974824sueadvanced.html31False5.0
1469974854sueadvanced.html32False5.0
1469974924sueadvanced.html33False5.0
1469977424bobintro.html1True10.0
1469977454bobintro.html1True10.0
1469977484bobintro.html1True10.0
1469977514bobintro.html1True10.0
1469977544bobintro.html1True10.0
1469977574bobintro.html1True10.0
1469977604bobintro.html1True10.0
1469977634bobintro.html1True10.0
1469977664bobintro.html1True10.0
1469977694bobintro.html1True10.0
1469977724bobintro.html1True10.0
 
 
 
 
 
 
log_data.bfill()
 
 
 
  videoplayback positionpausedvolume
timeuser    
1469974424cherylintro.html5False10.0
sueadvanced.html23False10.0
1469974454cherylintro.html6True5.0
sueadvanced.html24True5.0
1469974484cherylintro.html7True5.0
1469974514cherylintro.html8True5.0
1469974524sueadvanced.html25True5.0
1469974544cherylintro.html9True5.0
1469974554sueadvanced.html26True5.0
1469974574cherylintro.html10True5.0
1469974604cherylintro.html11True5.0
1469974624sueadvanced.html27True5.0
1469974634cherylintro.html12True5.0
1469974654sueadvanced.html28True5.0
1469974664cherylintro.html13True10.0
1469974694cherylintro.html14True10.0
1469974724cherylintro.html15True10.0
sueadvanced.html29True10.0
1469974754sueadvanced.html30True10.0
1469974824sueadvanced.html31True10.0
1469974854sueadvanced.html32True10.0
1469974924sueadvanced.html33True10.0
1469977424bobintro.html1True10.0
1469977454bobintro.html1NaNNaN
1469977484bobintro.html1NaNNaN
1469977514bobintro.html1NaNNaN
1469977544bobintro.html1NaNNaN
1469977574bobintro.html1NaNNaN
1469977604bobintro.html1NaNNaN
1469977634bobintro.html1NaNNaN
1469977664bobintro.html1NaNNaN
1469977694bobintro.html1NaNNaN
1469977724bobintro.html1NaNNaN
 
 
 
 
 
 
 
 
 
 

转载于:https://www.cnblogs.com/crawer-1/p/7837209.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值