第二章:数据重构

import numpy as np
import pandas as pd
data_1 = pd.read_csv('train-left-down.csv')
data_1.head()
data_2 = pd.read_csv('train-left-up.csv')
data_2.head()
df_1 = pd.read_csv('train-right-down.csv')
df_1.head(3)
df_2 = pd.read_csv('train-right-up.csv')
df_2.head(3)
result_up = data_2.join(df_2)
result_up.head(3)
result_down = data_1.join(df_1)
result = result_up.append(result_down)
result.head(3)

result_down = data_1.merge(df_1, right_index = True, left_index = True)
result_down.head(3)
result_up = data_2.merge(df_2, right_index = True, left_index = True)
result = result_up.append(result_down)
result.head(3)
s1 = df_1.stack() #stack函数转化为series
s1

#groupby
arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
          ['Captive', 'Wild', 'Captive', 'Wild']]
index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
                  index=index)
df

df.groupby(level='Type').sum()

# 计算泰坦尼克号男性与女性的平均票价
result = pd.read_csv('result.csv')
result.head()

sex_fare = result.groupby(['Sex']).Fare.mean()
sex_fare

# 统计泰坦尼克号中男女的存活人数
sex_survived = result.groupby(['Sex']).Survived.sum()
sex_survived
# 计算客舱不同等级的存活人数
result.groupby(['Pclass']).Survived.sum()

result[result['Pclass'] == 1].groupby(by = ['Age']).Fare.mean()

df = pd.merge(sex_fare, sex_survived, left_index = True, right_index = True)
df

#不同年龄的存活人数
age_sur = result.groupby(['Age']).Survived.sum()
age_sur.head(3)
age_sured = age_sur[age_sur == age_sur.max()]
age_sured
sur_rate = age_sured/age_sum #存活率
sur_rate

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值