1.导入pandas,读取csv
import pandas as pd
df = pd.read_csv('./data/titanic.csv')
2.读取年龄和费用
df['Age'][:5]
df[['Age','Fare']][:5]
3.定位
- loc 用label来去定位
- iloc 用position来去定位
df.iloc[0]
df.iloc[0:5]
df.iloc[0:5,1:3]
4.设置label
df = df.set_index('Name')
df.loc['Heikkinen, Miss. Laina']
df.loc['Heikkinen, Miss. Laina','Fare']
df.loc['Heikkinen, Miss. Laina':'Allen, Mr. William Henry',:]
df.loc['Heikkinen, Miss. Laina','Fare'] = 1000
df.head()
6.bool类型索引
df['Fare'] > 40
df[df['Fare'] > 40][:5]
df[df['Sex'] == 'male'][:5]
df.loc[df['Sex'] == 'male','Age'].mean()
(df['Age'] > 70).sum()