import pandas as pd
1、Series结构的增删改查,dataframe类似
data = [10,11,12]
index = ['a','b','c']
s = pd.Series(data = data,index = index)
s
>a 10
b 11
c 12
dtype: int64
s[0]
>10
s[0:2]
>a 10
b 11
dtype: int64
mask = [True,False,True]
s[mask]
>a 10
c 12
dtype: int64
s.loc['b']
>11
s.iloc[1]
>11
del s1['A'] #删除A列
s1.drop(['b','d'],inplace = True) #删除A列
2、merge合并
import pandas as pd
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
res = pd.merge(left, right)
res = pd.merge(left, right, on = 'key') #建议加上'on = '
对于可能有缺失值的数据,也可采用:
left = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],
'key2': ['K0', 'K1', 'K2', 'K3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],
'key2': ['K0', 'K1', 'K2', 'K4'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
res = pd.merge(left, right, on = ['key1', 'key2']) #共有的key1,key2,会漏数据
> A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
res = pd.merge(left, right, on = ['key1', 'key2'], how = 'outer')
> A Bkey1 key2 C D #缺失的赋值NAN,不会漏数据
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
3 A3 B3 K3 K3 NaN NaN
4 NaN NaN K3 K4 C3 D3
res = pd.merge(left, right, on = ['key1', 'key2'],
how = 'outer', indicator = True) #显示数据来源(left ro right)
res = pd.merge(left, right, how = 'left') #只显示左边的
> A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K1 K1 C1 D1
2 A2 B2 K2 K2 C2 D2
3 A3 B3 K3 K3 NaN NaN
res = pd.merge(left, right, how = 'right')
3、对于显示设置:
pd.get_option('display.max_rows')
>60
pd.set_option('display.max_rows',6)
pd.get_option('display.max_columns')
pd.set_option('display.max_columns',30)
pd.get_option('display.max_colwidth')
pd.get_option('display.max_colwidth')
pd.get_option('display.precision')
pd.set_option('display.precision',5) #小数点位数
4、pivot数据透视表:
example = pd.DataFrame({'Month': ["January", "January", "January", "January",
"February", "February", "February", "February",
"March", "March", "March", "March"],
'Category': ["Transportation", "Grocery", "Household", "Entertainment",
"Transportation", "Grocery", "Household", "Entertainment",
"Transportation", "Grocery", "Household", "Entertainment"],
'Amount': [74., 235., 175., 100., 115., 240., 225., 125., 90., 260., 200., 120.]})
example_pivot = example.pivot(index = 'Category',columns= 'Month',values = 'Amount')
example_pivot
> Month February January March
Category
Entertainment 125.0 100.0 120.0
Grocery 240.0 235.0 260.0
Household 225.0 175.0 200.0
Transportation 115.0 74.0 90.0
df.pivot_table(index = 'Sex',columns='Pclass',values='Fare',aggfunc='max')
#aggfunc处理函数
df.pivot_table(index = 'Sex',columns='Pclass',values='Fare',aggfunc='count')
df['Underaged'] = df['Age'] <= 18
df.pivot_table(index = 'Underaged',columns='Sex',values='Survived',aggfunc='mean')
Sex female male
Underaged
False 0.760163 0.167984
True 0.676471 0.338028