操作文件
HM6Ly9ibG9nLmNzZG4ubmV0L3FxXzUzNzM5NTQ4,size_16,color_FFFFFF,t_70)
concat参数ignore_index
import numpy as np
import pandas as pd
#concat
#创建三个序列
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3=pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)
# 将数列纵向合并
df4=pd.concat([df1,df2,df3],axis=0,ignore_index=True)
# 将数列横向合并
df5=pd.concat([df1,df2,df3],axis=1,ignore_index=True)
print(df4)
print(df5)
concat的参数 join,[‘inner’,‘outer’]
df1=pd.DataFrame(np.ones((3,4))*0,index=[1,2,3],columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,index=[2,3,4],columns=['b','c','d','e'])
print(df1)
print(df2)
#默认为outer,没有用空值补充
df3=pd.concat([df1,df2])
df4=pd.concat([df1,df2],join='outer')
print(df3)
print(df4)
#只合并共有部分
df5=pd.concat([df1,df2],join='inner',ignore_index=True)
print(df5)
merge
# consider two keys
#合并两个key
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on=['key1', 'key2'], how='inner') # default for how='inner'
print(res)
# how = ['left', 'right', 'outer', 'inner']
# 系统自带的有'outer','inner'
# 基于类型有本身,'left','right'
#基于left合并,没有用Nan补充
res1 = pd.merge(left, right, on=['key1', 'key2'], how='left')
print(res1)
# indicator,合并之后显示哪个有数据,哪个是Nan,默认名字是_merge
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
print(df1)
print(df2)
res = pd.merge(df1, df2, on='col1', how='outer')
res1= pd.merge(df1, df2, on='col1', how='outer', indicator=True)
print(res)
print(res1)
# give the indicator a custom name
#修改indicator的名字
res2 = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')
print(res2)
merged by index 通过index进合并
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
'B': ['B0', 'B1', 'B2']},
index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
'D': ['D0', 'D2', 'D3']},
index=['K0', 'K2', 'K3'])
print(left)
print(right)
# left_index and right_index
#merge合并默认不考虑index,加上index,转换为只考虑index,类型有四种'left', 'right', 'outer', 'inner'
#全部合并,没有Nan补充
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
#只有共有的index才合并
res1 = pd.merge(left, right, left_index=True, right_index=True, how='inner')
print(res)
print(res1)
# handle overlapping
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
print(res)
pandas和matplotlib组合绘图
# Series
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()
##data.plot()
# DataFrame
data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list("ABCD"))
data = data.cumsum()
data.plot()
plt.show()
# plot methods:
# 'bar', 'hist', 'box', 'kde', 'area', scatter', hexbin', 'pie'
# ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label="Class 1")
# data.plot.scatter(x='A', y='C', color='LightGreen', label='Class 2', ax=ax)
#
# plt.show()