环境:
python3.7
双剑客:
pip install numpy -i https://pypi.douban.com/simple
pip install pandas -i https://pypi.douban.com/simple
一、numpy
1.1
numpy连接:np.concatenate()
,np.vstack()
,np.hstack
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = np.random.randint(0,150,size=(4,4,))
index = ['张三','李四','王五','赵柳']
columns = ['语文','数学','英语','理综']
df1 = DataFrame(data=data,index=index,columns=columns)
data = np.random.randint(0,150,size=(4,4,))
index = ['天气','赵柳','小明','小红']
columns = ['语文','数学','英语','理综']
df2 = DataFrame(data=data,index=index,columns=columns)
# numpy链接
# np.concatenate()
# 垂直级联
print(np.concatenate((df1,df2)))
# 水平级联
print(np.concatenate((df1,df2),axis=1))
# 垂直级联
# np.vstack()
print(np.vstack((df1, df2)))
# 水平级联
# np.hstack
print(np.hstack((df1, df2)))
[[117 25 75 90]
[ 65 107 74 46]
[133 126 54 94]
[ 57 73 40 99]
[ 63 80 63 112]
[133 104 26 120]
[136 100 91 64]
[139 50 74 28]]
[[117 25 75 90 63 80 63 112]
[ 65 107 74 46 133 104 26 120]
[133 126 54 94 136 100 91 64]
[ 57 73 40 99 139 50 74 28]]
[[117 25 75 90]
[ 65 107 74 46]
[133 126 54 94]
[ 57 73 40 99]
[ 63 80 63 112]
[133 104 26 120]
[136 100 91 64]
[139 50 74 28]]
[[117 25 75 90 63 80 63 112]
[ 65 107 74 46 133 104 26 120]
[133 126 54 94 136 100 91 64]
[ 57 73 40 99 139 50 74 28]]
Process finished with exit code 0
二、append
2.1
append连接:append()
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = np.random.randint(0,150,size=(4,4,))
index = ['张三','李四','王五','赵柳']
columns = ['语文','数学','英语','理综']
df1 = DataFrame(data=data,index=index,columns=columns)
data = np.random.randint(0,150,size=(4,4,))
index = ['天气','赵柳','小明','小红']
columns = ['语文','数学','英语','理综']
df2 = DataFrame(data=data,index=index,columns=columns)
# append级联
# 垂直级联
print(df1.append(df2,ignore_index=True,sort=True))
数学 理综 英语 语文
0 79 29 88 86
1 131 84 32 12
2 10 78 85 141
3 1 15 57 11
4 135 43 145 60
5 147 118 88 82
6 91 27 62 127
7 35 120 104 50
Process finished with exit code 0
三、pandas
3.1
pandas连接:pd.cancat()
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = np.random.randint(0,150,size=(4,4,))
index = ['张三','李四','王五','赵柳']
columns = ['语文','数学','英语','理综']
df1 = DataFrame(data=data,index=index,columns=columns)
data = np.random.randint(0,150,size=(4,4,))
index = ['天气','赵柳','小明','小红']
columns = ['语文','数学','英语','理综']
df2 = DataFrame(data=data,index=index,columns=columns)
# pandas级联
# 垂直级联
# ignore_index:忽略索引
print(pd.concat([df1, df2],ignore_index=True))
print()
# 水平级联
# axis:指定轴
# keys:添加索引
print(pd.concat([df1,df2],axis=0,keys=['x','y']))
语文 数学 英语 理综
0 90 112 49 72
1 50 130 58 54
2 46 22 70 32
3 1 86 20 94
4 70 77 37 18
5 64 30 108 1
6 148 108 7 80
7 119 107 94 0
语文 数学 英语 理综
x 张三 90 112 49 72
李四 50 130 58 54
王五 46 22 70 32
赵柳 1 86 20 94
y 天气 70 77 37 18
赵柳 64 30 108 1
小明 148 108 7 80
小红 119 107 94 0
Process finished with exit code 0
3.2
pandas连接:pd.merge()
一对一合并
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b','b1','g','b2']}
index = list('1234')
df3 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'sex':['b','b3','g','b4']}
index = list('4567')
df4 = DataFrame(data=data,index=index)
# pd.merge()
# 一对一合并
print(pd.merge(df3, df4))
name age sex job
0 张三 23 b 文员
1 王五 24 g 前台
Process finished with exit code 0
3.3
一对多
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b2','g','b3']}
index = list('1234')
df5 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'sex':['b1','b1','g1','b4']}
index = list('4567')
df6 = DataFrame(data=data,index=index)
# 一对多合并
print(pd.merge(df5, df6))
name age sex job
0 张三 23 b1 文员
1 张三 23 b1 技术
Process finished with exit code 0
3.4
多对多
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df7 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'sex':['b1','b1','g3','g4']}
index = list('4567')
df8 = DataFrame(data=data,index=index)
# 多对多合并
print(pd.merge(df7, df8))
name age sex job
0 张三 23 b1 文员
1 张三 23 b1 技术
2 李四 22 b1 文员
3 李四 22 b1 技术
Process finished with exit code 0
3.5
指定key:on=''
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df9 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'sex':['b1','b1','g3','g4'],'age':[23,22,24,25]}
index = list('4567')
df10 = DataFrame(data=data,index=index)
# 当有多个key时可以指定key连接
print(df9.merge(df10, on='sex'))
name age_x sex job age_y
0 张三 23 b1 文员 23
1 张三 23 b1 技术 22
2 李四 22 b1 文员 23
3 李四 22 b1 技术 22
Process finished with exit code 0
3.6
当连个数据框没有共同的key,但又共同的值时可以使用:left_on=''
、right_on=''
指定
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df11 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'number':[23,22,26,27]}
index = list('4567')
df12 = DataFrame(data=data,index=index)
# 当连个数据框没有共同的key,但又共同的值时可以使用left_on=''和right_on=''来指定
print(df11.merge(df12, left_on='age', right_on='number'))
name age sex job number
0 张三 23 b1 文员 23
1 李四 22 b1 技术 22
Process finished with exit code 0
3.7
当没有共同的key时可以指定索引来合并:left_index=True
、right_index=True
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df11 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'number':[23,22,26,27]}
index = list('4567')
df12 = DataFrame(data=data,index=index)
# 当没有共同的key时可以指定索引来合并
print(df11.merge(df12, left_index=True, right_index=True))
name age sex job number
4 赵六 25 g2 文员 23
Process finished with exit code 0
3.8
内合并:how='inner'
左合并:how='left'
右合并:how='right'
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df11 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'number':[23,22,26,27]}
index = list('4567')
df12 = DataFrame(data=data,index=index)
# 内合并(默认时how='inner')
print(df11.merge(df12, left_on='age', right_on='number',how='inner'))
print()
# 左合并
print(df11.merge(df12, left_on='age', right_on='number',how='left'))
print()
# 右合并
print(df11.merge(df12, left_on='age', right_on='number',how='right'))
name age sex job number
0 张三 23 b1 文员 23
1 李四 22 b1 技术 22
name age sex job number
0 张三 23 b1 文员 23.0
1 李四 22 b1 技术 22.0
2 王五 24 g1 NaN NaN
3 赵六 25 g2 NaN NaN
name age sex job number
0 张三 23.0 b1 文员 23
1 李四 22.0 b1 技术 22
2 NaN NaN NaN 前台 26
3 NaN NaN NaN 后台 27
Process finished with exit code 0
3.9
解决冲突
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df13 = DataFrame(data=data,index=index)
data = {'job':['文员','技术','前台','后台'],'age':[23,22,26,27],'sex':['b2','b1','g1','g2']}
index = list('4567')
df14 = DataFrame(data=data,index=index)
# 当有多个key时为了解决冲突,使用on指定,加后缀suffixes=['','']
print(df13.merge(df14, on='age', suffixes=['_df13', '_df14']))
name age sex_df13 job sex_df14
0 张三 23 b1 文员 b2
1 李四 22 b1 技术 b1
Process finished with exit code 0