数据分析之pandas-numpy-append三种方式的连接

numpy、append、pandas


环境: python3.7
双剑客:

pip install numpy -i https://pypi.douban.com/simple
pip install pandas -i https://pypi.douban.com/simple

在这里插入图片描述

一、numpy

1.1
numpy连接:np.concatenate(),np.vstack(),np.hstack

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = np.random.randint(0,150,size=(4,4,))
index = ['张三','李四','王五','赵柳']
columns = ['语文','数学','英语','理综']
df1 = DataFrame(data=data,index=index,columns=columns)

data = np.random.randint(0,150,size=(4,4,))
index = ['天气','赵柳','小明','小红']
columns = ['语文','数学','英语','理综']
df2 = DataFrame(data=data,index=index,columns=columns)

# numpy链接
# np.concatenate()

# 垂直级联
print(np.concatenate((df1,df2)))

# 水平级联
print(np.concatenate((df1,df2),axis=1))

# 垂直级联
# np.vstack()
print(np.vstack((df1, df2)))

# 水平级联
# np.hstack
print(np.hstack((df1, df2)))
[[117  25  75  90]
 [ 65 107  74  46]
 [133 126  54  94]
 [ 57  73  40  99]
 [ 63  80  63 112]
 [133 104  26 120]
 [136 100  91  64]
 [139  50  74  28]]

[[117  25  75  90  63  80  63 112]
 [ 65 107  74  46 133 104  26 120]
 [133 126  54  94 136 100  91  64]
 [ 57  73  40  99 139  50  74  28]]

[[117  25  75  90]
 [ 65 107  74  46]
 [133 126  54  94]
 [ 57  73  40  99]
 [ 63  80  63 112]
 [133 104  26 120]
 [136 100  91  64]
 [139  50  74  28]]

[[117  25  75  90  63  80  63 112]
 [ 65 107  74  46 133 104  26 120]
 [133 126  54  94 136 100  91  64]
 [ 57  73  40  99 139  50  74  28]]

Process finished with exit code 0

二、append

2.1
append连接:append()

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = np.random.randint(0,150,size=(4,4,))
index = ['张三','李四','王五','赵柳']
columns = ['语文','数学','英语','理综']
df1 = DataFrame(data=data,index=index,columns=columns)

data = np.random.randint(0,150,size=(4,4,))
index = ['天气','赵柳','小明','小红']
columns = ['语文','数学','英语','理综']
df2 = DataFrame(data=data,index=index,columns=columns)

# append级联
# 垂直级联
print(df1.append(df2,ignore_index=True,sort=True))
    数学   理综   英语   语文
0   79   29   88   86
1  131   84   32   12
2   10   78   85  141
3    1   15   57   11
4  135   43  145   60
5  147  118   88   82
6   91   27   62  127
7   35  120  104   50

Process finished with exit code 0

三、pandas

3.1
pandas连接:pd.cancat()

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = np.random.randint(0,150,size=(4,4,))
index = ['张三','李四','王五','赵柳']
columns = ['语文','数学','英语','理综']
df1 = DataFrame(data=data,index=index,columns=columns)

data = np.random.randint(0,150,size=(4,4,))
index = ['天气','赵柳','小明','小红']
columns = ['语文','数学','英语','理综']
df2 = DataFrame(data=data,index=index,columns=columns)

# pandas级联
# 垂直级联
# ignore_index:忽略索引
print(pd.concat([df1, df2],ignore_index=True))
print()

# 水平级联
# axis:指定轴
# keys:添加索引
print(pd.concat([df1,df2],axis=0,keys=['x','y']))
    语文   数学   英语  理综
0   90  112   49  72
1   50  130   58  54
2   46   22   70  32
3    1   86   20  94
4   70   77   37  18
5   64   30  108   1
6  148  108    7  80
7  119  107   94   0

       语文   数学   英语  理综
x 张三   90  112   49  72
  李四   50  130   58  54
  王五   46   22   70  32
  赵柳    1   86   20  94
y 天气   70   77   37  18
  赵柳   64   30  108   1
  小明  148  108    7  80
  小红  119  107   94   0

Process finished with exit code 0

3.2
pandas连接:pd.merge()
一对一合并

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b','b1','g','b2']}
index = list('1234')
df3 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'sex':['b','b3','g','b4']}
index = list('4567')
df4 = DataFrame(data=data,index=index)

# pd.merge()
# 一对一合并
print(pd.merge(df3, df4))
  name  age sex job
0   张三   23   b  文员
1   王五   24   g  前台

Process finished with exit code 0

3.3
一对多

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b2','g','b3']}
index = list('1234')
df5 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'sex':['b1','b1','g1','b4']}
index = list('4567')
df6 = DataFrame(data=data,index=index)

# 一对多合并
print(pd.merge(df5, df6))
  name  age sex job
0   张三   23  b1  文员
1   张三   23  b1  技术

Process finished with exit code 0

3.4
多对多

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df7 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'sex':['b1','b1','g3','g4']}
index = list('4567')
df8 = DataFrame(data=data,index=index)

# 多对多合并
print(pd.merge(df7, df8))
  name  age sex job
0   张三   23  b1  文员
1   张三   23  b1  技术
2   李四   22  b1  文员
3   李四   22  b1  技术

Process finished with exit code 0

3.5
指定key:on=''

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df9 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'sex':['b1','b1','g3','g4'],'age':[23,22,24,25]}
index = list('4567')
df10 = DataFrame(data=data,index=index)

# 当有多个key时可以指定key连接
print(df9.merge(df10, on='sex'))

  name  age_x sex job  age_y
0   张三     23  b1  文员     23
1   张三     23  b1  技术     22
2   李四     22  b1  文员     23
3   李四     22  b1  技术     22

Process finished with exit code 0

3.6
当连个数据框没有共同的key,但又共同的值时可以使用:left_on=''right_on=''指定

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df11 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'number':[23,22,26,27]}
index = list('4567')
df12 = DataFrame(data=data,index=index)

# 当连个数据框没有共同的key,但又共同的值时可以使用left_on=''和right_on=''来指定
print(df11.merge(df12, left_on='age', right_on='number'))
  name  age sex job  number
0   张三   23  b1  文员      23
1   李四   22  b1  技术      22

Process finished with exit code 0

3.7
当没有共同的key时可以指定索引来合并:left_index=Trueright_index=True

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df11 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'number':[23,22,26,27]}
index = list('4567')
df12 = DataFrame(data=data,index=index)

# 当没有共同的key时可以指定索引来合并
print(df11.merge(df12, left_index=True, right_index=True))
  name  age sex job  number
4   赵六   25  g2  文员      23

Process finished with exit code 0

3.8
内合并:how='inner'
左合并:how='left'
右合并:how='right'

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df11 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'number':[23,22,26,27]}
index = list('4567')
df12 = DataFrame(data=data,index=index)

# 内合并(默认时how='inner')
print(df11.merge(df12, left_on='age', right_on='number',how='inner'))
print()

# 左合并
print(df11.merge(df12, left_on='age', right_on='number',how='left'))
print()

# 右合并
print(df11.merge(df12, left_on='age', right_on='number',how='right'))
  name  age sex job  number
0   张三   23  b1  文员      23
1   李四   22  b1  技术      22

  name  age sex  job  number
0   张三   23  b1   文员    23.0
1   李四   22  b1   技术    22.0
2   王五   24  g1  NaN     NaN
3   赵六   25  g2  NaN     NaN

  name   age  sex job  number
0   张三  23.0   b1  文员      23
1   李四  22.0   b1  技术      22
2  NaN   NaN  NaN  前台      26
3  NaN   NaN  NaN  后台      27

Process finished with exit code 0

3.9
解决冲突

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

data = {'name':['张三','李四','王五','赵六'],'age':[23,22,24,25],'sex':['b1','b1','g1','g2']}
index = list('1234')
df13 = DataFrame(data=data,index=index)

data = {'job':['文员','技术','前台','后台'],'age':[23,22,26,27],'sex':['b2','b1','g1','g2']}
index = list('4567')
df14 = DataFrame(data=data,index=index)

# 当有多个key时为了解决冲突,使用on指定,加后缀suffixes=['','']
print(df13.merge(df14, on='age', suffixes=['_df13', '_df14']))
  name  age sex_df13 job sex_df14
0   张三   23       b1  文员       b2
1   李四   22       b1  技术       b1

Process finished with exit code 0
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值