十套练习使用pandas数据分析.5
import pandas as pd
import numpy as np
raw_data1 = {"subject_id":["1","2","3","4","5"],"first_name":['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
"last_name": ["Anderson","Ackerman", "Ali", "Aoni", "Atiches"]}
raw_data_2 = {
'subject_id': ['4', '5', '6', '7', '8'],
'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
raw_data_3 = {
'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
data1 = pd.DataFrame(raw_data1,columns=["subject_id","first_name","last_name"])
data2 = pd.DataFrame(raw_data_2,columns=["subject_id","first_name",'last_name'])
data3 = pd.DataFrame(raw_data_3,columns=['subject_id','test_id'])
data1
| subject_id | first_name | last_name |
---|
0 | 1 | Alex | Anderson |
---|
1 | 2 | Amy | Ackerman |
---|
2 | 3 | Allen | Ali |
---|
3 | 4 | Alice | Aoni |
---|
4 | 5 | Ayoung | Atiches |
---|
data2
| subject_id | first_name | last_name |
---|
0 | 4 | Billy | Bonder |
---|
1 | 5 | Brian | Black |
---|
2 | 6 | Bran | Balwner |
---|
3 | 7 | Bryce | Brice |
---|
4 | 8 | Betty | Btisan |
---|
all_data = pd.concat([data1,data2])
all_data
| subject_id | first_name | last_name |
---|
0 | 1 | Alex | Anderson |
---|
1 | 2 | Amy | Ackerman |
---|
2 | 3 | Allen | Ali |
---|
3 | 4 | Alice | Aoni |
---|
4 | 5 | Ayoung | Atiches |
---|
0 | 4 | Billy | Bonder |
---|
1 | 5 | Brian | Black |
---|
2 | 6 | Bran | Balwner |
---|
3 | 7 | Bryce | Brice |
---|
4 | 8 | Betty | Btisan |
---|
all_data_col = pd.concat([data1,data2],axis=1)
all_data_col
| subject_id | first_name | last_name | subject_id | first_name | last_name |
---|
0 | 1 | Alex | Anderson | 4 | Billy | Bonder |
---|
1 | 2 | Amy | Ackerman | 5 | Brian | Black |
---|
2 | 3 | Allen | Ali | 6 | Bran | Balwner |
---|
3 | 4 | Alice | Aoni | 7 | Bryce | Brice |
---|
4 | 5 | Ayoung | Atiches | 8 | Betty | Btisan |
---|
data3
| subject_id | test_id |
---|
0 | 1 | 51 |
---|
1 | 2 | 15 |
---|
2 | 3 | 15 |
---|
3 | 4 | 61 |
---|
4 | 5 | 16 |
---|
5 | 7 | 14 |
---|
6 | 8 | 15 |
---|
7 | 9 | 1 |
---|
8 | 10 | 61 |
---|
9 | 11 | 16 |
---|
data4 = pd.merge(all_data,data3,on="subject_id")
data4
| subject_id | first_name | last_name | test_id |
---|
0 | 1 | Alex | Anderson | 51 |
---|
1 | 2 | Amy | Ackerman | 15 |
---|
2 | 3 | Allen | Ali | 15 |
---|
3 | 4 | Alice | Aoni | 61 |
---|
4 | 4 | Billy | Bonder | 61 |
---|
5 | 5 | Ayoung | Atiches | 16 |
---|
6 | 5 | Brian | Black | 16 |
---|
7 | 7 | Bryce | Brice | 14 |
---|
8 | 8 | Betty | Btisan | 15 |
---|
pd.merge(data1,data2,on="subject_id",how='inner')
| subject_id | first_name_x | last_name_x | first_name_y | last_name_y |
---|
0 | 4 | Alice | Aoni | Billy | Bonder |
---|
1 | 5 | Ayoung | Atiches | Brian | Black |
---|
pd.merge(data1,data2,on='subject_id',how='outer')
| subject_id | first_name_x | last_name_x | first_name_y | last_name_y |
---|
0 | 1 | Alex | Anderson | NaN | NaN |
---|
1 | 2 | Amy | Ackerman | NaN | NaN |
---|
2 | 3 | Allen | Ali | NaN | NaN |
---|
3 | 4 | Alice | Aoni | Billy | Bonder |
---|
4 | 5 | Ayoung | Atiches | Brian | Black |
---|
5 | 6 | NaN | NaN | Bran | Balwner |
---|
6 | 7 | NaN | NaN | Bryce | Brice |
---|
7 | 8 | NaN | NaN | Betty | Btisan |
---|