题目如下
1)从studentsInfo.xlsx的“Group3”页读取数据,将序号、性别、年龄项保存到data1对象;
2)从studentsInfo.xlsx的“Group3”页读取数据,将序号、身高、体重、成绩项保存到data2对象;
3)将data2合并到data1中,连接方式为内连接。
前提条件
我的spyder的python版本是 3.8.10我的jupyter的python版本是3.6.0
具体情况如下
我在Spyder里写出了这样一串代码并且成功运行了
import pandas as pd
col_data1 = ['序号','性别','年龄']
data1 = pd.read_excel('data/studentsInfo.xlsx',
'Group3',
usecols = col_data1, #pandas读取excel文件时不能使用columns=
index_col = 0) #而应该是usecols =
print(data1)
print()
col_data2 = ['序号','身高','体重','成绩']
data2 = pd.read_excel('data/studentsInfo.xlsx',
'Group3',
usecols = col_data2,
index_col = 0)
print(data2)
print()
data_total = pd.merge(data1, data2,how = 'inner',on = '序号')
print(data_total)
结果如下
性别 年龄
序号
21 female 21
22 female 19
23 male 21
24 female 21
25 female 21
26 male 21
27 female 21
28 female 22
29 female 20
30 female 20
身高 体重 成绩
序号
21 165 45 93
22 167 42 89
23 169 80 93
24 160 49 59
25 162 54 68
26 181 77 62
27 162 49 65
28 160 52 73
29 161 51 80
30 168 52 98
性别 年龄 身高 体重 成绩
序号
21 female 21 165 45 93
22 female 19 167 42 89
23 male 21 169 80 93
24 female 21 160 49 59
25 female 21 162 54 68
26 male 21 181 77 62
27 female 21 162 49 65
28 female 22 160 52 73
29 female 20 161 51 80
30 female 20 168 52 98
然后,我就想到了之前因为版本不同,我在Spyder里对Series用.append()失败但在Jupyter里却可行的经验,我将代码复制进jupyter然后测试运行,就不能运行
具体情况是提示我
性别 年龄
序号
21 female 21
22 female 19
23 male 21
24 female 21
25 female 21
26 male 21
27 female 21
28 female 22
29 female 20
30 female 20
身高 体重 成绩
序号
21 165 45 93
22 167 42 89
23 169 80 93
24 160 49 59
25 162 54 68
26 181 77 62
27 162 49 65
28 160 52 73
29 161 51 80
30 168 52 98
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
D:\ANACONDA3\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4433)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4279)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696)()
KeyError: '序号'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-28-ba38e2457767> in <module>()
17 print()
18
---> 19 data_total = pd.merge(data1, data2,how = 'inner',on = '序号')
20 print(data_total)
D:\ANACONDA3\lib\site-packages\pandas\tools\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
59 right_on=right_on, left_index=left_index,
60 right_index=right_index, sort=sort, suffixes=suffixes,
---> 61 copy=copy, indicator=indicator)
62 return op.get_result()
63 if __debug__:
D:\ANACONDA3\lib\site-packages\pandas\tools\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator)
541 (self.left_join_keys,
542 self.right_join_keys,
--> 543 self.join_names) = self._get_merge_keys()
544
545 def get_result(self):
D:\ANACONDA3\lib\site-packages\pandas\tools\merge.py in _get_merge_keys(self)
808 if not is_rkey(rk):
809 if rk is not None:
--> 810 right_keys.append(right[rk]._values)
811 else:
812 # work-around for merge_asof(right_index=True)
D:\ANACONDA3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
D:\ANACONDA3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
D:\ANACONDA3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
D:\ANACONDA3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
D:\ANACONDA3\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4433)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4279)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696)()
KeyError: '序号'
然后我就很纳闷
换了一种写法
#1、数据合并
#1)
import pandas as pd
stu = pd.read_excel('data\studentsInfo.xlsx','Group3')
data1 = stu[['序号','性别','年龄']]
print(data1)
#2)
data2 = stu[['序号','身高','体重','成绩']]
print(data2)
#3)
print(pd.merge(data1,data2)) #注:how = ’inner‘可以省略,默认为内连接
在两边都可以正常运行
然后发现,是index_col = 0的问题,index_col=0后序号标在最前,无法识别了
这是我的理解,但不确定
希望各位佬可以讲解一下