Python pandas中DataFrame添加列、获取行列、获取元素值

一、DataFrame添加列

直接通过赋值为空,添加一列。

>>> import pandas as pd

>>> df = pd.DataFrame(np.arange(12).reshape(3, 4), index = ['row1', 'row2', 'row3'], columns=['col1', 'col2', 'col3', 'col4'])

>>> df
      col1  col2  col3  col4
row1     0     1     2     3
row2     4     5     6     7
row3     8     9    10    11
>>> 
>>> df['col5']=''
>>> 
>>> df
      col1  col2  col3  col4 col5
row1     0     1     2     3     
row2     4     5     6     7     
row3     8     9    10    11 

通过一个list给新加的列赋值,添加一列。注意,list里的元素个数要跟dataframe的行数一致,否则回报长度对不齐的错误。

>>> df['col6']=[1,1,1]
>>> 
>>> df
      col1  col2  col3  col4 col5  col6
row1     0     1     2     3          1
row2     4     5     6     7          1
row3     8     9    10    11          1

>>> df['col7']=[]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3119, in __setitem__
    self._set_item(key, value)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3194, in _set_item
    value = self._sanitize_column(key, value)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3391, in _sanitize_column
    value = _sanitize_index(value, self.index, copy=False)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 4001, in _sanitize_index
    raise ValueError('Length of values does not match length of ' 'index')
ValueError: Length of values does not match length of index
>>> 
>>> df['col7']=[1]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3119, in __setitem__
    self._set_item(key, value)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3194, in _set_item
    value = self._sanitize_column(key, value)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3391, in _sanitize_column
    value = _sanitize_index(value, self.index, copy=False)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 4001, in _sanitize_index
    raise ValueError('Length of values does not match length of ' 'index')
ValueError: Length of values does not match length of index

如果需要在指定位置处添加一列,用insert方法实现。DataFrame.insert(loc, column, value, allow_duplicates=False),loc-位置,column-列名,value-元素值,allow_duplicates-是否允许重列。

>>> df
      col1  col2  col3  col4 col5  col6
row1     0     1     2     3          1
row2     4     5     6     7          1
row3     8     9    10    11          1
>>> 
>>> df.insert(1,'col7','')
>>> 
>>> df
      col1 col7  col2  col3  col4 col5  col6
row1     0          1     2     3          1
row2     4          5     6     7          1
row3     8          9    10    11          1
>>> 
>>> df.insert(4,'col8',[2,2,2])
>>> 
>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

二、DataFrame取行列

1、按索引或者行列名选取DataFrame的具体行和列

>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1
>>>  
>>> col1=df['col1']
>>> 
>>> col1
row1    0
row2    4
row3    8
Name: col1, dtype: int64
>>> 
>>> type(col1)
<class 'pandas.core.series.Series'>

注意,不能用行名取行,也不能用数字索引取行列,以下方式都会报错。

>>> row1=df['row1']
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3078, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'row1'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
    return self._getitem_column(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
    return self._get_item_cache(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2489, in _get_item_cache
    values = self._data.get(item)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/internals.py", line 4115, in get
    loc = self.items.get_loc(item)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'row1'
>>> 
>>> row1=df[0]
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3078, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
    return self._getitem_column(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
    return self._get_item_cache(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2489, in _get_item_cache
    values = self._data.get(item)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/internals.py", line 4115, in get
    loc = self.items.get_loc(item)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas/_libs/index.pyx", line 140, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 0
>>> 
>>> row1=df[0,:]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
    return self._getitem_column(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
    return self._get_item_cache(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2487, in _get_item_cache
    res = cache.get(item)
TypeError: unhashable type: 'slice'
>>> 
>>> col1=df[:,0]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2688, in __getitem__
    return self._getitem_column(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2695, in _getitem_column
    return self._get_item_cache(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 2487, in _get_item_cache
    res = cache.get(item)
TypeError: unhashable type: 'slice'

取某些行可以用限定行号的方式。

>>> row1=df[0:1]
>>> 
>>> row1
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
>>> 
>>> row2=df[1:2]
>>> 
>>> row2
      col1 col7  col2  col3  col8  col4 col5  col6
row2     4          5     6     2     7          1
>>> 
>>> row23=df[1:3]
>>> 
>>> row23
      col1 col7  col2  col3  col8  col4 col5  col6
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

2、使用iloc按行号和列号取相应的行列

>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

# 取第一行
>>> row1=df.iloc[0]
>>> 
>>> row1
col1    0
col7     
col2    1
col3    2
col8    2
col4    3
col5     
col6    1
Name: row1, dtype: object

# 取第1,2,3行
>>> row123=df.iloc[[0,1,2]]
>>> 
>>> row123
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1
>>> 
>>> row12=df.iloc[[0,1]]
>>> 
>>> row12
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1

# 连续取多行
>>> row12=df.iloc[0:2]
>>> 
>>> row12
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
>>> 
>>> row12=df.iloc[0:2,:]
>>> 
>>> row12
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

# 取行时可以不指定列,但取列时必须用:,来指定全行 
>>> col1=df.iloc[:,0]
>>> 
>>> col1
row1    0
row2    4
row3    8
Name: col1, dtype: int64

# 用list[0,2]指定取第1,3列
>>> col13=df.iloc[:,[0,2]]
>>> 
>>> col13
      col1  col2
row1     0     1
row2     4     5
row3     8     9
>>> 
>>> col123=df.iloc[:,[0,1,2]]
>>> 
>>> col123
      col1 col7  col2
row1     0          1
row2     4          5
row3     8          9

# 用0:2指定连续的多列
>>> col12=df.iloc[:,0:2]
>>> 
>>> col12
      col1 col7
row1     0     
row2     4     
row3     8   

3、使用loc按行名和列名取相应的行列

>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1
>>> 
>>> row1=df.loc['row1']
>>> 
>>> row1
col1    0
col7     
col2    1
col3    2
col8    2
col4    3
col5     
col6    1
Name: row1, dtype: object
>>> 
>>> row2=df.loc['row2',:]
>>> 
>>> row2
col1    4
col7     
col2    5
col3    6
col8    2
col4    7
col5     
col6    1
Name: row2, dtype: object
>>> 
>>> row12=df.loc[['row1','row2']]
>>> 
>>> row12
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
>>> 
>>> row123=df.loc['row1':'row3',:]
>>> 
>>> row123
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1
>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1
>>> 
>>> col1=df.loc[:,'col1']
>>> 
>>> col1
row1    0
row2    4
row3    8
Name: col1, dtype: int64

>>> col128=df.loc[:,['col1','col2','col8']]
>>> 
>>> col128
      col1  col2  col8
row1     0     1     2
row2     4     5     2
row3     8     9     2
>>> 
>>> col1723=df.loc[:,'col1':'col3']
>>> 
>>> col1723
      col1 col7  col2  col3
row1     0          1     2
row2     4          5     6
row3     8          9    10

4、使用条件取相应的行列

>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

# 取出第一列,有对应数字为4的所有行 
>>> df.loc[df['col1']==4]
      col1 col7  col2  col3  col8  col4 col5  col6
row2     4          5     6     2     7          1
>>> 
>>> df.loc[df['col8']==2]
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

>>> df.loc[~(df['col1']!=4)]
      col1 col7  col2  col3  col8  col4 col5  col6
row2     4          5     6     2     7          1
>>> 
>>> df.loc[~(df['col1']==4)]
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row3     8          9    10     2    11          1

5、使用条件替换行列的值

>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2     4          5     6     2     7          1
row3     8          9    10     2    11          1

# 把第一列中值为4的行选取出来,再选择第'col1'列的元素赋值成新的44
>>> df.loc[df['col1']==4,'col1']=44
>>> 
>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2    44          5     6     2     7          1
row3     8          9    10     2    11          1

#  把第一列中值为4的行选取出来,再选择第'col4'列的元素赋值成新的44
>>> df.loc[df['col1']==44,'col4']=44
>>> 
>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2    44          5     6     2    44          1
row3     8          9    10     2    11          1
 
# 选择'row3'行,其中有值为2的元素赋值成新的44
>>> df.loc['row3',df.loc['row3',:]==2]=44
>>> 
>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2    44          5     6     2    44          1
row3     8          9    10    44    11          1

# 改变某行某列的元素值
>>> df
      col1  col2  col3  col4
row1     0     1     2     3
row2     4     5     6     7
row3     8     9    10    11
>>> 
>>> df.at['row1','col1']=100
>>> 
>>> df
      col1  col2  col3  col4
row1   100     1     2     3
row2     4     5     6     7
row3     8     9    10    11

6、使用条件替换整个矩阵的部分元素的值

>>> df
      col1 col7  col2  col3  col8  col4 col5  col6
row1     0          1     2     2     3          1
row2    44          5     6     2    44          1
row3     8          9    10    44    11          1

# 将所有值大于40的元素赋值为新的40,注意包含np.nan空值时会报错,需要先去除或者赋值
>>> df[df>40]=40
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3114, in __setitem__
    self._setitem_frame(key, value)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 3161, in _setitem_frame
    self._check_inplace_setting(value)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 4503, in _check_inplace_setting
    raise TypeError('Cannot do inplace boolean setting on '
TypeError: Cannot do inplace boolean setting on mixed-types with a non np.nan value
>>> 
>>> 
>>> df>40
       col1  col7   col2   col3   col8   col4  col5   col6
row1  False  True  False  False  False  False  True  False
row2   True  True  False  False  False   True  True  False
row3  False  True  False  False   True  False  True  False

>>> df=df.drop(['col7'], axis=1)
>>> df=df.drop(['col5'], axis=1)
>>> 
>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    44     5     6     2    44     1
row3     8     9    10    44    11     1
>>> 
>>> df[df>40]=40
>>> 
>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    40     5     6     2    40     1
row3     8     9    10    40    11     1

三、DataFrame取元素

1、选择列然后遍历获取元素的值

>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    40     5     6     2    40     1
row3     8     9    10    40    11     1

>>> for index in df['col1'].index:
...     idx=df['col1'].get(index)
...     print(idx)
... 
0
40
8

2、使用pandas.DataFrame.at的行索引和列名获取元素的值

>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    40     5     6     2    40     1
row3     8     9    10    40    11     1
>>> 
>>> df.at[4,'col1']
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2141, in __getitem__
    key = self._convert_key(key)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2227, in _convert_key
    raise ValueError("At based indexing on an non-integer "
ValueError: At based indexing on an non-integer index can only have non-integer indexers
>>> 
>>> df.at['row4','col1']
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2142, in __getitem__
    return self.obj._get_value(*key, takeable=self._takeable)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 2539, in _get_value
    return engine.get_value(series._values, index)
  File "pandas/_libs/index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value
  File "pandas/_libs/index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
  File "pandas/_libs/index.pyx", line 162, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1492, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1500, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'row4'
>>> 
>>> df.at['row2','col1']
40

>>> df.iloc[1].at['col1']
40

3、使用pandas.DataFrame.iat的行索引和列索引获取元素的值

>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    40     5     6     2    40     1
row3     8     9    10    40    11     1
>>> 
>>> df.iat[1,2]
6
>>> 
>>> df.iloc[3].iat[4]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2102, in _getitem_axis
    self._validate_integer(key, axis)
  File "/root/miniconda3/lib/python3.6/site-packages/pandas/core/indexing.py", line 2009, in _validate_integer
    raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
>>> 
>>> df.iloc[2].iat[4]
11

4、使用pandas.DataFrame.loc获取元素的值或者行

>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    40     5     6     2    40     1
row3     8     9    10    40    11     1
>>> 
>>> 
>>> df.loc['row1','col1']
0
>>> 
>>> df.loc['row1']
col1    0
col2    1
col3    2
col8    2
col4    3
col6    1
Name: row1, dtype: int64
>>> 
>>> df.loc[['row1','row3']]
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row3     8     9    10    40    11     1

5、使用pandas.DataFrame.iloc获取元素的值或者行

>>> df
      col1  col2  col3  col8  col4  col6
row1     0     1     2     2     3     1
row2    40     5     6     2    40     1
row3     8     9    10    40    11     1
>>> 
>>> df.iloc[0,2]
2
>>> 
>>> df.iloc[2]
col1     8
col2     9
col3    10
col8    40
col4    11
col6     1
Name: row3, dtype: int64

引用

【1】https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.insert.html

  • 7
    点赞
  • 31
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值