dataframe的增删改查

#dataframe的增删改查之增
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randn(3,4),index=list("abc"),columns=list("ABCD"))
df
ABCD
a-1.191323-0.7743960.234996-0.565204
b0.042025-0.003400-1.517832-0.729930
c0.4597240.341114-0.0707760.515991
#增加列
df["E"] = df.A + df.B  
df
ABCDE
a1.2416300.384139-1.225968-1.1600281.625769
b-0.736530-0.148695-1.622824-0.145651-0.885224
c0.370135-1.1302620.032585-0.416405-0.760128
df.insert(1,"insert",6) #insert(loc, column, value, allow_duplicates=False)
df
AinsertBCDE
a1.24163060.384139-1.225968-1.1600281.625769
b-0.7365306-0.148695-1.622824-0.145651-0.885224
c0.3701356-1.1302620.032585-0.416405-0.760128
df.assign(F = df["A"] / df["B"]) # DataFrame has an assign() method that allows you to easily create new columns that are potentially derived from existing columns.
#注意此时的F并未正真的添加到df中
AinsertBCDEF
a1.24163060.384139-1.225968-1.1600281.6257693.232243
b-0.7365306-0.148695-1.622824-0.145651-0.8852244.953305
c0.3701356-1.1302620.032585-0.416405-0.760128-0.327477
#增加行
#merge / concatenate / join 等函数实现
#其他章节研究一下
#删除列
del df["A"]    #我本来想删除E列,报错了,E列实际上可能没保存在df的内存地址中
df
BCDE
a0.224567-1.0495190.157046-1.692286
b1.3598461.6050340.2134921.141026
c0.206875-0.037353-0.092492-0.673894
df.pop("B")  #看来删除实际上改动了原来的dataframe
df
CDE
a-1.0495190.157046-1.692286
b1.6050340.2134921.141026
c-0.037353-0.092492-0.673894
#删除行
del df.a
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-38-5c6c11f8ca66> in <module>
      1 #删除行
----> 2 del df.a


AttributeError: a
#删除行
del df.loc["c",:]
df
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-39-87bdf8cd1515> in <module>
      1 #删除行
----> 2 del df.loc["c",:]
      3 df


AttributeError: __delitem__
#改动列
df.C = "bar"
df
CDE
abar0.157046-1.692286
bbar0.2134921.141026
cbar-0.092492-0.673894
#改动行
df.c = "barr"  #可以发现重新赋值没用
df
CDE
abar0.157046-1.692286
bbar0.2134921.141026
cbar-0.092492-0.673894
df.c  #看来是把df.c当成变量了
'barr'
#改动行
df.loc["a",:] = "barr"  #改动应该都是先选择后赋值
df                      #所以我应该先学查找的啊
CDE
abarrbarrbarr
bbar0.2134921.14103
cbar-0.0924917-0.673894
#重头戏之查找
df
AinsertBCDE
a1.24163060.384139-1.225968-1.1600281.625769
b-0.7365306-0.148695-1.622824-0.145651-0.885224
c0.3701356-1.1302620.032585-0.416405-0.760128
df["A"]  #该方式选择的列,返回series
a    1.241630
b   -0.736530
c    0.370135
Name: A, dtype: float64
df.loc["a"]  #该方式以行的label选择行,返回的是series
A         1.241630
insert    6.000000
B         0.384139
C        -1.225968
D        -1.160028
E         1.625769
Name: a, dtype: float64
df.iloc[0]   #该方式以行的数字索引选择行,返回的是series
A         1.241630
insert    6.000000
B         0.384139
C        -1.225968
D        -1.160028
E         1.625769
Name: a, dtype: float64
df[0:2]   #以切片的方式选择行,返回dataframe
AinsertBCDE
a1.2416360.384139-1.225968-1.1600281.625769
b-0.736536-0.148695-1.622824-0.145651-0.885224
df[0:-1]
AinsertBCDE
a1.2416360.384139-1.225968-1.1600281.625769
b-0.736536-0.148695-1.622824-0.145651-0.885224
#选则具体的单个元素
df.loc["a","A"]
1.2416298344644054
df.iloc[0,0]
1.2416298344644054
df.loc[["a","c"],["A","C"]]
AC
a1.241630-1.225968
c0.3701350.032585
df.iloc[[0,2],[0,3]]
AC
a1.241630-1.225968
c0.3701350.032585
df.iloc[[0,2]] #不连续选个行的方法
AinsertBCDE
a1.24163060.384139-1.225968-1.1600281.625769
c0.3701356-1.1302620.032585-0.416405-0.760128
df.loc[["a","c"]]
AinsertBCDE
a1.24163060.384139-1.225968-1.1600281.625769
c0.3701356-1.1302620.032585-0.416405-0.760128
df[[0,2],["A","C"]]
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-81-970a7eaa38bd> in <module>
----> 1 df[[0,2],["A","C"]]


~/miniconda3/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2798             if self.columns.nlevels > 1:
   2799                 return self._getitem_multilevel(key)
-> 2800             indexer = self.columns.get_loc(key)
   2801             if is_integer(indexer):
   2802                 indexer = [indexer]


~/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2644                 )
   2645             try:
-> 2646                 return self._engine.get_loc(key)
   2647             except KeyError:
   2648                 return self._engine.get_loc(self._maybe_cast_indexer(key))


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


TypeError: '([0, 2], ['A', 'C'])' is an invalid key
#中间开了小差,继续
df
ABCD
a-1.191323-0.7743960.234996-0.565204
b0.042025-0.003400-1.517832-0.729930
c0.4597240.341114-0.0707760.515991
df[df["A"] > 0]   #if then语句 ,根据A列数值选择行
ABCD
b0.042025-0.003400-1.517832-0.729930
c0.4597240.341114-0.0707760.515991
df[(df.A > 0) & (df.D > 0)]  # &  |
ABCD
c0.4597240.341114-0.0707760.515991
df[df > 0]
ABCD
aNaNNaN0.234996NaN
b0.042025NaNNaNNaN
c0.4597240.341114NaN0.515991
df["E"] = ["AA","BB","CC"]
df
ABCDE
a-1.191323-0.7743960.234996-0.565204AA
b0.042025-0.003400-1.517832-0.729930BB
c0.4597240.341114-0.0707760.515991CC
df[df.E.isin(["AA","CC"])]   #siin函数制造bool值然后进行选择
ABCDE
a-1.191323-0.7743960.234996-0.565204AA
c0.4597240.341114-0.0707760.515991CC
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值