pandas基本使用的练习笔记

1. 数据索引

import pandas as pd
left = pd.DataFrame( {'key':['k1', 'k2', 'k3', 'k4'],
                      'name':['n1', 'n2', 'n3', 'n4'],
                      'id':['id1', 'id2', 'id3', 'id4']
                     }
                   ) # 数组构成的字典

right = pd.DataFrame( {'key':['k221', 'k222', 'k223', 'k224'],
                      'name':['n221', 'n222', 'n223', 'n224'],
                      'id':['id221', 'id222', 'id223', 'id224']
                     }
                   )

print(left)
print(right)
    id key name
0  id1  k1   n1
1  id2  k2   n2
2  id3  k3   n3
3  id4  k4   n4
      id   key  name
0  id221  k221  n221
1  id222  k222  n222
2  id223  k223  n223
3  id224  k224  n224

A 选取行

# 1. 用数值取
left.loc[2]
id      id3
key      k3
name     n3
Name: 2, dtype: object
# 2. 用列表取
left.loc[ [1,2,3]] # 注意外层 【】
idkeyname
1id2k2n2
2id3k3n3
3id4k4n4
#3. 用切片
left.loc[ 1:2 ]
idkeyname
1id2k2n2
2id3k3n3
#4. 用bool做索引,先要得到bool series
mask = left['name'] > 'n1'
mask
0    False
1     True
2     True
3     True
Name: name, dtype: bool
#4.1 使用 bool series
left.loc[ mask]
idkeyname
1id2k2n2
2id3k3n3
3id4k4n4
# 5. iloc
left.iloc[3]
id      id4
key      k4
name     n4
Name: 3, dtype: object
# 5.1 iloc
left.iloc[ [0,1]]
idkeyname
0id1k1n1
1id2k2n2
# 5.2 iloc && 切片
left.iloc[ 0:2]
idkeyname
0id1k1n1
1id2k2n2
left.loc[ 0:2] # 注意区别,左闭有闭
idkeyname
0id1k1n1
1id2k2n2
2id3k3n3
left.set_index('key')
idname
key
k1id1n1
k2id2n2
k3id3n3
k4id4n4
left.set_index('name')
idkey
name
n1id1k1
n2id2k2
n3id3k3
n4id4k4
left.loc[ 0:2]
idkeyname
0id1k1n1
1id2k2n2
2id3k3n3
left_reset_index = left.set_index("key")
left_reset_index
idname
key
k1id1n1
k2id2n2
k3id3n3
k4id4n4
left_reset_index.loc[ ['k1']]
idname
key
k1id1n1
left_reset_index.iloc[ [0,2]]
idname
key
k1id1n1
k3id3n3

B. 选取列

# 1. 列名
left[ 'name']
0    n1
1    n2
2    n3
3    n4
Name: name, dtype: object
# 2. 列表
left[ ['name', 'id']]
nameid
0n1id1
1n2id2
2n3id3
3n4id4
# 3. 切片
left[ 0:2]  # 变成行了
idkeyname
0id1k1n1
1id2k2n2
# 4. bool
left[ left['name'] > 'n2']
idkeyname
2id3k3n3
3id4k4n4

C. 选取行和列

#1. 位置
left
left.loc[0, "key"]
'k1'
# 2. 列表
left.loc[ [1,2], ['name', 'id']]
nameid
1n2id2
2n3id3
# 3. 切片
left.loc[ 0:2, :]
idkeyname
0id1k1n1
1id2k2n2
2id3k3n3
# 4.iloc
left.iloc[ [0,1],[0,2]]
idname
0id1n1
1id2n2
left.iloc[ [0,1],[0,1]]
idkey
0id1k1
1id2k2

基本操作

# 得到列名们
left.columns 
Index(['id', 'key', 'name'], dtype='object')
#修改列名
left.columns = ["xuhao", 'guanjianzhi', 'mingzi'] 
left
xuhaoguanjianzhimingzi
0id1k1n1
1id2k2n2
2id3k3n3
3id4k4n4
# 获取的简单方式 。语法
left.xuhao
0    id1
1    id2
2    id3
3    id4
Name: xuhao, dtype: object
# 重命名列
left.rename(columns={"xuhao":"ID", "guanjianzhi":"KEY", "mingzi":"NAME"})

IDKEYNAME
0id1k1n1
1id2k2n2
2id3k3n3
3id4k4n4
left
xuhaoguanjianzhimingzi
0id1k1n1
1id2k2n2
2id3k3n3
3id4k4n4
# 就地修改 inplace 参数
left.rename( columns={"xuhao":"ID", "guanjianzhi":"KEY", "mingzi":"NAME"}, inplace=True)
left
IDKEYNAME
0id1k1n1
1id2k2n2
2id3k3n3
3id4k4n4
# 修改元素值 字典的字典,外层字典用来指定改哪一列
left.replace( {"ID":{'id1':"ID1", 'id2':'ID2'}}, inplace=True);
left
IDKEYNAME
0ID1k1n1
1ID2k2n2
2id3k3n3
3id4k4n4

基本的排序和增删

# 有多少种取值可能
left.ID.unique()
array(['ID1', 'ID2', 'id3', 'id4'], dtype=object)
# 每一种取值的数量
left.ID.value_counts()
ID1    1
id3    1
ID2    1
id4    1
Name: ID, dtype: int64
# 排序
left.sort_values('ID')
IDKEYNAME
0ID1k1n1
1ID2k2n2
2id3k3n3
3id4k4n4
# 多个key 排序
left.sort_values(['ID', 'NAME'], ascending=[True, False])
IDKEYNAME
0ID1k1n1
1ID2k2n2
2id3k3n3
3id4k4n4
left.sort_values( ['ID', "NAME"], ascending=[False, False])
IDKEYNAME
3id4k4n4
2id3k3n3
1ID2k2n2
0ID1k1n1
left.replace( {"ID":{'id3':'ID3', 'ID2':'id2'}}, inplace=True)
left
IDKEYNAME
0ID1k1n1
1id2k2n2
2ID3k3n3
3id4k4n4
left.sort_values(['ID'], ascending=[True])
IDKEYNAME
0ID1k1n1
2ID3k3n3
1id2k2n2
3id4k4n4
left['mailema'] = 1
left
IDKEYNAMEmailema
0ID1k1n11
1id2k2n21
2ID3k3n31
3id4k4n41
left['rongyuceshilie'] = 1
left
IDKEYNAMEmailemarongyuceshilie
0ID1k1n111
1id2k2n211
2ID3k3n311
3id4k4n411
# 删除列
del left['rongyuceshilie']
left
IDKEYNAMEmailema
0ID1k1n11
1id2k2n21
2ID3k3n31
3id4k4n41
# 删除行
ret = left.drop(labels=2)
print(left)
print(ret)
    ID KEY NAME  mailema
0  ID1  k1   n1        1
1  id2  k2   n2        1
2  ID3  k3   n3        1
3  id4  k4   n4        1
    ID KEY NAME  mailema
0  ID1  k1   n1        1
1  id2  k2   n2        1
3  id4  k4   n4        1
# 修改一列的数值 map 结合字典
left.NAME
ret = left.NAME.map({'n1':'NNN1','n2':"N2", 'n3':'NN3', 'n4':"NN4"})
print(ret)
print(left.NAME)
0    NNN1
1      N2
2     NN3
3     NN4
Name: NAME, dtype: object
0    n1
1    n2
2    n3
3    n4
Name: NAME, dtype: object
# map 修改一列 map结合函数
import numpy as np
left.NAME.map("hello my name is {}".format)
test_series = left.NAME
yy = pd.Series(['name_ext', np.nan])
print(yy)
test_series = test_series.append(yy)
test_series = test_series.map("my name is {}".format, na_action='ignore')
print(test_series)
0    name_ext
1         NaN
dtype: object
0          my name is n1
1          my name is n2
2          my name is n3
3          my name is n4
0    my name is name_ext
1                    NaN
dtype: object
from collections import Counter
counter = Counter()
counter['bar'] += 1
counter
Counter({'bar': 1})
left.min()
ID                ID1
KEY                k1
NAME               n1
mailema             1
rongyuceshilie      1
dtype: object
left.max()
ID                id4
KEY                k4
NAME               n4
mailema             1
rongyuceshilie      1
dtype: object
left.sum()
ID                ID1id2ID3id4
KEY                   k1k2k3k4
NAME                  n1n2n3n4
mailema                      4
rongyuceshilie               4
dtype: object
left.cumsum()
IDKEYNAMEmailemarongyuceshilie
0ID1k1n111
1ID1id2k1k2n1n222
2ID1id2ID3k1k2k3n1n2n333
3ID1id2ID3id4k1k2k3k4n1n2n3n444

pandas 常用操作矩阵运算

# 最值的位置
df = pd.DataFrame(np.random.random( (5, 10)), columns = list('abcdefghij')) # 简写列名
df
abcdefghij
00.9201870.4683420.2510230.0123710.4109200.8235670.6444620.9715670.3639260.125066
10.3888910.0175000.4000270.9073020.7478840.8500260.8711190.3615810.3133650.517795
20.8580490.8184440.4297820.9537070.3623990.5975940.5838650.3632180.6715350.887737
30.9813480.8848350.3336320.9305830.9904360.2646810.3497090.9381170.7294090.371021
40.4090970.2702080.0931920.9855810.3897090.8916750.5511120.8972210.5737140.854950
df.idxmax()
a    3
b    3
c    2
d    4
e    3
f    4
g    1
h    0
i    3
j    2
dtype: int64
df*10
abcdefghij
09.2018694.6834212.5102270.1237094.1092058.2356706.4446229.7156673.6392651.250659
13.8889080.1749954.0002699.0730247.4788418.5002568.7111863.6158113.1336485.177951
28.5804888.1844354.2978189.5370713.6239935.9759415.8386523.6321836.7153488.877372
39.8134838.8483543.3363179.3058349.9043552.6468083.4970949.3811737.2940893.710212
44.0909682.7020800.9319159.8558063.8970868.9167515.5111208.9722065.7371448.549502
df*df
abcdefghij
00.8467440.2193440.0630120.0001530.1688560.6782630.4153320.9439420.1324420.015641
10.1512360.0003060.1600220.8231980.5593310.7225440.7588480.1307410.0981970.268112
20.7362480.6698500.1847120.9095570.1313330.3571190.3408990.1319280.4509590.788077
30.9630450.7829340.1113100.8659850.9809620.0700560.1222970.8800640.5320370.137657
40.1673600.0730120.0086850.9713690.1518730.7950840.3037240.8050050.3291480.730940
df.dot(df.T)
01234
03.4837292.5765653.0182213.4863352.975670
12.5765653.6725333.4742883.4051483.571698
23.0182213.4742884.7006824.4783484.018175
33.4863353.4051484.4783485.4463473.980907
42.9756703.5716984.0181753.9809074.336201
df.T.dot(df)
abcdefghij
a2.8646332.1189051.1208622.4989772.1113162.2256932.0014262.6339681.9834641.792030
b2.1189051.7454460.7967071.8919511.4838171.3648201.2532841.8311421.5259681.353505
c1.1208620.7967070.5277411.1782570.9248350.9750050.9292130.9412310.8021410.823518
d2.4989771.8919511.1782573.5702622.3350352.4764742.2237762.4437662.1734842.505874
e2.1113161.4838170.9248352.3350351.9923551.8003521.6890492.0800871.5732831.461012
f2.2256931.3648200.9750052.4764741.8003522.6230652.2041202.3728921.6720191.934186
g2.0014261.2532840.9292132.2237761.6890492.2041201.9410991.9757261.4708631.650903
h2.6339681.8311420.9412312.4437662.0800872.3728921.9757262.8916791.9098191.746318
i1.9834641.5259680.8021412.1734841.5732831.6720191.4708631.9098191.5427851.565043
j1.7920301.3535050.8235182.5058741.4610121.9341861.6509031.7463181.5650431.940427
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值