pandas2

import numpy as np
import pandas as pd
df = pd.read_csv(r"C:\SAIC\DataHunter\GitHub\joyful-pandas\data\table.csv",index_col='ID')
df.head()
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1101S_1C_1Mstreet_11736334.0A+
1102S_1C_1Fstreet_21927332.5B+
1103S_1C_1Mstreet_21868287.2B+
1104S_1C_1Fstreet_21678180.4B-
1105S_1C_1Fstreet_41596484.8B+
df.loc[1103]
School          S_1
Class           C_1
Gender            M
Address    street_2
Height          186
Weight           82
Math           87.2
Physics          B+
Name: 1103, dtype: object
df.loc[[1102,2304]]
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1102S_1C_1Fstreet_21927332.5B+
2304S_2C_3Fstreet_61648195.5A-
df.loc[2402::-1].head()
SchoolClassGenderAddressHeightWeightMathPhysics
ID
2402S_2C_4Mstreet_71668248.7B
2401S_2C_4Fstreet_21926245.3A
2305S_2C_3Mstreet_41877348.9B
2304S_2C_3Fstreet_61648195.5A-
2303S_2C_3Fstreet_71909965.9C
df.loc[:,'Height'].head()
ID
1101    173
1102    192
1103    186
1104    167
1105    159
Name: Height, dtype: int64
df.loc[:,'Height':'Math'].head()
HeightWeightMath
ID
11011736334.0
11021927332.5
11031868287.2
11041678180.4
11051596484.8
df.loc[1102:2401:3,'Height':'Math'].head()
HeightWeightMath
ID
11021927332.5
11051596484.8
12031605358.8
13011616831.5
13041957085.2
df.loc[lambda x:x['Gender']=='M'].head()
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1101S_1C_1Mstreet_11736334.0A+
1103S_1C_1Mstreet_21868287.2B+
1201S_1C_2Mstreet_51886897.0A-
1203S_1C_2Mstreet_61605358.8A+
1301S_1C_3Mstreet_41616831.5B+
def f(x):
    return [1101,1103]
df.loc[f]
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1101S_1C_1Mstreet_11736334.0A+
1103S_1C_1Mstreet_21868287.2B+
df.loc[df['Address'].isin(['street_7','street_4'])].head()
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1105S_1C_1Fstreet_41596484.8B+
1202S_1C_2Fstreet_41769463.5B-
1301S_1C_3Mstreet_41616831.5B+
1303S_1C_3Mstreet_71888249.7B
2101S_2C_1Mstreet_71748483.3C
df.loc[[True if i[-1]=='4' or i[-1]=='7' else False for i in df['Address'].values]].head()
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1105S_1C_1Fstreet_41596484.8B+
1202S_1C_2Fstreet_41769463.5B-
1301S_1C_3Mstreet_41616831.5B+
1303S_1C_3Mstreet_71888249.7B
2101S_2C_1Mstreet_71748483.3C
df.iloc[3]
School          S_1
Class           C_1
Gender            F
Address    street_2
Height          167
Weight           81
Math           80.4
Physics          B-
Name: 1104, dtype: object
df.iloc[3:5]
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1104S_1C_1Fstreet_21678180.4B-
1105S_1C_1Fstreet_41596484.8B+
df.iloc[:,3].head()
ID
1101    street_1
1102    street_2
1103    street_2
1104    street_2
1105    street_4
Name: Address, dtype: object
df.iloc[:,7::-2].head()
PhysicsWeightAddressClass
ID
1101A+63street_1C_1
1102B+73street_2C_1
1103B+82street_2C_1
1104B-81street_2C_1
1105B+64street_4C_1
df.iloc[3::4,7::-2].head()
PhysicsWeightAddressClass
ID
1104B-81street_2C_1
1203A+53street_6C_2
1302A-57street_1C_3
2101C84street_7C_1
2105A81street_4C_1
df.iloc[lambda x:[3]].head()
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1104S_1C_1Fstreet_21678180.4B-
s = pd.Series(df['Math'],index=df.index)
s[1101]
34.0
s[lambda x: x.index[16::-6]]
ID
2102    50.6
1301    31.5
1105    84.8
Name: Math, dtype: float64
s[lambda x: [16::-6]]
  File "<ipython-input-21-ada17acead14>", line 1
    s[lambda x: [16::-6]]
                   ^
SyntaxError: invalid syntax
s[0:4]
ID
1101    34.0
1102    32.5
1103    87.2
1104    80.4
Name: Math, dtype: float64
s[s>80]
ID
1103    87.2
1104    80.4
1105    84.8
1201    97.0
1302    87.7
1304    85.2
2101    83.3
2205    85.4
2304    95.5
Name: Math, dtype: float64
df.loc[df['Math']>60,(df[:8]['Address']=='street_6').values].head()#??
#如果不加values就会索引对齐发生错误,Pandas中的索引对齐是一个重要特征,很多时候非常实用
#但是若不加以留意,就会埋下隐患
Physics
ID
1103B+
1104B-
1105B+
1201A-
1202B-
df[:8]
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1101S_1C_1Mstreet_11736334.0A+
1102S_1C_1Fstreet_21927332.5B+
1103S_1C_1Mstreet_21868287.2B+
1104S_1C_1Fstreet_21678180.4B-
1105S_1C_1Fstreet_41596484.8B+
1201S_1C_2Mstreet_51886897.0A-
1202S_1C_2Fstreet_41769463.5B-
1203S_1C_2Mstreet_61605358.8A+
df[df['Address'].isin(['street_1','street_4'])&df['Physics'].isin(['A','A+'])]
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1101S_1C_1Mstreet_11736334.0A+
2105S_2C_1Mstreet_41708134.2A
2203S_2C_2Mstreet_41559173.8A+
df[df[['Address','Physics']].isin({'Address':['street_1','street_4'],'Physics':['A','A+']}).all(1)]
SchoolClassGenderAddressHeightWeightMathPhysics
ID
1101S_1C_1Mstreet_11736334.0A+
2105S_2C_1Mstreet_41708134.2A
2203S_2C_2Mstreet_41559173.8A+
df.at[1101,'School']
'S_1'
%timeit df.at[1101,'School']最快
%timeit df.loc[1101,'School']
%timeit df.iat[0,0]
%timeit df.iloc[0,0]
3.74 µs ± 120 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.99 µs ± 993 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4 µs ± 28 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
6.7 µs ± 618 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
pd.interval_range(start=0,end=5)
#closed参数可选'left''right''both''neither',默认左开右闭
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
              closed='right',
              dtype='interval[int64]')
math_interval = pd.cut(df['Math'],bins=[0,40,60,80,100])
math_interval.head()
ID
1101      (0, 40]
1102      (0, 40]
1103    (80, 100]
1104    (80, 100]
1105    (80, 100]
Name: Math, dtype: category
Categories (4, interval[int64]): [(0, 40] < (40, 60] < (60, 80] < (80, 100]]
df_i = df.join(math_interval,rsuffix='_interval')[['Math','Math_interval']]\
            .reset_index().set_index('Math_interval')
df_i.head()
IDMath
Math_interval
(0, 40]110134.0
(0, 40]110232.5
(80, 100]110387.2
(80, 100]110480.4
(80, 100]110584.8
df_i.loc[65].head()
IDMath
Math_interval
(60, 80]120263.5
(60, 80]120568.4
(60, 80]130561.7
(60, 80]210472.2
(60, 80]220268.5
df_i.loc[[65,90]].head()
IDMath
Math_interval
(60, 80]120263.5
(60, 80]120568.4
(60, 80]130561.7
(60, 80]210472.2
(60, 80]220268.5
df_i.index.astype('interval')
IntervalIndex([(0, 40], (0, 40], (80, 100], (80, 100], (80, 100] ... (40, 60], (40, 60], (40, 60], (60, 80], (40, 60]],
              closed='right',
              dtype='interval[int64]')
df_i[df_i.index.astype('interval').overlaps(pd.Interval(70, 85))].head()
IDMath
Math_interval
(80, 100]110387.2
(80, 100]110480.4
(80, 100]110584.8
(80, 100]120197.0
(60, 80]120263.5
tuples = [('A','a'),('A','b'),('B','a'),('B','b')]
mul_index = pd.MultiIndex.from_tuples(tuples, names=('Upper', 'Lower'))
mul_index
MultiIndex([('A', 'a'),
            ('A', 'b'),
            ('B', 'a'),
            ('B', 'b')],
           names=['Upper', 'Lower'])
pd.DataFrame({'Score':['perfect','good','fair','bad']},index=mul_index)
Score
UpperLower
Aaperfect
bgood
Bafair
bbad
L1 = list('AABB')
L2 = list('abab')
tuples = list(zip(L1,L2))
mul_index = pd.MultiIndex.from_tuples(tuples, names=('Upper', 'Lower'))
pd.DataFrame({'Score':['perfect','good','fair','bad']},index=mul_index)
Score
UpperLower
Aaperfect
bgood
Bafair
bbad
arrays = [['A','a'],['A','b'],['B','a'],['B','b']]
mul_index = pd.MultiIndex.from_tuples(arrays, names=('Upper', 'Lower'))
pd.DataFrame({'Score':['perfect','good','fair','bad']},index=mul_index)
Score
UpperLower
Aaperfect
bgood
Bafair
bbad
mul_index
MultiIndex([('A', 'a'),
            ('A', 'b'),
            ('B', 'a'),
            ('B', 'b')],
           names=['Upper', 'Lower'])
L1 = ['A','B']
L2 = ['a','b']
pd.MultiIndex.from_product([L1,L2],names=('Upper', 'Lower'))
MultiIndex([('A', 'a'),
            ('A', 'b'),
            ('B', 'a'),
            ('B', 'b')],
           names=['Upper', 'Lower'])
df_using_mul = df.set_index(['Class','Address'])
df_using_mul.head()
SchoolGenderHeightWeightMathPhysics
ClassAddress
C_1street_1S_1M1736334.0A+
street_2S_1F1927332.5B+
street_2S_1M1868287.2B+
street_2S_1F1678180.4B-
street_4S_1F1596484.8B+
#df_using_mul.loc['C_2','street_5']
#当索引不排序时,单个索引会报出性能警告
#df_using_mul.index.is_lexsorted()
#该函数检查是否排序
df_using_mul.sort_index().loc['C_2','street_5']
#df_using_mul.sort_index().index.is_lexsorted()
SchoolGenderHeightWeightMathPhysics
ClassAddress
C_2street_5S_1M1886897.0A-
street_5S_1F1626333.8B
street_5S_2M19310039.1B
#df_using_mul.loc[('C_2','street_5'):] 报错
#当不排序时,不能使用多层切片
df_using_mul.sort_index().loc[('C_2','street_6'):('C_3','street_4')]
#注意此处由于使用了loc,因此仍然包含右端点
SchoolGenderHeightWeightMathPhysics
ClassAddress
C_2street_6S_1M1605358.8A+
street_6S_1F1676368.4B-
street_7S_2F1947768.5B+
street_7S_2F1837685.4B
C_3street_1S_1F1755787.7A-
street_2S_1M1957085.2A
street_4S_1M1616831.5B+
street_4S_2F1577872.3B+
street_4S_2M1877348.9B
df_using_mul.sort_index().loc[('C_2','street_7'):'C_3'].head()
#非元组也是合法的,表示选中该层所有元素
SchoolGenderHeightWeightMathPhysics
ClassAddress
C_2street_7S_2F1947768.5B+
street_7S_2F1837685.4B
C_3street_1S_1F1755787.7A-
street_2S_1M1957085.2A
street_4S_1M1616831.5B+
df_using_mul.sort_index().loc[[('C_2','street_7'),('C_3','street_2')]]
SchoolGenderHeightWeightMathPhysics
ClassAddress
C_2street_7S_2F1947768.5B+
street_7S_2F1837685.4B
C_3street_2S_1M1957085.2A
L1,L2 = ['A','B','C'],['a','b','c']
mul_index1 = pd.MultiIndex.from_product([L1,L2],names=('Upper', 'Lower'))
L3,L4 = ['D','E','F'],['d','e','f']
mul_index2 = pd.MultiIndex.from_product([L3,L4],names=('Big', 'Small'))
df_s = pd.DataFrame(np.random.rand(9,9),index=mul_index1,columns=mul_index2)
df_s
BigDEF
Smalldefdefdef
UpperLower
Aa0.8681460.7799730.0400350.3129310.7427380.4009270.7198050.0987840.808752
b0.6404750.8712460.3000740.5596560.0328050.8150440.9093670.6175270.071200
c0.5648270.7201810.1684130.6050860.8154050.9198830.7671630.0990410.510799
Ba0.6454980.2128470.2391870.6648300.2435260.9072480.7609470.8117200.898771
b0.5984140.0958900.1248900.3793780.7232090.8423170.8172380.2574040.982154
c0.3328650.4726300.0175620.7415580.2547170.0232020.1311580.9888820.026893
Ca0.7154180.6535540.7380600.1612440.8515200.9024960.8435730.7614240.366131
b0.9941210.3625010.2923380.0163100.5329780.8904720.6210690.7094720.949864
c0.3781750.3580230.9689790.0210410.1983450.9013270.1654560.9305770.554062
idx=pd.IndexSlice
df_s.loc[idx['B':,df_s['D']['d']>0.3],idx[df_s.sum()>4]]
#df_s.sum()默认为对列求和,因此返回一个长度为9的数值列表
BigDEF
Smalldeefdef
UpperLower
Ba0.6454980.2128470.2435260.9072480.7609470.8117200.898771
b0.5984140.0958900.7232090.8423170.8172380.2574040.982154
c0.3328650.4726300.2547170.0232020.1311580.9888820.026893
Ca0.7154180.6535540.8515200.9024960.8435730.7614240.366131
b0.9941210.3625010.5329780.8904720.6210690.7094720.949864
c0.3781750.3580230.1983450.9013270.1654560.9305770.554062
df_using_mul.swaplevel(i=1,j=0,axis=0).sort_index().head()
SchoolGenderHeightWeightMathPhysics
AddressClass
street_1C_1S_1M1736334.0A+
C_2S_2M1757447.2B-
C_3S_1F1755787.7A-
street_2C_1S_1F1927332.5B+
C_1S_1M1868287.2B+
df_using_mul.swaplevel(i=1,j=0,axis=0).sort_index().head()

好多方法仅仅是看了一遍,周六再仔细看看

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值