Pandas实战中的进阶技巧(1)

1.对字典的值进行排序

import operator 
importance = model.get_fscore() # 得到都是字典的list
importance = sorted(importance.items(),key = operator.itemgetter(1),reverse = True)

# 结果类似这样
# [('iBlade1TempBattBox_1sec', 164), ('iCableTwistTotal', 163), ('iKWhOverall_h', 159), ('iPitchAngle3', 15), ('iUL2_690V', 14), ('iIL1_690V', 14), ('iTempCntr_1sec', 12), ('iIL2_690V', 7), ('iVibrationY', 3), ('iReactivePower', 1), ('iActivePoweiSetPointValue', 1), ('iIL3_690V', 1)]


2.pickle存储和读取

# 存储文件
with open('abc.pickle','wb') as file:
	pickle.dump(abc,file)

# 读取文件
with open('abc.pickle','rb') as file:
	abc = pickle.load(file)

  1. 查看属性的值分别是多少以及出现了多少次
# 查看都有哪些值
df[‘Label’].unique()
# 查看值都出现了多少次
df['Label'].value_counts()

4.pandas读取大文件

reader = pd.read_csv('ANN/20003-2017-03/20003001#2017-03.csv',iterator=True)
chunks = []
while 1:
    try:
        chunk = reader.get_chunk(100000)
        chunks.append(chunk)
    except StopIteration:
        print('Iteration is stoped')
        break
df = pd.concat(chunks,ignore_index=True) # 合并全部的chunk

5.去除值全部相同的属性

df = df.ix[:,(df != df.ix[0]).any()]

6.Pandas使用Dataframe的时候减少内存的方法

参考:

https://www.kaggle.com/arjanso/reducing-dataframe-memory-size-by-65

def reduce_mem_usage(props):
    start_mem_usg = props.memory_usage().sum() / 1024**2 
    print("Memory usage of properties dataframe is :",start_mem_usg," MB")
    NAlist = [] # Keeps track of columns that have missing values filled in. 
    for col in props.columns:
        if props[col].dtype != object:  # Exclude strings
            
            # Print current column type
            print("******************************")
            print("Column: ",col)
            print("dtype before: ",props[col].dtype)
            
            # make variables for Int, max and min
            IsInt = False
            mx = props[col].max()
            mn = props[col].min()
            
            # Integer does not support NA, therefore, NA needs to be filled
            if not np.isfinite(props[col]).all(): 
                NAlist.append(col)
                props[col].fillna(mn-1,inplace=True)  
                   
            # test if column can be converted to an integer
            asint = props[col].fillna(0).astype(np.int64)
            result = (props[col] - asint)
            result = result.sum()
            if result > -0.01 and result < 0.01:
                IsInt = True

            
            # Make Integer/unsigned Integer datatypes
            if IsInt:
                if mn >= 0:
                    if mx < 255:
                        props[col] = props[col].astype(np.uint8)
                    elif mx < 65535:
                        props[col] = props[col].astype(np.uint16)
                    elif mx < 4294967295:
                        props[col] = props[col].astype(np.uint32)
                    else:
                        props[col] = props[col].astype(np.uint64)
                else:
                    if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
                        props[col] = props[col].astype(np.int8)
                    elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
                        props[col] = props[col].astype(np.int16)
                    elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
                        props[col] = props[col].astype(np.int32)
                    elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
                        props[col] = props[col].astype(np.int64)    
            
            # Make float datatypes 32 bit
            else:
                props[col] = props[col].astype(np.float32)
            
            # Print new column type
            print("dtype after: ",props[col].dtype)
            print("******************************")
    
    # Print final result
    print("___MEMORY USAGE AFTER COMPLETION:___")
    mem_usg = props.memory_usage().sum() / 1024**2 
    print("Memory usage is: ",mem_usg," MB")
    print("This is ",100*mem_usg/start_mem_usg,"% of the initial size")
    return props, NAlist

#-------------------------------------------------------------------------
train = pd.read_csv('../input/train_V2.csv')
train = reduce_mem_usage(train)
test = pd.read_csv('../input/test_V2.csv')
test = reduce_mem_usage(test)

7.Pandas的nunique()

# 表示A,B属性中不相同的元素的数量
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 1, 1]})
>>> df.nunique()
A    3
B    1

# 表示每行中不相同的元素的数量
>>> df.nunique(axis=1)
0    1
1    2
2    2

8.可以直接调用Series的画图函数plot

fig,ax = plt.subplots(1,2,figsize = (12,4))

# value_counts后得到的是Series,直接使用plot即可画图
train.groupby('matchId')['matchType'].first().value_counts().plot(kind = 'bar',ax = ax[0])

9.Pandas中的query的用法

df[df['total_bill'] > 20]

# 等价于
df.query('total_bill > 20')

10.Pandas的层次化索引
https://blog.csdn.net/Darkman_EX/article/details/80723802
https://cloud.tencent.com/developer/ask/48201
https://pandas.pydata.org/pandas-docs/stable/advanced.html

cols = ['numGroups','maxPlace']
desc1 = train.groupby('matchType')[cols].describe()
print(desc1)
print(desc1.loc[:,(slice(None),['min','mean','max'])])

           numGroups                                                      \
               count       mean        std  min   25%   50%   75%    max   
matchType                                                                  
duo        1322628.0  45.812482   3.164604  1.0  45.0  46.0  48.0   52.0   
solo        720713.0  91.115157  11.437120  1.0  91.0  93.0  95.0  100.0   
squad      2403625.0  27.039389   2.348066  2.0  26.0  27.0  28.0   37.0   

            maxPlace                                                      
               count       mean        std  min   25%   50%   75%    max  
matchType                                                                 
duo        1322628.0  47.608919   2.911739  3.0  47.0  48.0  49.0   52.0  
solo        720713.0  93.908771  10.135402  1.0  94.0  96.0  97.0  100.0  
squad      2403625.0  27.982982   2.205999  2.0  27.0  28.0  29.0   37.0  



           numGroups               maxPlace            
                mean  min    max       mean  min    max
matchType                                              
duo        45.812482  1.0   52.0  47.608919  3.0   52.0
solo       91.115157  1.0  100.0  93.908771  1.0  100.0
squad      27.039389  2.0   37.0  27.982982  2.0   37.0

11.Pandas的isin函数

>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
>>> df.isin([1, 3, 12, 'a'])
       A      B
0   True   True
1  False  False
2   True  False

12.Pandas中的Groupby
https://www.cnblogs.com/lemonbit/p/6810972.html

13.Pandas中的count()和size()的不同

# 都是用来计数的,但是返回的类型不一样,count返回Dataframe,size返回Series
train.groupby(['matchType','matchId']).size()

matchType  matchId       
duo        0003b92987589e    100
           0006eb8c17708d     93
           00086c74bb4efc     98
           001125344b660c     96
           001360264d4b5f     91
           0014d9d1b0aff6     94
           0016fe3ee17ce7     97
           00177a6ce4dfb5     92
           0019bc34b3c58e     97
           0019d729577e9c     94
           0027a504cd3b0c     99
           002aeef57764f8     99
           002b3b8e0e3870     95
           003228f5dc4934     97
           0043d584520de4     97
           00489048e21690     93
           004cddec9da619     93
           005166f6ee4ab6     97
           0052116de68e46     99
           005712df62d721     96
           005908fb69efc4     93
           005e236012c55f     97
           005e3f527c4a6a     97
           0063217a97a803     93
           00656cf8a4dbe6     98
           006755987f2217     93
           006d25449eb444     96
           007124bcf16c97     92
           007510af0aaa9e     92
           007edf38197cec     84
                            ... 
squad      ffc364ae0cdae2     92
           ffc66d8b3e21f7     84
           ffc6fd9da2b020     98
           ffc89e1f6347df     84
           ffc9f30599cc27     97
           ffca00bb37147a     95
           ffca3124b378ad     93
           ffcb58c4fe9192     93
           ffcbc1b5255d3a     98
           ffcdc0aa9e4b09     97
           ffd07bb11c36c3     97
           ffd19b3585e2ba     93
           ffd334bce1b8ca     98
           ffd8796ff5f1d6     94
           ffd8c5746d9be8     97
           ffdf8e9dc18596     98
           ffead5be0b50e8     98
           ffeaf5f7073d41     89
           ffeb2131284e45     94
           ffed6b75ebe4ce     91
           ffed79cf43d7d7     97
           ffef306edf6b36     98
           fff0a2994a6f3a     87
           fff2c218352941     98
           fff497dff0186d     90
           fff4d3cbb6c317    100
           fffa170cf8ed83     93
           fffd74b5150cb4     97
           fffe562611d981     94
           fffe92232706aa     93
Length: 47965, dtype: int64
train.groupby(['matchType','matchId']).count()


                           Id  groupId  assists  boosts  damageDealt  DBNOs  \
matchType matchId                                                             
duo       0003b92987589e  100      100      100     100          100    100   
          0006eb8c17708d   93       93       93      93           93     93   
          00086c74bb4efc   98       98       98      98           98     98   
          001125344b660c   96       96       96      96           96     96   
          001360264d4b5f   91       91       91      91           91     91   
          0014d9d1b0aff6   94       94       94      94           94     94   
          0016fe3ee17ce7   97       97       97      97           97     97   
          00177a6ce4dfb5   92       92       92      92           92     92   
          0019bc34b3c58e   97       97       97      97           97     97   
          0019d729577e9c   94       94       94      94           94     94   
          0027a504cd3b0c   99       99       99      99           99     99   
          002aeef57764f8   99       99       99      99           99     99   
          002b3b8e0e3870   95       95       95      95           95     95   
          003228f5dc4934   97       97       97      97           97     97   
          0043d584520de4   97       97       97      97           97     97   
          00489048e21690   93       93       93      93           93     93   
          004cddec9da619   93       93       93      93           93     93   
          005166f6ee4ab6   97       97       97      97           97     97   
          0052116de68e46   99       99       99      99           99     99   
          005712df62d721   96       96       96      96           96     96   
          005908fb69efc4   93       93       93      93           93     93   
          005e236012c55f   97       97       97      97           97     97   
          005e3f527c4a6a   97       97       97      97           97     97   
          0063217a97a803   93       93       93      93           93     93   
          00656cf8a4dbe6   98       98       98      98           98     98   
          006755987f2217   93       93       93      93           93     93   
          006d25449eb444   96       96       96      96           96     96   
          007124bcf16c97   92       92       92      92           92     92   
          007510af0aaa9e   92       92       92      92           92     92   
          007edf38197cec   84       84       84      84           84     84   
...                       ...      ...      ...     ...          ...    ...   
squad     ffc364ae0cdae2   92       92       92      92           92     92   
          ffc66d8b3e21f7   84       84       84      84           84     84   
          ffc6fd9da2b020   98       98       98      98           98     98   
          ffc89e1f6347df   84       84       84      84           84     84   
          ffc9f30599cc27   97       97       97      97           97     97   
          ffca00bb37147a   95       95       95      95           95     95   
          ffca3124b378ad   93       93       93      93           93     93   
          ffcb58c4fe9192   93       93       93      93           93     93   
          ffcbc1b5255d3a   98       98       98      98           98     98   
          ffcdc0aa9e4b09   97       97       97      97           97     97   
          ffd07bb11c36c3   97       97       97      97           97     97   
          ffd19b3585e2ba   93       93       93      93           93     93   
          ffd334bce1b8ca   98       98       98      98           98     98   
          ffd8796ff5f1d6   94       94       94      94           94     94   
          ffd8c5746d9be8   97       97       97      97           97     97   
          ffdf8e9dc18596   98       98       98      98           98     98   
          ffead5be0b50e8   98       98       98      98           98     98   
          ffeaf5f7073d41   89       89       89      89           89     89   
          ffeb2131284e45   94       94       94      94           94     94   
          ffed6b75ebe4ce   91       91       91      91           91     91   
          ffed79cf43d7d7   97       97       97      97           97     97   
          ffef306edf6b36   98       98       98      98           98     98   
          fff0a2994a6f3a   87       87       87      87           87     87   
          fff2c218352941   98       98       98      98           98     98   
          fff497dff0186d   90       90       90      90           90     90   
          fff4d3cbb6c317  100      100      100     100          100    100   
          fffa170cf8ed83   93       93       93      93           93     93   
          fffd74b5150cb4   97       97       97      97           97     97   
          fffe562611d981   94       94       94      94           94     94   
          fffe92232706aa   93       93       93      93           93     93   

                          headshotKills  heals  killPlace  killPoints  \
matchType matchId                                                       
duo       0003b92987589e            100    100        100         100   
          0006eb8c17708d             93     93         93          93   
          00086c74bb4efc             98     98         98          98   
          001125344b660c             96     96         96          96   
          001360264d4b5f             91     91         91          91   
          0014d9d1b0aff6             94     94         94          94   
          0016fe3ee17ce7             97     97         97          97   
          00177a6ce4dfb5             92     92         92          92   
          0019bc34b3c58e             97     97         97          97   
          0019d729577e9c             94     94         94          94   
          0027a504cd3b0c             99     99         99          99   
          002aeef57764f8             99     99         99          99   
          002b3b8e0e3870             95     95         95          95   
          003228f5dc4934             97     97         97          97   
          0043d584520de4             97     97         97          97   
          00489048e21690             93     93         93          93   
          004cddec9da619             93     93         93          93   
          005166f6ee4ab6             97     97         97          97   
          0052116de68e46             99     99         99          99   
          005712df62d721             96     96         96          96   
          005908fb69efc4             93     93         93          93   
          005e236012c55f             97     97         97          97   
          005e3f527c4a6a             97     97         97          97   
          0063217a97a803             93     93         93          93   
          00656cf8a4dbe6             98     98         98          98   
          006755987f2217             93     93         93          93   
          006d25449eb444             96     96         96          96   
          007124bcf16c97             92     92         92          92   
          007510af0aaa9e             92     92         92          92   
          007edf38197cec             84     84         84          84   
...                                 ...    ...        ...         ...   
squad     ffc364ae0cdae2             92     92         92          92   
          ffc66d8b3e21f7             84     84         84          84   
          ffc6fd9da2b020             98     98         98          98   
          ffc89e1f6347df             84     84         84          84   
          ffc9f30599cc27             97     97         97          97   
          ffca00bb37147a             95     95         95          95   
          ffca3124b378ad             93     93         93          93   
          ffcb58c4fe9192             93     93         93          93   
          ffcbc1b5255d3a             98     98         98          98   
          ffcdc0aa9e4b09             97     97         97          97   
          ffd07bb11c36c3             97     97         97          97   
          ffd19b3585e2ba             93     93         93          93   
          ffd334bce1b8ca             98     98         98          98   
          ffd8796ff5f1d6             94     94         94          94   
          ffd8c5746d9be8             97     97         97          97   
          ffdf8e9dc18596             98     98         98          98   
          ffead5be0b50e8             98     98         98          98   
          ffeaf5f7073d41             89     89         89          89   
          ffeb2131284e45             94     94         94          94   
          ffed6b75ebe4ce             91     91         91          91   
          ffed79cf43d7d7             97     97         97          97   
          ffef306edf6b36             98     98         98          98   
          fff0a2994a6f3a             87     87         87          87   
          fff2c218352941             98     98         98          98   
          fff497dff0186d             90     90         90          90   
          fff4d3cbb6c317            100    100        100         100   
          fffa170cf8ed83             93     93         93          93   
          fffd74b5150cb4             97     97         97          97   
          fffe562611d981             94     94         94          94   
          fffe92232706aa             93     93         93          93   

                              ...       revives  rideDistance  roadKills  \
matchType matchId             ...                                          
duo       0003b92987589e      ...           100           100        100   
          0006eb8c17708d      ...            93            93         93   
          00086c74bb4efc      ...            98            98         98   
          001125344b660c      ...            96            96         96   
          001360264d4b5f      ...            91            91         91   
          0014d9d1b0aff6      ...            94            94         94   
          0016fe3ee17ce7      ...            97            97         97   
          00177a6ce4dfb5      ...            92            92         92   
          0019bc34b3c58e      ...            97            97         97   
          0019d729577e9c      ...            94            94         94   
          0027a504cd3b0c      ...            99            99         99   
          002aeef57764f8      ...            99            99         99   
          002b3b8e0e3870      ...            95            95         95   
          003228f5dc4934      ...            97            97         97   
          0043d584520de4      ...            97            97         97   
          00489048e21690      ...            93            93         93   
          004cddec9da619      ...            93            93         93   
          005166f6ee4ab6      ...            97            97         97   
          0052116de68e46      ...            99            99         99   
          005712df62d721      ...            96            96         96   
          005908fb69efc4      ...            93            93         93   
          005e236012c55f      ...            97            97         97   
          005e3f527c4a6a      ...            97            97         97   
          0063217a97a803      ...            93            93         93   
          00656cf8a4dbe6      ...            98            98         98   
          006755987f2217      ...            93            93         93   
          006d25449eb444      ...            96            96         96   
          007124bcf16c97      ...            92            92         92   
          007510af0aaa9e      ...            92            92         92   
          007edf38197cec      ...            84            84         84   
...                           ...           ...           ...        ...   
squad     ffc364ae0cdae2      ...            92            92         92   
          ffc66d8b3e21f7      ...            84            84         84   
          ffc6fd9da2b020      ...            98            98         98   
          ffc89e1f6347df      ...            84            84         84   
          ffc9f30599cc27      ...            97            97         97   
          ffca00bb37147a      ...            95            95         95   
          ffca3124b378ad      ...            93            93         93   
          ffcb58c4fe9192      ...            93            93         93   
          ffcbc1b5255d3a      ...            98            98         98   
          ffcdc0aa9e4b09      ...            97            97         97   
          ffd07bb11c36c3      ...            97            97         97   
          ffd19b3585e2ba      ...            93            93         93   
          ffd334bce1b8ca      ...            98            98         98   
          ffd8796ff5f1d6      ...            94            94         94   
          ffd8c5746d9be8      ...            97            97         97   
          ffdf8e9dc18596      ...            98            98         98   
          ffead5be0b50e8      ...            98            98         98   
          ffeaf5f7073d41      ...            89            89         89   
          ffeb2131284e45      ...            94            94         94   
          ffed6b75ebe4ce      ...            91            91         91   
          ffed79cf43d7d7      ...            97            97         97   
          ffef306edf6b36      ...            98            98         98   
          fff0a2994a6f3a      ...            87            87         87   
          fff2c218352941      ...            98            98         98   
          fff497dff0186d      ...            90            90         90   
          fff4d3cbb6c317      ...           100           100        100   
          fffa170cf8ed83      ...            93            93         93   
          fffd74b5150cb4      ...            97            97         97   
          fffe562611d981      ...            94            94         94   
          fffe92232706aa      ...            93            93         93   

                          swimDistance  teamKills  vehicleDestroys  \
matchType matchId                                                    
duo       0003b92987589e           100        100              100   
          0006eb8c17708d            93         93               93   
          00086c74bb4efc            98         98               98   
          001125344b660c            96         96               96   
          001360264d4b5f            91         91               91   
          0014d9d1b0aff6            94         94               94   
          0016fe3ee17ce7            97         97               97   
          00177a6ce4dfb5            92         92               92   
          0019bc34b3c58e            97         97               97   
          0019d729577e9c            94         94               94   
          0027a504cd3b0c            99         99               99   
          002aeef57764f8            99         99               99   
          002b3b8e0e3870            95         95               95   
          003228f5dc4934            97         97               97   
          0043d584520de4            97         97               97   
          00489048e21690            93         93               93   
          004cddec9da619            93         93               93   
          005166f6ee4ab6            97         97               97   
          0052116de68e46            99         99               99   
          005712df62d721            96         96               96   
          005908fb69efc4            93         93               93   
          005e236012c55f            97         97               97   
          005e3f527c4a6a            97         97               97   
          0063217a97a803            93         93               93   
          00656cf8a4dbe6            98         98               98   
          006755987f2217            93         93               93   
          006d25449eb444            96         96               96   
          007124bcf16c97            92         92               92   
          007510af0aaa9e            92         92               92   
          007edf38197cec            84         84               84   
...                                ...        ...              ...   
squad     ffc364ae0cdae2            92         92               92   
          ffc66d8b3e21f7            84         84               84   
          ffc6fd9da2b020            98         98               98   
          ffc89e1f6347df            84         84               84   
          ffc9f30599cc27            97         97               97   
          ffca00bb37147a            95         95               95   
          ffca3124b378ad            93         93               93   
          ffcb58c4fe9192            93         93               93   
          ffcbc1b5255d3a            98         98               98   
          ffcdc0aa9e4b09            97         97               97   
          ffd07bb11c36c3            97         97               97   
          ffd19b3585e2ba            93         93               93   
          ffd334bce1b8ca            98         98               98   
          ffd8796ff5f1d6            94         94               94   
          ffd8c5746d9be8            97         97               97   
          ffdf8e9dc18596            98         98               98   
          ffead5be0b50e8            98         98               98   
          ffeaf5f7073d41            89         89               89   
          ffeb2131284e45            94         94               94   
          ffed6b75ebe4ce            91         91               91   
          ffed79cf43d7d7            97         97               97   
          ffef306edf6b36            98         98               98   
          fff0a2994a6f3a            87         87               87   
          fff2c218352941            98         98               98   
          fff497dff0186d            90         90               90   
          fff4d3cbb6c317           100        100              100   
          fffa170cf8ed83            93         93               93   
          fffd74b5150cb4            97         97               97   
          fffe562611d981            94         94               94   
          fffe92232706aa            93         93               93   

                          walkDistance  weaponsAcquired  winPoints  \
matchType matchId                                                    
duo       0003b92987589e           100              100        100   
          0006eb8c17708d            93               93         93   
          00086c74bb4efc            98               98         98   
          001125344b660c            96               96         96   
          001360264d4b5f            91               91         91   
          0014d9d1b0aff6            94               94         94   
          0016fe3ee17ce7            97               97         97   
          00177a6ce4dfb5            92               92         92   
          0019bc34b3c58e            97               97         97   
          0019d729577e9c            94               94         94   
          0027a504cd3b0c            99               99         99   
          002aeef57764f8            99               99         99   
          002b3b8e0e3870            95               95         95   
          003228f5dc4934            97               97         97   
          0043d584520de4            97               97         97   
          00489048e21690            93               93         93   
          004cddec9da619            93               93         93   
          005166f6ee4ab6            97               97         97   
          0052116de68e46            99               99         99   
          005712df62d721            96               96         96   
          005908fb69efc4            93               93         93   
          005e236012c55f            97               97         97   
          005e3f527c4a6a            97               97         97   
          0063217a97a803            93               93         93   
          00656cf8a4dbe6            98               98         98   
          006755987f2217            93               93         93   
          006d25449eb444            96               96         96   
          007124bcf16c97            92               92         92   
          007510af0aaa9e            92               92         92   
          007edf38197cec            84               84         84   
...                                ...              ...        ...   
squad     ffc364ae0cdae2            92               92         92   
          ffc66d8b3e21f7            84               84         84   
          ffc6fd9da2b020            98               98         98   
          ffc89e1f6347df            84               84         84   
          ffc9f30599cc27            97               97         97   
          ffca00bb37147a            95               95         95   
          ffca3124b378ad            93               93         93   
          ffcb58c4fe9192            93               93         93   
          ffcbc1b5255d3a            98               98         98   
          ffcdc0aa9e4b09            97               97         97   
          ffd07bb11c36c3            97               97         97   
          ffd19b3585e2ba            93               93         93   
          ffd334bce1b8ca            98               98         98   
          ffd8796ff5f1d6            94               94         94   
          ffd8c5746d9be8            97               97         97   
          ffdf8e9dc18596            98               98         98   
          ffead5be0b50e8            98               98         98   
          ffeaf5f7073d41            89               89         89   
          ffeb2131284e45            94               94         94   
          ffed6b75ebe4ce            91               91         91   
          ffed79cf43d7d7            97               97         97   
          ffef306edf6b36            98               98         98   
          fff0a2994a6f3a            87               87         87   
          fff2c218352941            98               98         98   
          fff497dff0186d            90               90         90   
          fff4d3cbb6c317           100              100        100   
          fffa170cf8ed83            93               93         93   
          fffd74b5150cb4            97               97         97   
          fffe562611d981            94               94         94   
          fffe92232706aa            93               93         93   

                          winPlacePerc  
matchType matchId                       
duo       0003b92987589e           100  
          0006eb8c17708d            93  
          00086c74bb4efc            98  
          001125344b660c            96  
          001360264d4b5f            91  
          0014d9d1b0aff6            94  
          0016fe3ee17ce7            97  
          00177a6ce4dfb5            92  
          0019bc34b3c58e            97  
          0019d729577e9c            94  
          0027a504cd3b0c            99  
          002aeef57764f8            99  
          002b3b8e0e3870            95  
          003228f5dc4934            97  
          0043d584520de4            97  
          00489048e21690            93  
          004cddec9da619            93  
          005166f6ee4ab6            97  
          0052116de68e46            99  
          005712df62d721            96  
          005908fb69efc4            93  
          005e236012c55f            97  
          005e3f527c4a6a            97  
          0063217a97a803            93  
          00656cf8a4dbe6            98  
          006755987f2217            93  
          006d25449eb444            96  
          007124bcf16c97            92  
          007510af0aaa9e            92  
          007edf38197cec            84  
...                                ...  
squad     ffc364ae0cdae2            92  
          ffc66d8b3e21f7            84  
          ffc6fd9da2b020            98  
          ffc89e1f6347df            84  
          ffc9f30599cc27            97  
          ffca00bb37147a            95  
          ffca3124b378ad            93  
          ffcb58c4fe9192            93  
          ffcbc1b5255d3a            98  
          ffcdc0aa9e4b09            97  
          ffd07bb11c36c3            97  
          ffd19b3585e2ba            93  
          ffd334bce1b8ca            98  
          ffd8796ff5f1d6            94  
          ffd8c5746d9be8            97  
          ffdf8e9dc18596            98  
          ffead5be0b50e8            98  
          ffeaf5f7073d41            89  
          ffeb2131284e45            94  
          ffed6b75ebe4ce            91  
          ffed79cf43d7d7            97  
          ffef306edf6b36            98  
          fff0a2994a6f3a            87  
          fff2c218352941            98  
          fff497dff0186d            90  
          fff4d3cbb6c317           100  
          fffa170cf8ed83            93  
          fffd74b5150cb4            97  
          fffe562611d981            94  
          fffe92232706aa            93  

[47965 rows x 27 columns]

14.Pandas中的first
https://codeday.me/bug/20180109/115070.html

# 含义是获取每个group的第一条记录

15.groupby后的计数统计

# 下面这两句话是一个意思,都是统计每个比赛下group的多少
train.groupby(['matchType','matchId','groupId']).count().groupby(['matchType','matchId']).size()
train.groupby(['matchType','matchId'])['groupId'].nunique()

16.Pandas中的to_frame()

# 由于size()会返回一个series,导致没有column,使用to_frame将series转化成为Dataframe,添加column
train.groupby(['matchType','matchId'])['groupId'].nunique()
train.groupby(['matchType','matchId'])['groupId'].nunique().to_frame('group in match')

matchType  matchId       
duo        0003b92987589e    47
           0006eb8c17708d    44
           00086c74bb4efc    48
           001125344b660c    47
           001360264d4b5f    44
           0014d9d1b0aff6    47
           0016fe3ee17ce7    47
           00177a6ce4dfb5    47
           0019bc34b3c58e    47
           0019d729577e9c    47
           0027a504cd3b0c    50
           002aeef57764f8    49
           002b3b8e0e3870    46
           003228f5dc4934    47
           0043d584520de4    48
           00489048e21690    45
           004cddec9da619    46
           005166f6ee4ab6    47
           0052116de68e46    50
           005712df62d721    47
           005908fb69efc4    47
           005e236012c55f    45
           005e3f527c4a6a    48
           0063217a97a803    47
           00656cf8a4dbe6    46
           006755987f2217    45
           006d25449eb444    45
           007124bcf16c97    45
           007510af0aaa9e    47
           007edf38197cec    41
                             ..
squad      ffc364ae0cdae2    26
           ffc66d8b3e21f7    25
           ffc6fd9da2b020    29
           ffc89e1f6347df    27
           ffc9f30599cc27    24
           ffca00bb37147a    26
           ffca3124b378ad    25
           ffcb58c4fe9192    26
           ffcbc1b5255d3a    29
           ffcdc0aa9e4b09    26
           ffd07bb11c36c3    29
           ffd19b3585e2ba    27
           ffd334bce1b8ca    29
           ffd8796ff5f1d6    27
           ffd8c5746d9be8    29
           ffdf8e9dc18596    30
           ffead5be0b50e8    28
           ffeaf5f7073d41    25
           ffeb2131284e45    30
           ffed6b75ebe4ce    26
           ffed79cf43d7d7    25
           ffef306edf6b36    26
           fff0a2994a6f3a    26
           fff2c218352941    28
           fff497dff0186d    29
           fff4d3cbb6c317    29
           fffa170cf8ed83    28
           fffd74b5150cb4    27
           fffe562611d981    23
           fffe92232706aa    29
Name: groupId, Length: 47965, dtype: int64

                          group in match
matchType matchId                       
duo       0003b92987589e              47
          0006eb8c17708d              44
          00086c74bb4efc              48
          001125344b660c              47
          001360264d4b5f              44
          0014d9d1b0aff6              47
          0016fe3ee17ce7              47
          00177a6ce4dfb5              47
          0019bc34b3c58e              47
          0019d729577e9c              47
          0027a504cd3b0c              50
          002aeef57764f8              49
          002b3b8e0e3870              46
          003228f5dc4934              47
          0043d584520de4              48
          00489048e21690              45
          004cddec9da619              46
          005166f6ee4ab6              47
          0052116de68e46              50
          005712df62d721              47
          005908fb69efc4              47
          005e236012c55f              45
          005e3f527c4a6a              48
          0063217a97a803              47
          00656cf8a4dbe6              46
          006755987f2217              45
          006d25449eb444              45
          007124bcf16c97              45
          007510af0aaa9e              47
          007edf38197cec              41
...                                  ...
squad     ffc364ae0cdae2              26
          ffc66d8b3e21f7              25
          ffc6fd9da2b020              29
          ffc89e1f6347df              27
          ffc9f30599cc27              24
          ffca00bb37147a              26
          ffca3124b378ad              25
          ffcb58c4fe9192              26
          ffcbc1b5255d3a              29
          ffcdc0aa9e4b09              26
          ffd07bb11c36c3              29
          ffd19b3585e2ba              27
          ffd334bce1b8ca              29
          ffd8796ff5f1d6              27
          ffd8c5746d9be8              29
          ffdf8e9dc18596              30
          ffead5be0b50e8              28
          ffeaf5f7073d41              25
          ffeb2131284e45              30
          ffed6b75ebe4ce              26
          ffed79cf43d7d7              25
          ffef306edf6b36              26
          fff0a2994a6f3a              26
          fff2c218352941              28
          fff497dff0186d              29
          fff4d3cbb6c317              29
          fffa170cf8ed83              28
          fffd74b5150cb4              27
          fffe562611d981              23
          fffe92232706aa              29

[47965 rows x 1 columns]

16.Pandas中查看属性最大的几个元素

print(group['players in group'].nlargest(5))

matchType  matchId         groupId       
squad      b30f3d87189aa6  14d6b54cdec6bc    74
duo        3e029737889ce9  b8275198faa03b    72
solo       41a634f62f86b7  128b07271aa012    64
duo        7e93ce71ac6f61  7385e5fe214021    49
squad      3c2531adf5b942  e52a2e6ca30474    36
Name: players in group, dtype: int64

17.画图subplots

fix,ax = plt.subplots(1,2,figsize = (12,4))
train['matchDuration'].plot(kind = 'hist',ax = ax[0])
train.query('matchDuration >= 1400 & matchDuration <= 1800')['matchDuration'].plot(kind = 'hist',ax = ax[1])

18.画图subplot

# 调整图片尺寸
plt.figure(figsize = (12,4))

# 创建axes
plt.subplot(1,2,1)
plt.hist(x = train['matchDuration'],bins = 50)
plt.subplot(1,2,2)
plt.hist(x =train.query('matchDuration >= 1400 & matchDuration <= 1800')['matchDuration'],bins = 50 )

19.利用any判断
any() 函数用于判断给定的可迭代参数 iterable 是否全部为 False,则返回 False,如果有一个为 True,则返回 True。
元素除了是 0、空、FALSE 外都算 TRUE。

在这里插入代码片

20.Pandas中的cut
https://medium.com/@morris_tai/pandas的cut-qcut函數-93c244e34cfc
https://blog.csdn.net/cc_jjj/article/details/78878878

21.判断属性是否包含以及取反

train['matchType'].str.contains('solo')

0          False
1          False
2          False
3          False
4           True
5          False
6          False
7           True
8          False
9          False
10         False
11         False
12         False
13          True
14         False
15         False
16         False
17          True
18         False
19         False
20         False
21         False
22         False
23         False
24         False
25         False
26         False
27         False
28         False
29         False
           ...  
4446936    False
4446937    False
4446938    False
4446939    False
4446940    False
4446941    False
4446942    False
4446943    False
4446944    False
4446945    False
4446946     True
4446947    False
4446948    False
4446949    False
4446950     True
4446951    False
4446952    False
4446953    False
4446954     True
4446955    False
4446956    False
4446957    False
4446958    False
4446959     True
4446960    False
4446961    False
4446962     True
4446963    False
4446964    False
4446965     True
Name: matchType, Length: 4446966, dtype: bool
~train['matchType'].str.contains('solo')

0           True
1           True
2           True
3           True
4          False
5           True
6           True
7          False
8           True
9           True
10          True
11          True
12          True
13         False
14          True
15          True
16          True
17         False
18          True
19          True
20          True
21          True
22          True
23          True
24          True
25          True
26          True
27          True
28          True
29          True
           ...  
4446936     True
4446937     True
4446938     True
4446939     True
4446940     True
4446941     True
4446942     True
4446943     True
4446944     True
4446945     True
4446946    False
4446947     True
4446948     True
4446949     True
4446950    False
4446951     True
4446952     True
4446953     True
4446954    False
4446955     True
4446956     True
4446957     True
4446958     True
4446959    False
4446960     True
4446961     True
4446962    False
4446963     True
4446964     True
4446965    False
Name: matchType, Length: 4446966, dtype: bool
# 还可以进行选择,选择train中所有包含‘solo’的行
train.loc[train['matchType'].str.contains('solo')]

22.对属性值进行排序
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort_values.html

>>> df = pd.DataFrame({
...     'col1' : ['A', 'A', 'B', np.nan, 'D', 'C'],
...     'col2' : [2, 1, 9, 8, 7, 4],
...     'col3': [0, 1, 9, 4, 2, 3],
... })
>>> df
    col1 col2 col3
0   A    2    0
1   A    1    1
2   B    9    9
3   NaN  8    4
4   D    7    2
5   C    4    3

>>> df.sort_values(by=['col1'])
    col1 col2 col3
0   A    2    0
1   A    1    1
2   B    9    9
5   C    4    3
4   D    7    2
3   NaN  8    4

23.更改具体值

# 把返回为True的行中的值进行更改
sub.loc[sub[col] >= 5, col] = '5+'

24.Pandas中的交叉表和透视表
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.pivot_table.html
https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.crosstab.html
https://zhuanlan.zhihu.com/p/31952948
https://blog.csdn.net/hustqb/article/details/78086394

# 简单的说,data用来传入dataframe,index用来选择行,values用来选择查看哪些列,columns用来增加列的层次,aggfunc用来选择聚合方法,默认是np.mean


df = pd.DataFrame({"A": [1, 1, 1, 1, 1,
                         2, 2, 2, 2],
                    "B": [1, 1, 1, 1, 2,
                          1, 1, 2, 2],
                    "C": [1, 1, 1, 2,
                          2, 1, 2, 2,
                          1],
                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})


	A	B	C	D
0	1	1	1	1
1	1	1	1	2
2	1	1	1	2
3	1	1	2	3
4	1	2	2	3
5	2	1	1	4
6	2	1	2	5
7	2	2	2	6
8	2	2	1	7
pd.pivot_table(data = df, index = 'A')

     B    C    D
A               
1  1.2  1.4  2.2
2  1.5  1.5  5.5
pd.pivot_table(data = df, index = 'A',values = ['B','C'])

     B    C
A          
1  1.2  1.4
2  1.5  1.5
print(pd.pivot_table(data = df, index = 'A',values = ['B','C'],columns = 'D'))

     B                                  C                              
D    1    2    3    4    5    6    7    1    2    3    4    5    6    7
A                                                                      
1  1.0  1.0  1.5  NaN  NaN  NaN  NaN  1.0  1.0  2.0  NaN  NaN  NaN  NaN
2  NaN  NaN  NaN  1.0  1.0  2.0  2.0  NaN  NaN  NaN  1.0  2.0  2.0  1.0
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值