import pandas as pd
import numpy as np
df = pd. read_csv( 'data/table.csv' )
df. head( )
School Class ID Gender Address Height Weight Math Physics 0 S_1 C_1 1101 M street_1 173 63 34.0 A+ 1 S_1 C_1 1102 F street_2 192 73 32.5 B+ 2 S_1 C_1 1103 M street_2 186 82 87.2 B+ 3 S_1 C_1 1104 F street_2 167 81 80.4 B- 4 S_1 C_1 1105 F street_4 159 64 84.8 B+
pd. Series( [ "a" , "b" , "c" , "a" ] , dtype= "category" )
0 a
1 b
2 c
3 a
dtype: category
Categories (3, object): [a, b, c]
cat = pd. Categorical( [ "a" , "b" , "c" , "a" ] , categories= [ 'a' , 'b' , 'c' ] )
pd. Series( cat)
0 a
1 b
2 c
3 a
dtype: category
Categories (3, object): [a, b, c]
pd. cut( np. random. randint( 0 , 60 , 5 ) , [ 0 , 10 , 30 , 60 ] )
[(30, 60], (30, 60], (30, 60], (10, 30], (10, 30]]
Categories (3, interval[int64]): [(0, 10] < (10, 30] < (30, 60]]
pd. cut( np. random. randint( 0 , 60 , 5 ) , [ 0 , 10 , 30 , 60 ] , right= False , labels= [ '0-10' , '10-30' , '30-60' ] )
[30-60, 10-30, 30-60, 0-10, 0-10]
Categories (3, object): [0-10 < 10-30 < 30-60]
s = pd. Series( pd. Categorical( [ "a" , "b" , "c" , "a" , np. nan] , categories= [ 'a' , 'b' , 'c' , 'd' ] ) )
s. describe( )
count 4
unique 3
top a
freq 2
dtype: object
s. cat. categories
Index(['a', 'b', 'c', 'd'], dtype='object')
s. cat. ordered
False
s = pd. Series( pd. Categorical( [ "a" , "b" , "c" , "a" , np. nan] , categories= [ 'a' , 'b' , 'c' , 'd' ] ) )
s. cat. set_categories( [ 'new_a' , 'c' ] )
0 NaN
1 NaN
2 c
3 NaN
4 NaN
dtype: category
Categories (2, object): [new_a, c]
s = pd. Series( pd. Categorical( [ "a" , "b" , "c" , "a" , np. nan] , categories= [ 'a' , 'b' , 'c' , 'd' ] ) )
s. cat. rename_categories( [ 'new_%s' % i for i in s. cat. categories] )
0 new_a
1 new_b
2 new_c
3 new_a
4 NaN
dtype: category
Categories (4, object): [new_a, new_b, new_c, new_d]
s. cat. rename_categories( { 'a' : 'new_a' , 'b' : 'new_b' } )
0 new_a
1 new_b
2 c
3 new_a
4 NaN
dtype: category
Categories (4, object): [new_a, new_b, c, d]
s = pd. Series( pd. Categorical( [ "a" , "b" , "c" , "a" , np. nan] , categories= [ 'a' , 'b' , 'c' , 'd' ] ) )
s. cat. add_categories( [ 'e' ] )
0 a
1 b
2 c
3 a
4 NaN
dtype: category
Categories (5, object): [a, b, c, d, e]
s = pd. Series( pd. Categorical( [ "a" , "b" , "c" , "a" , np. nan] , categories= [ 'a' , 'b' , 'c' , 'd' ] ) )
s. cat. remove_categories( [ 'd' ] )
0 a
1 b
2 c
3 a
4 NaN
dtype: category
Categories (3, object): [a, b, c]
s = pd. Series( pd. Categorical( [ "a" , "b" , "c" , "a" , np. nan] , categories= [ 'a' , 'b' , 'c' , 'd' ] ) )
s. cat. remove_unused_categories( )
0 a
1 b
2 c
3 a
4 NaN
dtype: category
Categories (3, object): [a, b, c]
s = pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' ) . cat. as_ordered( )
s
0 a
1 d
2 c
3 a
dtype: category
Categories (3, object): [a < c < d]
s. cat. as_unordered( )
0 a
1 d
2 c
3 a
dtype: category
Categories (3, object): [a, c, d]
pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' ) . cat. set_categories( [ 'a' , 'c' , 'd' ] , ordered= True )
0 a
1 d
2 c
3 a
dtype: category
Categories (3, object): [a < c < d]
s = pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' )
s. cat. reorder_categories( [ 'a' , 'c' , 'd' ] , ordered= True )
0 a
1 d
2 c
3 a
dtype: category
Categories (3, object): [a < c < d]
s = pd. Series( np. random. choice( [ 'perfect' , 'good' , 'fair' , 'bad' , 'awful' ] , 50 ) ) . astype( 'category' )
s. cat. set_categories( [ 'perfect' , 'good' , 'fair' , 'bad' , 'awful' ] [ : : - 1 ] , ordered= True ) . head( )
0 awful
1 awful
2 awful
3 perfect
4 awful
dtype: category
Categories (5, object): [awful < bad < fair < good < perfect]
s. sort_values( ascending= False ) . head( )
49 perfect
33 perfect
3 perfect
10 perfect
16 perfect
dtype: category
Categories (5, object): [awful, bad, fair, good, perfect]
df_sort = pd. DataFrame( { 'cat' : s. values, 'value' : np. random. randn( 50 ) } ) . set_index( 'cat' )
df_sort. head( )
value cat awful -0.397948 awful -0.188164 awful -0.543448 perfect 0.410447 awful -1.689240
df_sort. sort_index( ) . head( )
value cat awful -0.397948 awful -0.826390 awful -1.087261 awful -0.012140 awful 0.813705
s = pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' )
s == 'a'
0 True
1 False
2 False
3 True
dtype: bool
s == list ( 'abcd' )
0 True
1 False
2 True
3 False
dtype: bool
s = pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' )
s == s
0 True
1 True
2 True
3 True
dtype: bool
s != s
0 False
1 False
2 False
3 False
dtype: bool
s_new = s. cat. set_categories( [ 'a' , 'd' , 'e' ] )
s = pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' )
s = pd. Series( [ "a" , "d" , "c" , "a" ] ) . astype( 'category' ) . cat. reorder_categories( [ 'a' , 'c' , 'd' ] , ordered= True )
s >= s
0 True
1 True
2 True
3 True
dtype: bool