#encoding:utf8
import pandas as pd
import numpy as np
s = pd.Series([1,2,3,4,5],index=list('acefh'))
print(s)
'''
a 1
c 2
e 3
f 4
h 5
'''
print(s.index)
'''
Index(['a', 'c', 'e', 'f', 'h'], dtype='object')
'''
#重新索引并对Nan值赋值为0
print(s.reindex(list('abcdefgh'),fill_value=0))
'''
a 1
b 0
c 2
d 0
e 3
f 4
g 0
h 5
'''
#把Nan赋值为上一个非Nan的值(类比股票停盘的时候把停盘的值赋值为停盘之前的值)
print(s.reindex(list('abcdefgh')))
#method:bfill是把下一个的非Nan值赋值到Nan中
print(s.reindex(list('abcdefgh'),method='ffill'))
'''
a 1.0
b NaN
c 2.0
d NaN
e 3.0
f 4.0
g NaN
h 5.0
dtype: float64
a 1
b 1
c 2
d 2
e 3
f 4
g 4
h 5
'''
df = pd.DataFrame(np.random.randn(4,6),index=list('ADFH'),columns=['one','two','three','four','five','six'])
print(df)
'''
one two three four five six
A 0.352770 0.302011 0.375550 1.804725 -0.494243 -0.467798
D -0.246352 -1.346173 -0.194345 -0.050121 -1.695538 -0.666932
F -1.134675 0.889683 0.603448 2.041425 -0.537469 -0.551439
H 1.916636 0.433567 1.072732 -1.391239 0.732202 -0.829673
'''
#二维数组重索引行,填充Nan值
df2 = df.reindex(index=list('ABCDEFGH'),fill_value=0)
print(df2)
'''
one two three four five six
A 0.617191 0.687148 1.274273 -0.839415 0.792152 -0.536064
B NaN NaN NaN NaN NaN NaN
C NaN NaN NaN NaN NaN NaN
D -0.730075 -0.286531 -1.884375 1.139414 -0.169306 0.217407
E NaN NaN NaN NaN NaN NaN
F 1.132639 0.130489 0.894960 0.700022 0.825214 -1.424234
G NaN NaN NaN NaN NaN NaN
H -0.197997 1.464797 -0.733199 -0.366465 -0.709581 0.780381
one two three four five six
A -0.741244 2.237643 0.596041 -1.825212 1.535922 -1.279042
B 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
C 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
D 0.799521 0.453463 0.935007 0.469048 -1.783111 -0.145021
E 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
F 0.355039 -0.500475 -0.444605 -0.559341 0.031650 1.377536
G 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
H -0.363621 0.510240 0.088605 -1.108609 -0.799488 0.681844
'''
#二维数据列重索引
print(df.reindex(columns=['one','two','three','four','five','six','seven'],fill_value=0))
'''
one two three four five six seven
A 0.886400 -0.423722 -0.236410 -2.955891 1.138746 0.617567 NaN
D 0.604896 0.496586 -0.209181 -1.913454 0.022793 -2.085502 NaN
F 1.120339 -0.510216 -2.438642 -0.648351 -0.047299 -0.569957 NaN
H 1.390851 -0.539437 -0.378924 -0.976334 2.274232 0.002144 NaN
one two three four five six seven
A -1.548185 -0.310676 -0.441914 0.576015 0.969689 -0.450120 0
D 0.247333 -0.559566 -0.352404 0.235390 -0.078221 0.990842 0
F -0.582162 0.672071 0.582770 0.761390 -0.039544 -0.411953 0
H 1.799309 0.494148 0.847326 -0.958537 -2.313566 -0.286750 0
另外method方法只对行有效,列无效的
'''
#取消显示某行某列
#但原数据不变
print(df.drop('A'))
#axis:0默认为行,axis:1为列
print(df.drop(['one','two'],axis=1))
print(df)
'''
one two three four five six
D 0.595548 -1.324211 -1.654202 -0.661661 0.461671 1.273477
F 0.045223 0.951209 0.654337 -0.530489 1.707179 0.973863
H 0.808623 0.627833 1.630329 0.287034 0.143080 -0.406583
three four five six
A 2.154951 0.848024 1.028920 0.753677
D -1.654202 -0.661661 0.461671 1.273477
F 0.654337 -0.530489 1.707179 0.973863
H 1.630329 0.287034 0.143080 -0.406583
one two three four five six
A 1.413738 0.819763 2.154951 0.848024 1.028920 0.753677
D 0.595548 -1.324211 -1.654202 -0.661661 0.461671 1.273477
F 0.045223 0.951209 0.654337 -0.530489 1.707179 0.973863
H 0.808623 0.627833 1.630329 0.287034 0.143080 -0.406583
'''
df = pd.DataFrame(np.arange(12).reshape(4,3),index=['one','two','three','four'],columns=list('ABC'))
print(df)
'''
A B C
one 0 1 2
two 3 4 5
three 6 7 8
four 9 10 11
'''
#按列进行运算
print(df.apply(lambda x: x.max() - x.min()))
'''
A 9
B 9
C 9
'''
#按行进行运算
#axis:0为列默认,axis:1为行
print(df.apply(lambda x: x.max() - x.min(),axis=1))
'''
one 2
two 2
three 2
four 2
'''
#查看帮助
help(df.apply)
'''
axis : {0 or 'index', 1 or 'columns'}, default 0
* 0 or 'index': apply function to each column
* 1 or 'columns': apply function to each row
'''
#apply中可以传入更复杂的函数而不是lambda这样的匿名函数
def min_max(x):
return pd.Series([x.min(),x.max()],index=['min','max'])
print(df.apply(min_max))
'''
A B C
min 0 1 2
max 9 10 11
'''
print(df.apply(min_max,axis=1))
'''
min max
one 0 2
two 3 5
three 6 8
four 9 11
'''
#对dataframe中的小数取位数
df = pd.DataFrame(np.random.randn(4,3),index=['one','two','three','four'],columns=list('ABC'))
print(df)
'''
A B C
one -0.163500 1.513105 0.620532
two -0.372754 1.180852 -0.013991
three -1.065681 0.286195 -1.399696
four 1.042050 -0.251143 -1.671825
'''
formater = lambda x: '%.03f' %x
print(df.applymap(formater))
'''
A B C
one 0.030 -0.223 -0.038
two -0.358 -0.020 0.557
three 0.820 -0.646 0.296
four 0.273 0.765 0.625
'''
#排序
df = pd.DataFrame(np.random.randint(1,10,(4,3)),columns=['one','two','three'],index=list('ABCD'))
print(df.sort_values(by='one',ascending=False))
'''
one two three
C 1 4 1
A 2 7 1
D 6 7 1
B 7 5 9
one two three
B 8 4 5
C 8 1 8
D 3 4 6
A 2 2 2
'''
#元素的排名
s = pd.Series([3,6,2,6,4])
print(s.rank(method='first'))
'''
0 2.0
1 4.5
2 1.0
3 4.5
4 3.0
0 2.0
1 4.0
2 1.0
3 5.0
4 3.0
先出现的排名较高,默认是method=average
'''
print(df)
print(df.rank(method='first'))
'''
one two three
A 7 1 4
B 5 2 8
C 4 3 9
D 9 6 5
one two three
A 3.0 1.0 1.0
B 2.0 2.0 3.0
C 1.0 3.0 4.0
D 4.0 4.0 2.0
'''
s = pd.Series(list('aaaabbbdbdbdbdjdjkfk'))
print(s.value_counts())
'''
b 6
d 5
a 4
k 2
j 2
f 1
'''
print(s.unique())
'''
['a' 'b' 'd' 'j' 'k' 'f']
获取不重复的元素列表
'''
#判断是否是里面的值
print(s.isin(['a','c','k']))
'''
0 True
1 True
2 True
3 True
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 False
16 False
17 True
18 False
19 True
'''
print(s.isin(s.unique()))
'''
0 True
1 True
2 True
3 True
4 True
5 True
6 True
7 True
8 True
9 True
10 True
11 True
12 True
13 True
14 True
15 True
16 True
17 True
18 True
19 True
'''