linux 计算标准差,5-pandas基础运算

最新推荐文章于 2024-01-22 13:56:59 发布

伊斯特艾格

最新推荐文章于 2024-01-22 13:56:59 发布

阅读量224

点赞数

文章标签： linux 计算标准差

#encoding:utf8

import pandas as pd

import numpy as np

s = pd.Series([1,2,3,4,5],index=list('acefh'))

print(s)

'''

a 1

c 2

e 3

f 4

h 5

'''

print(s.index)

'''

Index(['a', 'c', 'e', 'f', 'h'], dtype='object')

'''

#重新索引并对Nan值赋值为0

print(s.reindex(list('abcdefgh'),fill_value=0))

'''

a 1

b 0

c 2

d 0

e 3

f 4

g 0

h 5

'''

#把Nan赋值为上一个非Nan的值(类比股票停盘的时候把停盘的值赋值为停盘之前的值)

print(s.reindex(list('abcdefgh')))

#method：bfill是把下一个的非Nan值赋值到Nan中

print(s.reindex(list('abcdefgh'),method='ffill'))

'''

a 1.0

b NaN

c 2.0

d NaN

e 3.0

f 4.0

g NaN

h 5.0

dtype: float64

a 1

b 1

c 2

d 2

e 3

f 4

g 4

h 5

'''

df = pd.DataFrame(np.random.randn(4,6),index=list('ADFH'),columns=['one','two','three','four','five','six'])

print(df)

'''

one two three four five six

A 0.352770 0.302011 0.375550 1.804725 -0.494243 -0.467798

D -0.246352 -1.346173 -0.194345 -0.050121 -1.695538 -0.666932

F -1.134675 0.889683 0.603448 2.041425 -0.537469 -0.551439

H 1.916636 0.433567 1.072732 -1.391239 0.732202 -0.829673

'''

#二维数组重索引行，填充Nan值

df2 = df.reindex(index=list('ABCDEFGH'),fill_value=0)

print(df2)

'''

one two three four five six

A 0.617191 0.687148 1.274273 -0.839415 0.792152 -0.536064

B NaN NaN NaN NaN NaN NaN

C NaN NaN NaN NaN NaN NaN

D -0.730075 -0.286531 -1.884375 1.139414 -0.169306 0.217407

E NaN NaN NaN NaN NaN NaN

F 1.132639 0.130489 0.894960 0.700022 0.825214 -1.424234

G NaN NaN NaN NaN NaN NaN

H -0.197997 1.464797 -0.733199 -0.366465 -0.709581 0.780381

one two three four five six

A -0.741244 2.237643 0.596041 -1.825212 1.535922 -1.279042

B 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

C 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

D 0.799521 0.453463 0.935007 0.469048 -1.783111 -0.145021

E 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

F 0.355039 -0.500475 -0.444605 -0.559341 0.031650 1.377536

G 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

H -0.363621 0.510240 0.088605 -1.108609 -0.799488 0.681844

'''

#二维数据列重索引

print(df.reindex(columns=['one','two','three','four','five','six','seven'],fill_value=0))

'''

one two three four five six seven

A 0.886400 -0.423722 -0.236410 -2.955891 1.138746 0.617567 NaN

D 0.604896 0.496586 -0.209181 -1.913454 0.022793 -2.085502 NaN

F 1.120339 -0.510216 -2.438642 -0.648351 -0.047299 -0.569957 NaN

H 1.390851 -0.539437 -0.378924 -0.976334 2.274232 0.002144 NaN

one two three four five six seven

A -1.548185 -0.310676 -0.441914 0.576015 0.969689 -0.450120 0

D 0.247333 -0.559566 -0.352404 0.235390 -0.078221 0.990842 0

F -0.582162 0.672071 0.582770 0.761390 -0.039544 -0.411953 0

H 1.799309 0.494148 0.847326 -0.958537 -2.313566 -0.286750 0

另外method方法只对行有效，列无效的

'''

#取消显示某行某列

#但原数据不变

print(df.drop('A'))

#axis：0默认为行，axis：1为列

print(df.drop(['one','two'],axis=1))

print(df)

'''

one two three four five six

D 0.595548 -1.324211 -1.654202 -0.661661 0.461671 1.273477

F 0.045223 0.951209 0.654337 -0.530489 1.707179 0.973863

H 0.808623 0.627833 1.630329 0.287034 0.143080 -0.406583

three four five six

A 2.154951 0.848024 1.028920 0.753677

D -1.654202 -0.661661 0.461671 1.273477

F 0.654337 -0.530489 1.707179 0.973863

H 1.630329 0.287034 0.143080 -0.406583

one two three four five six

A 1.413738 0.819763 2.154951 0.848024 1.028920 0.753677

D 0.595548 -1.324211 -1.654202 -0.661661 0.461671 1.273477

F 0.045223 0.951209 0.654337 -0.530489 1.707179 0.973863

H 0.808623 0.627833 1.630329 0.287034 0.143080 -0.406583

'''

df = pd.DataFrame(np.arange(12).reshape(4,3),index=['one','two','three','four'],columns=list('ABC'))

print(df)

'''

A B C

one 0 1 2

two 3 4 5

three 6 7 8

four 9 10 11

'''

#按列进行运算

print(df.apply(lambda x: x.max() - x.min()))

'''

A 9

B 9

C 9

'''

#按行进行运算

#axis：0为列默认，axis：1为行

print(df.apply(lambda x: x.max() - x.min(),axis=1))

'''

one 2

two 2

three 2

four 2

'''

#查看帮助

help(df.apply)

'''

axis : {0 or 'index', 1 or 'columns'}, default 0

* 0 or 'index': apply function to each column

* 1 or 'columns': apply function to each row

'''

#apply中可以传入更复杂的函数而不是lambda这样的匿名函数

def min_max(x):

return pd.Series([x.min(),x.max()],index=['min','max'])

print(df.apply(min_max))

'''

A B C

min 0 1 2

max 9 10 11

'''

print(df.apply(min_max,axis=1))

'''

min max

one 0 2

two 3 5

three 6 8

four 9 11

'''

#对dataframe中的小数取位数

df = pd.DataFrame(np.random.randn(4,3),index=['one','two','three','four'],columns=list('ABC'))

print(df)

'''

A B C

one -0.163500 1.513105 0.620532

two -0.372754 1.180852 -0.013991

three -1.065681 0.286195 -1.399696

four 1.042050 -0.251143 -1.671825

'''

formater = lambda x: '%.03f' %x

print(df.applymap(formater))

'''

A B C

one 0.030 -0.223 -0.038

two -0.358 -0.020 0.557

three 0.820 -0.646 0.296

four 0.273 0.765 0.625

'''

#排序

df = pd.DataFrame(np.random.randint(1,10,(4,3)),columns=['one','two','three'],index=list('ABCD'))

print(df.sort_values(by='one',ascending=False))

'''

one two three

C 1 4 1

A 2 7 1

D 6 7 1

B 7 5 9

one two three

B 8 4 5

C 8 1 8

D 3 4 6

A 2 2 2

'''

#元素的排名

s = pd.Series([3,6,2,6,4])

print(s.rank(method='first'))

'''

0 2.0

1 4.5

2 1.0

3 4.5

4 3.0

0 2.0

1 4.0

2 1.0

3 5.0

4 3.0

先出现的排名较高，默认是method=average

'''

print(df)

print(df.rank(method='first'))

'''

one two three

A 7 1 4

B 5 2 8

C 4 3 9

D 9 6 5

one two three

A 3.0 1.0 1.0

B 2.0 2.0 3.0

C 1.0 3.0 4.0

D 4.0 4.0 2.0

'''

s = pd.Series(list('aaaabbbdbdbdbdjdjkfk'))

print(s.value_counts())

'''

b 6

d 5

a 4

k 2

j 2

f 1

'''

print(s.unique())

'''

['a' 'b' 'd' 'j' 'k' 'f']

获取不重复的元素列表

'''

#判断是否是里面的值

print(s.isin(['a','c','k']))

'''

0 True

1 True

2 True

3 True

4 False

5 False

6 False

7 False

8 False

9 False

10 False

11 False

12 False

13 False

14 False

15 False

16 False

17 True

18 False

19 True

'''

print(s.isin(s.unique()))

'''

0 True

1 True

2 True

3 True

4 True

5 True

6 True

7 True

8 True

9 True

10 True

11 True

12 True

13 True

14 True

15 True

16 True

17 True

18 True

19 True

'''

伊斯特艾格

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
linux 计算标准差,5-pandas基础运算

#encoding:utf8import pandas as pdimport numpy as nps = pd.Series([1,2,3,4,5],index=list('acefh'))print(s)'''a 1c 2e 3f 4h 5'''print(s.index)'''Index(['a', 'c', 'e', 'f', 'h'], dtype='ob...
复制链接

扫一扫

linux 计算标准差,5-pandas基础运算

“相关推荐”对你有帮助么？