import pandas as pd
import numpy as np
import matplotlib. pyplot as plt
date1 = pd. date_range( '2020-7-1' , '2020-7-20' )
col = [ 'open' , 'high' , 'close' , 'low' , 'volume' ,
'price_change' , 'p_change' , 'turnover' ]
data = pd. DataFrame( np. random. randn( 20 , 8 ) , index= date1, columns= col)
data
open high close low volume price_change p_change turnover 2020-07-01 -0.322513 0.267763 -1.093117 0.027041 0.509584 0.984490 -0.190861 0.517801 2020-07-02 2.089734 -0.983815 -1.046229 -0.036239 -0.256708 0.356689 -0.252771 -0.497880 2020-07-03 -2.133904 0.679809 1.807282 -0.566753 -1.837010 0.040434 -1.059297 0.555953 2020-07-04 -0.386285 0.956894 2.315036 -1.346728 -0.896410 -0.009206 0.474799 -0.496008 2020-07-05 0.751076 -1.149339 0.197884 -0.831805 -0.496330 -0.952271 -1.532102 0.425145 2020-07-06 1.756448 -0.564414 -0.750158 -1.222868 0.757163 -0.606473 0.166400 -0.793353 2020-07-07 -1.470281 0.624951 -1.186256 -0.688941 -0.746321 -1.896055 -1.641859 1.192342 2020-07-08 0.128879 -0.416039 -0.657877 -1.313704 0.945771 -2.131931 0.356988 -2.770012 2020-07-09 -1.380173 0.650889 0.033093 1.108484 2.143283 -1.000914 1.117865 0.440897 2020-07-10 0.273836 -0.454474 -0.287727 0.992828 -0.411735 -1.074596 -1.815277 -0.248398 2020-07-11 0.841820 -0.119436 0.446986 -0.290631 1.690281 -0.706691 1.628919 0.941540 2020-07-12 0.281178 1.508803 -2.302384 0.501423 -0.071525 0.060684 0.842450 1.369522 2020-07-13 -0.155110 -0.459593 -1.896780 0.860048 -0.023542 -0.524829 -0.672771 0.486148 2020-07-14 -0.687383 0.611564 0.170420 -2.491876 -0.971182 -0.705079 -1.247554 1.782681 2020-07-15 0.684804 0.765028 1.264321 1.250718 -0.833911 0.332943 1.901829 -0.623410 2020-07-16 -0.758880 -1.139305 -0.064042 -0.591721 -1.318660 -1.263623 1.448318 -0.238409 2020-07-17 -0.628691 0.481645 -0.221320 0.719116 0.459349 -0.842811 0.739307 -1.204500 2020-07-18 -1.424206 0.536644 0.513619 0.211149 -0.232468 0.230522 0.873776 -2.385903 2020-07-19 0.313115 2.312191 0.211485 0.320764 0.772857 -0.019631 -2.276235 0.401747 2020-07-20 0.372503 0.236578 0.916577 -1.189288 -0.210505 -0.370926 1.478682 -0.399276
算数运算
data[ 'open' ] . add( 10 ) . head( )
2020-07-01 9.677487
2020-07-02 12.089734
2020-07-03 7.866096
2020-07-04 9.613715
2020-07-05 10.751076
Freq: D, Name: open, dtype: float64
data[ 'open' ] . sub( 10 ) . head( )
2020-07-01 -10.322513
2020-07-02 -7.910266
2020-07-03 -12.133904
2020-07-04 -10.386285
2020-07-05 -9.248924
Freq: D, Name: open, dtype: float64
逻辑运算
逻辑运算符号> < | &
data[ "open" ] > 0
2020-07-01 False
2020-07-02 True
2020-07-03 False
2020-07-04 False
2020-07-05 True
2020-07-06 True
2020-07-07 False
2020-07-08 True
2020-07-09 False
2020-07-10 True
2020-07-11 True
2020-07-12 True
2020-07-13 False
2020-07-14 False
2020-07-15 True
2020-07-16 False
2020-07-17 False
2020-07-18 False
2020-07-19 True
2020-07-20 True
Freq: D, Name: open, dtype: bool
data[ data[ "open" ] > 0 ] . head( )
open high close low volume price_change p_change turnover 2020-07-02 2.089734 -0.983815 -1.046229 -0.036239 -0.256708 0.356689 -0.252771 -0.497880 2020-07-05 0.751076 -1.149339 0.197884 -0.831805 -0.496330 -0.952271 -1.532102 0.425145 2020-07-06 1.756448 -0.564414 -0.750158 -1.222868 0.757163 -0.606473 0.166400 -0.793353 2020-07-08 0.128879 -0.416039 -0.657877 -1.313704 0.945771 -2.131931 0.356988 -2.770012 2020-07-10 0.273836 -0.454474 -0.287727 0.992828 -0.411735 -1.074596 -1.815277 -0.248398
data[ ( data[ "high" ] > 0 ) & ( data[ "high" ] < 1 ) ] . head( )
open high close low volume price_change p_change turnover 2020-07-01 1 0.267763 -1.093117 0.027041 0.509584 0.984490 -0.190861 0.517801 2020-07-03 1 0.679809 1.807282 -0.566753 -1.837010 0.040434 -1.059297 0.555953 2020-07-04 1 0.956894 2.315036 -1.346728 -0.896410 -0.009206 0.474799 -0.496008 2020-07-07 1 0.624951 -1.186256 -0.688941 -0.746321 -1.896055 -1.641859 1.192342 2020-07-09 1 0.650889 0.033093 1.108484 2.143283 -1.000914 1.117865 0.440897
逻辑运算函数
data. query( 'high>0 & high<1' ) . head( )
open high close low volume price_change p_change turnover 2020-07-01 1 0.267763 -1.093117 0.027041 0.509584 0.984490 -0.190861 0.517801 2020-07-03 1 0.679809 1.807282 -0.566753 -1.837010 0.040434 -1.059297 0.555953 2020-07-04 1 0.956894 2.315036 -1.346728 -0.896410 -0.009206 0.474799 -0.496008 2020-07-07 1 0.624951 -1.186256 -0.688941 -0.746321 -1.896055 -1.641859 1.192342 2020-07-09 1 0.650889 0.033093 1.108484 2.143283 -1.000914 1.117865 0.440897
data. open = 1
data[ data[ 'open' ] . isin( [ 1 , 0 , 5 ] ) ] . head( )
open high close low volume price_change p_change turnover 2020-07-01 1 0.267763 -1.093117 0.027041 0.509584 0.984490 -0.190861 0.517801 2020-07-02 1 -0.983815 -1.046229 -0.036239 -0.256708 0.356689 -0.252771 -0.497880 2020-07-03 1 0.679809 1.807282 -0.566753 -1.837010 0.040434 -1.059297 0.555953 2020-07-04 1 0.956894 2.315036 -1.346728 -0.896410 -0.009206 0.474799 -0.496008 2020-07-05 1 -1.149339 0.197884 -0.831805 -0.496330 -0.952271 -1.532102 0.425145
统计运算
describe
综合分析: 能够直接得出很多统计结果,count, mean, std, min, max 等
data. describe( )
open high close low volume price_change p_change turnover count 20.0 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000 mean 1.0 0.217317 -0.081459 -0.228949 -0.051401 -0.504964 0.017030 -0.077169 std 0.0 0.888158 1.152481 0.999190 0.990270 0.776446 1.276293 1.151633 min 1.0 -1.149339 -2.302384 -2.491876 -1.837010 -2.131931 -2.276235 -2.770012 25% 1.0 -0.455754 -0.824176 -0.921176 -0.768218 -0.964431 -1.106361 -0.529262 50% 1.0 0.374704 -0.015474 -0.163435 -0.221487 -0.565651 0.261694 0.081669 75% 1.0 0.658119 0.463645 0.555847 0.571479 0.045497 0.934798 0.527339 max 1.0 2.312191 2.315036 1.250718 2.143283 0.984490 1.901829 1.782681
统计函数
count sum mean median(中位数) min max mode(众数) prod(积)
abs std(标准差) var(方差) idxmax(最大值位置) idxmin(最小值位置)
对于单个函数去进行统计的时候,坐标轴还是按照默认列“columns” (axis=0, default),
如果要对行“index” 需要指定(axis=1)
max() min()
data. max ( )
data. min ( )
open 1.000000
high -1.149339
close -2.302384
low -2.491876
volume -1.837010
price_change -2.131931
p_change -2.276235
turnover -2.770012
dtype: float64
data. std( )
open 0.000000
high 0.888158
close 1.152481
low 0.999190
volume 0.990270
price_change 0.776446
p_change 1.276293
turnover 1.151633
dtype: float64
data. idxmax( )
open 2020-07-01
high 2020-07-19
close 2020-07-04
low 2020-07-15
volume 2020-07-09
price_change 2020-07-01
p_change 2020-07-15
turnover 2020-07-14
dtype: datetime64[ns]
data. idxmin( )
open 2020-07-01
high 2020-07-05
close 2020-07-12
low 2020-07-14
volume 2020-07-03
price_change 2020-07-08
p_change 2020-07-19
turnover 2020-07-08
dtype: datetime64[ns]
累计统计函数
cumsum 计算前1/2/3/…/n个数的和 cummax 计算前1/2/3/…/n个数的最大值 cummin 计算前1/2/3/…/n个数的最小值 cumprod 计算前1/2/3/…/n个数的积
data = data. sort_index( )
data2 = data[ 'low' ]
data2. cumsum( ) . plot( )
plt. show( )
自定义运算
apply(func, axis=0) func:自定义函数 axis=0:默认是列,axis=1为行进行运算
data[ [ 'close' ] ] . apply ( lambda x: x. max ( ) - x. min ( ) , axis= 0 )
close 4.61742
dtype: float64