import pandas as pd
import random
import numpy as np
n_rows=5
n_cols=2
df = pd.DataFrame(np.random.randn(n_rows, n_cols),
index = pd.date_range('1/1/2000', periods=n_rows),
columns = ['A','B'])
df=df.apply(lambda x:[int(xx*10) for xx in x],axis=0)
df
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | 5 | -4 |
---|
2000-01-03 | -2 | 8 |
---|
2000-01-04 | 0 | 1 |
---|
2000-01-05 | -18 | 3 |
---|
pct_change
## pct_change() to compute the percent change over a given number of periods
df.pct_change(periods=1) # b{t}=(a{t}-a{t-1})/a{t-1}
| A | B |
---|
2000-01-01 | NaN | NaN |
---|
2000-01-02 | -1.277778 | -2.333333 |
---|
2000-01-03 | -1.400000 | -3.000000 |
---|
2000-01-04 | -1.000000 | -0.875000 |
---|
2000-01-05 | -inf | 2.000000 |
---|
df.pct_change(periods=2) # b{t}=(a{t}-a{t-2})/a{t-2}
| A | B |
---|
2000-01-01 | NaN | NaN |
---|
2000-01-02 | NaN | NaN |
---|
2000-01-03 | -0.888889 | 1.666667 |
---|
2000-01-04 | -1.000000 | -1.250000 |
---|
2000-01-05 | 8.000000 | -0.625000 |
---|
Covariance
df.cov()
| A | B |
---|
A | 114.80 | -17.85 |
---|
B | -17.85 | 18.70 |
---|
df.A.cov(df.B)
-17.849999999999998
Correlation
df.corr()
| A | B |
---|
A | 1.000000 | -0.385253 |
---|
B | -0.385253 | 1.000000 |
---|
Data ranking
df.rank()
| A | B |
---|
2000-01-01 | 1.5 | 3.5 |
---|
2000-01-02 | 5.0 | 1.0 |
---|
2000-01-03 | 3.0 | 5.0 |
---|
2000-01-04 | 4.0 | 2.0 |
---|
2000-01-05 | 1.5 | 3.5 |
---|
df.rank(axis=1)
| A | B |
---|
2000-01-01 | 1.0 | 2.0 |
---|
2000-01-02 | 2.0 | 1.0 |
---|
2000-01-03 | 1.0 | 2.0 |
---|
2000-01-04 | 1.0 | 2.0 |
---|
2000-01-05 | 1.0 | 2.0 |
---|
method parameter:
average : average rank of tied group
min : lowest rank in the group
max : highest rank in the group
first : ranks assigned in the order they appear in the array
Window Functions
cumsum
df
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | 5 | -4 |
---|
2000-01-03 | -2 | 8 |
---|
2000-01-04 | 0 | 1 |
---|
2000-01-05 | -18 | 3 |
---|
df.cumsum()
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | -13 | -1 |
---|
2000-01-03 | -15 | 7 |
---|
2000-01-04 | -15 | 8 |
---|
2000-01-05 | -33 | 11 |
---|
rolling
df
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | 5 | -4 |
---|
2000-01-03 | -2 | 8 |
---|
2000-01-04 | 0 | 1 |
---|
2000-01-05 | -18 | 3 |
---|
r=df.rolling(window=2)
r.mean()
| A | B |
---|
2000-01-01 | NaN | NaN |
---|
2000-01-02 | -6.5 | -0.5 |
---|
2000-01-03 | 1.5 | 2.0 |
---|
2000-01-04 | -1.0 | 4.5 |
---|
2000-01-05 | -9.0 | 2.0 |
---|
r.count()
| A | B |
---|
2000-01-01 | 1.0 | 1.0 |
---|
2000-01-02 | 2.0 | 2.0 |
---|
2000-01-03 | 2.0 | 2.0 |
---|
2000-01-04 | 2.0 | 2.0 |
---|
2000-01-05 | 2.0 | 2.0 |
---|
r.max()
| A | B |
---|
2000-01-01 | NaN | NaN |
---|
2000-01-02 | 5.0 | 3.0 |
---|
2000-01-03 | 5.0 | 8.0 |
---|
2000-01-04 | 0.0 | 8.0 |
---|
2000-01-05 | 0.0 | 3.0 |
---|
count() | Number of non-null observations |
sum() | Sum of values |
mean() | Mean of values |
median() | Arithmetic median of values |
min() | Minimum |
max() | Maximum |
std() | Bessel-corrected sample standard deviation |
var() | Unbiased variance |
skew() | Sample skewness (3rd moment) |
kurt() | Sample kurtosis (4th moment) |
quantile() | Sample quantile (value at %) |
apply() | Generic apply |
cov() | Unbiased covariance (binary) |
corr() | Correlation (binary) |
win_type can specify distribution function.
parameter 'on' to specify a column (rather than the default of the index) in a DataFrame.
df
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | 5 | -4 |
---|
2000-01-03 | -2 | 8 |
---|
2000-01-04 | 0 | 1 |
---|
2000-01-05 | -18 | 3 |
---|
df.rolling(window='3d',min_periods=3).sum() ## 最近三天
| A | B |
---|
2000-01-01 | NaN | NaN |
---|
2000-01-02 | NaN | NaN |
---|
2000-01-03 | -15.0 | 7.0 |
---|
2000-01-04 | 3.0 | 5.0 |
---|
2000-01-05 | -20.0 | 12.0 |
---|
expanding
df
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | 5 | -4 |
---|
2000-01-03 | -2 | 8 |
---|
2000-01-04 | 0 | 1 |
---|
2000-01-05 | -18 | 3 |
---|
df.expanding().mean() ## statistic with all data up to a point in time
| A | B |
---|
2000-01-01 | -18.00 | 3.000000 |
---|
2000-01-02 | -6.50 | -0.500000 |
---|
2000-01-03 | -5.00 | 2.333333 |
---|
2000-01-04 | -3.75 | 2.000000 |
---|
2000-01-05 | -6.60 | 2.200000 |
---|
Exponentially Weighted Windows(ewm)
df
| A | B |
---|
2000-01-01 | -18 | 3 |
---|
2000-01-02 | 5 | -4 |
---|
2000-01-03 | -2 | 8 |
---|
2000-01-04 | 0 | 1 |
---|
2000-01-05 | -18 | 3 |
---|
df.ewm(alpha=0.9).mean()
| A | B |
---|
2000-01-01 | -18.000000 | 3.000000 |
---|
2000-01-02 | 2.909091 | -3.363636 |
---|
2000-01-03 | -1.513514 | 6.873874 |
---|
2000-01-04 | -0.151215 | 1.586859 |
---|
2000-01-05 | -16.215282 | 2.858699 |
---|