import pandas as pd
from numba import jit
@jit(nopython=True)
def func(df):
# 不要用pandas内置函数iloc、corr等
x = df[:, 0]
y = df[:, 1]
x_mean = x.mean()
y_mean = y.mean()
return y_mean - x_mean
demo = pd.DataFrame([[1,2], [4,2], [3,7]], columns=['a', 'b'])
demo[['a', 'b']].rolling(2, method='table').apply(func, raw=True, engine='numba')
如果需要两列进行回归 / 相关系数,尽量用numpy语法写,下面是一个demo
def func(df):
x = df[:, 0]
y = df[:, 1]
x_mean = x.mean()
y_mean = y.mean()
beta = (y * (x - x_mean)).sum() / ((x - x_mean)**2).sum()
alpha = y_mean - beta * x_mean
y_pred = alpha + beta * x
SS_tot = ((y - y_mean)**2).sum()
SS_res = ((y - y_pred)**2).sum()
def func(df):
return np.corrcoef(df[:, 0], df[:, 1])[0, 1]