python的pandas包的groupby似乎难以直接滚动回归。
原因似乎是rolling的结果是一个Series。
代码如下:
import pandas as pd
df = pd.DataFrame({"a": [i for i in range(100)], "b": [i ** 2 for i in range(100)]})
def tmp(x):
print(x)
return 1
s = df.rolling(3).apply(tmp)
解决方法:使用numpy的as_strided 方法
直接上代码
import pandas as pd
import numpy as np
from numpy.lib.stride_tricks import as_strided as stride
def test(df):
model = np.polyfit(df["a"], df["b"], deg=1)
return model[0], model[1]
class GetRolling(object):
def __init__(self, df):
self.df = df
self.nrows, self.ncols = df.shape[0], df.shape[1]
self.strides = df.values.strides
def getGroup(self, rolling_nums):
my_stride = stride(df.values, (self.nrows - rolling_nums + 1, rolling_nums, self.ncols),
(self.strides[0], self.strides[0], self.stri