使用pandas对算法进行了改进,速度比以前快了30倍。
%%time
# 导入函数库
import jqdata
import numpy as np
import pandas as pd
import math
from statsmodels import regression
import statsmodels.api as sm
import matplotlib.pyplot as plt
def winsorize(factor, std=3, have_negative = True):
'''
去极值函数
factor:以股票code为index,因子值为value的Series
std为几倍的标准差,have_negative 为布尔值,是否包括负值
输出Series
'''
r=factor.dropna().copy()
if have_negative == False:
r = r[r>=0]
else:
pass
#取极值
edge_up = r.mean()+std*r.std()
edge_low = r.mean()-std*r.std()
r[r>edge_up] = edge_up
r[r< edge_low] = edge_low
return r
#标准化函数:
def standardize(s,ty=