import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from jqdatasdk import* auth("18958982268","LinMeng1992") #一元模型一次回归 # Y=1+10⋅X. # nsample=100 # x=np.linspace(0,10,nsample) # X=sm.add_constant(x) # beta=np.array([1,10]) # e=np.random.normal(size=nsample) # y=np.dot(X,beta)+e # model=sm.OLS(y,x) # results=model.fit() # print(results.params) # print(results.summary()) # # y_fitted = results.fittedvalues # fig,ax=plt.subplots(figsize=(8,6)) # ax.plot(x,y,'o',label='data') # ax.plot(x,y_fitted,'r--',label='OLS') # ax.legend(loc='best') # plt.show() # plt.savefig('1.png') #多元回归 高元转线性回归 # Y=1+0.1⋅X+10⋅X2 # nsample=100 # x1=np.linspace(0,10,nsample) # x2=x1*x1 # X=np.column_stack((x1,x2)) # X=sm.add_constant(X) # beta=np.array([1.,0.1,10.]) # e=np.random.normal(size=nsample) # y=np.dot(X,beta)+e # model=sm.OLS(y,X) # results=model.fit() # print(results.params) # print(results.summary()) # # y_fitted = results.fittedvalues # fig,ax=plt.subplots(figsize=(8,6)) # ax.plot(x1,y,'o',label='data') # ax.plot(x1,y_fitted,'r--',label='OLS') # ax.legend(loc='best') # plt.show() # plt.savefig('1.png') # 哑变量 # 这里假设一个反应变量 Y 对应连续自变量 X 和一个分类变量 Z。 # 常项系数为 10,X 的系数为 1;Z有 {a,b,c}三个种类, # 其中 a 类有系数 1,b 类有系数 3,c 类有系数88。也就是说, # 将 Z 转换为哑变量 (Z1,Z2,Z3)其中 ZiZiZi 取值于 0,1,有线性公式 # Y=10+X+Z1+3⋅Z2+8⋅Z3. # nsample=50 # groups=np.zeros(nsample,int) # groups[20:40]=1 # groups[40:]=2 # dummy=sm.categorical(groups,drop=True) # x=np.linspace(0,20,nsample) # X=np.column_stack((x,dummy)) # X=sm.add_constant(X) # beta=[10,1,1,3,8] # e=np.random.normal(size=nsample) # y=np.dot(X,beta)+e # results=sm.OLS(y,X).fit() # print(results.params) # print(results.summary()) # # y_fitted = results.fittedvalues # fig,ax=plt.subplots(figsize=(8,6)) # ax.plot(x,y,'o',label='data') # ax.plot(x,y_fitted,'r--',label='OLS') # ax.legend(loc='best') # plt.show() # plt.savefig('1.png') # data1 = get_price('000001.XSHG',start_date='2021-01-01', end_date='2021-12-31', frequency='daily', fields=['close'],skip_paused=False, fq='pre', panel=False) data2 = get_price('399001.XSHE', start_date='2021-01-01', end_date='2021-12-31', frequency='daily', fields=['close'],skip_paused=False, fq='pre', panel=False) x_price = data1['close'].values y_price = data2['close'].values x_pct, y_pct = [], [] for i in range(1, len(x_price)): x_pct.append(x_price[i]/x_price[i-1]-1) for i in range(1, len(y_price)): y_pct.append(y_price[i]/y_price[i-1]-1) x = np.array(x_pct) X = sm.add_constant(x) y = np.array(y_pct) print(x) print(y) results = sm.OLS(y, X).fit() print(results.summary()) print(results.params) y_fitted = results.fittedvalues fig,ax=plt.subplots(figsize=(8,6)) ax.plot(x,y,'o',label='data') ax.plot(x,y_fitted,'r--',label='OLS') ax.legend(loc='best') plt.show() plt.savefig('1.png') print(get_query_count()) #查询当日剩余可调用条数
Statsmodels 统计包之 OLS 回归
最新推荐文章于 2024-05-26 20:52:15 发布