python多元线性回归代码_使用具有特定数据的OLS代码的Python多元线性回归？

最新推荐文章于 2022-07-13 11:37:55 发布

weixin_39551993

最新推荐文章于 2022-07-13 11:37:55 发布

阅读量300

点赞数

文章标签： python多元线性回归代码

我正在使用在

scipy Cookbook下载的ols.py代码(下载在第一段用粗体OLS)但我需要了解而不是使用ols函数的随机数据来进行多元线性回归.

我有一个特定的因变量y和三个解释变量.每当我尝试将变量放入随机变量时,它就会给出错误：

TypeError: this constructor takes no arguments.

有人可以帮忙吗？这可能吗？

这是我尝试使用的ols代码的副本以及我尝试输入的变量

from __future__ import division

from scipy import c_, ones, dot, stats, diff

from scipy.linalg import inv, solve, det

from numpy import log, pi, sqrt, square, diagonal

from numpy.random import randn, seed

import time

class ols:

"""

Author: Vincent Nijs (+ ?)

Email: v-nijs at kellogg.northwestern.edu

Last Modified: Mon Jan 15 17:56:17 CST 2007

Dependencies: See import statement at the top of this file

Doc: Class for multi-variate regression using OLS

Input:

dependent variable

y_varnm = string with the variable label for y

x = independent variables, note that a constant is added by default

x_varnm = string or list of variable labels for the independent variables

Output:

There are no values returned by the class. Summary provides printed output.

All other measures can be accessed as follows:

Step 1: Create an OLS instance by passing data to the class

m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])

Step 2: Get specific metrics

To print the coefficients:

>>> print m.b

To print the coefficients p-values:

>>> print m.p

"""

y = [29.4, 29.9, 31.4, 32.8, 33.6, 34.6, 35.5, 36.3, 37.2, 37.8, 38.5, 38.8,

38.6, 38.8, 39, 39.7, 40.6, 41.3, 42.5, 43.9, 44.9, 45.3, 45.8, 46.5,

77.1, 48.2, 48.8, 50.5, 51, 51.3, 50.7, 50.7, 50.6, 50.7, 50.6, 50.7]

#tuition

x1 = [376, 407, 438, 432, 433, 479, 512, 543, 583, 635, 714, 798, 891,

971, 1045, 1106, 1218, 1285, 1356, 1454, 1624, 1782, 1942, 2057, 2179,

2271, 2360, 2506, 2562, 2700, 2903, 3319, 3629, 3874, 4102, 4291]

#research and development

x2 = [28740.00, 30952.00, 33359.00, 35671.00, 39435.00, 43338.00, 48719.00, 55379.00, 63224.00,

72292.00, 80748.00, 89950.00, 102244.00, 114671.00, 120249.00, 126360.00, 133881.00, 141891.00,

151993.00, 160876.00, 165350.00, 165730.00, 169207.00, 183625.00, 197346.00, 212152.00, 226402.00,

267298.00, 277366.00, 276022.00, 288324.00, 299201.00, 322104.00, 347048.00, 372535.00,

397629.00]

#one/none parents

x3 = [11610, 12143, 12486, 13015, 13028, 13327, 14074, 14094, 14458, 14878, 15610, 15649,

15584, 16326, 16379, 16923, 17237, 17088, 17634, 18435, 19327, 19712, 21424, 21978,

22684, 22597, 22735, 22217, 22214, 22655, 23098, 23602, 24013, 24003, 21593, 22319]

def __init__(self,y,x1,y_varnm = 'y',x_varnm = ''):

"""

Initializing the ols class.

"""

self.y = y

#self.x1 = c_[ones(x1.shape[0]),x1]

self.y_varnm = y_varnm

if not isinstance(x_varnm,list):

self.x_varnm = ['const'] + list(x_varnm)

else:

self.x_varnm = ['const'] + x_varnm

# Estimate model using OLS

self.estimate()

def estimate(self):

# estimating coefficients, and basic stats

self.inv_xx = inv(dot(self.x.T,self.x))

xy = dot(self.x.T,self.y)

self.b = dot(self.inv_xx,xy) # estimate coefficients

self.nobs = self.y.shape[0] # number of observations

self.ncoef = self.x.shape[1] # number of coef.

self.df_e = self.nobs - self.ncoef # degrees of freedom, error

self.df_r = self.ncoef - 1 # degrees of freedom, regression

self.e = self.y - dot(self.x,self.b) # residuals

self.sse = dot(self.e,self.e)/self.df_e # SSE

self.se = sqrt(diagonal(self.sse*self.inv_xx)) # coef. standard errors

self.t = self.b / self.se # coef. t-statistics

self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2 # coef. p-values

self.R2 = 1 - self.e.var()/self.y.var() # model R-squared

self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef)) # adjusted R-square

self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e) # model F-statistic

self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e) # F-statistic p-value

def dw(self):

"""

Calculates the Durbin-Waston statistic

"""

de = diff(self.e,1)

dw = dot(de,de) / dot(self.e,self.e);

return dw

def omni(self):

"""

Omnibus test for normality

"""

return stats.normaltest(self.e)

def JB(self):

"""

Calculate residual skewness, kurtosis, and do the JB test for normality

"""

# Calculate residual skewness and kurtosis

skew = stats.skew(self.e)

kurtosis = 3 + stats.kurtosis(self.e)

# Calculate the Jarque-Bera test for normality

JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))

JBpv = 1-stats.chi2.cdf(JB,2);

return JB, JBpv, skew, kurtosis

def ll(self):

"""

Calculate model log-likelihood and two information criteria

"""

# Model log-likelihood, AIC, and BIC criterion values

ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)

aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)

bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs

return ll, aic, bic

def summary(self):

"""

Printing model output to screen

"""

# local time & date

t = time.localtime()

# extra stats

ll, aic, bic = self.ll()

JB, JBpv, skew, kurtosis = self.JB()

omni, omnipv = self.omni()

# printing output to screen

print '\n=============================================================================='

print "Dependent Variable: " + self.y_varnm

print "Method: Least Squares"

print "Date: ", time.strftime("%a, %d %b %Y",t)

print "Time: ", time.strftime("%H:%M:%S",t)

print '# obs: %5.0f' % self.nobs

print '# variables: %5.0f' % self.ncoef

print '=============================================================================='

print 'variable coefficient std. Error t-statistic prob.'

print '=============================================================================='

for i in range(len(self.x_varnm)):

print '''% -5s % -5.6f % -5.6f % -5.6f % -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]])

print '=============================================================================='

print 'Models stats Residual stats'

print '=============================================================================='

print 'R-squared % -5.6f Durbin-Watson stat % -5.6f' % tuple([self.R2, self.dw()])

print 'Adjusted R-squared % -5.6f Omnibus stat % -5.6f' % tuple([self.R2adj, omni])

print 'F-statistic % -5.6f Prob(Omnibus stat) % -5.6f' % tuple([self.F, omnipv])

print 'Prob (F-statistic) % -5.6f JB stat % -5.6f' % tuple([self.Fpv, JB])

print 'Log likelihood % -5.6f Prob(JB) % -5.6f' % tuple([ll, JBpv])

print 'AIC criterion % -5.6f Skew % -5.6f' % tuple([aic, skew])

print 'BIC criterion % -5.6f Kurtosis % -5.6f' % tuple([bic, kurtosis])

print '=============================================================================='

if __name__ == '__main__':

##########################

### testing the ols class

##########################

# intercept is added, by default

m = ols(y,x1,y_varnm = 'y',x_varnm = ['x1','x2','x3'])

m.summary()

weixin_39551993

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python多元线性回归代码_使用具有特定数据的OLS代码的Python多元线性回归？

我正在使用在scipy Cookbook下载的ols.py代码(下载在第一段用粗体OLS)但我需要了解而不是使用ols函数的随机数据来进行多元线性回归.我有一个特定的因变量y和三个解释变量.每当我尝试将变量放入随机变量时,它就会给出错误：TypeError: this constructor takes no arguments.有人可以帮忙吗？这可能吗？这是我尝试使用的ols代码的副本以及我尝试...
复制链接

扫一扫