statsmodels模块计量经济学

最新推荐文章于 2024-10-07 15:38:54 发布

Muwenlong#

最新推荐文章于 2024-10-07 15:38:54 发布

阅读量1.7k

点赞数

文章标签： python

本文链接：https://blog.csdn.net/qq_44717437/article/details/105604013

版权

1、将参数处理为一维或者多维数组后

#x为一维数组（只有一个参数），多维数组（多个参数）。处理方式参见下文
x=sm.add_constant(x)#添加一个常数
model=sm.OLS(salary,x).fit()#拟合
print(model.summary())#输出关系

import statsmodels.api as sm
import numpy as np
import math
import pandas as pd
class ceosal():
    def __init__(self):
        self.data=pd.read_stata('CEOSAL1.DTA')
        pd.set_option('max_columns',None)
        pd.set_option('max_rows',None)
    def log(self,num):
        return [math.log(i)for i in num]#取对数
    def t(self,num):
        return np.transpose(num)#两种方式转置二维数组
        #return list(map(list, zip(*x)))
    def ols(self):
        salary=list(self.data['salary'])
        salary=self.log(salary)
        sales=list(self.data['sales'])
        sales=self.log(sales)
        roe=list(self.data['roe'])
        ros=list(self.data['ros'])
        x=[sales,roe,ros]
        x=self.t(x)
        x=sm.add_constant(x)#添加一个常数
        model=sm.OLS(salary,x).fit()#拟合
        print(model.summary())#输出关系
        #输出内容均是与计量经济学专业术语相关

if __name__=='__main__':
    d=ceosal()
    d.ols()

# 提取回归系数
model.params

# 提取回归系数标准差
model.bse

# 提取回归系数p值
model.pvalues

# 提取回归系数t值
model.tvalues

# 提取回归系数置信区间 默认5%，括号中可填具体数字 比如0.05, 0.1
model.conf_int()  

# 提取模型预测值
model.fittedvalues

# 提取残差
model.resid

# 模型自由度（系数自由度）
model.df_model

# 残差自由度（样本自由度）
model.df_resid

# 模型样本数量
model.nobs

# 提取R方
model.rsquared

# 提取调整R方
model.rsquared_adj

# 提取AIC
model.aic

# 提取BIC
model.bic

# 提取F-statistic
model.fvalue

# 提取F-statistic 的pvalue
model.f_pvalue

# 模型mse
model.mse_model

# 残差mse
model.mse_resid

# 总体mse
model.mse_total

# 协方差矩阵比例因子
model.scale

#  White异方差稳健标准误
model.HC0_se

# MacKinnon和White（1985）的异方差稳健标准误
model.HC1_se

#  White异方差矩阵
model.cov_HC0

# MacKinnon和White（1985）的异方差矩阵
model.cov_HC1

标准化处理

from scipy.stats.mstats import zscore
arg = [dist, dist2,intst, intst2, area, land, rooms, baths, age]
arg = self.t(arg)
price = zscore(price)
arg = zscore(arg)
# arg=sm.add_constant(arg)#数据标准化之后不需要截距项
model = sm.OLS(price, arg).fit()
print(model.summary('price', ['dist', 'dist2','intst', 'intst2', 'area', 'land', 'rooms', 'baths', 'age'],
                            'question3'))  # 分别设置因变量名称，自变量名称（包括常数），回归的标题
#zscore可以处理一维或者多维数组，但是要做添加截距项之前，而且一般情况下，标准化之后就不需要截距项。

2、ols回归的另一种方式

import statsmodels.formula.api as smf
price=self.data
fit = smf.ols('price~sqrft+bdrms', data=price).fit()
# 模型概览的反馈
print(fit.summary())

3、异方差检验的方式

import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import statsmodels.api as sm#引入statasmodel模型
import statsmodels.formula.api as smf
from matplotlib.font_manager import FontProperties#中文处理
from mpl_toolkits.mplot3d import Axes3D as ax
from  numpy import *
import numpy as np
#异方差性检验
class Testheter():
    def __init__(self):
        mpl.rcParams['font.sans-serif'] = ['SimHei']  # 配置显示中文
        plt.rcParams['axes.unicode_minus'] = False
        self.data = pd.read_stata('WAGE1.DTA')
        self.data.dropna(axis=0, how='any', inplace=True)  # 删除缺失值
        pd.set_option('max_columns', None)  # 显示所以行
        pd.set_option('max_rows', None)  # 显示所有列

    def graph(self):
        fit=smf.ols('wage~educ+exper',data=self.data).fit()
        a=np.linalg.cond(fit.model.exog)
        print(fit.summary())
        print("检验多重共线性 ",a)
        print("#"*100)

        plt.scatter(fit.predict(), (fit.resid-fit.resid.mean())/fit.resid.std())
        plt.xlabel('预测值')
        plt.ylabel('标准化残差')
        # 添加水平参考线
        plt.axhline(y=0, color='r', linewidth=2)
        plt.show()


    def white(self):
        fit = smf.ols('wage~educ+exper', data=self.data).fit()
        a=sm.stats.diagnostic.het_white(fit.resid, exog=fit.model.exog)
        name = ['Lagrange multiplier statistic', 'p-value','f-value', 'f p-value']
        print("white test")
        for n,a in zip(name,a):
            print(n,a)
        print("#"*100)
        #print("拉格朗日乘子的统计(lagrange multiplier statistic(lm : float)) ",a[0])
        #print("拉格朗日乘子检验的p值(p-value of lagrange multiplier test(lm_pvalue :float)) ", a[1])
        #print("误差方差不依赖于x的假设的f统计量(f-statistic of the hypothesis that the error variance does not depend on x(fvalue : float)) ", a[2])
        #print("f统计量的p值(p-value for the f-statistic(f_pvalue : float))", a[3])
    def BreushPagan(self):
        fit = smf.ols('wage~educ+exper', data=self.data).fit()
        a = sm.stats.diagnostic.het_breuschpagan(fit.resid, exog_het=fit.model.exog)
        name = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
        print("Breush-Pagan test")
        for n, a in zip(name, a):
            print(n, a)
        print("#" * 100)

    def GoldfeldQuandt(self):
        fit = smf.ols('wage~educ+exper', data=self.data).fit()
        a = sm.stats.diagnostic.het_breuschpagan(fit.resid, exog_het=fit.model.exog)
        name = ['F statistic', 'p-value']
        print("GoldfeldQuandt test")
        for n, a in zip(name, a):
            print(n, a)
        print("#" * 100)
    def arch(self):
        fit = smf.ols('wage~educ+exper', data=self.data).fit()
        a = sm.stats.diagnostic.het_arch(fit.resid)
        name = ['Lagrange multiplier statistic', 'p-value', 'f p-value','ResultsStore']
        print("arch test")
        for n, a in zip(name, a):
            print(n, a)
        print("#" * 100)

if __name__=='__main__':
    d=Testheter()
    d.graph()
    d.white()
    d.BreushPagan()
    d.GoldfeldQuandt()
    d.arch()

4、异方差的修正

import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import statsmodels.api as sm#引入statasmodel模型
import statsmodels.formula.api as smf
from matplotlib.font_manager import FontProperties#中文处理
from mpl_toolkits.mplot3d import Axes3D as ax
from  numpy import *
import math
import numpy as np
#异方差性检验
class Testheter():
    def __init__(self):
        mpl.rcParams['font.sans-serif'] = ['SimHei']  # 配置显示中文
        plt.rcParams['axes.unicode_minus'] = False
        self.data = pd.read_stata('WAGE1.DTA')
        self.data.dropna(axis=0, how='any', inplace=True)  # 删除缺失值
        pd.set_option('max_columns', None)  # 显示所以行
        pd.set_option('max_rows', None)  # 显示所有列

    def log(self, num):  # 取对数
        return [math.log(i) for i in num]

    def t(self, num):  # 将二维数组转置
        return np.transpose(num)
    def logs(self):
        wage =self.log(list(self.data['wage']))  # 将dataframe转化为列表
        educ = list(self.data['educ'])
        exper = self.log(list(self.data['exper']))
        print("取对数法")
        print("_"*1000)
        arg = [educ, exper]  # 构造多维数组
        arg = self.t(arg)  # 转置
        arg = sm.add_constant(arg)  # 添加一个常数
        model = sm.OLS(wage, arg).fit()  # 利用statsmodels进行拟合
        print(model.summary('log(wage)', ['const', 'educ', 'log(exper)'],'取对数'))  # 分别设置因变量名称，自变量名称（包括常数）
        a = sm.stats.diagnostic.het_white(model.resid, exog=model.model.exog)
        name = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
        print("white test")
        for n, a in zip(name, a):
            print(n, a)
        print("#" * 100)
    def olss(self):
        print('\n\n\n\n\n\n')
        print("加权最小二乘法")
        print("_" * 1000)
        fit = smf.ols('wage~educ+exper', data=self.data).fit()
        w1 = 1 / np.abs(fit.resid)
        fit2 = sm.formula.wls('wage~educ+exper', data=self.data, weights=w1).fit()
        print(fit2.summary())
        a = sm.stats.diagnostic.het_white(fit2.resid, exog=fit2.model.exog)
        name = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
        print("white test")
        for n, a in zip(name, a):
            print(n, a)
        print("#" * 100)

if __name__=='__main__':
    d=Testheter()
    d.logs()
    d.olss()