python aic准则_使用AIC进行变量选择并评估多元回归中的标准

# Multiple Regression Variable Selectiondefmr(selection=False):importos

os.chdir(r'C:\Users\Path')importpandasaspd

h=pd.read_csv('Dataset.csv',index_col=0)#print(h.head(0)) # dataset's variable namesyvar='resale_price'modeleq=yvar+' ~'forxvarin(# Insert new 'x variable' into a row, ending with ',''storey_range_lower','storey_range_lower_rt','storey_range_lower_sq','storey_range_upper','storey_range_upper_rt','storey_range_upper_sq','floor_area_sqm','floor_area_sqm_rt','floor_area_sqm_sq','lease_commence_year','lease_commence_year_rt','lease_commence_year_sq','transaction_month','transaction_month_rt','transaction_month_sq','town','flat_model','flat_type','no_of_rooms','block_number','block_number_rt','block_number_sq','postal_code','postal_code_rt','postal_code_sq','postal_code_2digit','postal_code_2digit_rt','postal_code_2digit_sq',):ifmodeleq[-1]=='~':modeleq=modeleq+' '+xvarelse:modeleq=modeleq+' + '+xvar#import matplotlib.pyplot as pl#%matplotlib inline#import numpy as npimportstatsmodels.apiassmfromstatsmodels.formula.apiimportols

bmodeleq=modeleqifselection:print('Variable Selection using p-value & PR(>F):')minfpv=1.0whileTrue:#Specify C() for Categorical, else could be interpreted as numeric:#hout=ols('resale_price ~ floor_area_sqm + C(flat_type)', data=h).fit()hout=ols(modeleq,data=h).fit()ifmodeleq.find(' + ')==-1:# 1 xvar leftbreak#print(dir(hout)) gives all the attributes of .fit(), e.g. .fvalue & .f_pvaluefpv=hout.f_pvalueiffpv

bmodeleq=modeleqprint('\nF-statistic =',hout.fvalue,' PR(>F) =',fpv)prf=sm.stats.anova_lm(hout,typ=3)['PR(>F)']maxp=max(prf[1:])#print('\n',dict(prf))xdrop=prf[maxp==prf].axes[0][0]# 1st element of row-label .axes[0]#if xdrop.find('Intercept') != -1 :# break# xdrop removed from model equation:if(modeleq.find('~ '+xdrop+' + ')!=-1):modeleq=modeleq.replace('~ '+xdrop+' + ','~ ')elif(modeleq.find('+ '+xdrop+' + ')!=-1):modeleq=modeleq.replace('+ '+xdrop+' + ','+ ')else:modeleq=modeleq.replace(' + '+xdrop,'')#print('Model equation:',modeleq,'\n')print('Variable to drop:',xdrop,' p-value =',prf[xdrop])#print('\nVariable left:\n'+str(prf[maxp!=prf][:-1]),'\n')print('\nF-statistic =',hout.fvalue,' PR(>F) =',hout.f_pvalue)print('Variable left:\n'+str(prf[maxp!=prf][:-1]),'\n')#input("found intercept")print('Best model equation:',bmodeleq)print('Minimum PR(>F) =',minfpv,'\n')hout=ols(bmodeleq,data=h).fit()print(sm.stats.anova_lm(hout,typ=1))#print(anova) # Anova table with 'Treatment' broken uphsum=hout.summary()print('\n',hsum)last=3#number of bottom p-values to display with more precision#p-values are not in general the same as PR(>F) from ANOVAprint("\nLast",last,"x-coefficients' p-values:")nxvar=len(hout.pvalues)foriinrange(last,0,-1):print(' ',hout.pvalues.axes[0][nxvar-i],' ',hout.pvalues[nxvar-i])# Output Coefficient table:#from IPython.core.display import HTML#HTML(hout.summary().tables[1].as_html()) #.tables[] from 0 to 3mr(True)# do Variable Selection#mr() # do multiple regression once

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值