pythont检验筛选变量_如何对多个变量和多个模型执行(修改)t检验

# Written: April 4, 2019importpandas# for visualizationsfrommatplotlibimportpyplot# for visualizationsfromscipy.statsimportks_2samp# for 2-sample Kolmogorov-Smirnov testimportos# for deleting CSV files# Functions which isolates DataFramedefremoveColumns(DataFrame,typeArray,stringOfInterest):foriinrange(0,len(typeArray)):iftypeArray[i].find(stringOfInterest)!=-1:continueelse:DataFrame.drop(typeArray[i],axis=1,inplace=True)# Get the whole DataFramedf=pandas.read_csv("ExperimentResultsCondensed.csv",index_col=0)dfCopy=df# Specified metrics and models for comparisonCOI="Area_under_PRC"ROI_1="weka.classifiers.meta.AdaBoostM1[DecisionTable]"ROI_2="weka.classifiers.meta.AdaBoostM1[DecisionStump]"# Lists of header and row in dataFrame# `rows` may act strangelyheaders=list(df.dtypes.index)rows=list(df.index)# remove irrelevant rowsdf1=dfCopy.loc[ROI_1]df2=dfCopy.loc[ROI_2]# remove irrelevant columnsremoveColumns(df1,headers,COI)removeColumns(df2,headers,COI)# Make CSV filesdf1.to_csv(str(ROI_1+"-"+COI+".csv"),index=False)df2.to_csv(str(ROI_2+"-"+COI)+".csv",index=False)results=pandas.DataFrame()# Read CSV files# The CSV files can be of any netric/measure, F-measure is used as an exampleresults[ROI_1]=pandas.read_csv(str(ROI_1+"-"+COI+".csv"),header=None).values[:,0]results[ROI_2]=pandas.read_csv(str(ROI_2+"-"+COI+".csv"),header=None).values[:,0]# Kolmogorov-Smirnov test since we have Non-Gaussian, independent, distinctive variance datasets# Test configurationsvalue,pvalue=ks_2samp(results[ROI_1],results[ROI_2])# Corresponding confidence level: 95%alpha=0.05# Output the resultsprint('\n')print('\033[1m'+'>>>TEST STATISTIC: ')print(value)print(">>>P-VALUE: ")print(pvalue)ifpvalue>alpha:print('\t>>Samples are likely drawn from the same distributions (fail to reject H0 - NOT SIGNIFICANT)')else:print('\t>>Samples are likely drawn from different distributions (reject H0 - SIGNIFICANT)')# Plot filesdf1.plot.density()pyplot.xlabel(str(COI+" Values"))pyplot.ylabel(str("Density"))pyplot.title(str(COI+" Density Distribution of "+ROI_1))pyplot.show()df2.plot.density()pyplot.xlabel(str(COI+" Values"))pyplot.ylabel(str("Density"))pyplot.title(str(COI+" Density Distribution of "+ROI_2))pyplot.show()# Delete Filesos.remove(str(ROI_1+"-"+COI+".csv"))os.remove(str(ROI_2+"-"+COI+".csv"))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值