# Written: April 4, 2019importpandas# for visualizationsfrommatplotlibimportpyplot# for visualizationsfromscipy.statsimportks_2samp# for 2-sample Kolmogorov-Smirnov testimportos# for deleting CSV files# Functions which isolates DataFramedefremoveColumns(DataFrame,typeArray,stringOfInterest):foriinrange(0,len(typeArray)):iftypeArray[i].find(stringOfInterest)!=-1:continueelse:DataFrame.drop(typeArray[i],axis=1,inplace=True)# Get the whole DataFramedf=pandas.read_csv("ExperimentResultsCondensed.csv",index_col=0)dfCopy=df# Specified metrics and models for comparisonCOI="Area_under_PRC"ROI_1="weka.classifiers.meta.AdaBoostM1[DecisionTable]"ROI_2="weka.classifiers.meta.AdaBoostM1[DecisionStump]"# Lists of header and row in dataFrame# `rows` may act strangelyheaders=list(df.dtypes.index)rows=list(df.index)# remove irrelevant rowsdf1=dfCopy.loc[ROI_1]df2=dfCopy.loc[ROI_2]# remove irrelevant columnsremoveColumns(df1,headers,COI)removeColumns(df2,headers,COI)# Make CSV filesdf1.to_csv(str(ROI_1+"-"+COI+".csv"),index=False)df2.to_csv(str(ROI_2+"-"+COI)+".csv",index=False)results=pandas.DataFrame()# Read CSV files# The CSV files can be of any netric/measure, F-measure is used as an exampleresults[ROI_1]=pandas.read_csv(str(ROI_1+"-"+COI+".csv"),header=None).values[:,0]results[ROI_2]=pandas.read_csv(str(ROI_2+"-"+COI+".csv"),header=None).values[:,0]# Kolmogorov-Smirnov test since we have Non-Gaussian, independent, distinctive variance datasets# Test configurationsvalue,pvalue=ks_2samp(results[ROI_1],results[ROI_2])# Corresponding confidence level: 95%alpha=0.05# Output the resultsprint('\n')print('\033[1m'+'>>>TEST STATISTIC: ')print(value)print(">>>P-VALUE: ")print(pvalue)ifpvalue>alpha:print('\t>>Samples are likely drawn from the same distributions (fail to reject H0 - NOT SIGNIFICANT)')else:print('\t>>Samples are likely drawn from different distributions (reject H0 - SIGNIFICANT)')# Plot filesdf1.plot.density()pyplot.xlabel(str(COI+" Values"))pyplot.ylabel(str("Density"))pyplot.title(str(COI+" Density Distribution of "+ROI_1))pyplot.show()df2.plot.density()pyplot.xlabel(str(COI+" Values"))pyplot.ylabel(str("Density"))pyplot.title(str(COI+" Density Distribution of "+ROI_2))pyplot.show()# Delete Filesos.remove(str(ROI_1+"-"+COI+".csv"))os.remove(str(ROI_2+"-"+COI+".csv"))
pythont检验筛选变量_如何对多个变量和多个模型执行(修改)t检验
最新推荐文章于 2024-03-12 19:05:40 发布