使用Tkinter设计基于python的GUI交互

在数据分析的过程中,往往需要对所建立的模型进行可视化,并调整其中的某些参数。

通常情况下,在Python中可以通过Matplotlib来进行绘制图像。然而该绘制过程是静态的,也就是每次调整完参数需要重新调用绘图语句进行绘图展示。我们的目标是结合GUI组件,实现对模型参数的交互式绘图。这样,可以在展示出的GUI界面中动态的调整模型的参数,并绘制图像。

最终的实现效果如下:


# coding=gbk  
''''' 
Created on 2016年10月28日 
 
@author: ldk 
'''  
   
import time    
from sklearn import metrics    
import pickle as pickle
from sklearn.externals import joblib ##保存模型
from matplotlib.widgets import Button ##添加按钮功能包

import pandas as pd
import random

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import KFold,StratifiedKFold 

from scipy import interp
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
from Tkinter import *
  
  
###############################################################################    
# Multinomial Naive Bayes Classifier    
def naive_bayes_classifier(train_x, train_y):    
    from sklearn.naive_bayes import MultinomialNB    
    model = MultinomialNB(alpha=0.01)    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
###############################################################################        
# KNN Classifier    
def knn_classifier(train_x, train_y):    
    from sklearn.neighbors import KNeighborsClassifier    
    model = KNeighborsClassifier()    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
###############################################################################        
# Logistic Regression Classifier    
def logistic_regression_classifier(train_x, train_y):    
    from sklearn.linear_model import LogisticRegression    
    model = LogisticRegression(penalty='l2')    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
    
###############################################################################    
# Random Forest Classifier    
def random_forest_classifier(train_x, train_y):    
    from sklearn.ensemble import RandomForestClassifier    
    model = RandomForestClassifier(n_estimators=20)    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
    
###############################################################################    
# Decision Tree Classifier    
def decision_tree_classifier(train_x, train_y):    
    from sklearn import tree    
    model = tree.DecisionTreeClassifier()    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
    
###############################################################################    
# GBDT(Gradient Boosting Decision Tree) Classifier    
def gradient_boosting_classifier(train_x, train_y):    
    from sklearn.ensemble import GradientBoostingClassifier    
    model = GradientBoostingClassifier(n_estimators=200)    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
    
###############################################################################    
# SVM Classifier    
def svm_classifier(train_x, train_y):    
    from sklearn.svm import SVC    
    model = SVC(kernel='rbf', probability=True)    
    model.fit(train_x, train_y)    
    return model    
###############################################################################    
    
###############################################################################    
# SVM Classifier using cross validation    
def svm_cross_validation(train_x, train_y):    
    from sklearn.grid_search import GridSearchCV    
    from sklearn.svm import SVC    
    model = SVC(kernel='rbf', probability=True)    
    param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}    
    grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1)    
    grid_search.fit(train_x, train_y)    
    best_parameters = grid_search.best_estimator_.get_params()    
    for para, val in list(best_parameters.items()):    
        print(para, val)    
    model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True)    
    model.fit(train_x, train_y)    
    return model    
###############################################################################

def shuffle_data(inputfile):    
    csv_input = pd.read_csv(inputfile,error_bad_lines = False)
##    print "csv_input.shape[0]",csv_input.shape[0]
    randArray = np.random.rand(csv_input.shape[0])
##    print "randArray.shape",randArray.shape
    csv_input['rand'] = randArray
    csv_input.sort_values(by = 'rand',axis = 0,inplace = True )#用值排序较为安全
    del csv_input['rand']
    csv_input.to_csv(inputfile, index=False,index_label = False ,mode = 'wb+')
    
###############################################################################    
def read_data(data_file,droplabel):    
    data = pd.read_csv(data_file)
    train = data[:int(len(data)*0.7)]  
    test = data[int(len(data)*0.7):]  
    train_y = train.label  
    train_x = train.drop('label', axis=1)

    ##筛选类别,二分类
    train_x , train_y = train_x[train_y != droplabel ],train_y[train_y != droplabel]

    ###############################################################################  
    # Data IO and generation,导入iris数据,做数据准备  

    # import some data to play with  
##    iris = datasets.load_iris()  
##    X = iris.data  
##    y = iris.target  
##    X, y = X[y != 2], y[y != 2]#去掉了label为2,label只能二分,才可以。这句话很方便啊,直接进行筛选,并且是每条都相互对应,不用手动csv修改了!!  
##    n_samples, n_features = X.shape  
##
##    # Add noisy features  
##    random_state = np.random.RandomState(0)  
##    X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

    ##可以使用sklearn自带的traintest分割方法,有随机化参数可选,哈哈,跟自己的方法功能一致。
    # Split the dataset in two equal parts  
##    X_train, X_test, y_train, y_test = train_test_split(  
##    X, y, test_size=0.5, random_state=0) 
  
    ###############################################################################

    ##去掉非属性列
    train_x = train_x.drop('SampleDate',axis=1)
    train_x = train_x.drop('picktime',axis=1)
    train_x = train_x.drop('classes',axis=1)    
    train_x = train_x.drop('temp_label',axis=1)
      
    test_x = test.drop('label', axis=1)
    test_y = test.label
    
    ##筛选类别,变为二分类
    test_x, test_y = test_x[test_y != droplabel],test_y[test_y != droplabel]
    
    ##去掉非属性列    
    test_x = test_x.drop('SampleDate', axis=1)
    test_x = test_x.drop('picktime', axis=1)
    test_x = test_x.drop('classes',axis=1)
    test_x = test_x.drop('temp_label', axis=1)

    
##    train_x = normalize(train_x, norm='l2')  
##    test_x = normalize(test_x, norm='l2') 
    return train_x, train_y, test_x, test_y
###############################################################################    
   

###############################################################################    
def read_data_selectfeature(data_file,dimention,droplabel):    
    data = pd.read_csv(data_file)
    #特征选取后的新数据
    X = data.drop('label',axis = 1)
    X = X.drop('SampleDate',axis = 1)
    X = X.drop('picktime',axis = 1)
    X = X.drop('classes',axis = 1)
    X = X.drop('temp_label',axis = 1)    
    y = data.label

    ##筛选类别为两类#############################################################
    X,y = X[y != droplabel],y[y != droplabel]
##    print "X :",X
    
    X_new = SelectKBest(chi2, k=dimention).fit_transform(X, y)
##    print "X_new ***********",X_new
 
    Data_new = np.c_[X_new , y]  ##注意最后返回的是组合后的整体,注意先后顺序。所以可以在原始数据第一行上加上属性描述行,就是说标签row在前,被增加数据在后
##    print "Data_new ***********",Data_new    
##    X_new['label'] = np.asarray(y.T)
#    
    train = Data_new[:int(len(Data_new)*0.7)]  
    test = Data_new[int(len(Data_new)*0.7):]
##    print "train,test .shape" , train
    
    train_y = train[:,-1:]  
    train_x = train[:,:-1]

    
    test_y = test[:,-1:]  
    test_x = test[:,:-1]

##    print "train_x,train_y shape" ,train_x.shape,train_y.shape
##    train_x,train_y = train_x[train_y != droplabel],train_y[train_y != droplabel]
##    
##    test_x, test_y = test_x[test_y != droplabel ],test_y[test_y != droplabel]
##    print "test_x after",test_x

    ##正则化数据方法,对有的数据可以提高准确度,有的则不然
##    train_x = normalize(train_x, norm='l2')  
##    test_x = normalize(test_x, norm='l2')
    return train_x, train_y, test_x, test_y
###############################################################################

def read_Kf(data_file,droplabel):
    data = pd.read_csv(data_file)
    X = data.drop('label',axis = 1)
    y = data.label
    kf = KFold(len(y),n_folds = 3,shuffle = True )##kf 里存的是index不是数据
    for train_index , test_index in kf:
        print "train,test " , train, test
    

###############################################################################    
#清洗数据,滤出异常点样本,关键点在于.iloc[]的使用,可以定位数据样本点第几行,然后可以像操作二维array一样操作DataFrame
def DataClean(inputfile):
    csv_input = pd.read_csv(inputfile,error_bad_lines = False)
    #print csv_input[:2]
    for wave in csv_input.index:
        #print csv_input.iloc[wave]['Wavelength']  #打印定位第几行,第wavelength列的数据值
        if csv_input.iloc[wave]['Wavelength'] > 500 and csv_input.iloc[wave]['Wavelength'] < 1050:
            continue    
        else:
            csv_input.iloc[wave] = 0  #按指定需求滤除某一整行(清零操作)
            
    #csv_input.to_csv(inputfile, index = False, index_label = False, mode = 'wb+')
    
    #用.to_csv自带的参数columns可以指定输出那几个特征属性列到指定位置的文件中
    csv_input.to_csv(inputfile, index = False,columns = ['Wavelength','average','label'], index_label = False, mode = 'wb+')
###############################################################################    

###############################################################################    
def PlotROC(classifier,test_y,predict,label,subfigue):
    global mean_tpr,mean_fpr
    #画ROC曲线和计算AUC
    fpr, tpr, thresholds = roc_curve(test_y, predict , pos_label = label)##指定正例标签,pos_label = ###########在数之联的时候学到的,要指定正例标签

    mean_tpr += interp(mean_fpr, fpr, tpr)          #对mean_tpr在mean_fpr处进行插值,通过scipy包调用interp()函数  
    mean_tpr[0] = 0.0                               #初始处为0  
    roc_auc = auc(fpr, tpr)
    
    #画图,只需要plt.plot(fpr,tpr),变量roc_auc只是记录auc的值,通过auc()函数能计算出来  
    subfigue.plot(fpr, tpr, lw=1, label='ROC  %s (area = %0.3f)' % (classifier, roc_auc))
###############################################################################    

###############################################################################    
def DispROC(subfigue):
    global cv,mean_tpr,mean_fpr
    #画对角线  
    subfigue.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')  
      
    mean_tpr /= len(cv)                     #在mean_fpr100个点,每个点处插值插值多次取平均  
    mean_tpr[-1] = 1.0                      #坐标最后一个点为(1,1)  
    mean_auc = auc(mean_fpr, mean_tpr)      #计算平均AUC值  
    #画平均ROC曲线  
    #print mean_fpr,len(mean_fpr)  
    #print mean_tpr  
    subfigue.plot(mean_fpr ,mean_tpr, 'k--',label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)  
      
    subfigue.set_xlim([-0.05, 1.15])  
    subfigue.set_ylim([-0.05, 1.15])
    
    subfigue.set_xlabel('False Positive Rate')  
    subfigue.set_ylabel('True Positive Rate')  
    subfigue.set_title('Receiver operating characteristic example')  
    subfigue.legend(loc="lower right")

###############################################################################

def button_all_press():
    global model_save,model_save_file
    pickle.dump(model_save, open(model_save_file  , 'wb'))
    print 'model is saved!'

def button_smodel_press():
    global model_save,model_save_file
    ##获取文本框内容
    save_modelname = inputEntry.get()
    try:
        save_modelname not in model_save
    except:
        print "请输入正确的分类器"
        inputEntry.delete(0,END)
        inputEntry.insert(0,'RF')
    

    pickle.dump(model_save[save_modelname], open(model_save_file  , 'wb'))
    print 'model is saved!'

def draw_button_saveall():  
    global button , root#must global  
##    point = plt.axes([0.3,0.03,0.1,0.03])  
    button = Button(root, text="save all",command = button_all_press).grid(row=2,column=1,columnspan=3)  
    

def draw_button_savesingle():  
    global button,root#must global    
    button = Button(root, text="save singlemodel",command = button_smodel_press).grid(row=2,column=2,columnspan=3)  


def trainmodel(subfigue):
    print "all model train start"
    global data_file,data_file,dimention,droplabel,mean_tpr,mean_fpr,positive_label
    ##随机化数据
    shuffle_data( data_file)
    ##应用特征选取,进行分类
    dimention = 1000
    train_x_s, train_y_s, test_x_s, test_y_s = read_data_selectfeature( data_file, dimention, droplabel)
    for classifier in test_classifiers:    
        print('******************* %s ********************' % classifier)    
        start_time = time.time()    
        model = classifiers[classifier](train_x_s, train_y_s)    
        print('training took %fs!' % (time.time() - start_time))    
        predict = model.predict(test_x_s)
        if model_save_file != None:    
            model_save[classifier] = model    
        precision = metrics.precision_score(test_y_s, predict)    
        recall = metrics.recall_score(test_y_s, predict)    
        print('precision: %.2f%%, recall: %.2f%%' % (100 * precision, 100 * recall))    
        accuracy = metrics.accuracy_score(test_y_s, predict)    
        print('accuracy: %.2f%%' % (100 * accuracy))
        PlotROC(classifier,test_y_s,predict,positive_label,subfigue)
        
    print "all model train end"
    
    
def drawPic():
    
    drawPic.f.clf()
    drawPic.a = drawPic.f.add_subplot(111)
    trainmodel(drawPic.a)
    drawPic.a.set_title('Model Classifier')
    drawPic.a.set_xmargin

    DispROC(drawPic.a)
    drawPic.canvas.show()
 
        
if __name__ == '__main__':
##    datafilename = 'outputhard10_25.csv'
    
    datafilename = 'multi_apple_union.csv'    
##    data_file = "L:\\Python\\testdownscale\\"+datafilename
    data_file = "L:\\Python\\creditcard\\" + datafilename

    ##随机化数据
    shuffle_data(data_file)
##    read_Kf(data_file,'temp_label')
    
    thresh = 0.5    
    model_save_file = "L:\\Python\\creditcard\\model\\savemodel.txt"    
    model_save = {}    
     
    test_classifiers = ['NB', 'KNN', 'LR', 'RF', 'DT', 'SVM']  #'SVMCV',  , 'GBDT'
    classifiers = {'NB':naive_bayes_classifier,     
                  'KNN':knn_classifier,    
                   'LR':logistic_regression_classifier,    
                   'RF':random_forest_classifier,    
                   'DT':decision_tree_classifier,    
                  'SVM':svm_classifier,    
##                 'SVMCV':svm_cross_validation,    
##                 'GBDT':gradient_boosting_classifier    
    }    
        

    ###设置正例标签####################################################    
    positive_label = 2
    
    ###设置要去掉的类别标签####################################################
    droplabel = 3
    train_x, train_y, test_x, test_y = read_data(data_file,droplabel)  


##    print "train_y_s",train_y_s

    mean_tpr = 0.0  
    mean_fpr = np.linspace(0, 1, 100)  
    all_tpr = []
    
    y_target = np.r_[train_y,test_y]
    cv = StratifiedKFold(y_target, n_folds=6)

    matplotlib.use('TkAgg')
    root=Tk()
    
     #在Tk的GUI上放置一个画布,并用.grid()来调整布局
    drawPic.f = Figure(figsize = [10,7],dpi=100) 
    drawPic.canvas = FigureCanvasTkAgg(drawPic.f, master=root)
    drawPic.canvas.show()
    drawPic.canvas.get_tk_widget().grid(row=0, columnspan=3)

    
     #放置标签、文本框和按钮等部件,并设置文本框的默认值和按钮的事件函数
    Label(root,text=u"请输入分类器:").grid(row=1,column=0)##中文字符串请注意加上u,以双字节的Unicode显示中文不会出现乱码###### 在准星云学实习学到的
    inputEntry=Entry(root)
    inputEntry.grid(row=1,column=1)
    inputEntry.insert(0,'RF')
    drawPic()
    
    draw_button_saveall()  
    draw_button_savesingle()
    Button(root,text=u"重新训练",command=drawPic).grid(row=1,column=2,columnspan=3)

    
     
   #启动事件循环
    root.mainloop()



#输出预测标签用于感性对比真实标签
##    import numpy as np
##    model = classifiers['LR'](train_x, train_y)
##    predict = model.predict(test_x)
##    print "LR :"
##    print "Predict:",test_x,predict.T
    
##    if model_save_file != None:    
##        pickle.dump(model_save, open(model_save_file  , 'wb'))    



展开阅读全文

没有更多推荐了,返回首页