Neuron Classification

# -*- coding: utf-8 -*-
"""
Created on Thurs May 26 15:28:03 2016

@author: HM
"""

print(__doc__)
import sys
import os
import time
from sklearn import metrics
import numpy as np
import cPickle as pickle
import pandas as pd
import numpy as np
from pandas import DataFrame,Series
import matplotlib.pyplot as plt

reload(sys)
sys.setdefaultencoding('utf8')
import os
os.chdir("E:\data")

# Multinomial Naive Bayes Classifier
def naive_bayes_classifier(train_x, train_y):
    from sklearn.naive_bayes import MultinomialNB
    model = MultinomialNB(alpha=0.01)
    model.fit(train_x, train_y)
    return model


# KNN Classifier
def knn_classifier(train_x, train_y):
    from sklearn.neighbors import KNeighborsClassifier
    model = KNeighborsClassifier()
    model.fit(train_x, train_y)
    return model


# Logistic Regression Classifier
def logistic_regression_classifier(train_x, train_y):
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(penalty='l2')
    model.fit(train_x, train_y)
    return model


# Random Forest Classifier
def random_forest_classifier(train_x, train_y):
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=10, max_depth=None,min_samples_split=1, random_state=0)
    model.fit(train_x, train_y)
    return model


# Decision Tree Classifier1
def decision_tree_classifier1(train_x, train_y):
    from sklearn import tree
    model = tree.DecisionTreeClassifier(criterion='gini')
    model.fit(train_x, train_y)
    return model

# Decision Tree Classifier2
def decision_tree_classifier2(train_x, train_y):
    from sklearn import tree
    model = tree.DecisionTreeClassifier(criterion='entropy')
    model.fit(train_x, train_y)
    return model

# Decision Tree Classifier3
def DecisionTreeRegressor(train_x, train_y):
    from sklearn import tree
    model = tree.DecisionTreeRegressor()
    model.fit(train_x, train_y)
    return model

# GBDT(Gradient Boosting Decision Tree) Classifier
def gradient_boosting_classifier(train_x, train_y):
    from sklearn.ensemble import GradientBoostingClassifier
    model = GradientBoostingClassifier(n_estimators=200)
    model.fit(train_x, train_y)
    return model

# GNB(GaussianNB) Classifier
def GaussianNB(train_x, train_y):
    from sklearn.naive_bayes import GaussianNB
    model = GaussianNB()
    model.fit(train_x, train_y)
    return model

# SVM Classifier
def svm_classifier(train_x, train_y):
    from sklearn.svm import SVC
    model = SVC(kernel='rbf', probability=True)
    model.fit(train_x, train_y)
    return model

# SVM Classifier using cross validation
def svm_cross_validation(train_x, train_y):
    from sklearn.grid_search import GridSearchCV
    from sklearn.svm import SVC
    model = SVC(kernel='rbf', probability=True)
    param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}
    grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1)
    grid_search.fit(train_x, train_y)
    best_parameters = grid_search.best_estimator_.get_params()
    for para, val in best_parameters.items():
        print para, val
    model = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'], probability=True)
    model.fit(train_x, train_y)
    return model


def read_data():
    f1 = pd.read_csv("X_train.csv")
    data1 = [2,2,2,2,2,5,5,5,1,1,1,1,1,3,3,3,3,3,3,3,3,3,4,5,5,5,7,7,7,7,7,7,7,4,4,4,4,6,6,6,6,6,6,6] 
    f2 = pd.read_csv("X_test.csv")
    f3 = pd.read_csv("X_test2.csv")
    data2 = [2,4,1,3,5,6,7]  
    data3 = [1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,7,7,7,7,7,7,7,7,7,7]  
    train_x = np.array(f1)
    train_y = np.array(data1)
    #test_x = np.array(f2)                                                   
    #test_y = np.array(data2) 
    test_x = np.array(f3)                                                   
    test_y = np.array(data3)
    return train_x, train_y, test_x, test_y

if __name__ == '__main__':

    #model_save_file = None
    #model_save = {}

    test_classifiers = ['RF', 'DT1', 'GBDT']
    classifiers = {'NB':naive_bayes_classifier,
                   'KNN':knn_classifier,
                   'LR':logistic_regression_classifier,
                   'RF':random_forest_classifier,
                   'DT1':decision_tree_classifier1,
                   'DT2':decision_tree_classifier2,
                   'DT3':DecisionTreeRegressor,
                   'SVM':svm_classifier,
                   'SVMCV':svm_cross_validation,
                   'GBDT':gradient_boosting_classifier,
                   'GNB':GaussianNB              
    }

    print 'reading training and testing data...'
    train_x, train_y, test_x, test_y = read_data()
    num_train, num_feat = train_x.shape
    num_test, num_feat = test_x.shape
    is_binary_class = (len(np.unique(train_y)) == 2)
    print '******************** Data Info *********************'
    print '#training data: %d, #testing_data: %d, dimension: %d' % (num_train, num_test, num_feat)

    for classifier in test_classifiers:
        print '******************* %s ********************' % classifier
        start_time = time.time()
        model = classifiers[classifier](train_x, train_y)
        print 'training took %fs!' % (time.time() - start_time)
        predict = model.predict(test_x)

        if is_binary_class:
            precision = metrics.precision_score(test_y, predict)
            recall = metrics.recall_score(test_y, predict)
            print 'precision: %.2f%%, recall: %.2f%%' % (100 * precision, 100 * recall)
        accuracy = metrics.accuracy_score(test_y, predict)
        print 'accuracy: %.2f%%' % (100 * accuracy)

        #if model_save_file != None:
        #    model_save[classifier] = model

   # if model_save_file != None:
    #    pickle.dump(model_save, open(model_save_file, 'wb'))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值