# -*- coding: utf-8 -*-
"""
Created on Wed May 18 07:42:02 2022
@author: 733
"""
import pandas as pd
import numpy as np
inputfile="C://Users//dingnanwei//Desktop//data-train.csv"
datatestfile="C://Users//dingnanwei//Desktop//data-test.csv"
data = pd.read_csv(inputfile,sep=',')
X=pd.DataFrame(data.loc[:,:'f13'])
y=pd.DataFrame(data.loc[:,'target'])
newdata=pd.read_csv(datatestfile,sep=',')
newdata = newdata.replace(np.nan, 0)
newX=pd.DataFrame(newdata.loc[:,:'f13'])
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X,y)
print("Pred_KNN:",neigh.predict(newX))
#print("Prob_KNN:",neigh.predict_proba(newX))
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X,y)
print("Pred_GNB:",clf.predict(newX))
#print("Prob_GNB:",clf.predict_proba(newX))
from sklearn.linear_model import LogisticRegression
logis = LogisticRegression()
logis.fit(X, y)
print("Pred_LGR:",logis.predict(newX))
#print("Prob_LGR:",logis.predict_proba(newX))
from sklearn.tree import DecisionTreeClassifier
det = DecisionTreeClassifier()
det.fit(X, y)
print("Pred_DET:",det.predict(newX))
#print("Prob_DET:",det.predict_proba(newX))
from sklearn.ensemble import RandomForestClassifier
rdf = RandomForestClassifier()
rdf.fit(X, y)
print("Pred_RDF:",rdf.predict(newX))
#print("Prob_RDF:",rdf.predict_proba(newX))
from sklearn.ensemble import GradientBoostingClassifier
gdb = GradientBoostingClassifier()
gdb.fit(X, y)
print("Pred_GDB:",gdb.predict(newX))
#print("Prob_GDB:",gdb.predict_proba(newX))
from sklearn.svm import SVC
svc = SVC()
svc.fit(X,y)
print("Pred_SVC:",svc.predict(newX))
#print("Prob_SVC:",svc.predict_proba(newX))
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier()
mlp.fit(X,y)
print("Pred_MLP:",mlp.predict(newX))
#print("Prob_MLP:",mlp.predict_proba(newX))
file_handle=open('C://Users//dingnanwei//Desktop//predictions.txt',mode='w')
np.savetxt(file_handle,mlp.predict(newX),fmt="%d",delimiter=" ")
file_handle.close()
数据挖掘,各种分类器的优劣。