import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.externals import joblib
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
import sys
if len(sys.argv)!=5:
print("Usage:"+sys.argv[0]+" <train set path> <train set read line numbers> <test set path> <test set read line numbers>")
print("Example:"+sys.argv[0]+" train.csv 10000 test.csv 100")
sys.exit(1)
datapath=sys.argv[1]
trainLines=sys.argv[2]
testpath=sys.argv[3]
testLines=sys.argv[4]
'''############################Data Set Build#################################'''
data = pd.read_csv(datapath, header=None, nrows=int(trainLines))
print("train set line numbers is "+str(len(data)))
x = data.ix[:, 0:5]
y = data.ix[:, 6:]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)
'''############################ Model Train #################################'''
#Just modify the corresponding classification algorithm, for example SVM
clf = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0)
print("start train model")
clf.fit(x_train, y_train)
'''############################ Predict #################################'''
print("start predict data")
#y_predict=best_model.predict(x_test)
#print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_))
print('Validate Score: %.2f' % clf.score(x_test, y_test))
'''############################ Molde Save #################################'''
#save model
model_save_path = "./"
print("Decison Tree Model save...")
save_path_name = model_save_path + "tree_" + "train_model.m"
joblib.dump(clf, save_path_name)
'''############################ Test #################################'''
data = pd.read_csv(testpath, header=None,nrows=int(testLines))
print("test set line numbers is "+str(len(data)))
xtest = data.ix[:, 0:5]
ytest = data.ix[:, 6:]
#print(ytest)
arrytest=ytest[6].tolist()
file_handle=open('originLable.txt',mode='w')
for i in arrytest:
file_handle.write(str(i)+"\n")
#print(i)
file_handle.close()
ypredict=clf.predict(xtest)
print('Test Score: %.2f' % clf.score(xtest, ytest))
file_handle=open('predictLable.txt',mode='w')
for i in ypredict:
file_handle.write(str(i)+"\n")
#print(i)
file_handle.close()
'''############################END#################################'''
print("END")