import numpy as np
import pandas as pd
import argparse
from time import time
from datetime import datetime
from sklearn.metrics import confusion_matrix as CM # 导入混淆矩阵计算模块
from sklearn.metrics import accuracy_score as ACCS #导入准确率计算模块
parser = argparse.ArgumentParser(description='ML training and testing using RF,SVM,LR with input of matrix and target')
parser.add_argument("allmatrix",help="input all the datamatrix in dataframe")
parser.add_argument("target",help="input label data with np.txt format")
args = parser.parse_args()
#record time costed
def timerecord():
print("time cost: {}".format(datetime.fromtimestamp(time()-time0).strftime("%M:%S:%f"))) # 记录模型使用时间
# load feature matrix and label
allmatrix = pd.read_csv(args.allmatrix,index_col=0,header=0)
target = np.loadtxt(args.target,dtype='int32')
print("allmatrix shape: {}".format(allmatrix.shape))
print("target shape: {}".format(target.shape))
print("chrom NO.: ",(target==0).sum())
print("plas NO.: ",(target==1).sum())
#随机森林训练
# Random Forest training and cross_validation
print("random forest training result")
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
Xtrain,Xtest,Ytrain,Ytest = train_test_split(allmatrix,target,test_size=0.3,random_state=420)
rfc = R