资料:http://scikit-learn.org/dev/documentation.html
因为数学建模的关系,所以才临时了解了Python的一个开源项目 scikit-learn,
有很多东西没有弄懂,以后补充吧
写的第一个测试代码:
import numpy as np
import sys
sys.stdout = open('out.txt', 'w');
from sklearn.ensemble import RandomForestClassifier
f = open('train1.txt', 'r')
data = np.loadtxt(f)
X = data[:, :-1]
y = data[:, -1]
from sklearn import cross_validation
# X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
# y = np.array([1, 2, 3, 4])
kf = cross_validation.KFold(99, n_folds=2)
# print len(kf)
# print(kf)
clf = RandomForestClassifier(n_jobs=-1,n_estimators=10)
for train_index, test_index in kf:
#print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf = clf.fit(X_train, y_train)
ans = clf.predict(X_test)
print ans
print y_test