1 算法模型
我嘗試過的算法模型有:LogisticRegression-邏輯迴歸,KNeighborsClassifier-K近邻算法,GaussianProcessClassifier-高斯過程,RPF, GradientBoostingRegressor。算法具體內容可以百度搜索。
2 模型訓練
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.externals import joblib
import os,cv2
import numpy as np
from sklearn.model_selection import cross_val_score
from collections import Counter
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import GradientBoostingRegressor
import sklearn
def load_data(img_path):
x = []
y = []
n = 0
for file in os.listdir(img_path):
file_path = os.path.join(img_path, file)
for img in os.listdir(file_path):
image_path = os.path.join(file_path, img)
image = cv2.imread(image_path)
input = image.reshape(1, -1)/255.
x.append(input[0])
y.append(n)
n += 1
x = np.array(x)
y = np.array(y)
return x, y
def train(x, y):
print("starting train!")
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
# Log = sklearn.svm.LinearSVC()
# Log = sklearn.svm.SVC(kernel='linear',gamma='auto')
# Log = sklearn.svm.NuSVC(gamma='auto')
# Log = GradientBoostingRegressor(loss='quantile', alpha=0.95,
# n_estimators=250, max_depth=6,
# learning_rate=.1, min_samples_leaf=9,
# min_samples_split=9)
# Log = GaussianProcessClassifier(1.0 * RBF(1.0))
# Log = KNeighborsClassifier(3)
# Log = LogisticRegression(penalty='l2',solver='lbfgs')
Log = LogisticRegression(penalty='l2',dual=True,warm_start=True,)
Log.fit(X_train,y_train)
prediction = Log.predict(X_test)
joblib.dump(Log,r'D:\MODEL\Y_IT_044_V1.model')
print("Compete the training! The model is saved!")
print("-"*100)
print("starting eval!")
classes = set(y_test)
dict_ = {}
for i in classes:
dict_[i] = 0
for i in range(len(y_test)):
if y_test[i] != prediction[i]:
dict_[y_test[i]] += 1
for k in dict_.keys():
acc = 1 - dict_[k]/Counter(y_test)[k]
print('classes', k, 'eval acc: ', acc)
if __name__ == '__main__':
img_path = r'D:\data'
x, y = load_data(img_path)
train(x, y)
3 模型測試
from sklearn.preprocessing import StandardScaler
import cv2, os
from sklearn.externals import joblib
import numpy as np
def model_Predict(img_path, model_path, classes):
x = []
for file in os.listdir(img_path):
file_path = os.path.join(img_path, file)
img = cv2.imread(file_path)
# img = cv2.resize(img,(80,80))
input = img.reshape(1, -1).astype(np.float64) / 255.
x.append(input[0])
x = np.array(x)
Log = joblib.load(model_path)
predict = Log.predict(x)
count = 0
for i in range(len(predict)):
if predict[i] != classes:
print(os.listdir(img_path)[i])
count += 1
acc = 1 - count / len(predict)
print('acc:', acc)
if __name__ == '__main__':
# 對某一個類別文件夾進行測試
img_path = r'D:\data'
model_path = r'D:\MODEL\R_IT_044_v0.model'
classes = 1
model_Predict(img_path, model_path, classes)