简单球类识别(机器学习)

图片数据(自己从网站里找的图片):

第一步:修改图片统一格式(像素大小、命名)

# coding=utf-8
import os
from PIL import Image


folds="./Datas/"

for fold in  os.listdir(folds):  #fold为子文件夹名
    i=0
    for file in os.listdir(folds+fold): #file为图片文件名
        photo=Image.open(folds+fold+"/"+file)

        im_photo=photo.resize((256,256))  #修改尺寸
        im_photo.save(folds+fold+"/"+str(i)+".jpg")#统一命名格式
        i = i + 1
        os.remove(folds + fold + "/" + file) #删除原图片


第二步:将图片转换为可训练的一维数据

xx_train=[]
for i in X_train:
 Images = cv2.imdecode(np.fromfile(i, dtype=np.uint8), cv2.IMREAD_COLOR) #缺失这一步可能导致文件名呈繁体字导致打不开文件,编码问题
 image = cv2.resize(Images, (256, 256 ), interpolation=cv2.INTER_CUBIC)
 hist = cv2.calcHist([image], [0, 1], None, [256, 256], [0.0, 255.0, 0.0, 255.0])
 xx_train.append((hist/255).flatten()) #转换为可训练数据

第三步:选择算法进行训练


tree=DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=4, min_samples_leaf=4,random_state=3)
tree.fit(xx_train,y_train)
y_pred=tree.predict(xx_test)
print("决策树")
print(tree.score(xx_test,y_test))
print()




knn=KNeighborsClassifier(n_neighbors=6,weights="distance",algorithm="kd_tree")
knn.fit(xx_train,y_train)
y_pred=tree.predict(xx_test)
print("knn")
print(knn.score(xx_test,y_test))
print()
#

rf=RandomForestClassifier(n_estimators=81, criterion='entropy', max_depth=10,random_state=7)
rf.fit(xx_train,y_train)
y_pred=rf.predict(xx_test)
print("随机森林")
print(rf.score(xx_test,y_test))
print()

# 网格搜索进行超参数调优
# param_grid = {
#     'n_estimators': [81, 100, 150],
#     "criterion":['entropy',"gini"],
#     "max_depth":[7,8,9,10,11],
#
#     "random_state":[7,8,9,10,11]
#
#
# }
#
# # 创建随机森林分类器对象
# clf = RandomForestClassifier()
#
# # 进行网格搜索
# grid_search = GridSearchCV(clf, param_grid, cv=5)
# grid_search.fit(xx_train, y_train)
#
# # 输出最佳参数组合和最佳准确率
# print("Best parameters:", grid_search.best_params_)
# print("Best accuracy:", grid_search.best_score_)


#
nb=MultinomialNB()
nb.fit(xx_train,y_train)
y_pred=nb.predict(xx_test)
print("nb")

print(nb.score(xx_test,y_test))
print()
#
lr=LogisticRegression()
lr.fit(xx_train,y_train)
y_pred=lr.predict(xx_test)
print("lr")
print(lr.score(xx_test,y_test))
print()
#
# #
vote=VotingClassifier(estimators=[("decision",tree),("rf",rf),("lr",lr)],voting="hard")
vote.fit(xx_train,y_train)
y_pred=vote.predict(xx_test)
vote_score=accuracy_score(y_test,y_pred)
print(vote.score(xx_test,y_test))
print("vote",vote_score)
#
#

bagging=BaggingClassifier(rf,n_estimators=4,random_state=2)
bagging.fit(xx_train,y_train)
y_pred=bagging.predict(xx_test)
baggin_score=accuracy_score(y_test,y_pred)
print("bagging",baggin_score)
#
#
Gradient=GradientBoostingClassifier()
Gradient.fit(xx_train,y_train)
y_pred=Gradient.predict(xx_test)
gradient_score=accuracy_score(y_test,y_pred)
print("grandient",gradient_score)
#
stacking=StackingClassifier([("rf",rf)],final_estimator=rf)
stacking.fit(xx_train,y_train)
y_pred=stacking.predict(xx_test)
stacking_score=accuracy_score(y_test,y_pred)
print("stack",stacking_score)

第四步:进行参数调优

注:对于不同数据,参数的选择是不同的

方法一:for循环一个个参数进行调优

方法二:网格搜索进行超参数调优

# 网格搜索进行超参数调优
param_grid = {
    'n_estimators': [81, 100, 150],
    "criterion":['entropy',"gini"],
    "max_depth":[7,8,9,10,11],

    "random_state":[7,8,9,10,11]


}

# 创建随机森林分类器对象
clf = RandomForestClassifier()

# 进行网格搜索
grid_search = GridSearchCV(clf, param_grid, cv=5)
grid_search.fit(xx_train, y_train)

# 输出最佳参数组合和最佳准确率
print("Best parameters:", grid_search.best_params_)
print("Best accuracy:", grid_search.best_score_)

第五步:进行可视化

import matplotlib.image as mpimg
# # 设置画布大小
plt.figure(figsize=(6*10,3*10))

for i in range(len(y_pred)):

    # 画子图
    re = plt.subplot(3,6,i+1)
    image=mpimg.imread(X_test[i])
    re.imshow(image)
    re.set_title(f'predict:{y_pred[i]}',fontsize=40,color = 'k'if X_test[i].split("Datas/")[1].split("/")[0] == y_pred[i] else 'r' )
plt.show()

第六步:定义预测函数


test = []
def exchange(url):
    Images = cv2.imdecode(np.fromfile(url, dtype=np.uint8), cv2.IMREAD_COLOR)  # 缺失这一步可能导致文件名呈繁体字导致打不开文件,编码问题
    image = cv2.resize(Images, (256, 256), interpolation=cv2.INTER_CUBIC)
    hist = cv2.calcHist([image], [0, 1], None, [256, 256], [0.0, 255.0, 0.0, 255.0])
    test.append((hist / 255).flatten())  # 转换为可训练数据
    print(rf.predict(test))
    test.remove(test[0])

exchange("b.jpg")
exchange("b.jpg")

全部代码:

# coding=utf-8
import os

import cv2
import matplotlib
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier, GradientBoostingClassifier, \
    StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
matplotlib.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文
# 为了坐标轴负号正常显示。matplotlib默认不支持中文,设置中文字体后,负号会显示异常。需要手动将坐标轴负号设为False才能正常显示负号。
matplotlib.rcParams['axes.unicode_minus'] = False

folds="./Datas/"
x=[]
y=[]
for fold in  os.listdir(folds):

    for file in os.listdir(folds+fold):

        x.append(folds+fold+"/"+file)  #图片绝对路径名 为了方便后面显示图片
        y.append(fold)
print(x)
print(y)

x=np.array(x)
y=np.array(y)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

xx_train=[]
for i in X_train:
 Images = cv2.imdecode(np.fromfile(i, dtype=np.uint8), cv2.IMREAD_COLOR) #缺失这一步可能导致文件名呈繁体字导致打不开文件,编码问题
 image = cv2.resize(Images, (256, 256 ), interpolation=cv2.INTER_CUBIC)
 hist = cv2.calcHist([image], [0, 1], None, [256, 256], [0.0, 255.0, 0.0, 255.0])
 xx_train.append((hist/255).flatten()) #转换为可训练数据

xx_test=[]
for i in X_test:
 Images = cv2.imdecode(np.fromfile(i, dtype=np.uint8), cv2.IMREAD_COLOR)
 image = cv2.resize(Images, (256, 256 ), interpolation=cv2.INTER_CUBIC)
 hist = cv2.calcHist([image], [0, 1], None, [256, 256], [0.0, 255.0, 0.0, 255.0])
 xx_test.append((hist/255).flatten())



tree=DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=4, min_samples_leaf=4,random_state=3)
tree.fit(xx_train,y_train)
y_pred=tree.predict(xx_test)
print("决策树")
print(tree.score(xx_test,y_test))
print()

#
import matplotlib.image as mpimg
# # 设置画布大小
plt.figure(figsize=(6*10,3*10))

for i in range(len(y_pred)):

    # 画子图
    re = plt.subplot(3,6,i+1)
    image=mpimg.imread(X_test[i])
    re.imshow(image)
    re.set_title(f'predict:{y_pred[i]}',fontsize=40,color = 'k'if X_test[i].split("Datas/")[1].split("/")[0] == y_pred[i] else 'r' )
plt.show()



knn=KNeighborsClassifier(n_neighbors=6,weights="distance",algorithm="kd_tree")
knn.fit(xx_train,y_train)
y_pred=tree.predict(xx_test)
print("knn")
print(knn.score(xx_test,y_test))
print()
#

rf=RandomForestClassifier(n_estimators=81, criterion='entropy', max_depth=10,random_state=7)
rf.fit(xx_train,y_train)
y_pred=rf.predict(xx_test)
print("随机森林")
print(rf.score(xx_test,y_test))
print()

# # 网格搜索进行超参数调优
# param_grid = {
#     'n_estimators': [81, 100, 150],
#     "criterion":['entropy',"gini"],
#     "max_depth":[7,8,9,10,11],
#
#     "random_state":[7,8,9,10,11]
#
#
# }
#
# # 创建随机森林分类器对象
# clf = RandomForestClassifier()
#
# # 进行网格搜索
# grid_search = GridSearchCV(clf, param_grid, cv=5)
# grid_search.fit(xx_train, y_train)
#
# # 输出最佳参数组合和最佳准确率
# print("Best parameters:", grid_search.best_params_)
# print("Best accuracy:", grid_search.best_score_)
#
#
# #
# nb=MultinomialNB()
# nb.fit(xx_train,y_train)
# y_pred=nb.predict(xx_test)
# print("nb")
#
# print(nb.score(xx_test,y_test))
# print()
# #
# lr=LogisticRegression()
# lr.fit(xx_train,y_train)
# y_pred=lr.predict(xx_test)
# print("lr")
# print(lr.score(xx_test,y_test))
# print()
# #
# # #
# vote=VotingClassifier(estimators=[("decision",tree),("rf",rf),("lr",lr)],voting="hard")
# vote.fit(xx_train,y_train)
# y_pred=vote.predict(xx_test)
# vote_score=accuracy_score(y_test,y_pred)
# print(vote.score(xx_test,y_test))
# print("vote",vote_score)
# #
# #
#
# bagging=BaggingClassifier(rf,n_estimators=4,random_state=2)
# bagging.fit(xx_train,y_train)
# y_pred=bagging.predict(xx_test)
# baggin_score=accuracy_score(y_test,y_pred)
# print("bagging",baggin_score)
# #
# #
# Gradient=GradientBoostingClassifier()
# Gradient.fit(xx_train,y_train)
# y_pred=Gradient.predict(xx_test)
# gradient_score=accuracy_score(y_test,y_pred)
# print("grandient",gradient_score)
# #
# stacking=StackingClassifier([("rf",rf)],final_estimator=rf)
# stacking.fit(xx_train,y_train)
# y_pred=stacking.predict(xx_test)
# stacking_score=accuracy_score(y_test,y_pred)
# print("stack",stacking_score)

test = []
def exchange(url):
    Images = cv2.imdecode(np.fromfile(url, dtype=np.uint8), cv2.IMREAD_COLOR)  # 缺失这一步可能导致文件名呈繁体字导致打不开文件,编码问题
    image = cv2.resize(Images, (256, 256), interpolation=cv2.INTER_CUBIC)
    hist = cv2.calcHist([image], [0, 1], None, [256, 256], [0.0, 255.0, 0.0, 255.0])
    test.append((hist / 255).flatten())  # 转换为可训练数据
    print(rf.predict(test))
    test.remove(test[0])

exchange("b.jpg")
exchange("b.jpg")

  • 9
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值