分享一个基于python的机器学习识别简单验证码的算法

注:源码基于Willson Luo的样例扩展而来,本文只做学习借鉴使用

# -*- coding: utf-8 -*-

from PIL import Image

import os

import requests

import time

import cv2

from sklearn import svm

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score

import numpy as np

from sklearn.ensemble import RandomForestClassifier

from sklearn.externals import joblib


 

def yzm_download(down_url):

# url = 'https://app.singlewindow.cn/cas/plat_cas_verifycode_gen'

res = requests.get(down_url, stream=True)

with open('yzm_download'+os.sep+str(int(time.time() * 1000000))+'.jpg', 'wb') as f:

for chunk in res.iter_content(chunk_size=1024):

if chunk:

f.write(chunk)

f.flush()

# f.close()

return


 

def yzm_clean(yzm_file):

print(os.getcwd())

img = cv2.imread(yzm_file, 0) # 直接读为灰度图像

ret, thresh1 = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)

cv2.imwrite(yzm_file, thresh1)

return


 

def yzm_split(yzm_file):

img = Image.open(yzm_file)

im = img.convert("L")

split_lines = [4, 18, 32, 46, 60]

y_min = 1

y_max = 23

for x_min, x_max in zip(split_lines[:-1], split_lines[1:]):

im.crop([x_min, y_min, x_max, y_max]).save(

'yzm_split' + os.sep + str('zz_')+str(int(time.time() * 1000000))+'.jpg',

'jpeg') # (str(c)+'.jpg')

return
 

def yzm_binary(yzm_file):

im = Image.open(yzm_file)

img = np.array(im)

rows, cols = img.shape

for i in range(rows):

for j in range(cols):

if (img[i, j] <= 128):

img[i, j] = 0

else:

img[i, j] = 1

binpix = np.ravel(img)

return binpix


 

def yzm_matrix(yzm_folder):

samples_list = []

samples_filename = os.listdir(yzm_folder)

for i in range(len(samples_filename)):

samples_list.append(yzm_binary(yzm_folder + os.sep + samples_filename[i]))

samples = np.array(samples_list)

print(samples.shape)

return samples


 

def yzm_vector(yzm_folder):

vector_list = []

vector_filename = os.listdir(yzm_folder)

for i in range(len(vector_filename)):

vector_list.append(vector_filename[i].split("_")[0])

vectors = np.array(vector_list)

print(vectors.shape)

return vectors


 

def yzm_gnb(yzm_Xdata,yzm_ydata):

Xtrain, Xtest, ytrain, ytest = train_test_split(yzm_Xdata,yzm_ydata,

test_size=0.3) # random_state=1)

model = GaussianNB()

model.fit(Xtrain,ytrain)

y_model = model.predict(Xtest)

score = accuracy_score(ytest, y_model)

print("Gaussian Naive Bayes Accuracy is:",score)

joblib.dump(model, "GNB_train_model.m")

return


 

def yzm_svm(yzm_Xdata,yzm_ydata):

Xtrain, Xtest, ytrain, ytest = train_test_split(yzm_Xdata,yzm_ydata,

test_size=0.3) # random_state=1)

clf = svm.SVC()

clf.fit(Xtrain, ytrain)

y_model = clf.predict(Xtest)

score = accuracy_score(ytest, y_model)

print("Support Vector Machine Accuracy is:",score)

joblib.dump(clf, "SVM_train_model.m")

return


 

def yzm_rf(yzm_Xdata,yzm_ydata):

Xtrain, Xtest, ytrain, ytest = train_test_split(yzm_Xdata,yzm_ydata,

test_size=0.3) # random_state=1)

model = RandomForestClassifier(n_estimators=1000)

model.fit(yzm_Xdata, yzm_ydata)

y_model = model.predict(Xtest)

score = accuracy_score(ytest, y_model)

print("Radon Forest Accuracy is:",score)

joblib.dump(model, "RF_train_model.m")

return


 

def yzm_gnb_pred(yzm_file):

X_tmp = []

img_orig = yzm_binary(yzm_file)

X_tmp.append(img_orig)

X_pred = np.array(X_tmp)

gnb_model = joblib.load("GNB_train_model.m")

return gnb_model.predict(X_pred)

 

def yzm_rf_pred(yzm_file):

X_tmp = []

img_orig = yzm_binary(yzm_file)

X_tmp.append(img_orig)

X_pred = np.array(X_tmp)

rf_model = joblib.load("RF_train_model.m")

return rf_model.predict(X_pred)


 

def yzm_svm_pred(yzm_file):

X_tmp = []

img_orig = yzm_binary(yzm_file)

X_tmp.append(img_orig)

X_pred = np.array(X_tmp)

svm_model = joblib.load("SVM_train_model.m")

return svm_model.predict(X_pred)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值