贝叶斯实现手写体识别

【贝叶斯】

import pandas as pd
import numpy as np
import cv2
import random
import time

from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

 

#二值化
def binaryzation(img):
    cv_img=img.astype(np.uint8)
    cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
    return cv_img
def Train(trainset,train_labels):
    prior_probability=np.zeros(class_num) #j记录先验概率
    conditional_probability=np.zeros((class_num,feature_len,2))#记录条件概率
    
    #计算条件概率和先验概率
    for i in range(len(train_labels)):
        img=binaryzation(trainset[i]) #图片进行二值化
        label=train_labels[i]
        
        prior_probability[label]+=1
        
        for j in range(feature_len):
            conditional_probability[label][j][img[j]]+=1
            
        
    for i in range(class_num):
        for j in range(feature_len):
            #经过二值化后图像只有0,1两种取值
            pix_0= conditional_probability[i][j][0]
            pix_1= conditional_probability[i][j][1]
            
            #计算0,1像素点对应的条件概率
            probability_0=(float( pix_0)/float(pix_0+pix_1))* 10000 + 1
            probability_1=(float( pix_1)/float(pix_0+pix_1))*10000 + 1
            
            conditional_probability[i][j][0]=probability_0
            conditional_probability[i][j][1]=probability_1
    return prior_probability, conditional_probability

#计算概率
def caculate_probability(img,label):
    probability =int(prior_probability[label])
    
    for i in range(len(img)):
        probability *=int( conditional_probability[label][i][img[i]])
        return probability
def predict(testset,prior_probability,conditional_probability):
    predict=[]
    for img in testset:
        img=binaryzation(img) #图片进行二值化
        max_label=0
        max_probability=caculate_probability(img,0)
        
        for j in range(1,10):
            probability=caculate_probability(img,j)
            
            if max_probability < probability:
                max_label=j
                max_probability=probability
                
        predict.append(max_label)
    return np.array(predict)

class_num=10
feature_len=784

if __name__ == '__main__':
    print('start read data')
    
    time_1=time.time()
    
    raw_data=pd.read_csv(r"C:\Users\Administrator\Desktop\train.csv",header=0)    
    data=raw_data.values
    
    imgs=data[0::,1::]
    labels=data[::,0]
    
    #选取2/3数据作为训练集,1/3数据作为测试集
    train_features,test_fetures,train_labels,test_labels=train_test_split(imgs,labels,test_size=0.33,random_state=23323)
    #print train_fetures.shape
    #print train_fetures.shape
    time_2=time.time()
    print('read data cost',time_2-time_1,'second','\n')
    
    print('start training') 
    prior_probability,conditional_probability=Train( train_features,train_labels)
    time_3=time.time()
    print('training cost',time_3-time_2,'second','\n')
    
    print('start predicting')
    test_predict=predict(test_fetures,prior_probability,conditional_probability)
    time_4=time.time()
    print('predicting cost',time_4-time_3,'second','\n')
    
    score = accuracy_score(test_labels,test_predict)
    print('the accuracy score is',score)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值