C++实现基于KNN的手写体识别

一系统结构

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

二、数据获取与预处理

在这里插入图片描述
在这里插入图片描述

三、KNN算法与K折交叉验证

在这里插入图片描述
在这里插入图片描述
源:

#include "pch.h"
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
using namespace std;
//#define k 10   //KNN关键参数
#define mn 10
#define K_flod 15  //K折交叉验证
int k;
double train_weigh[8] = {1,1,1,1,1,1,1,1};//训练集不同汉字的权重值。
struct node {
 string nn, name, shuxing,sbname;
 //nn:以字符串类型存储每个 txt 文件产生的 01 矩阵
 //name:存储训练集的类别名称
 //shuxing:存储测试集最终判别的类别名字
 //sbname:测试字识别结果
 double dis, x1, x2, diss;
 bool s;
 //dis:存储测试字和训练字的欧式距离
 //diss:存储测试字和训练字的曼哈顿距离
 //x1:识别为当前测试字的k个欧式(曼哈顿)距离的比例
 //x2:识别为其他种类字体的k个欧式(曼哈顿)距离的比例
 node() {
  nn = "000"; name = "000"; shuxing = "000"; sbname = "000";
  dis = 0.0; x1 = 0.0; x2 = 0.0;
  s = false;
 }
};
node dis_o[720];//存储欧式(曼哈顿)距离
node ceshi[720];//存储测试集
double tru[720];//K折交叉验证每次验证的正确率
double err[720];//K折交叉验证每次验证的误差率
double K_tru[8];//K近邻时,每个汉字的最终正确率
//double K_err[8];//K近邻时,每个汉字最终错误率
string typp[8] = { "bei","jing","xin","xi","ke","ji","da","xue" };//可以识别汉字的种类
node a[720];//全部数据集
//将全部数据(720个128*128的二值化矩阵读入存储在a这个结构体数组中)
void read() {
 int i, j;
 string nn;
 string mi[90] = { "01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26" ,"27","28","29","30",
 "31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56" ,"57","58","59","60" ,
 "61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86" ,"87","88","89","90" };
 for (i = 0, j = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\bei_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "bei";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\jing_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "jing";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xin_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xin";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xi_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xi";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ke_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "ke";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ji_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "ji";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\da_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "da";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xue_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xue";
  rf.close();
 }
}
//初始化训练集不同汉字的权重值。
void InitTrWei() {
 cout << "请分别输入8个汉字的权重值: ";
 cin >> train_weigh[0];
 cin >> train_weigh[1];
 cin >> train_weigh[2];
 cin >> train_weigh[3];
 cin >> train_weigh[4];
 cin >> train_weigh[5];
 cin >> train_weigh[6];
 cin >> train_weigh[7];
}
//计算欧式距离
double dis (node x1,node x2) {
 const char *ar1,*ar2;
 double sum = 0;
 int j;
 ar1 = x1.nn.c_str();
 ar2 = x2.nn.c_str();
 for (j = 0; j < 1024; j++) {
  sum += (ar1[j] - ar2[j])*(ar1[j] - ar2[j]);
 }
 return sqrt(sum);
}
//计算曼哈顿距离
double diss(node x1, node x2) {
 const char *ar1, *ar2;
 double sum = 0;
 int j;
 ar1 = x1.nn.c_str();
 ar2 = x2.nn.c_str();
 for (j = 0; j < 1024; j++) {
  sum += fabs(ar1[j] - ar2[j]);
 }
 return sum;
}
//快速排序
int Par(node *a,int low,int high) {
 node p = a[low];
 while(low<high){
  while (low < high&&a[high].dis >= p.dis)--high;
  a[low].name = a[high].name;
  a[low].dis = a[high].dis;
  while (low < high&&a[low].dis <= p.dis)++low;
  a[high].dis = a[low].dis;
  a[high].name = a[low].name;
 }
 a[low].dis = p.dis;
 a[low].name = p.name;
 return low;
}
void Qsort(node *a,int low ,int high) {
 if (low < high) {
  int p = Par(a, low, high);
  Qsort(a, low, p - 1);
  Qsort(a, p + 1, high);
 }
}
//判断某被测汉字的k最近邻测试结果
void K_select(node& p) {
 double a[8] = {0};
 int i;
 Qsort(dis_o, 0, (90 - 90 / K_flod)*8-1);
 for (i = 0;i<k;i++) {
  if (dis_o[i].name=="bei") {
   a[0]++;
  }
  if (dis_o[i].name == "jing") {
   a[1]++;
  }
  if (dis_o[i].name == "xin") {
   a[2]++;
  }
  if (dis_o[i].name == "xi") {
   a[3]++;
  }
  if (dis_o[i].name == "ke") {
   a[4]++;
  }
  if (dis_o[i].name == "ji") {
   a[5]++;
  }
  if (dis_o[i].name == "da") {
   a[6]++;
  }
  if (dis_o[i].name == "xue") {
   a[7]++;
  }
 }
 //乘上权重
 for (i = 0; i < 8; i++) {
  a[i] *= train_weigh[i];
 }
 double max = 0;
 int t=0;
 for(i=0;i<8;i++){
  if (a[i] > max) {
   max = a[i];
   t = i;
  }
 }
 switch (t) {
  case 0:p.sbname = "bei"; break;
  case 1:p.sbname = "jing"; break;
  case 2:p.sbname = "xin"; break;
  case 3:p.sbname = "xi"; break;
  case 4:p.sbname = "ke"; break;
  case 5:p.sbname = "ji"; break;
  case 6:p.sbname = "da"; break;
  case 7:p.sbname = "xue"; break;
 }
}
//分析测试集结果
double ceshiFenxi() {
 int i,j;
 double err=0, tru=0;
 for (i = 0; i < (90/K_flod)*8; i++) {
  if (ceshi[i].name == ceshi[i].sbname)
   tru++;
  else
   err++;
 }
 return tru / (tru + err);
}
//K折交叉验证
void k_K_flod(){
 int x1 = 0;//记录交叉验证第几折
 int i,j,t,jj,e;
 double b[8] = { 0 };
 for (i = 0; i < 720; i++) {
  a[i].s = false;
 }
 for (x1 = 0; x1 < K_flod;x1++) {
  t = 0;
  //选出第x1折交叉验证的测试集
  for (e = 0; e < 8; e++) {
   for (j = 90 - (K_flod - x1)*(90 / K_flod)+e*90; j < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod+e*90; j++) {
    a[j].s = true;
   }
  }
  for (e = 0; e < 8; e++) {
   double tru = 0, err = 0;
   //计算第x1折交叉验证的每个字的测试集准确率
   for (i = 90 - (K_flod - x1)*(90 / K_flod) + e * 90; i < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod + e * 90; i++) {
    if (a[i].name == a[i].sbname)
     tru++;
    else
     err++;
   }
   b[e] += tru / (tru + err);
   }
  for (i = 0; i < 720; i++) {
   a[i].s = false;
  }
 }
 for (i = 0; i < 8; i++) {
  K_tru[i] = b[i] / K_flod;
 }
}
int main() {
 read();//将全部数据集读入
 string ader = "F:\\计算机综合实践\\15折交交叉验证_曼哈顿距离.txt";
 ofstream rf(ader);
 for (int j = 5; j <= 40; j++) {
  k = j;
  k_K_flod();//进行K折交叉验证得到每个汉字识别的正确率并保存到  K_tru   数组中
  for (int i = 0; i < 8; i++) {
   cout << K_tru[i] <<endl;
   rf << K_tru[i] << endl;
  }
  rf << endl << endl;
  cout << endl<<endl;
  }
  return 0;
  }
  • 4
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值