C++单刷《机器学习实战》——kNN算法完整代码

最新推荐文章于 2024-01-28 14:33:45 发布

wengtengfan

最新推荐文章于 2024-01-28 14:33:45 发布

阅读量2k

点赞数

分类专栏：模式识别机器学习

本文链接：https://blog.csdn.net/u014080185/article/details/60467308

版权

模式识别同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

机器学习

4 篇文章 1 订阅

订阅专栏

#include <iostream>
#include <cmath>
#include<map>
#include<string>
#include<sstream>
#include<fstream>
#include<vector>
#include<algorithm>
using namespace std;


double group[4][2] = { { 1.0, 1.1 }, { 1.0, 1.0 }, { 0, 0 }, { 0, 0.1 } };
string labels[4] = { "A", "A", "B", "B" };


struct man
{
double fly;
double game;
double icecream;
string eval;
};


void sort(double* data, int n, int k)          
//冒泡排序，采用冒泡排序的目的是以最快速度找到最大的前k个值
//data:要排序的数组，n:数组大小，k：要找到的前k个值
{
int temp;
for (int i = 0; i < k; i++)
{
for (int j = i+1; j < n; j++)
{
if (*(data + i) > *(data + j))
{
temp = *(data + i);
*(data + i) = *(data + j);
*(data + j) = temp;
}

}


}
}


void sortIndex(double* data, int* sorted_index2, int n)
//排序，并返回排序后的原数组索引
//data：原始数组，sorted_index2：排序后的原数组索引，n：数组大小
{
int index = 0;
int* sorted_index = new int[n];

for (int i = 0; i < n; i++)
{
index = 0;
for (int j = 0; j < n; j++)
{
if (data[i] > data[j])
index++;
else if (data[i] == data[j] && i > j)
index++;
}
sorted_index[i] = index;
}


for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
if (i == sorted_index[j])
sorted_index2[i] = j;
}
}


delete sorted_index;
}


vector<man> readFile(const char* file_name)
//从文件中读取数据并存入结构体数组
{
string data_str;
vector<man> data_list;
fstream file;
file.open(file_name,ios::in);
if (file.is_open())
{
while (getline(file, data_str))
{
man data;
istringstream record(data_str);
record >> data.fly;
record >> data.game;
record >> data.icecream;
record >> data.eval;

data_list.push_back(data);


}
} 
return data_list;
}


void data2matrix(vector<man> data_list, double* dataSet, string labels[], double& length_fly, double& length_game, double& length_icecream)
//将结构体数组转化为二维矩阵，并归一化
//data_list：结构体数组，dataSet：转化为的二维矩阵，labels：标签数组，length_fly....:样本特征最大值与最小值之差
{
int index = 0;
auto it = data_list.begin();
double min_fly = it->fly;
double max_fly = it->fly;
double min_game = it->game;
double max_game = it->game;
double min_icecream = it->icecream;
double max_icecream = it->icecream;


for (; it != data_list.end(); ++it)
{
if (min_fly > it->fly)
min_fly = it->fly;
if (max_fly < it->fly)
max_fly = it->fly;
if (min_game > it->game)
min_game = it->game;
if (max_game < it->game)
max_game = it->game;
if (min_icecream > it->icecream)
min_icecream = it->icecream;
if (max_icecream < it->icecream)
max_icecream = it->icecream;
}
    length_fly = max_fly - min_fly;
length_game = max_game - min_game;
length_icecream = max_icecream - min_icecream;


for (auto it = data_list.begin(); it != data_list.end(); ++it)
{
*(dataSet + index * 3) = it->fly / length_fly;
*(dataSet + index * 3 + 1) = it->game / length_game;
*(dataSet + index * 3 + 2) = it->icecream / length_icecream;
labels[index] = it->eval;
++index;
}
}


void data2matrix2(man person, double* data,double length_fly, double length_game, double length_icecream)
{
data[0] = person.fly / length_fly;
data[1] = person.game / length_game;
data[2] = person.icecream / length_icecream;




}


string classify(double* inX,double* dataSet,string labels[],int k,int size,int dataSetSize)
//kNN分类算法
//inX：未分类的输入数据，dataSet：样本集，labels：标签，k:k值，size：数据的特征数量，dataSetSize:样本集数量  
{
double sum = 0;
double* diff_array = new double[size];
double* diff_all = new double[dataSetSize];
int* sorted_index = new int[dataSetSize];
string label;
map<string, int> label_count;
for (int i = 0; i < dataSetSize; i++)
//计算当前点与各样本点的欧式距离，并存入数组diff_array
{
sum = 0;
for (int j = 0; j < size; j++)
{
diff_array[j] = *(inX + j) - *(dataSet + i*size + j);
sum += (diff_array[j] * diff_array[j]);
}
diff_all[i] = sqrt(sum);
}
//排序，并返回排序后的原数组索引
sortIndex(diff_all, sorted_index, dataSetSize);
for (int i = 0; i < k; i++)
//计算前k个索引对应标签的出现次数，存入关联容器label_count
{
label = labels[sorted_index[i]];
++label_count[label];
}


//找出出现次数最多的标签，返回
auto map_it = label_count.begin();
label = map_it->first;
int max_count = map_it->second;
for (; map_it != label_count.end(); map_it++)
{
if (max_count < map_it->second)
{
max_count = map_it->second;
label = map_it->first;
}

}


delete diff_array;
delete diff_all;
delete sorted_index;


return label;
}


int main()
{
/*string result;
string line;
double point[2];
cout << "please input the coodinate of the pixel" << endl;
while (getline(cin, line))
{
istringstream record(line);
record >> point[0];
record >> point[1];
result = classify(point, &group[0][0], labels, 3, 2, 4);
cout << "the result is: " << result << endl;
cout << "please input the coodinate of the pixel" << endl;
}*/




vector<man> data_list;
data_list = readFile("datingTestSet.txt");
int size = 3;
int dataSetSize = data_list.size();
double* dataSet = new double[dataSetSize*3];
string* labels = new string[dataSetSize];
double length_fly = 0;
double length_game = 0;
double length_icecream = 0;


data2matrix(data_list, dataSet, labels, length_fly, length_game, length_icecream);


//测试，ratio为测试集占数据集总量
double ratio = 0.1;
string result;
int error_count = 0;
int num_test = dataSetSize * ratio;
string label;
for (int i = 0; i < num_test; i++)
{
result = classify(dataSet + i * 3, dataSet + num_test * 3, labels + num_test, 3, 3, dataSetSize - num_test);
cout <<i<<"times  "<<"The classifier came back with: "<< result <<",the real answer is "<<labels[i]<< endl;
label = labels[i];
if (result != label)
{
++error_count;
}
}
double err_rate = (double)error_count / (double)num_test;
cout << "The total error rate is： " << err_rate << endl;


man person;
double* data = new double[3];


string line;
cout << "Please input the time of fly,game and the consume of icrcreame" << endl;
while (getline(cin, line))
{
cout << "Please input the time of fly,game and the consume of icrcreame" << endl;
istringstream record(line);
record >> person.fly;
record >> person.game;
record >> person.icecream;
data2matrix2(person, data, length_fly, length_game, length_icecream);
result = classify(data, dataSet, labels, 3, 3, dataSetSize);
person.eval = result;
cout << result << endl;
}


delete dataSet;
delete data;


return 0;
}

wengtengfan

关注

0
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
C++单刷《机器学习实战》——kNN算法完整代码

#include #include #include#include#include#include#include#includeusing namespace std;double group[4][2] = { { 1.0, 1.1 }, { 1.0, 1.0 }, { 0, 0 }, { 0, 0.1 } };string labels[4]
复制链接

扫一扫