C++实现KNN算法、
#include<iostream>
#include<vector>
#include<fstream>
#include<random>
#include<time.h>
#include<map>
#include<algorithm>
using namespace std;
vector<string> split(string &s,char mode){
vector<string> res;
while(s.size() > 0){
int index = s.find(mode);
if(index != -1){
res.push_back(s.substr(0,index+1));
s = s.substr(index + 1);
}
else{
res.push_back(s);
break;
}
}
return res;
}
int getFileRows(ifstream &in,vector<vector<string>> &lines){
int rows = 0;
char line[512];
while(!in.eof()){
in.getline(line,512,'\n');
string src = string(line);
lines.push_back(split(src,','));
rows++;
}
return rows;
}
void loadDataset(string &filepath,double &rate,vector<vector<double>> &trainingSet,vector<vector<double>> &testSet){
ifstream input;
input.open("irisdata.txt",ios::in | ios :: binary);
vector<vector<string>> lines;
int rows = getFileRows(input,lines);
srand((unsigned int)time(NULL));
vector<vector<double>> dataset(rows,vector<double>(5));
for(int i = 0;i < rows;i++){
for(int j = 0;j < 5;j++){
dataset[i][j] = atof(lines[i][j].c_str());
}
if(rand()/double(RAND_MAX) < rate){
trainingSet.push_back(dataset[i]);
}
else{
testSet.push_back(dataset[i]);
}
}
input.close();
}
double calculateDistance(vector<double> &instance1,vector<double> &instance2,int length){
double distance = 0;
for(int i = 0;i < length;i++){
distance += pow(instance1[i] - instance2[i],2);
}
return sqrt(distance);
}
vector<vector<double>> getNeighbors(vector<vector<double>> &trainingSet,vector<double> &testInstance,int k){
vector<pair<vector<double>,double>> distances;
int len = testInstance.size() - 1;
for(int i = 0;i < trainingSet.size();i++){
double distance = calculateDistance(testInstance,trainingSet[i],len);
distances.push_back(make_pair(trainingSet[i],distance));
}
sort(distances.begin(),distances.end(),[](pair<vector<double>,double> &p1,pair<vector<double>,double> &p2){
return p1.second < p2.second;
});
vector<vector<double>> neighbors;
for(int i = 0;i < k;i++){
neighbors.push_back(distances[i].first);
}
return neighbors;
}
double getResponse(vector<vector<double>> &neighbors){
map<int,int> classVotes;
for(int i = 0;i < neighbors.size();i++){
classVotes[neighbors[i][4]]++;
}
int maxVote = 0;
double res = 0;
for(auto vote : classVotes){
if(vote.second > maxVote){
maxVote = vote.second;
res = vote.first;
}
}
return res;
}
double getAccuracy(vector<vector<double>> &testSet,vector<double> &predictions){
int correct = 0;
for(int i = 0;i < testSet.size();i++){
if(testSet[i][4] == predictions[i]){
correct++;
}
}
return correct / (double)(testSet.size()) * 100.0;
}
vector<double> pridict(vector<vector<double>> &trainSet,vector<vector<double>> &testSet){
vector<double> predictions;
int k = 3;
for(int i = 0;i < testSet.size();i++){
vector<std::vector<double>> neighbors = getNeighbors(trainSet,testSet[i],k);
double res = getResponse(neighbors);
predictions.push_back(res);
}
return predictions;
}
int main(){
vector<vector<double>> trainSet;
vector<vector<double>> testSet;
double rate = 0.8;
string filepath = "./irisdata.txt";
loadDataset(filepath,rate,trainSet,testSet);
cout << "------------trainSet:--------------" << endl;
for(auto traindata : trainSet){
for(double num : traindata){
cout << num << " ";
}
cout << endl;
}
cout << "------------testSet:--------------" << endl;
for(auto testdata : testSet){
for(double num : testdata){
cout << num << " ";
}
cout << endl;
}
vector<double> predictions;
predictions = pridict(trainSet,testSet);
cout << "------------测试结果为::--------------" << endl;
for(int i = 0;i < testSet.size();i++){
cout << "测试数据" << i << ":";
for(int j = 0;j < 4;j++){
cout << testSet[i][j] << " ";
}
cout << "预测值:" << predictions[i] << " " << "真实值:" << testSet[i][4] << endl;
}
double accuracy = getAccuracy(testSet,predictions);
cout << "准确率为:" << accuracy << endl;
}