前言:
最近要实现一个算法,“对一系列点拟合出一条线,且区分出不属于该线的点”。在网上找了许多资料,用数学公式解释原理以及用matlab实现的居多,本文章主要解释用最小二乘法的进行点拟合成线,matlab 和 c++两个版本的代码实现。
使用矩阵实现:
根据公式A = (X'*X)-1*X'*Y(这个公式可以拟合一条最接近点的曲线,而不仅仅是直线,具体请参照链接),便得到了系数矩阵A,A是一个2行1列的矩阵,其中的两个值分别是直线的k、b值。
测试样例点:(27 39) (8 5) (8 9) (16 22) (44 71) (35 44) (43 57) (19 24) (27 39) (37 52)
matlab:
x =[
27,1;
8,1;
7,1;
16,1;
44,1;
35,1;
43,1;
19,1;
27,1;
37,1]
xt = x.'
y = [39;5;9;22;71;44;57;24;39;52]
xt * x
inv(xt * x)
b = inv(xt * x) * xt * y
结果:
为了使用实现对矩阵的操作,分别选用了Eigen库 和 OpenCv库两个版本:
EIgen库:
#include <iostream>
#include <Eigen/Dense>
using namespace Eigen;
using namespace Eigen::internal;
using namespace Eigen::Architecture;
int main(){
MatrixXd x(10,2);
for(int i = 0; i<10 ; ++i ) x(i,1) = 1;
x(0,0) = 27;
x(1,0) = 8;
x(2,0) = 7;
x(3,0) = 16;
x(4,0) = 44;
x(5,0) = 35;
x(6,0) = 43;
x(7,0) = 19;
x(8,0) = 27;
x(9,0) = 37;
MatrixXd xt = x.transpose();
MatrixXd y(10,1);
y(0,0) = 39;
y(1,0) = 5;
y(2,0) = 9;
y(3,0) = 22;
y(4,0) = 71;
y(5,0) = 44;
y(6,0) = 57;
y(7,0) = 24;
y(8,0) = 39;
y(9,0) = 52;
MatrixXd b(2,1);
b = (xt * x).inverse() * xt * y; //主要公式
for(int i = 0 ; i<2 ;++i){
for(int j = 0 ; j<2 ;++j){
std::cout<<(xt * x)(i,j)<<" ";
}
std::cout<<std::endl;
}
for(int i = 0 ; i<2 ;++i){
for(int j = 0 ; j<2 ;++j){
std::cout<<(xt * x).inverse()(i,j)<<" ";
}
std::cout<<std::endl;
}
std::cout<<b(0,0)<<","<<b(1,0)<<std::endl;
system("pause");
return 0;
}
结果:
Opencv Mat库:
#include <iostream>
#include<opencv2/highgui/highgui.hpp>
using namespace cv;
int main(){
double x[10][2] = {
27,1,
8,1,
7,1,
16,1,
44,1,
35,1,
43,1,
19,1,
27,1,
37,1
};
CvMat vx = cvMat(10,2,CV_64FC1,x);
double y[10][1]={
39,
5,
9,
22,
71,
44,
57,
24,
39,
52
};
CvMat vy = cvMat(10,1,CV_64FC1,y);
double temp[2][2]={0,0,0,0}; //用于对矩阵初始化
double temp2[2][2]={0,0,0,0};
double temp3[2][10]={0,0,0,0};
double temp4[2][1]={0,0};
CvMat xt_multiply_x = cvMat(2,2,CV_64FC1,temp);
CvMat x_inverse = cvMat(2,2,CV_64FC1,temp2);
CvMat multiply1 = cvMat(2,10,CV_64FC1,temp3);
CvMat b = cvMat(2,1,CV_64FC1,temp4);
cvGEMM(&vx,&vx,1,&xt_multiply_x,1,&xt_multiply_x,1); //函数解释请参考链接
//cvCrossProduct(&xt, &x, &xt_multiply_x);
cvInvert(&xt_multiply_x,&x_inverse);
cvGEMM(&x_inverse, &vx,1, &multiply1,1,&multiply1,2);
cvGEMM(&multiply1, &vy,1, &b,1,&b);
for(int i = 0 ; i<2 ;++i){
for(int j = 0 ; j<2 ;++j){
std::cout<<cvmGet(&xt_multiply_x,i,j)<<" ";
}
std::cout<<std::endl;
}
for(int i = 0 ; i<2 ;++i){
for(int j = 0 ; j<2 ;++j){
std::cout<<cvmGet(&x_inverse,i,j)<<" ";
}
std::cout<<std::endl;
}
std::cout<<cvmGet(&b,0,0)<<","<<cvmGet(&b,1,0)<<std::endl;
system("pause");
return 0;
}
使用最小二乘公式实现:
根据公式得到的a,b分别为直线的k、b值。这个公式的原理是:
1. 要求的方程为y=ax+b。
2.假设残差为e,yi=a*xi+b+ei -> ei = yi - a*xi - b。
3.只要求 使∑ei^2(残差平方和)最小 的a b的值。
4.设Q = ∑ei^2,要使Q最小,只需要分别对 a b 求导 使得求导后的两方程为0,联立求出a b。
#include <iostream>
#include <algorithm>
#include <valarray>
#include <vector>
struct Point{
int x,y;
};
int main()
{
double x[]={27,8,7,16,44,35,43,19,27,37,};
double y[]={39,5,9,22,71,44,57,24,39,52};
std::valarray<double> data_x(x,10);
std::valarray<double> data_y(y,10);
float A = 0.0;
float B = 0.0;
float C = 0.0;
float D = 0.0;
A = (data_x*data_x).sum();
B = data_x.sum();
C = (data_x*data_y).sum();
D = data_y.sum();
float k, b, tmp = 0;
if (tmp = (A*data_x.size() - B*B))
{
k = (C*data_x.size() - B*D) / tmp;
b = (A*D - C*B) / tmp;
}
else
{
k = 1;
b = 0;
}
std::cout<<k<<","<<b<<std::endl;
system("pause");
return 0;
}
结果:
实现前言中提到的算法:
不知道为什么,要编两次才会出结果,第一次跑opencv的界面不会出现,没有搭建opencv环境的小伙伴可以把关于画图的部分注释掉。
#include <iostream>
#include <time.h>
#include <algorithm>
#include <Eigen/Dense>
#include <valarray>
#include<opencv2/highgui/highgui.hpp>
#define random(x) ((rand())%x)
using namespace cv;
using namespace Eigen;
using namespace Eigen::internal;
using namespace Eigen::Architecture;
//输入一系列点的坐标,输出这些点所拟合的线的k b值
std::vector<double> leastSquareFitting(std::vector<Point> &rPoints){
std::vector<double > resLine(2);
int num_points = rPoints.size();
std::valarray<float> data_x(num_points);
std::valarray<float> data_y(num_points);
for (int i = 0; i < num_points; i++)
{
data_x[i] = rPoints[i].x;
data_y[i] = rPoints[i].y;
}
float A = 0.0;
float B = 0.0;
float C = 0.0;
float D = 0.0;
A = (data_x*data_x).sum();
B = data_x.sum();
C = (data_x*data_y).sum();
D = data_y.sum();
float k, b, tmp = 0;
if (tmp = (A*data_x.size() - B*B))
{
k = (C*data_x.size() - B*D) / tmp;
b = (A*D - C*B) / tmp;
}
else
{
k = 1;
b = 0;
}
resLine[0] = k;
resLine[1] = b;
return resLine;
}
//第一个参数为系列点,第二个参数为在一条线上的点,第三个参数为在这条线之外的点
void RegressionPoints(const std::vector<Point> &rPoints,std::vector<Point> &rInlinePoints,std::vector<Point>&rOutlinePoints){
int length = rPoints.size();
std::vector<bool > is_inline;
for(int i = 0 ;i<length;++i) is_inline.push_back(true);
double avg = 0.0;
std::vector<double> kb;
bool find_new_points = false;
while(!find_new_points){
//线内点
std::vector<int> InlinePointsIndex;
rInlinePoints.clear();
for(int i = 0 ; i < length ;++i){
if(is_inline[i]){
rInlinePoints.push_back(rPoints[i]);
InlinePointsIndex.push_back(i);
}
}
//线的k b值
kb = leastSquareFitting(rInlinePoints);
//残差
std::vector<double > def;
for(int i = 0 ;i < rInlinePoints.size();++i){
def.push_back(abs(kb[0] * rInlinePoints[i].x + kb[1] - rInlinePoints[i].y));
avg += def[i];
}
//判断是否有新的线外点
avg = avg / rInlinePoints.size();
find_new_points = false;
for(int i = 0 ; i < rInlinePoints.size();++i){
if(2.0 * avg < def[i]){
is_inline[InlinePointsIndex[i]] = false;
find_new_points = true;
}
}
}
//线外点
rOutlinePoints.clear();
for(int i = 0 ; i < length ;++i){
if(!is_inline[i]) rOutlinePoints.push_back(rPoints[i]);
}
//---------------------------------------用opencv在窗口中画出,便于观察
//画图
cv::Mat cdst = cv::Mat::zeros(480, 720, CV_8UC3);
//画出线内点
for (int i = 0; i < rInlinePoints.size(); i++){
cv::Point center = cv::Point((int)rInlinePoints[i].x , (int)rInlinePoints[i].y);
circle(cdst, center, 3, cv::Scalar(0, 255, 255), -1);
}
//画出线外点
for (int i = 0; i < rOutlinePoints.size(); i++){
cv::Point center = cv::Point((int)rOutlinePoints[i].x , (int)rOutlinePoints[i].y);
circle(cdst, center, 3, cv::Scalar(255, 0, 255), -1);
}
//画线
cv::Point pt1, pt2;
pt1.x = 0;
pt1.y = kb[0]*pt1.x + kb[1];
pt2.x = 400;
pt2.y = kb[0]*pt2.x + kb[1];
line(cdst, pt1, pt2, cv::Scalar(255, 255/2,255/2), 2, 8);
imshow("LinesCluster", cdst);
cv::namedWindow("LinesCluster", 1);
cv::waitKey(1000);
//----------------------------输出线内点、线外点、线的kb值
for(auto w:rInlinePoints) std::cout<<w.x<<","<<w.y<<std::endl;
std::cout<<std::endl;
for(auto w:rOutlinePoints) std::cout<<w.x<<","<<w.y<<std::endl;
std::cout<<std::endl;
std::cout<<kb[0]<<","<<kb[1]<<std::endl;
}
int main()
{
std::vector<Point> temp(10),in_line_points,rOutlinePoints;
Point crd;
srand(unsigned(time(0)));
for(int i = 0;i<10;++i){
temp[i].x = random(50) * 5;
temp[i].y = 1.3 * temp[i].x + random(100) - 50;
}
RegressionPoints(temp,in_line_points,rOutlinePoints);
system("pause");
return 0;
}
结果: