数据拟合: 直线拟合--多项式拟合
1.问题概述
在实际问题中,常常需要从一组观察数据
2.理论与方法
• 使残差的最大绝对值为最小,即
• 使残差的绝对值之和为最小,即
• 使残差的平方和为最小,即
分析以上三种准则,前两种提法比较自然,但由于含有绝对值运算,不便于实际应用;基于第三种准则来选取拟合曲线的方法则称作曲线拟合的最小二乘法。
3.算法与设计
1) 直线拟合
即成立
式中,求和表示关于下标i从1到N求和。
使总误差
为最小。由于Q可以视作关于aj(j = 0,1,...,m)的多元函数,故上述拟合多项式的构造问题可归结为多元函数的极值问题。
即得到关于系数aj的线性方程组通常称作正则方程组。
4.案例分析
Example:
Data set1
Memory Capacity in GBytes | Price in US dollars |
2 | 9.99 |
4 | 10.99 |
8 | 19.99 |
16 | 29.99 |
1 What's therelationship between memory capacity and cost? Pleas fitting a linear and higher polynomial function model to the data.
由实验结果可以得出在多项式拟合曲线的过程中,不一定是次数越高,结果越准确,结合实际问题与残差值得衡量,可以找到对于具体问题几阶的拟合效果更好。
根据函数图像可以得出表格:
二阶:
Memory Capacity in GBytes | Price in US dollars |
2 | 9.54 |
4 | 12.52 |
8 | 18.49 |
16 | 30.42 |
三阶:
Memory Capacity in GBytes | Price in US dollars |
2 | 9.06 |
4 | 12.61 |
8 | 19.17 |
16 | 30.1 |
四阶:
Memory Capacity in GBytes | Price in US dollars |
2 | 9.99 |
4 | 11 |
8 | 19.98 |
16 | 30 |
通过与原表格的数据进行分析:
不难看出四阶的函数表达式最接近原题。
即:y=13.0376+-2.75*x+0.666667*x^2+-0.0267857*x^3
5.代码
void _2()
{
//一阶
double mat[105][2];
double b[105];
ifstream ifile;
ifile.open("e:\\sat.txt");
for (int i = 0; i < 105; i++)
{
mat[i][0] = 1;
ifile >> mat[i][1];
// cout << mat[i][0] << " " << mat[i][1] << endl;
}
for (int i = 0; i < 105; i++)
{
ifile >> b[i];
// cout << b[i] << endl;
}
ifile.close();
/*
double t = 0, t2 = 0, bi = 0, tb = 0;
for (int i = 0; i < 105; i++)
{
bi += b[i];
for (int j = 0; j < 2; j++)
{
if (j == 1)
{
t += mat[i][j];
t2 += mat[i][j] * mat[i][j];
tb += mat[i][j] * b[i];
}
}
}
double x1 = (t2*bi - tb*t) / (105 * t2 - t*t);
double x2 = (105 * tb - t*bi) / (105 * t2 - t*t);
//cout << x1 << " " << x2 << endl;
cout << "拟合方程(关于high_gpa):";
cout << endl << "y = " << x1 << " + " << x2 << "x" << endl;
//一阶
*/
//4阶
double mat21[105][5];
ifstream file;
file.open("e:\\sat.txt");
for (int i = 0; i < 105; i++)
{
mat21[i][0] = 1;
file >> mat21[i][1];
// cout << mat[i][0] << " " << mat[i][1] << endl;
}
for (int i = 0; i < 105; i++) file >> mat21[i][2];
for (int i = 0; i < 105; i++) file >> mat21[i][3];
for (int i = 0; i < 105; i++) file >> mat21[i][4];
for (int i = 0; i < 105; i++)
{
file >> b[i];
// cout << b[i] << endl;
}
file.close();
double mat22[5][105];
memset(mat22, 0, sizeof(mat22));
for (int i = 0; i < 5; i++)
{
for (int j = 0; j < 105; j++)
{
mat22[i][j] = mat21[j][i];
}
}
double bb2[5][5];
memset(bb2, 0, sizeof(bb2));
for (int i = 0; i < 5; i++)
{
for (int j = 0; j < 5; j++)
{
for (int k = 0; k < 105; k++)
{
bb2[i][j] += mat22[i][k] * mat21[k][j];
}
}
}
double yy2[5];
memset(yy2, 0, sizeof(yy2));
for (int i = 0; i < 5; i++)
{
for (int k = 0; k < 105; k++)
{
yy2[i] += mat22[i][k] * b[k];
}
}
double x21[5], x22[5];
memset(x21, 0, sizeof(x21));
memset(x22, 0, sizeof(x22));
double temp;
for (int k = 0; k < 5; k++)
{
for (int i = k + 1; i < 5; i++)
{
temp = bb2[i][k] /bb2[k][k];
for (int j = k + 1; j < 5; j++)
{
bb2[i][j] -= temp*bb2[k][j];
}
yy2[i] -= temp*yy2[k];
}
}
for (int i = 4; i >= 0; i--)
{
x21[i] = b[i];
for (int j = 4; j >= 0; j--)
{
if (i != j)
{
x21[i] -= bb2[i][j] * x21[j];
}
}
x21[i] /= bb2[i][i];
}
cout << endl;
cout << "拟合方程(关于high_gpa,math_sat,verb_sat,comp_gpa):";
cout << endl << "y = " << x21[0] << " + " << x21[1] << "*high_gpa" << " + " << x21[2] << "math_sat" << " + " << x21[3] << "verb_sat" << " + " << x21[4] << "comp_gpa"<<endl;
return;
}
End