Fisher最优分割法原理:
1.定义类的直径
2.定义分类法的损失函数为 :
3.费希尔算法的递推公式:
Fisher最优分割法的聚类步骤 (如下图):
c语言代码如下(因为测试用例的曲线拐点并不好计算,所以我直接通过观察确定曲线的拐点,从而得出需要分的类数为4):
#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#define NUM 11
#define INF 9999
double S[NUM][NUM] = {0};
int J[NUM][NUM];
double D[NUM][NUM] = {0}; //直径距离
double sample[NUM] = {9.3, 1.8, 1.9, 1.7, 1.5, 1.3, 1.4, 2.0, 1.9, 2.3, 2.1}; //样本集
void getDistance(); //获取直径距离
void calLoss(); //计算损失函数
void main()
{
int i, j, l;
getDistance();
calLoss();
int K=4; //分的类数,由计算结果观察可知在分为四类的时候出现一个转折点
double minPoint = 9999;
int breakPoint[4]={0};
breakPoint[K-1] = J[K-1][NUM-1]; //第一个断点
for(i=K-2; i>0; i--)
{
l=i;
minPoint = 9999;
for(j=i+2; j<NUM; j++)
if(minPoint>S[i][j])
{
minPoint = S[i][j];
l = j;
}
breakPoint[i] = J[i][l];
}
for(i=0; i<NUM; i++)
{
for(j=0; j<NUM; j++)
printf(" %0.3lf ", S[i][j]);
printf("\n");
}
for(i=0; i<NUM; i++)
{
for(j=0; j<NUM; j++)
printf(" %d ", J[i][j]);
printf("\n");
}
//分类
printf("\n分类结果如下:\n");
j=1;
for(i=0; i<NUM; i++)
{
if(i==breakPoint[j])
{
j++;
printf("\n");
}
printf("%0.3lf ", sample[i]);
}
printf("\n");
}
//计算损失函数
void calLoss()
{
int i, j, k, l;
double min;
int minj;
double temp;
for(k=1; k<NUM-1; k++)
for(l=2; l<NUM; l++)
{
if(k<l)
min = INF;
for(j=1; j<=l; j++)
{
if(k==1)
{
temp = D[0][j-1]+D[j][l];
if(temp<min)
{
min = temp;
minj = j;
}
}
else
{
temp = S[k-1][j-1]+D[j][l];
if(temp<min)
{
min = temp;
minj = j;
}
}
}
S[k][l] = min;
J[k][l] = minj;
}
}
//获取直径距离
void getDistance()
{
int i, j, k, l, m, n;
double mean;
double sum = 0;
double dis = 0;
double X[NUM];
for(i=0; i<NUM-1; i++)
for(j=i+1; j<NUM; j++)
{
sum = 0;
l = 0;
for(k=i; k<=j; k++)
X[l++] = sample[k];
for(m=0; m<l; m++)
sum += X[m];
mean = sum/l;
for(m=0; m<l; m++)
D[i][j] += (X[m]-mean)*(X[m]-mean);
}
}
结果分析:
分类结果如下: