大道至简——SOM极简示例源码
想上传资源,赚的积分,找不到门路。原来是要电脑才行啊?
(在电脑上与流氓软件殊死搏斗,之后就不能上网了)。忙了两天,算了,分不要了,贴上来吧。
// SOFM_demo_Ver0.CPP,加载到新建VC6控制台项目即可运行
// 自组织映射网络(SOM)原理极简演示版——来自《神经网络模式识别及其实现》
// 对 SOFM.CPP 做了些修改,以便更好理解 (尚未拜读上面的书本)
// 大致改动:变量名、内部过程的调整,中文注释
//
// 2021-2-19 LiuQG, softez@163.com
/*
简介:
SOM 目的是让大量样本自动聚类,供后续应用。
此演示程序原理与步骤
将目标类模式空间映射到一个 5x5 的“网格”,其中每1格都是一个可能的类,
初始时每1个类的被赋予小的随机值。
训练:
从样本集中取1个样本,与“网格”中每1个类比对,与哪个类最相似
,哪个类(“格”)就是胜者,给它奖励——而且它周围的类都有赏
(让它们向输入者“演化”——距离拉近一点点)。
对样本集中所有样本进行1次上述操作,作为一轮。
这样继续,进行n轮操作,某些类模式逐渐聚集起来,形成多个密集区,
每个密集区的中心元素即代表一个聚类。
调整学习率和封赏范围:
随着训练轮次的递进,奖赏比例递减,但起初的500轮减的不多;
奖赏的范围每10次缩减1次,直到只奖赏胜者,周围其他无赏。
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define MAXPATTS 100 // 样本集容量上限
#define MAXNEURONS_IN 10 // 输入层节点上限
#define MAXNEURONS_X 15 // 输出层(二维的)各向节点数上限
#define MAXNEURONS_Y 15
#define MAXEPOCHS 1500 // 训练轮数
#define ETAMIN .005 // 每轮权重调节比例的缩减量下限
// 样本的结构(直接用1维数组会导致后面出现3维数组,晕。又为简单起见,直接引用定数组)
typedef struct {
double v[MAXNEURONS_IN];
} PattVector;
class PATTERN {
friend class SOFM;
private:
PattVector P[MAXPATTS]; // 样本集
int NumPatterns;
int Shuffle[MAXPATTS];
int SizeVector;
unsigned int Random(int N) {
unsigned int j;
j = (N*rand())/RAND_MAX; // = N*(rand()/RAND_MAX);
if (j>=N) j=N;
return j;
};
public:
PATTERN();
int GetPatterns(char *); //load pattern form file
int GetRandPats(int,int); //random patterns arg1=# of patterns, arg2=dimension
PattVector &Query(int patt); //returns P[arg1][arg2]
PattVector &QueryR(int patt); //returns P[Shuffle[arg1]][arg2]
void ReShuffle(int N);
};
PATTERN::PATTERN(){
for (int i=0; i<MAXPATTS; i++)
Shuffle[i]=i;
}
int PATTERN::GetPatterns(char *fname){
FILE *fp;
int i,j;
double x;
fp=fopen(fname,“r”);
if (fp==NULL) return 0; // Test for failure.
fscanf(fp,"%d",&NumPatterns);
fscanf(fp,"%d",&SizeVector);
for (i=0; i<NumPatterns; i++) { // 读取样本(模式),数量为 NumPatterns
// 读取第 i 个样本,样本的维数=SizeVector
for (j=0; j<SizeVector; j++) { // create a pattern
fscanf(fp,"%lg",&x); // consisting of all elements
P[i].v[j]=x;
}
}
fclose(fp);
return 1;
}
// 随机的生成 n1 个模式——其各维的取值范围=[0 ~ 1]
int PATTERN::GetRandPats(int n1,int n2) {
int i,j;
NumPatterns=n1;
SizeVector =n2;
for (i=0; i<NumPatterns; i++) { // For each vector
for (j=0; j<SizeVector; j++) { // create a pattern
P[i].v[j] =(double)rand()/RAND_MAX; // consisting of random elements
// between 0 and 1
}
}
return 1;
}
// 将样本索引顺序打乱(洗牌)
void PATTERN::ReShuffle(int N){
int i,a1,a2,tmp;
for (i=0; i<N ;i++) {
a1 =Random(NumPatterns);
a2 =Random(NumPatterns);
tmp =Shuffle[a1];
Shuffle[a1] =Shuffle[a2];
Shuffle[a2] =tmp;
}
}
// 取样本
PattVector &PATTERN::Query(int pat) {
return P[pat];
}
// 按乱序取样本
PattVector &PATTERN::QueryR(int pat) {
return P[Shuffle[pat]];
}
struct OutputLayer_NodeIndex {
friend class SOFM;
int ix, iy;
};
class SOFM {
private:
//Sofm_Outlay
PattVector W[MAXNEURONS_X][MAXNEURONS_Y]; // The weight matrix 模式类矩阵,
// 矩阵中每个点代表1类,待训练/测试的样本都要与之对比,故与样本类似,可视为特别的样本
int OutputLayerSizeX, OutputLayerSizeY; //outlayer dimensions 输出节点数,即上述模式类“网格”的数量
//int Lattice; //Square Vs triangular lattice
PattVector Yin; //The input layer neurons 每次训练用它传输1个样本
int InputLayerSize; //input layer dimensions 输入层节点数,可认为等同样本特征维数
int R; //update neighborhood radius 奖赏/权值更新范围半径
int MaxEpoch;
int epoch;
double eta; //The learning rate
double delta_eta; //Amount to change l.r. each epoch
double Erosion; //Urban decay metric..Neighborhoods shrink
int StochFlg; //Present vectors in rand order if 1
PATTERN *Pattern;
int LoadInLayer(int); //pattern->input layer
double EucNorm(OutputLayer_NodeIndex &); //Calc Euclidean distance 欧氏距离计算
double distance(PattVector &a, PattVector &b); //Calc distance 距离计算
OutputLayer_NodeIndex FindWinner(); //get coords of winning neuron
void UpdateOutlayer(OutputLayer_NodeIndex &);
void AdaptParms();
public:
SOFM();
void SetPattern(PATTERN *);
void SetParms(int, int, double);
void PrintWeights();
void PrintWinner();
void RunTrian();
};
SOFM::SOFM(){
StochFlg=1;
Erosion=0;
}
void SOFM::SetPattern(PATTERN *p) {
Pattern =p;
InputLayerSize =p->SizeVector;
}
void SOFM::SetParms(int X, int Y, double LR)
{
int ix,iy,k;
OutputLayerSizeX =X;
OutputLayerSizeY =Y;
R =3; // 在获胜者周围的奖赏范围 (R+1) x (R+1)
eta =LR; // 权重缩减比初值
delta_eta = 0.005;
for (ix=0; ix<X; ix++)
for (iy=0; iy<Y; iy++)
for (k=0; k<InputLayerSize; k++)
W[ix][iy].v[k] = (double)rand()/(10.0 * (double)RAND_MAX);
}
// 从样本集中取1个样本作为输入Yin
int SOFM::LoadInLayer(int P){
if (StochFlg)
Yin =Pattern->QueryR§; // 乱序
else
Yin =Pattern->Query§; // 顺序
return 1;
}
// 每1轮调整1次学习率eta和衰减率Erosion(实际上100轮缩减1次奖赏范围)
void SOFM::AdaptParms(){
Erosion += .01;
if (Erosion>=1.0) {
Erosion =0.0;
if (R>0)
R–; // 奖励范围递减
printf(“New neighborhood. Radius=%d”, R);
}
if (epoch <500) // 在开始的 1000 轮,学习率递减稍慢一点
eta -= delta_eta/10.0;
else
eta -= delta_eta;
if (eta < ETAMIN)
eta =ETAMIN;
}
void SOFM::PrintWeights() {
int ix,iy,k;
printf(" 竞争层逐行节点输出,(本例二维)首行dim1,次行dim2…:\n");
for (iy=0; iy<OutputLayerSizeY; iy++) {
for (k=0; k<InputLayerSize; k++){
for (ix=0; ix<OutputLayerSizeX; ix++)
//printf(“W[%d][%d]_%d=%f “,k,ix,iy,W[ix][iy].v[k]);
printf(”%5.2f “, W[ix][iy].v[k]);
printf(”\n”);
}
printf("\n");
}
}
void SOFM::RunTrian(){
OutputLayer_NodeIndex Winner;
int np =Pattern->NumPatterns;
epoch=0;
while (epoch<=MAXEPOCHS){
for (int i=0; i<np; i++){
LoadInLayer(i);
Winner =FindWinner();
UpdateOutlayer(Winner);
}
if((epoch % 50)==0) {
printf("\nEpoch=%d\n",epoch);
PrintWeights();
}
epoch++;
if (StochFlg)
Pattern->ReShuffle(np);
AdaptParms();
}
}
// 对获胜节点及其邻域节点权重进行调整
void SOFM::UpdateOutlayer(OutputLayer_NodeIndex &Winner){
int ix,iy,k;
int x1=Winner.ix-R, x2=Winner.ix+R;
int y1=Winner.iy-R, y2=Winner.iy+R;
if (x1<0) x1=0;
if (OutputLayerSizeX<=x2) x2=OutputLayerSizeX-1;
if (y1<0) y1=0;
if (OutputLayerSizeY<=y2) y2=OutputLayerSizeY-1;
for (ix=x1; ix<=x2; ix++)
for (iy=y1; iy<=y2; iy++)
for (k=0; k<InputLayerSize; k++)
W[ix][iy].v[k] += eta*(Yin.v[k]-W[ix][iy].v[k]);
// 这里的奖赏就是拉拢获胜小集体,使它们向输入者迈进一小步(或者说更像输入样本一点点)
}
// 找到竞争层中与输入模式距离最近者,即获胜者(返回其索引)
OutputLayer_NodeIndex SOFM::FindWinner(){
double best =1.0e99;
OutputLayer_NodeIndex Winner ={-1, -1};
for (int ix=0; ix<OutputLayerSizeX; ix++)
for (int iy=0; iy<OutputLayerSizeY; iy++){
OutputLayer_NodeIndex idx={ix, iy};
double d =EucNorm(idx);
if (d < best) {
best =d;
Winner =idx;
}
}
return Winner;
}
// Calc distance 欧氏距离计算
double SOFM::distance(PattVector &a, PattVector &b){
double dv2 =0;
for (int i=0; i<InputLayerSize;i++){
double di =a.v[i]-b.v[i];
dv2 += di * di;
}
return sqrt(dv2);
}
// Calc Euclidean norm of vector dif
// 输入模式与竞争层(输出层)某个节点欧氏距离
double SOFM::EucNorm(OutputLayer_NodeIndex &idx){
PattVector &nod =W[idx.ix][idx.iy];
return distance(Yin, nod) ;
}
//=================================================================
// GLOBAL OBJECTS
//=================================================================
PATTERN InPat;
SOFM FMap;
main(int argc, char *argv[]) {
if (argc>1) {
InPat.GetPatterns(argv[1]); //Establish pattern
FMap.SetPattern(&InPat); //Inform the feature map about the pattern
FMap.SetParms(5,5,0.900); //Init fm parms
FMap.RunTrian(); //Run the FM w/ training enabled
}
else {
printf(“USAGE: SOFM PATTERN_FILE”);
}
return 0;
}