模型的核心部分。代码并不多,基本思路就是层层训练,前一层的输出作为下一层的输入。hidde layer和dA共享的是相同的网络结构。有些需要注意的地方已经在代码中加入注释了。另外,还发现了原来代码实现中的一个bug,已经更正。
代码如下:
void SdA::pretrain (
int *input,
double lr,
double corruption_level,
int epochs
)
{
int *layer_input;
int prev_layer_input_size;
int *prev_layer_input;
int *train_X = new int[n_ins];
for(int i=0; i<n_layers; i++) // layer-wise, i
{
for(int epoch=0; epoch<epochs; epoch++) // training epochs, epoch
{
for(int n=0; n<N; n++) // input x1...xN, iterate each sample, n
{
// initial input
for(int m=0; m<n_ins; m++) // get the nth input sample
train_X[m] = input[n * n_ins + m];
// set the layer input and train it
// the code here is a little complicated:
// it calcualtes the node value layer by layer, from bottom
// input layer to the current calculated layer -- i. Then it
// train the network from the i-1 layer to the i layer by the
// denosing auto-encoder
for(int l=0; l<=i; l++) // l
{
if(l == 0) // in case the first layer
{
layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++)
layer_input[j] = train_X[j];
}
else // in case the rest layers
{
// set the value of previous layer from the last 'layer_input'
// as the input for the current layer
if(l == 1)
prev_layer_input_size = n_ins;
else
prev_layer_input_size = hidden_layer_sizes[l-2];
prev_layer_input = new int[prev_layer_input_size];
for(int j=0; j<prev_layer_input_size; j++)
prev_layer_input[j] = layer_input[j];
delete[] layer_input;
// calcualte the value of current layer from prev_layer_input
// and put the value into the current layer_input
layer_input = new int[hidden_layer_sizes[l-1]];
sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
delete[] prev_layer_input;
}
} // for l
// train the current layer as denosing auto-encoder
dA_layers[i]->train(layer_input, lr, corruption_level);
} // for N
} // for epoches
} // for n_layers
delete[] train_X;
delete[] layer_input;
}
void SdA::finetune(
int *input,
int *label,
double lr,
int epochs)
{
int *layer_input;
int prev_layer_input_size;
int *prev_layer_input;
int *train_X = new int[n_ins];
int *train_Y = new int[n_outs];
for(int epoch=0; epoch<epochs; epoch++)
{
for(int n=0; n<N; n++) // input x1...xN
{
// initial input
for(int m=0; m<n_ins; m++)
train_X[m] = input[n * n_ins + m];
for(int m=0; m<n_outs; m++)
train_Y[m] = label[n * n_outs + m];
// calculate the value of the last dA layer
// most different from the code block in pretrain is :
// here the network weights in hidden layers have been well trained,
// which can be used directly to calculating the output layer by layer.
// however, in the pretrain function, the weights have not been trained.
// it should train the weights based on whichi the value of next layer
// can be calculated.
for(int i=0; i<n_layers; i++)
{
if(i == 0)
{
prev_layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++)
prev_layer_input[j] = train_X[j];
}
else
{
prev_layer_input = new int[hidden_layer_sizes[i-1]];
for(int j=0; j<hidden_layer_sizes[i-1]; j++)
prev_layer_input[j] = layer_input[j];
delete[] layer_input;
}
layer_input = new int[hidden_layer_sizes[i]];
sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
delete[] prev_layer_input;
} // for n_layers
// train the output (last) layer by logistic regression
log_layer->train(layer_input, train_Y, lr);
} // for N
// lr *= 0.95;
} // for epoch
delete[] layer_input;
delete[] train_X;
delete[] train_Y;
}
void SdA::predict (
int *x,
double *y
)
{
double *layer_input;
int prev_layer_input_size;
double *prev_layer_input;
double linear_output;
prev_layer_input = new double[n_ins];
for(int j=0; j<n_ins; j++)
prev_layer_input[j] = x[j];
// layer activation: calculate the output value layer by layer
for(int i=0; i<n_layers; i++)
{
layer_input = new double[sigmoid_layers[i]->n_out];
// linear_output = 0.0; // it is a bug, it should be in the 'for-loop' below!!
for(int k=0; k<sigmoid_layers[i]->n_out; k++)
{
linear_output = 0.0; // here is the right place!!
for(int j=0; j<sigmoid_layers[i]->n_in; j++)
{
linear_output +=
sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
}
linear_output += sigmoid_layers[i]->b[k];
layer_input[k] = sigmoid(linear_output);
}
delete[] prev_layer_input;
if(i < n_layers-1)
{
prev_layer_input = new double[sigmoid_layers[i]->n_out];
for(int j=0; j<sigmoid_layers[i]->n_out; j++)
prev_layer_input[j] = layer_input[j];
delete[] layer_input;
}
} // for n_layers
// the logestic regresssion layer
for(int i=0; i<log_layer->n_out; i++)
{
y[i] = 0;
for(int j=0; j<log_layer->n_in; j++)
{
y[i] += log_layer->W[i][j] * layer_input[j];
}
y[i] += log_layer->b[i];
}
log_layer->softmax(y);
delete[] layer_input;
}