本篇介绍第三个算法,感知机算法,同样仅介绍两个主要函数:预测函数和训练函数。
需要说明,本算法仍使用随机梯度下降算法,与上篇多元线性回归有很多相似之处。
算法介绍
激活函数可以如下表示:
a
c
t
i
v
a
t
i
o
n
=
b
i
a
s
+
∑
i
=
1
n
w
e
i
g
h
t
i
×
x
i
activation = bias + \sum_{i=1}^{n}weight_i×x_i
activation=bias+i=1∑nweighti×xi
阶跃函数
p
r
e
d
i
c
t
i
o
n
=
1.0
I
F
a
c
t
i
v
a
t
i
o
n
≥
0.0
E
L
S
E
0.0
prediction = 1.0 IF activation ≥ 0.0 ELSE 0.0
prediction=1.0IFactivation≥0.0ELSE0.0
函数
读取csv
- 以下三个函数分别为获取行数、获取列数、获取文本内容。
double **dataset;
int row,col;
int get_row(char *filename)//获取行数
{
char line[1024];
int i = 0;
FILE* stream = fopen(filename, "r");
while(fgets(line, 1024, stream)){
i++;
}
fclose(stream);
return i;
}
int get_col(char *filename)//获取列数
{
char line[1024];
int i = 0;
FILE* stream = fopen(filename, "r");
fgets(line, 1024, stream);
char* token = strtok(line, ",");
while(token){
token = strtok(NULL, ",");
i++;
}
fclose(stream);
return i;
}
void get_two_dimension(char* line, double** data, char *filename)
{
FILE* stream = fopen(filename, "r");
int i = 0;
while (fgets(line, 1024, stream))//逐行读取
{
int j = 0;
char *tok;
char* tmp = strdup(line);
for (tok = strtok(line, ","); tok && *tok; j++, tok = strtok(NULL, ",\n")){
data[i][j] = atof(tok);//转换成浮点数
}//字符串拆分操作
i++;
free(tmp);
}
fclose(stream);//文件打开后要进行关闭操作
}
EXAMPLE
int main()
{
char filename[] = "data.csv";
char line[1024];
double **data;
int row, col;
row = get_row(filename);
col = get_col(filename);
data = (double **)malloc(row * sizeof(int *));
for (int i = 0; i < row; ++i){
data[i] = (double *)malloc(col * sizeof(double));
}//动态申请二维数组
get_two_dimension(line, data, filename);
printf("row = %d\n", row);
printf("col = %d\n", col);
int i, j;
for(i=0; i<row; i++){
for(j=0; j<col; j++){
float mean = (float)(sum / length);
return mean;
}
随机梯度下降估计回归系数
更新方程:
KaTeX parse error: Undefined control sequence: \ at position 29: …iction-expected\̲ ̲
b 1 ( t + 1 ) = b 1 ( t ) − l e a r n i n g r a t e × e r r o r ( t ) × x 1 ( t ) b_1(t+1) = b_1(t) - learning\space rate × error(t) × x_1(t) b1(t+1)=b1(t)−learning rate×error(t)×x1(t)
b 0 ( t + 1 ) = b 0 ( t ) − l e a r n i n g r a t e × e r r o r ( t ) b_0(t+1) = b_0(t) - learning\space rate × error(t) b0(t+1)=b0(t)−learning rate×error(t)
// 参数为:数据集、每个数据集属性个数(带label)、权重、学习率、epoch、train_size
void train_weights(double **data, int col,double *weights, double l_rate, int n_epoch, int train_size) {
int i;
for (i = 0; i < n_epoch; i++) {
int j = 0;//遍历每一行
for (j = 0; j < train_size; j++) {
double yhat = predict(col,data[j], weights);
double err = data[j][col - 1] - yhat;
weights[0] += l_rate * err;
int k;
for (k = 0; k < col - 1; k++) {
weights[k + 1] += l_rate * err * data[j][k];
}
}
}
for (i = 0; i < col; i++) {
printf("weights[%d]=%f\n",i, weights[i]);
}
}
预测函数
// 参数:样本属性个数、样本、权重
double predict(int col,double *array, double *weights) {//预测某一行的值
double activation = weights[0];
int i;
for (i = 0; i < col - 1; i++)
activation += weights[i + 1] * array[i];
double output = 0.0;
if (activation >= 0.0)
output = 1.0;
else
output = 0.0;
return output;
}