标题接下来用代码来实现它吧~~~~
#!/usr/bin/python
# coding=utf8
import sys;
import random;
import math;
EPS = 0.000000001 # 很小的数字,用于判断浮点数是否等于0
def load_data(filename, data, dim):
'''
输入数据格式: label\tindex1:value1\tindex2:value2\tindex3:value3..., 其中index是特征的编号, 从1开始
data的数据格式: [[label, sample],[label, sample], ...], 其中sample: [v0, v1, v2, v3, ..., v[dim]]
'''
for line in open(filename, 'rt'):
sample = [0.0 for v in range(0, dim + 1)];
line = line.rstrip("\r\n\t ");
fields = line.split("\t");
label = int(fields[0]); # LABEL取值: 1 or -1
sample[0] = 1.0; # sample第一个元素用于存x0特征, 默认置为1.0[方便把 WX+b => WX]
for field in fields[1:]:
kv = field.split(":");
idx = int(kv[0]); # ensure idx >= 1
val = float(kv[1]);
sample[idx] = val;
data.append((label, sample));
def svm_train(data4train, dim, W, iterations, lm, lr):
'''
目标函数: obj(<X,y>, W) = (对所有<X,y>SUM{
max{
0, 1 - W*X*y}}) + lm / 2 * ||W||^2, 即:hinge+L2
'''
X = [0.0 for v in range(0, dim + 1)]; # <sample, label> => <X, y>
grad =