参考gitlab上的代码,因为不能用cython,所以用python2又写了一遍,之后准备加入多分类方法,过会写。
fm原理详解:这篇说的特别好。fm推荐算法原理。
代码如下
# encoding: utf-8
# Author: czw
import numpy as np
import sys
from time import time
from sklearn.model_selection import train_test_split
import random
from scipy.sparse import csr_matrix
import copy
from math import exp, log
from util import _squared_loss, _log_loss
# fm model
LEARNING_RATE_TYPES = {"optimal": 0, "invscaling": 1, "constant": 2}
TASKS = {"regression": 0, "classification" : 1, "mult_classification" : 1}
class FM_model(object):
def __init__(self, num_factors=10,
num_iter=10,
k0=True,
k1=True,
init_stdev=0.1,
validation_size=0.01,
learning_rate_schedule="optimal",
initial_learning_rate=0.01,
power_t=0.5,
t0=0.001,
task='classification',
verbose=True,
shuffle_training=False,
seed = 28):
self.num_factors = num_factors
self.num_iter = num_iter
self.sum = np.zeros(self.num_factors)
self.sum_sqr = np.zeros(self.num_factors)
self.k0 = k0
self.k1 = k1
self.init_stdev = init_stdev
self.validation_size = validation_size
self.task = task
self.shuffle_training = shuffle_training
self.seed = seed
# Learning rate Parameters
self.learning_rate_schedule = learning_rate_schedule
self.eta0 = initial_learning_rate
self.power_t = power_t
self.t = 1.0
self.learning_rate = initial_learning_rate
self.t0 = t0
# for train
self.sum = np.zeros(self.num_factors)
self.sum_sqr = np.zeros(self.num_factors)
# Regularization Parameters (start with no regularization)
self.reg_0 = 0.001
self.reg_w = 0.001
self.reg_v = np.repeat(0.001, num_factors)
# local parameters in the lambda_update step
self.lambda_w_grad = 0.0
self.lambda_v_grad = 0.0
self.sum_f = 0.0
self.sum_f_dash_f = 0.0
self.verbose = verbose
# converse param
self.k0 = self._bool_to_int(self.k0)
self.k1 = self._bool_to_int(self.k1)
self.shuffle_training = self._bool_to_int(self.shuffle_training)
self.verbose = self._bool_to_int(self.verbose)
self.learning_rate_schedule = self._get_learning_rate_type(self.learning_rate_schedule)
self.task = self._get_task(self.task)
# model weight
# todo
def _validate_params(self):
# for Verifying the param
pass
def _get_learning_rate_type(self, learning_rate):
"""Map learning rate string to int for cython"""
try:
return LEARNING_RATE_TYPES[learning_rate]
except KeyError:
raise ValueError("learning rate %s "
"is not supported. " % learning_rate)
def _bool_to_int(self, bool_arg):
"""Map bool to int for cython"""
if bool_arg == True:
return 1
else:
return 0
def _get_task(self, task):
"""Map task string to int for cython"""
try:
return TASKS[task]
except KeyError:
raise ValueError("task %s "
"is not supported. " % task)
def _prepare_y(self, y):
'''
for binary classification
'''
y_i = np.ones(y.shape, dtype=np.float64, order="C")
y_i[y != 1] = -1.0
return y_i
def _predict_instance(self, x_data_ptr, x_ind_ptr, xnnz):
result = 0.
_sum = np.zeros(self.num_factors)
_sum_sqr = np.zeros(self.num_factors)
if self.k0:
result += self.w0
if self.k1:
for i in range(xnnz):
feature = x_ind_ptr[i]
result += self.w[feature]*x_data_ptr[i]
for f in range(self.num_factors):
_sum[f] = 0.
_sum_sqr[f] = 0.
for i in range(xnnz):
feature = x_ind_ptr[i]
d = self.v[f, feature]*x_data_ptr[i]
_sum[f] += d
_sum_sqr[f] += d*d
result += 0.5*(_sum[f]*_sum[f] - _sum_sqr[f])
self.sum = copy.copy(_sum)
return result
def _predict_scaled(self, x_data_ptr, x_ind_ptr, xnnz):
'''
for what??
'''
result = 0.
sum_ = np.zeros(self.num_factors)
sum_sqr_ = np.zeros(self.num_factors)
if self.k0 > 0:
result += self.w0
if self.k1 > 0:
for i in range(xnnz):
feature = x_ind_ptr[i]
w_dash = w[feature] - self.learning_rate*(self.grad_w[feature]+2*self.reg_w*self.w[feature])
result += w_dash * x_data_ptr[i]
for f in range(self.num_factors):
sum_[f] = 0.
sum_sqr_[f] = 0.
for i in range(xnnz):
feature = x_ind_ptr[i]
v_dash = v[f,feature] - learning_rate * (grad_v[f,feature] + 2 * reg_v[f] * v[f,feature])
d = v_dash * x_data_ptr[i]
sum_[f] += d
sum_sqr_[f] += d*d
result += 0.5 * (sum_[f]*sum_[f] - sum_sqr_[f])
return result
def _sgd_theta_step(self, x_data_ptr, x_ind_ptr, xnnz, y):
w0 = copy.copy(self.w0)
w = copy.copy(self.w)
v = copy.copy(self.v)
grad_w = copy.copy(self.grad_w)
grad_v = copy.copy(self.grad_v)
p = self._predict_instance(x_data_ptr, x_ind_ptr, xnnz)
if self.task == TASKS['regression']:
p = min(self.max_target, p)
p = max(self.min_target, p)
mult = 2 * (p-y)
# logit loss / sigmoid
# mult : dloss/dp
elif self.task == TASKS['classification']:
p = (1.0 / (1.0+exp(-p)))
mult = y*((1.0 / (1.0+exp(-y*p))) - 1.0)
else:
pass
# optimal: eta = 1.0/(t+t0) [default] (t+=1)
# constant: eta = eta0
# invscaling: eta = eta0 / pow(t, power_t)
if self.learning_rate_schedule == LEARNING_RATE_TYPES['optimal']:
learning_rate = 1.0/(self.t+self.t0)
elif self.learning_rate_schedule == LEARNING_RATE_TYPES['invscaling']:
learning_rate = self.learning_rate / pow(self.t, self.power_t)
# todo : multi_class
if self.verbose:
if self.task == TASKS['regression']:
self.sumloss += _squared_loss(p, y)
elif self.task == TASKS['classification']:
self.sumloss += _log_loss(p,y)
else:
pass
if self.k0:
grad_0 = mult
w0 -= learning_rate * (grad_0 + 2*self.reg_0*w0)
if self.k1:
for i in range(xnnz):
feature = x_ind_ptr[i]
grad_w[feature] = mult * x_data_ptr[i]
w[feature] -= learning_rate*(grad_w[feature] + 2*self.reg_w*w[feature])
for f in range(self.num_factors):
for i in range(xnnz):
feature = x_ind_ptr[i]
grad_v[f, feature] = mult*x_data_ptr[i]*(self.sum[f]-v[f, feature]*x_data_ptr[i])
v[f, feature] -= learning_rate*(grad_v[f, feature] + 2 * self.reg_v[f] * v[f,feature])
self.learning_rate = copy.copy(learning_rate)
self.w0 = copy.copy(w0)
self.w = copy.copy(w)
self.v = copy.copy(v)
self.grad_w = copy.copy(grad_w)
self.grad_v = copy.copy(grad_v)
self.t += 1
self.count += 1
# for adjust reg_weight
def _sgd_lambda_step(self, val_x_data_ptr, val_x_ind_ptr, val_xnnz, val_y):
w = copy.copy(self.w)
v = copy.copy(self.v)
grad_w = copy.copy(self.grad_w)
grad_v = copy.copy(self.grad_v)
learning_rate = self.learning_rate
reg_0 = copy.copy(self.reg_0)
reg_w = copy.copy(self.reg_w)
reg_v = copy.copy(self.reg_v)
lambda_w_grad = 0.
lambda_v_grad = 0.
p = self._predict_scaled(val_x_data_ptr, val_x_ind_ptr, val_xnnz)
if self.task == TASKS['regression']:
p = min(self.max_target, p)
p = max(self.min_target, p)
grad_loss = 2*(p-val_y)
elif self.task == TASKS['classification']:
grad_loss = val_y*((1.0/(1.0+exp(-val_y*p)))-1.0)
else:
pass
if self.k1>0:
lambda_w_grad = 0.0
for i in range(val_xnnz):
feature = val_x_ind_ptr[i]
lambda_w_grad += val_x_data_ptr[i] * w[feature]
lambda_w_grad = -2 * learning_rate * lambda_w_grad
reg_w -= learning_rate * grad_loss * lambda_w_grad
reg_w = max(0.0, reg_w)
for f in xrange(self.num_factors):
sum_f = 0.0
sum_f_dash = 0.0
sum_f_dash_f = 0.0
for i in range(val_xnnz):
feature = val_x_ind_ptr[i]
v_dash = v[f,feature] - learning_rate * (grad_v[f,feature] + 2 * reg_v[f] * v[f,feature])
sum_f_dash += v_dash * val_x_data_ptr[i]
sum_f += v[f,feature] * val_x_data_ptr[i]
sum_f_dash_f += v_dash * val_x_data_ptr[i] * v[f,feature] * val_x_data_ptr[i]
lambda_v_grad = -2 * learning_rate * (sum_f_dash * sum_f - sum_f_dash_f)
reg_v[f] -= learning_rate * grad_loss * lambda_v_grad
reg_v[f] = max(0.0, reg_v[f])
self.reg_w = copy.copy(reg_w)
self.reg_v = copy.copy(reg_v)
def fit(self, X, y):
'''
# todo readme doc
# todo: load 参数进行继续训练
# todo: sample_weight 样本权重未加
'''
if type(y) != np.ndarray:
y = np.array(y)
if self.task == 'classification':
y = self._prepare_y(y)
# for regression: limit the value of predict in (min, max)
self.max_target = max(y)
self.min_target = min(y)
if self.verbose:
print("Creating validation dataset of %.2f of training for adaptive regularization" % self.validation_size)
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=self.validation_size)
train_dataset = CSRDataset(X_train, y_train)
val_dataset = CSRDataset(X_val, y_val)
n_samples = train_dataset.n_samples
self.num_attribute = X_train.shape[1]
self.grad_w = np.zeros(self.num_attribute)
self.grad_v = np.zeros((self.num_factors, self.num_attribute))
self.w0 = 0.0
self.w = np.zeros(self.num_attribute)
np.random.seed(seed=self.seed)
self.v = np.random.normal(scale=self.init_stdev,size=(self.num_factors, self.num_attribute))
self.count = 0
# for refresh the data
x_data_ptr = [0]
x_ind_ptr = [0]
xnnz = [0]
y = [0]
val_x_data_ptr = [0]
val_x_ind_ptr = [0]
val_xnnz = [0]
val_y = [0]
for epoch in range(self.num_iter):
if self.verbose:
print('-- Epoch %d' % (epoch+1))
self.count = 0
self.sumloss = 0.
if self.shuffle_training:
# shuffle train_set
pass
for i in range(n_samples):
train_dataset.next(x_data_ptr, x_ind_ptr, xnnz, y)
self._sgd_theta_step(x_data_ptr[0], x_ind_ptr[0], xnnz[0], y[0])
# if epoch > 0:
# val_dataset.next(val_x_data_ptr, val_x_ind_ptr, val_xnnz, val_y)
# self._sgd_lambda_step(val_x_data_ptr[0], val_x_ind_ptr[0], val_xnnz[0], val_y[0])
if self.verbose:
error_type = "MSE" if self.task == TASKS['regression'] else "log loss"
print "Training %s: %.5f" % (error_type, (self.sumloss / self.count))
def predict_proba(self, X):
X_dataset = CSRDataset(X, np.ones(X.shape[0]))
n_samples = X_dataset.n_samples
x_data_ptr = [0]
x_ind_ptr = [0]
xnnz = [0]
y = [0]
return_preds = np.zeros(n_samples)
for i in range(n_samples):
X_dataset.next(x_data_ptr, x_ind_ptr, xnnz, y)
p = self._predict_instance(x_data_ptr[0], x_ind_ptr[0], xnnz[0])
if self.task == TASKS['regression']:
p = min(self.max_target, p)
p = max(self.min_target, p)
elif self.task == TASKS['classification']:
p = (1.0 / (1.0 + exp(-p)))
return_preds[i] = p
return return_preds
def predict(self, X):
proba = self.predict_proba(X)
if self.task == TASKS['regression']:
return proba
elif self.task == TASKS['classification']:
proba[proba < 0.5] = 0
proba[proba >= 0.5] = 1
return proba
else:
pass
class CSRDataset():
'''
convert data to sparse type
# todo: put it in other file
'''
def __init__(self, X, y):
if isinstance(X, np.ndarray) or isinstance(X, list):
X = csr_matrix(X)
assert isinstance(X, csr_matrix), '**************\n训练数据格式错误\n************'
self.n_samples = y.shape[0]
self.current_index = -1
self.X_data_ptr = X.data
self.X_indptr_ptr = X.indptr
self.X_indices_ptr = X.indices
self.y_data_ptr = y
index = np.arange(0, self.n_samples, dtype=np.int32)
self.index = index
def next(self, x_data_ptr, x_ind_ptr, nnz, y):
'''
# todo: sample_weight
'''
current_index = copy.copy(self.current_index)
if self.current_index >= (self.n_samples-1):
current_index = -1
current_index += 1
sample_idx = self.index[current_index]
offset = self.X_indptr_ptr[sample_idx]
y[0] = self.y_data_ptr[sample_idx]
nnz_tmp = self.X_indptr_ptr[sample_idx+1] - offset
x_data_ptr[0] = self.X_data_ptr[offset: offset+nnz_tmp]
x_ind_ptr[0] = self.X_indices_ptr[offset: offset+nnz_tmp]
nnz[0] = nnz_tmp
self.current_index = current_index