FTRL的原理和工程实现(LR/FM+FTRL)

 

一、LR+FTRL算法原理以及工程化实现

from datetime import datetime
from csv import DictReader
from math import exp, log, sqrt
import gzip
import random
import json
import argparse


class FTRLProximal(object):
    """
    FTRL Proximal engineer project with logistic regression
    Reference:
    https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/41159.pdf

    """

    def __init__(self, alpha, beta, L1, L2, D,
                 interaction=False, dropout=1.0,
                 dayfeature=True,
                 device_counters=False):

        # parameters
        self.alpha = alpha
        self.beta = beta
        self.L1 = L1
        self.L2 = L2
        self.dayfeature = dayfeature
        self.device_counters = device_counters

        # feature related parameters
        self.D = D
        self.interaction = interaction
        self.dropout = dropout

        # model
        self.n = [0.] * D
        self.z = [0.] * D
        self.w = [0.] * D

    def _indices(self, x):
        '''
        A helper generator that yields the indices in x
        The purpose of this generator is to make the following
        code a bit cleaner when doing feature interaction.
        '''

        for i in x:
            yield i

        if self.interaction:
            D = self.D
            L = len(x)
            for i in range(1, L):  # skip bias term, so we start at 1
                for j in range(i + 1, L):
                    # one-hot encode interactions with hash trick
                    yield abs(hash(str(x[i]) + '_' + str(x[j]))) % D

    def predict(self, x, dropped=None):
        """
        use x and computed weight to get predict
        :param x:
        :param dropped:
        :return:
        """
        # wTx is the inner product of w and x
        wTx = 0.
        for j, i in enumerate(self._indices(x)):

            if dropped is not None and dropped[j]:
                continue

            wTx += self.w[i]

        if dropped is not None:
            wTx /= self.dropout

        # bounded sigmoid function, this is the probability estimation
        return 1. / (1. + exp(-max(min(wTx, 35.), -35.)))

    def update(self, x, y):
        """
        update weight and coordinate learning rate based on x and y
        :param x:
        :param y:
        :return:
        """

        ind = [i for i in self._indices(x)]

        if self.dropout == 1:
            dropped = None
        else:
            dropped = [random.random() > self.dropout for i in range(0, len(ind))]

        p = self.predict(x, dropped)

        # gradient under logloss
        g = p - y

        # update z and n
        for j, i in enumerate(ind):

            # implement dropout as overfitting prevention
            if dropped is not None and dropped[j]:
                continue

            g_i = g * i
            sigma = (sqrt(self.n[i] + g_i * g_i) - sqrt(self.n[i])) / self.alpha
            self.z[i] += g_i - sigma * self.w[i]
            self.n[i] += g_i * g_i

            sign = -1. if self.z[i] < 0 else 1.  # get sign of z[i]

            # build w on the fly using z and n, hence the name - lazy weights -
            if sign * self.z[i] <= self.L1:
                # w[i] vanishes due to L1 regularization
                self.w[i] = 0.
            else:
                # apply prediction time L1, L2 regularization to z and get
                self.w[i] = (sign * self.L1 - self.z[i]) \
                            / ((self.beta + sqrt(self.n[i])) / self.alpha + self.L2)

    def save_model(self, save_file):
        """
        保存weight数据到本地
        :param save_file:
        :return:
        """
        with open(save_file, "w") as f:
            w = {k: v for k, v in enumerate(self.w) if v != 0}
            z = {k: v for k, v in enumerate(self.z) if v != 0}
            n = {k: v for k, v in enumerate(self.n) if v != 0}
            data = {
                'w': w,
                'z': z,
                'n': n
            }
            json.dump(data, f)

    def load_weight(self, model_file, D):
        """
        loada weight data
        :param model_file:
        :return:
        """
        with open(model_file, "r") as f:
            data = json.load(f)
            self.w = data.get('w', [0.] * D)
            self.z = data.get('z', [0.] * D)
            self.n = data.get('n', [0.] * D)

    @staticmethod
    def loss(y, y_pred):
        """
        log loss for LR model
        :param y:
        :param y_pred:
        :return:
        """
        p = max(min(y_pred, 1. - 10e-15), 10e-15)
        return -log(p) if y == 1. else -log(1. - p)


def data(f_train, D, dayfilter=None, dayfeature=True, counters=False):
    ''' GENERATOR: Apply hash-trick to the original csv row
                   and for simplicity, we one-hot-encode everything

        INPUT:
            path: path to training or testing file
            D: the max index that we can hash to

        YIELDS:
            ID: id of the instance, mainly useless
            x: a list of hashed and one-hot-encoded 'indices'
               we only need the index since all values are either 0 or 1
            y: y = 1 if we have a click, else we have y = 0
    '''

    device_ip_counter = {}
    device_id_counter = {}

    for t, row in enumerate(DictReader(f_train)):
        # process id
        ID = row['id']
        del row['id']

        # process clicks
        y = 0.
        if 'click' in row:
            if row['click'] == '1':
                y = 1.
            del row['click']

        # turn hour really into hour, it was originally YYMMDDHH

        date = row['hour'][0:6]
        row['hour'] = row['hour'][6:]

        if dayfilter != None and not date in dayfilter:
            continue

        if dayfeature:
            # extract date
            row['wd'] = str(int(date) % 7)
            row['wd_hour'] = "%s_%s" % (row['wd'], row['hour'])

        if counters:
            d_ip = row['device_ip']
            d_id = row["device_id"]
            try:
                device_ip_counter[d_ip] += 1
                device_id_counter[d_id] += 1
            except KeyError:
                device_ip_counter[d_ip] = 1
                device_id_counter[d_id] = 1
            row["ipc"] = str(min(device_ip_counter[d_ip], 8))
            row["idc"] = str(min(device_id_counter[d_id], 8))

        # build x
        x = [0]  # 0 is the index of the bias term
        for key in row:
            value = row[key]
            # one-hot encode everything with hash trick
            index = abs(hash(key + '_' + value)) % D
            x.append(index)
        yield t, ID, x, y

二、FM+FTRL算法原理以及工程化实现

数据地址: MovieLens 100K Dataset

# -*- coding: utf-8 -*-

"""
this is a fm_ftrl model with structured tensorflow coding style, and support online feature encoding
"""

import functools
import tensorflow as tf
import numpy as np
import os
import pandas as pd


def doublewrap(function):
    """
    A decorator decorator, allowing to use the decorator to be used without
    parentheses if no arguments are provided. All arguments must be optional.
    """

    @functools.wraps(function)
    def decorator(*args, **kwargs):
        if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
            return function(args[0])
        else:
            return lambda wrapee: function(wrapee, *args, **kwargs)

    return decorator


@doublewrap
def define_scope(function, scope=None, *args, **kwargs):
    """
    A decorator for functions that define TensorFlow operations. The wrapped
    function will only be executed once. Subsequent calls to it will directly
    return the result so that operations are added to the graph only once.
    The operations added by the function live within a tf.variable_scope(). If
    this decorator is used with arguments, they will be forwarded to the
    variable scope. The scope name defaults to the name of the wrapped
    function.
    """
    attribute = '_cache_' + function.__name__
    name = scope or function.__name__

    @property
    @functools.wraps(function)
    def decorator(self):
        if not hasattr(self, attribute):
            with tf.variable_scope(name, *args, **kwargs):
                setattr(self, attribute, function(self))
        return getattr(self, attribute)

    return decorator


class FM_FTRL:
    def __init__(self, x, y, p, k):
        """

        :param x: input x
        :param y: label
        :param p: number of columns
        :param k: dim of v for FM pair interaction vector
        """
        self.x = x
        self.y = y
        self.p = p
        self.k = k
        self.predict
        self.optimize
        self.w0
        self.W
        self.V
        self.norm
        self.error
        self.loss

    @define_scope
    def predict(self):
        """
        this function used to predict data
        :return:
        """
        x = self.x
        self.w0 = tf.Variable(tf.zeros([1]))
        self.W = tf.Variable(tf.zeros([self.p]))
        self.V = tf.Variable(tf.random_normal([self.k, self.p], stddev=0.01))
        liner_terms = tf.add(self.w0,
                             tf.reduce_sum(tf.multiply(self.W, x), 1, keepdims=True)
                             )
        pair_terms = tf.multiply(0.5,
                                 tf.reduce_sum(
                                     tf.subtract(
                                         tf.pow(tf.matmul(x, tf.transpose(self.V)), 2),
                                         tf.matmul(tf.pow(x, 2), tf.transpose(tf.pow(self.V, 2)))
                                     )
                                 ))
        predict = tf.add(liner_terms, pair_terms)
        return predict

    @define_scope
    def norm(self):
        """

        :return:
        """
        lambda_w = tf.constant(0.001, name="lambda_w")
        lambda_v = tf.constant(0.001, name="lambda_v")
        l2_norm = tf.reduce_sum(
            tf.add(
                tf.multiply(lambda_w, tf.pow(self.W, 2)),
                tf.multiply(lambda_v, tf.pow(self.V, 2))
            )
        )
        return l2_norm

    @define_scope
    def error(self):
        y = self.y
        y_hat = self.predict
        error = tf.reduce_mean(
            tf.square(
                tf.subtract(y, y_hat)
            )
        )
        return error

    @define_scope
    def loss(self):
        loss = tf.add(self.error, self.norm)
        return loss

    @define_scope
    def optimize(self, mode="ftrl"):
        if mode == 'ftrl':
            opt = tf.train.FtrlOptimizer(learning_rate=0.1).minimize(self.loss)
        else:
            opt = tf.train.AdamOptimizer(learning_rate=0.001).minimize(self.loss)
        return opt


def hash_java(key):
    """
    hash equal to jaha hash funtion,which hash valus > 0, this is very import for engineer and ont-hot encode
    :param key:
    :return:
    """
    h = 0
    for c in key:
        h = ((h * 37) + ord(c)) & 0xFFFFFFFF
    return h


def main():
    """

    :return:
    """
    epochs = 20
    batch_size = 1000

    D = 3000
    p = 2
    k = 2

    cols = ['user', 'item', 'rating', 'timestamp']
    use_cols = ['user', 'item', 'rating']
    features = ['user', 'item']

    data_dir = os.path.abspath(f"{os.path.abspath(os.path.dirname(os.path.realpath(__file__)))}/../../Data/fm/ml-100k")

    x = tf.placeholder('float', shape=[None, D])
    y = tf.placeholder('float', shape=[None, 1])
    model = FM_FTRL(x=x, y=y, p=D, k=k)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    num_lines = sum(1 for l in open(f'{data_dir}/ua.base')) - 1
    print(f"total train lines number is {num_lines}")
    for epoch in range(epochs):
        total_bacth = 0
        avg_cost = 0.
        # create random data based on random index
        index_random = np.random.permutation(num_lines)

        for row_index in range(0, index_random.shape[0], batch_size):
            skip_rows = np.concatenate([index_random[:row_index], index_random[row_index+batch_size:]])
            row = pd.read_csv(f'{data_dir}/ua.base', delimiter='\t', names=cols,
                              usecols=['user', 'item', 'rating'],
                              skiprows=skip_rows)
            total_bacth += 1
            bY = row['rating'].values.reshape(-1, 1)
            bX = np.zeros([D])
            for f in features:
                hash_index = hash_java(str(row[f]) + f) % D
                if hash_index < 0:
                    raise ValueError("index for one-hot should be bigger than 0")
                bX[hash_index] = 1
            bX = bX.reshape(-1, D)
            mse, loss_val, w, v, _ = sess.run([model.error, model.loss, model.W, model.V, model.optimize],
                                              feed_dict={x: bX, y: bY})
            avg_cost += loss_val
            # Display logs per epoch step
        if (epoch + 1) % 1 == 0:
            print(f"total batch is {total_bacth}")
            print(f"Epoch:{epoch + 1}, cost={avg_cost/total_bacth}")
    print('MSE: ', mse)
    print('Learnt weights:', w, w.shape)
    print('Learnt factors:', v, v.shape)
    # print(f"auc value is {tf.summary.scalar('AUC', auc)}")

    errors = []
    test = pd.read_csv(f'{data_dir}/ua.test', delimiter='\t', names=cols, usecols=['user', 'item', 'rating'],
                       chunksize=batch_size)
    for row in test:
        bY = row['rating'].values.reshape(-1, 1)
        bX = np.zeros([D])
        for f in features:
            hash_index = hash_java(str(row[f]) + "_" + f) % D
            bX[hash_index] = 1
        bX = bX.reshape(-1, D)
        errors.append(sess.run(model.error, feed_dict={x: bX, y: bY}))

    RMSE = np.sqrt(np.array(errors).mean())
    print(RMSE)
    sess.close()


if __name__ == '__main__':
    main()

 

参考:

1.深入理解FTRL

https://www.jianshu.com/p/befb9e02d858

2.各大公司广泛使用的在线学习算法FTRL详解

https://www.cnblogs.com/EE-NovRain/p/3810737.html

3. FTRL一路走来,从LR -> SGD -> TG -> FOBOS -> RDA -> FTRL

https://blog.csdn.net/ningyanggege/article/details/81133785

4.LR+FTRL算法原理以及工程化实现

https://zhuanlan.zhihu.com/p/55135954

5.FM+FTRL算法原理以及工程化实现

https://zhuanlan.zhihu.com/p/58508137

发布了151 篇原创文章 · 获赞 415 · 访问量 54万+
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览