Java 接入 ALS & LR 为用户推荐商户

召回(ALS)接入

  • 之前离线召回的数据保存在了 MySQL 中;
  • 直接取出来就行了;
package tech.lixinlei.dianping.recommand;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import tech.lixinlei.dianping.dal.RecommendModelMapper;
import tech.lixinlei.dianping.model.RecommendModel;

@Service
public class RecommendService{

    @Autowired
    private RecommendModelMapper recommendModelMapper;

    /**
     * 召回数据, 根据 userId 召回 shopIdList
     * @param userId
     * @return
     */
    public List<Integer> recall(Integer userId){
        RecommendModel recommendModel = recommendModelMapper.selectByPrimaryKey(userId);
        if(recommendModel == null){
            recommendModel = recommendModelMapper.selectByPrimaryKey(9999999);
        }
        String[] shopIdArr = recommendModel.getRecommend().split(",");
        List<Integer> shopIdList = new ArrayList<>();
        for(int i = 0; i < shopIdArr.length; i++) {
            shopIdList.add(Integer.valueOf(shopIdArr[i]));
        }
        return shopIdList;
    }

}

排序(LR)接入

package tech.lixinlei.dianping.recommand;

import org.apache.spark.ml.classification.GBTClassificationModel;
import org.apache.spark.ml.classification.LogisticRegressionModel;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.linalg.Vectors;
import org.apache.spark.sql.SparkSession;
import org.springframework.stereotype.Service;

import javax.annotation.PostConstruct;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;

@Service
public class RecommendSortService {

    private SparkSession spark;

    private LogisticRegressionModel lrModel;


    @PostConstruct
    public void init(){
        //加载 LR 模型
        spark = SparkSession.builder().master("local").appName("DianpingApp").getOrCreate();
        lrModel = LogisticRegressionModel.load("file:///home/lixinlei/project/gitee/dianping/src/main/resources/lrmode");
    }

    public List<Integer> sort(List<Integer> shopIdList, Integer userId){

        // 需要根据 lrmode 所需要 11 维的 x,生成特征,然后调用其预测方法
        List<ShopSortModel> list = new ArrayList<>();
        for(Integer shopId : shopIdList){
            //造的假数据,可以从数据库或缓存中拿到对应的性别,年龄,评分,价格等做特征转化生成 feature 向量
            Vector v = Vectors.dense(1,0,0,0,0,1,0.6,0,0,1,0);
            Vector result = lrModel.predictProbability(v);
            // arr[1] 表示代表正样本的概率
            double[] arr = result.toArray();
            double score = arr[1];
            ShopSortModel shopSortModel = new ShopSortModel();
            shopSortModel.setShopId(shopId);
            shopSortModel.setScore(score);
            list.add(shopSortModel);
        }

        list.sort(new Comparator<ShopSortModel>() {
            @Override
            public int compare(ShopSortModel o1, ShopSortModel o2) {
                if(o1.getScore() < o2.getScore()){
                    return 1;
                }else if(o1.getScore() > o2.getScore()){
                    return -1;
                }else{
                    return 0;
                }
            }
        });

        return list.stream().map(shopSortModel -> shopSortModel.getShopId()).collect(Collectors.toList());
    }

}

修改原来的 recommand 方法的实现

  • 先召回,再排序;
package tech.lixinlei.dianping.service.impl;

@Service
public class ShopServiceImpl implements ShopService {  

    @Autowired
    RecommendService recommendService;

    @Autowired
    RecommendSortService recommendSortService;

    /**
     * 先召回,再排序
     * @param longitude
     * @param latitude
     * @return
     */
    @Override
    public List<ShopModel> recommend(BigDecimal longitude, BigDecimal latitude) {
        List<Integer> shopIdList = recommendService.recall(148);
        shopIdList = recommendSortService.sort(shopIdList,148);
        List<ShopModel> shopModelList = shopIdList.stream().map(id->{
            ShopModel shopModel = get(id);
            shopModel.setIconUrl("/static/image/shopcover/xchg.jpg");
            shopModel.setDistance(100);
            return shopModel;
        }).collect(Collectors.toList());
//        List<ShopModel> shopModelList = shopModelMapper.recommend(longitude, latitude);
//        shopModelList.forEach(shopModel -> {
//            shopModel.setSellerModel(sellerService.get(shopModel.getSellerId()));
//            shopModel.setCategoryModel(categoryService.get(shopModel.getCategoryId()));
//        });
        return shopModelList;
    }

}
除了ALS方法,Spark还提供了其他的用户推荐算法,包括: 1. 基于内容的推荐(Content-Based Recommendation):这种推荐方法根据用户喜欢的物品的属性,找到与其相似的物品进行推荐。在Spark中,可以使用MLlib库中的TF-IDF算法和余弦相似度计算来实现基于内容的推荐。 2. 协同过滤推荐(Collaborative Filtering Recommendation):这种推荐方法使用用户和物品之间的交互信息(如评分、点击等)来推荐物品。在Spark中,除了ALS方法以外,还提供了基于矩阵分解的推荐算法(如SVD++),以及基于模型的协同过滤算法(如基于隐语义模型的推荐算法)。 3. 混合推荐(Hybrid Recommendation):这种推荐方法综合了多种推荐算法,例如基于内容的推荐和协同过滤推荐,以提高推荐的准确度和覆盖率。在Spark中,可以通过将多个推荐算法的结果进行加权求和来实现混合推荐。 下面是基于内容的推荐代码示例: ```python from pyspark.ml.feature import HashingTF, IDF, Tokenizer from pyspark.ml.linalg import Vectors from pyspark.sql.functions import col, udf from pyspark.sql.types import IntegerType # 创建电影数据集 movies = spark.createDataFrame([ (0, "The Shawshank Redemption", "drama"), (1, "The Godfather", "drama"), (2, "The Dark Knight", "action"), (3, "The Lord of the Rings: The Fellowship of the Ring", "adventure"), (4, "The Matrix", "action"), (5, "Inception", "action"), (6, "Forrest Gump", "drama"), (7, "The Lord of the Rings: The Return of the King", "adventure"), (8, "The Godfather: Part II", "drama"), (9, "The Lord of the Rings: The Two Towers", "adventure") ], ["movieId", "title", "genre"]) # 创建用户评分数据集 ratings = spark.createDataFrame([ (0, 0, 5), (0, 1, 4), (0, 2, 3), (0, 3, 5), (0, 4, 4), (0, 5, 3), (1, 0, 4), (1, 1, 5), (1, 2, 4), (1, 3, 3), (1, 4, 4), (1, 5, 5), (2, 0, 3), (2, 1, 4), (2, 3, 5), (2, 4, 3), (2, 5, 4), (3, 1, 5), (3, 3, 4), (3, 4, 5), (3, 5, 5), (4, 0, 4), (4, 1, 3), (4, 2, 5), (4, 3, 4), (4, 4, 3), (4, 5, 4) ], ["userId", "movieId", "rating"]) # 将电影数据集转换为特征向量 tokenizer = Tokenizer(inputCol="genre", outputCol="words") wordsData = tokenizer.transform(movies) hashingTF = HashingTF(inputCol="words", outputCol="rawFeatures", numFeatures=20) featurizedData = hashingTF.transform(wordsData) idf = IDF(inputCol="rawFeatures", outputCol="features") idfModel = idf.fit(featurizedData) rescaledData = idfModel.transform(featurizedData) # 定义用户喜好函数,用于计算用户喜欢的电影类型 def userLikes(userId): userMovies = ratings.filter(col("userId") == userId).select("movieId") userGenres = movies.join(userMovies, "movieId").select("genre") genres = [row.genre for row in userGenres.collect()] return genres # 注册用户喜好函数 userLikesUdf = udf(userLikes, ArrayType(StringType())) # 计算用户喜好的电影类型的TF-IDF特征向量 userRatings = ratings.groupBy("userId").agg(collect_list("movieId").alias("movieIds")) userGenres = userRatings.withColumn("genres", userLikesUdf(col("userId"))) userGenres = userGenres.withColumn("genresStr", concat_ws(" ", "genres")) userGenres = tokenizer.transform(userGenres) userFeatures = hashingTF.transform(userGenres) userFeatures = idfModel.transform(userFeatures) # 计算电影和用户之间的余弦相似度 dot_udf = udf(lambda x, y: float(x.dot(y)), FloatType()) similarity = rescaledData.crossJoin(userFeatures).select("movieId", "userId", dot_udf("features", "features").alias("similarity")) # 为用户推荐电影 recommendations = similarity.filter(col("userId") == 0).orderBy(col("similarity").desc()).limit(3) recommendedMovieIds = [row.movieId for row in recommendations.collect()] # 输出推荐结果 recommendedMovies = movies.filter(col("movieId").isin(recommendedMovieIds)) recommendedMovies.show() ``` 该代码示例中使用了TF-IDF算法和余弦相似度计算,计算出了电影和用户之间的相似度,并根据相似度为用户推荐了3部电影。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值