协同过滤推荐算法

协同过滤推荐算法分为 基于用户推荐相似度,基于物品相似度推荐,基于内容相似度推荐


import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;

public class SparkTest {
    public static void main(String[] args) {

        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("local[1]");
        sparkConf.setAppName("test");
        JavaSparkContext sc = new JavaSparkContext(sparkConf);
        List<String> list1 = Data.getList();//协同过滤 参数格式一行数据为 用户名  商品  评分   例如(张三 001 9\n李四 001 8)  \n表示换行

        //转化rdd
        JavaRDD<String> parallelize = sc.parallelize(list1);

        //格式化数据
        JavaPairRDD<String, Tuple2<String, Integer>> tuple2JavaPairRDD = parallelize.mapToPair(new PairFunction<String,
                String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Tuple2<String, Integer>> call(String message) throws Exception {
                String[] split = message.split(" ");
                if (split.length == 3) {
                    return new Tuple2<>(split[0], new Tuple2<>(split[1], Integer.parseInt(split[2])));
                }
                return null;
            }
        });

        //把每人的评分信息独立放在一行
        JavaPairRDD<String, Map<String, Integer>> pairRDD = tuple2JavaPairRDD.aggregateByKey(new ConcurrentHashMap<String, Integer>(), new Function2<Map<String, Integer>, Tuple2<String, Integer>, Map<String, Integer>>() {
            @Override
            public Map<String, Integer> call(Map<String, Integer> v1, Tuple2<String, Integer> v2) throws Exception {
                Integer integer = v1.get(v2._1);
                if (integer != null) {
                    v1.put(v2._1, Math.round((v2._2 + integer) / 2));
                } else {
                    v1.put(v2._1, v2._2);
                }
                return v1;
            }
        }, new Function2<Map<String, Integer>, Map<String, Integer>, Map<String, Integer>>() {
            @Override
            public Map<String, Integer> call(Map<String, Integer> v1, Map<String, Integer> v2) throws Exception {
                Set<Map.Entry<String, Integer>> entries = v1.entrySet();

                for (Map.Entry<String, Integer> entry : entries) {
                    Integer integer = v2.get(entry.getKey());
                    if (integer != null) {
                        v2.put(entry.getKey(), Math.round(entry.getValue() + integer) / 2);
                    } else {
                        v2.put(entry.getKey(), entry.getValue());
                    }
                }
                return v2;
            }
        });

        JavaPairRDD<String, Map<String, Integer>> cache = pairRDD.cache();

        //连表用于求相似度
        JavaPairRDD<Tuple2<String, Map<String, Integer>>, Tuple2<String, Map<String, Integer>>> cartesian = pairRDD.cartesian(pairRDD);
        //过滤重复数据
        JavaPairRDD<Tuple2<String, Map<String, Integer>>, Tuple2<String, Map<String, Integer>>> filter = cartesian.filter(new Function<Tuple2<Tuple2<String, Map<String, Integer>>, Tuple2<String, Map<String, Integer>>>, Boolean>() {
            @Override
            public Boolean call(Tuple2<Tuple2<String, Map<String, Integer>>, Tuple2<String, Map<String, Integer>>> v1) throws Exception {
                Tuple2<String, Map<String, Integer>> stringMapTuple1 = v1._1;
                Tuple2<String, Map<String, Integer>> stringMapTuple2 = v1._2;
                if (stringMapTuple1._1.equals(stringMapTuple2._1)) {
                    return false;
                }
                int i = stringMapTuple1._1.compareTo(stringMapTuple2._1);
                if (i > 0) {
                    return false;
                }
                return true;
            }
        });

        //相似度计算
        JavaPairRDD<Tuple2<String, String>, Long> tuple2DoubleJavaPairRDD = filter.mapToPair(new PairFunction<Tuple2<Tuple2<String, Map<String, Integer>>, Tuple2<String, Map<String, Integer>>>, Tuple2<String, String>, Long>() {
            @Override
            public Tuple2<Tuple2<String, String>, Long> call(Tuple2<Tuple2<String, Map<String, Integer>>, Tuple2<String, Map<String, Integer>>> tuple2Tuple2Tuple2) throws Exception {
                Tuple2<String, Map<String, Integer>> stringMapTuple1 = tuple2Tuple2Tuple2._1;
                Tuple2<String, Map<String, Integer>> stringMapTuple2 = tuple2Tuple2Tuple2._2;
                Map<String, Integer> stringIntegerMap1 = stringMapTuple1._2;
                Map<String, Integer> stringIntegerMap2 = stringMapTuple2._2;
                //方式1,根据欧式距离计算
                //方式2,根据余弦夹角计算相似度

                double i = 0;//记录维度数
                long sum = 0;//记录总数据
                Set<Map.Entry<String, Integer>> entries = stringIntegerMap2.entrySet();
                for (Map.Entry<String, Integer> entry : entries) {
                    Integer integer1 = stringIntegerMap1.get(entry.getKey());//第一个参数的值
                    if (integer1 != null) {//参数值为空,不参与计算
                        i++;
                        Integer integer2 = entry.getValue();
                        double pow = Math.pow(integer2 - integer1, 2);
                        sum += pow;
                    }
                }
                double v = sum / i;
                return new Tuple2<>(new Tuple2<>(stringMapTuple1._1, stringMapTuple2._1), Math.round(Math.sqrt(v)));
            }
        });

        /**
         * 得到转换为数据
         */

        JavaRDD<Tuple2<String, Tuple2<String, Long>>> tuple2JavaRDD = tuple2DoubleJavaPairRDD.flatMap((FlatMapFunction<Tuple2<Tuple2<String, String>, Long>, Tuple2<String, Tuple2<String, Long>>>) tuple2LongTuple2 -> {
            List<Tuple2<String, Tuple2<String, Long>>> list = new ArrayList<>();
            list.add(new Tuple2<>(tuple2LongTuple2._1._1, new Tuple2<>(tuple2LongTuple2._1._2, tuple2LongTuple2._2)));
            list.add(new Tuple2<>(tuple2LongTuple2._1._2, new Tuple2<>(tuple2LongTuple2._1._1, tuple2LongTuple2._2)));
            return list;
        });
        //转化为JavaPairRDD
        JavaPairRDD<String, Tuple2<Long, String>> javaPairRDD = tuple2JavaRDD.mapToPair(new PairFunction<Tuple2<String, Tuple2<String, Long>>, String, Tuple2<Long, String>>() {
            @Override
            public Tuple2<String, Tuple2<Long, String>> call(Tuple2<String, Tuple2<String, Long>> stringTuple2Tuple2) throws Exception {
                return new Tuple2<String, Tuple2<Long, String>>(stringTuple2Tuple2._1, new Tuple2<Long, String>(stringTuple2Tuple2._2._2, stringTuple2Tuple2._2._1));
            }
        });

        TreeMap<Long, String> treeMap = new TreeMap<Long, String>();
        //统计每个人的推荐人物
        JavaPairRDD<String, TreeMap<Long, String>> byKey = javaPairRDD.aggregateByKey(treeMap, new Function2<TreeMap<Long, String>, Tuple2<Long, String>, TreeMap<Long, String>>() {
            @Override
            public TreeMap<Long, String> call(TreeMap<Long, String> v1, Tuple2<Long, String> v2) throws Exception {
                v1.put(v2._1, v2._2);
                return v1;
            }
        }, new Function2<TreeMap<Long, String>, TreeMap<Long, String>, TreeMap<Long, String>>() {
            @Override
            public TreeMap<Long, String> call(TreeMap<Long, String> v1, TreeMap<Long, String> v2) throws Exception {
                v1.putAll(v2);
                return v1;
            }
        });


        //设置推荐数
        Integer weights = 10;
        JavaPairRDD<String, TreeMap<Long, String>> mapJavaPairRDD = byKey.mapToPair(new PairFunction<Tuple2<String, TreeMap<Long, String>>, String, TreeMap<Long, String>>() {
            @Override
            public Tuple2<String, TreeMap<Long, String>> call(Tuple2<String, TreeMap<Long, String>> treeMapTuple2) throws Exception {
                TreeMap<Long, String> stringTreeMap = treeMapTuple2._2;
                TreeMap<Long, String> five = new TreeMap<Long, String>();
                int i = 0;
                for (Map.Entry<Long, String> entry : stringTreeMap.entrySet()) {
                    if (i >= weights) {
                        break;
                    }
                    five.put(entry.getKey(), entry.getValue());
                }


                return new Tuple2<>(treeMapTuple2._1, five);
            }
        });
        //连接相同的key   格式为 推荐任务 历史购买记录
        JavaPairRDD<String, Tuple2<Iterable<TreeMap<Long, String>>, Iterable<Map<String, Integer>>>> cogroup = mapJavaPairRDD.cogroup(cache);
        //连接相同的key 推荐任务 历史购买记录 人物对商品的评分
        JavaPairRDD<Tuple2<String, Tuple2<Iterable<TreeMap<Long, String>>, Iterable<Map<String, Integer>>>>, Tuple2<String, Map<String, Integer>>> tuple2Tuple2JavaPairRDD = cogroup.cartesian(cache);

        //排除非推荐人物的连接
        JavaPairRDD<Tuple2<String, Tuple2<Iterable<TreeMap<Long, String>>, Iterable<Map<String, Integer>>>>, Tuple2<String, Map<String, Integer>>> calcFilter = tuple2Tuple2JavaPairRDD.filter((Function<Tuple2<Tuple2<String, Tuple2<Iterable<TreeMap<Long, String>>, Iterable<Map<String, Integer>>>>, Tuple2<String, Map<String, Integer>>>, Boolean>) v1 -> {
//                v1._1._1;//当前用户
//            TreeMap<Long, String> next = v1._1._2._1.iterator().next();//推荐人物
//            Iterable<Map<String, Integer>> maps = v1._1._2._2;//历史记录
//            Tuple2<String, Map<String, Integer>> stringMapTuple2 = v1._2;//人物商品
            for (Map.Entry<Long, String> entry : v1._1._2._1.iterator().next().entrySet()) {
                if (entry.getValue().equals(v1._2._1)) {
                    return true;
                }
            }
            return false;
        });
        JavaPairRDD<String, Map<Integer, String>> stringMapJavaPairRDD = calcFilter.mapToPair(new PairFunction<Tuple2<Tuple2<String, Tuple2<Iterable<TreeMap<Long, String>>, Iterable<Map<String, Integer>>>>, Tuple2<String, Map<String, Integer>>>, String, Map<Integer, String>>() {
            @Override
            public Tuple2<String, Map<Integer, String>> call(Tuple2<Tuple2<String, Tuple2<Iterable<TreeMap<Long, String>>, Iterable<Map<String, Integer>>>>, Tuple2<String, Map<String, Integer>>> v1) throws Exception {
                Iterator<TreeMap<Long, String>> iterator1 = v1._1._2._1.iterator();
                iterator1.hasNext();
                TreeMap<Long, String> trees = iterator1.next();//推荐人物
                Iterator<Map<String, Integer>> iterator2 = v1._1._2._2.iterator();
                iterator2.hasNext();
                Map<String, Integer> mapHistory = iterator2.next();//历史记录
                Map<String, Integer> mapPeople = v1._2._2;//人物商品
                Map<String, Integer> test = new HashMap<>();
                for (Map.Entry<String, Integer> entry : mapPeople.entrySet()) {
                    test.put(entry.getKey(), entry.getValue());
                }
                Map<Integer, String> integerStringMap = new HashMap<>();
                //排除已经购买的商品
                for (String productName : mapHistory.keySet()) {
                    test.remove(productName);
                }
                //获取误差分数
                Integer error = 0;
                for (Map.Entry<Long, String> entry : trees.entrySet()) {
                    if (entry.getValue().equals(v1._2._1)) {
                        error = Math.toIntExact(entry.getKey());
                        break;
                    }
                }
                //减少误差
                for (Map.Entry<String, Integer> entry : test.entrySet()) {
                    entry.setValue(entry.getValue() - error);
                    integerStringMap.put(entry.getValue(), entry.getKey());
                }
                return new Tuple2<>(v1._1._1, integerStringMap);
            }
        });
        JavaPairRDD<String, Map<Integer, String>> rdd = stringMapJavaPairRDD.reduceByKey(new Function2<Map<Integer,
                String>, Map<Integer, String>, Map<Integer, String>>() {
            @Override
            public Map<Integer, String> call(Map<Integer, String> v1, Map<Integer, String> v2) throws Exception {
                HashMap<String, Integer> hashMap1 = new HashMap();
                HashMap<String, Integer> hashMap2 = new HashMap();
                TreeMap<Integer, String> hashMap3 = new TreeMap();
                for (Map.Entry<Integer, String> entry : v1.entrySet()) {
                    hashMap1.put(entry.getValue(), entry.getKey());
                }
                for (Map.Entry<Integer, String> entry : v2.entrySet()) {
                    hashMap2.put(entry.getValue(), entry.getKey());
                }

                for (Map.Entry<String, Integer> entry : hashMap2.entrySet()) {
                    Integer value1 = hashMap1.get(entry.getKey());
                    Integer value2 = entry.getValue();
                    if (value1 == null) {
                        value1 = value2;
                    }
                    hashMap3.put(value1 > value2 ? value1 : value2, entry.getKey());
                }
                return hashMap3;
            }
        });

        for (Object stringTreeMapTuple2 : rdd.collect()) {
            System.out.println(stringTreeMapTuple2);
        }
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小钻风巡山

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值