大数据Spark处理算法005-查找共同好友

package cn.spark.study.core;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;

import scala.Tuple2;

public class SparkFriendRecommendation {
    
    public static void main(String[] args){
        
        SparkConf conf = new SparkConf().setAppName("SparkFriendRecommendation");
        
        JavaSparkContext jsc = new JavaSparkContext(conf);
        
        if(args.length < 1){
            System.out.println("err");
            System.exit(1);
        }
        JavaRDD<String> records = jsc.textFile(args[0],1);
        
        JavaPairRDD<Long,Tuple2<Long,Long>> pairs = records.flatMapToPair(new PairFlatMapFunction<String,Long,Tuple2<Long,Long>>(){

            private static final long serialVersionUID = 1L;

            @Override
            public Iterable<Tuple2<Long, Tuple2<Long, Long>>> call(String s) throws Exception {
                String[] tokens = s.split(" ");
                Long person = Long.parseLong(tokens[0]);
                
                String friedsAsString = tokens[1];
                String[] friendsTokenized = friedsAsString.split(",");
                
                List<Long> friends = new ArrayList<Long>();
                
                List<Tuple2<Long,Tuple2<Long,Long>>> mapperOutput = new ArrayList<Tuple2<Long,Tuple2<Long,Long>>>();
                
                for (String f : friendsTokenized){
                    Long fl = Long.parseLong(f);
                    friends.add(fl);
                    Tuple2<Long,Long> v1 = new Tuple2<Long,Long>(fl,-1L);
                    Tuple2<Long,Tuple2<Long,Long>> v = new Tuple2<Long,Tuple2<Long,Long>>(person,v1);
                    mapperOutput.add(v);
                }
                for(int i = 0;i < friends.size();i++){
                    for(int j = i+1; j < friends.size();j++){
                        Tuple2<Long,Long> pf1 = new Tuple2<Long,Long>(friends.get(j),person);
                        mapperOutput.add(new Tuple2<Long,Tuple2<Long,Long>>(friends.get(i),pf1));
                        
                        Tuple2<Long,Long> pf2 = new Tuple2<Long,Long>(friends.get(i),person);
                        mapperOutput.add(new Tuple2<Long,Tuple2<Long,Long>>(friends.get(j),pf2));
                        
                    }
                }
                return mapperOutput;
            }
            
        });
        
        //debug2
        List<Tuple2<Long, Tuple2<Long, Long>>> debug2 = pairs.collect();
        for(Tuple2<Long, Tuple2<Long, Long>> r : debug2){
            System.out.println("debug2 key = " + r._1 + "\t value = " + r._2);
        }
        
        JavaPairRDD<Long, Iterable<Tuple2<Long, Long>>> grouped = pairs.groupByKey();
        //debug3
        List<Tuple2<Long, Iterable<Tuple2<Long, Long>>>> debug3 = grouped.collect();
        for(Tuple2<Long, Iterable<Tuple2<Long, Long>>> r : debug3){
            System.out.println("debug3 key = " + r._1 + "\t value = " + r._2);
        }
        
        JavaPairRDD<Long,String> rec = grouped.mapValues(new Function<Iterable<Tuple2<Long,Long>>,String>(){

            private static final long serialVersionUID = 1L;

            @Override
            public String call(Iterable<Tuple2<Long, Long>> s) throws Exception {
                final Map<Long,List<Long>> mutualFriends = new  HashMap<Long,List<Long>>();
                
                for(Tuple2<Long,Long> t2 : s){
                    final Long toUser = t2._1;
                    final Long mutualFriend = t2._2;
                    final boolean alreadyFriend = (mutualFriend == -1);
                    if(mutualFriends.containsKey(toUser)){
                        if(alreadyFriend){
                            mutualFriends.put(toUser, null);
                        }
                        else if(mutualFriends.get(toUser) != null){
                            mutualFriends.get(toUser).add(mutualFriend);
                        }
                    }
                    else{
                        if(alreadyFriend){
                            mutualFriends.put(toUser, null);
                        }
                        else {
                            List<Long> list1 = new ArrayList<Long>(Arrays.asList(mutualFriend));
                            mutualFriends.put(toUser, list1);
                        }
                    }
                            
                }
//                mutualFriends.forEach((Key,Value)->{
//                    for(Long l : Value){
//                        System.out.println("Map<Long,List<Long>>" + l);
//                    }
//                });
                return buildRecommendations(mutualFriends);
            }
            
        });
        
        //debug4
        List<Tuple2<Long, String>> debug4 = rec.collect();
        for(Tuple2<Long, String> r : debug4){
            System.out.println("last key = " + r._1 + "\t value = " + r._2);
        }
        
        jsc.close();
        
    }
    static String buildRecommendations(Map<Long,List<Long>> mutualFriends){
        StringBuilder r = new StringBuilder();
        for(Map.Entry<Long, List<Long>> entry : mutualFriends.entrySet()){
            if(entry.getValue() == null){
                continue;
            }
            r.append(entry.getKey());
            r.append("(");
            r.append(entry.getValue().size());
            r.append(":");
            r.append(entry.getValue());
            r.append("),");
            
        }
        return r.toString();
    }
}
测试数据:

1 2,3,4,5,6,7,8
2 1,3,4,5,7
3 1,2
4 1,2,6
5 1,2
6 1,4
7 1,2
8 1

本列中1的好友分别为2,3,4,5,6,7,8以行类推

脚本:

/usr/local/spark1.5/bin/spark-submit \
--class cn.spark.study.core.SparkFriendRecommendation \
--num-executors 3 \
--driver-memory 100m \
--executor-memory 100m \
--executor-cores 3 \
/usr/local/spark-text/java/SparkFriendRecommendation/SparkFriendRecommendation.jar hdfs://spark01:9000/tSparkFriendRecommendation.txt

运行结果:

debug2 key = 1   value = (2,-1)
debug2 key = 1   value = (3,-1)
debug2 key = 1   value = (4,-1)
debug2 key = 1   value = (5,-1)
debug2 key = 1   value = (6,-1)
debug2 key = 1   value = (7,-1)
debug2 key = 1   value = (8,-1)
debug2 key = 2   value = (3,1)
debug2 key = 3   value = (2,1)
debug2 key = 2   value = (4,1)
debug2 key = 4   value = (2,1)
debug2 key = 2   value = (5,1)
debug2 key = 5   value = (2,1)
debug2 key = 2   value = (6,1)
debug2 key = 6   value = (2,1)
debug2 key = 2   value = (7,1)
debug2 key = 7   value = (2,1)
debug2 key = 2   value = (8,1)
debug2 key = 8   value = (2,1)
debug2 key = 3   value = (4,1)
debug2 key = 4   value = (3,1)
debug2 key = 3   value = (5,1)
debug2 key = 5   value = (3,1)
debug2 key = 3   value = (6,1)
debug2 key = 6   value = (3,1)
debug2 key = 3   value = (7,1)
debug2 key = 7   value = (3,1)
debug2 key = 3   value = (8,1)
debug2 key = 8   value = (3,1)
debug2 key = 4   value = (5,1)
debug2 key = 5   value = (4,1)
debug2 key = 4   value = (6,1)
debug2 key = 6   value = (4,1)
debug2 key = 4   value = (7,1)
debug2 key = 7   value = (4,1)
debug2 key = 4   value = (8,1)
debug2 key = 8   value = (4,1)
debug2 key = 5   value = (6,1)
debug2 key = 6   value = (5,1)
debug2 key = 5   value = (7,1)
debug2 key = 7   value = (5,1)
debug2 key = 5   value = (8,1)
debug2 key = 8   value = (5,1)
debug2 key = 6   value = (7,1)
debug2 key = 7   value = (6,1)
debug2 key = 6   value = (8,1)
debug2 key = 8   value = (6,1)
debug2 key = 7   value = (8,1)
debug2 key = 8   value = (7,1)
debug2 key = 2   value = (1,-1)
debug2 key = 2   value = (3,-1)
debug2 key = 2   value = (4,-1)
debug2 key = 2   value = (5,-1)
debug2 key = 2   value = (7,-1)
debug2 key = 1   value = (3,2)
debug2 key = 3   value = (1,2)
debug2 key = 1   value = (4,2)
debug2 key = 4   value = (1,2)
debug2 key = 1   value = (5,2)
debug2 key = 5   value = (1,2)
debug2 key = 1   value = (7,2)
debug2 key = 7   value = (1,2)
debug2 key = 3   value = (4,2)
debug2 key = 4   value = (3,2)
debug2 key = 3   value = (5,2)
debug2 key = 5   value = (3,2)
debug2 key = 3   value = (7,2)
debug2 key = 7   value = (3,2)
debug2 key = 4   value = (5,2)
debug2 key = 5   value = (4,2)
debug2 key = 4   value = (7,2)
debug2 key = 7   value = (4,2)
debug2 key = 5   value = (7,2)
debug2 key = 7   value = (5,2)
debug2 key = 3   value = (1,-1)
debug2 key = 3   value = (2,-1)
debug2 key = 1   value = (2,3)
debug2 key = 2   value = (1,3)
debug2 key = 4   value = (1,-1)
debug2 key = 4   value = (2,-1)
debug2 key = 4   value = (6,-1)
debug2 key = 1   value = (2,4)
debug2 key = 2   value = (1,4)
debug2 key = 1   value = (6,4)
debug2 key = 6   value = (1,4)
debug2 key = 2   value = (6,4)
debug2 key = 6   value = (2,4)
debug2 key = 5   value = (1,-1)
debug2 key = 5   value = (2,-1)
debug2 key = 1   value = (2,5)
debug2 key = 2   value = (1,5)
debug2 key = 6   value = (1,-1)
debug2 key = 6   value = (4,-1)
debug2 key = 1   value = (4,6)
debug2 key = 4   value = (1,6)
debug2 key = 7   value = (1,-1)
debug2 key = 7   value = (2,-1)
debug2 key = 1   value = (2,7)
debug2 key = 2   value = (1,7)
debug2 key = 8   value = (1,-1)

 

debug3 key = 4   value = [(2,1), (3,1), (5,1), (6,1), (7,1), (8,1), (1,2), (3,2), (5,2), (7,2), (1,-1), (2,-1), (6,-1), (1,6)]
debug3 key = 1   value = [(2,-1), (3,-1), (4,-1), (5,-1), (6,-1), (7,-1), (8,-1), (3,2), (4,2), (5,2), (7,2), (2,3), (2,4), (6,4), (2,5), (4,6), (2,7)]
debug3 key = 6   value = [(2,1), (3,1), (4,1), (5,1), (7,1), (8,1), (1,4), (2,4), (1,-1), (4,-1)]
debug3 key = 3   value = [(2,1), (4,1), (5,1), (6,1), (7,1), (8,1), (1,2), (4,2), (5,2), (7,2), (1,-1), (2,-1)]
debug3 key = 7   value = [(2,1), (3,1), (4,1), (5,1), (6,1), (8,1), (1,2), (3,2), (4,2), (5,2), (1,-1), (2,-1)]
debug3 key = 8   value = [(2,1), (3,1), (4,1), (5,1), (6,1), (7,1), (1,-1)]
debug3 key = 5   value = [(2,1), (3,1), (4,1), (6,1), (7,1), (8,1), (1,2), (3,2), (4,2), (7,2), (1,-1), (2,-1)]
debug3 key = 2   value = [(3,1), (4,1), (5,1), (6,1), (7,1), (8,1), (1,-1), (3,-1), (4,-1), (5,-1), (7,-1), (1,3), (1,4), (6,4), (1,5), (1,7)]

 

last key = 4     value = 3(2:[1, 2]),5(2:[1, 2]),7(2:[1, 2]),8(1:[1]),
last key = 1     value = 
last key = 6     value = 2(2:[1, 4]),3(1:[1]),5(1:[1]),7(1:[1]),8(1:[1]),
last key = 3     value = 4(2:[1, 2]),5(2:[1, 2]),6(1:[1]),7(2:[1, 2]),8(1:[1]),
last key = 7     value = 3(2:[1, 2]),4(2:[1, 2]),5(2:[1, 2]),6(1:[1]),8(1:[1]),
last key = 8     value = 2(1:[1]),3(1:[1]),4(1:[1]),5(1:[1]),6(1:[1]),7(1:[1]),
last key = 5     value = 3(2:[1, 2]),4(2:[1, 2]),6(1:[1]),7(2:[1, 2]),8(1:[1]),
last key = 2     value = 6(2:[1, 4]),8(1:[1]),

本列中给4的推荐好友为3,共同好友的个数为2分别是1和2

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值