Mapreduce算法
输入key value key是用户 value是用户的好友列表 ,构造新的key 是用户和其中一个好友,value是 用户的其余的好友列表,在归约器中求相同key 的value的交集
map(key, value){
reducevalue=(<friend1><friend2>...<friendn>);
foreach firend in reducevalue{
reducekey=buidSortedKey(persion,firend);
emit(reducekey, reducevalue);}
}
reduce(key,value){
outputkey=key;
outputvalue=interection(list1,list2,listm);
emit(outputkey,outputvalue);
}
spark实现方案
public class FindCommonFriends{
public static void main(String[] args) throws Exception{
//确认输入参数
//创建sparkcontext上下文对象
JavaSparkContext ctx = new JavaSparkContext();
//从text创建RDD
JavaRDD<String> person = ctx.textfile("",1);
//建立键值对,key为用户和客户对,value为客户列表
JavaPairRDD<Tuple2<Long,Long>, Iterable<Long>> pairs=person.flatMapToPair(
new PairFlatMap Function<String, Tuple2<Long,Long>,Iterable<Long>>(){
public Iterable<Tuple2<Tuple2<String,String>,List<String>>>call(String s){
String[] tokens=s.split(",");
long person = Long.parselong(tokens[0]);
String friendAsString= tokens[1];
String[] friendsAsTokenized= friendAsString.split(" ");
if (friendsAsTokenized.length ==1){
Tuple2<Long,Long>key = buidSortedTuple(person, Long.parseLong(frindsAsTokenized[0]));
return Arrays.asList(
new Tuple2<Tuple2<Long,Long>,Iterable<Long>>(key, new ArrayList<Long>()))
};
List<Long> friends = new ArrayList<Long>();
for (String f : friendAsTokenized){
friends.add(Long.parselong(f));
}
List<Tuple2<Tuple2<Long,Long>,Iterable<Long>>> result=
new ArrayList<Tuple2<Tuple2<Long,Long>,Iterable<Long>>>();
for(Long f:friends){
result.add(new Tuple2<Tuple2<Long,Long>,Iterable<Long>>)(key,friends);}
return result;
}
})
//规约键值对
JavaPairRDD<Tuple2<Long,Long>,Iterable<Iterable<Long>>> grouped=pairs.groupByKey();
//从pair的客户列表中创建交集
JavaPairRDD<Tuple2<Long,Long>,Iterable<Long>> commonFriends=pairs.reduceByKey<
new Function2<<Iterable<long>>,Iterable<Long>>(){
public Iterable<Long> call(Iterable<Long>a, Iterable<Long> b){
Set<Long>x =Stets.newHashSet(a);
Set<Long> intersection = new HashSet<Long>();
for (Long item :b){
if (x.contains(item)){
intersection.add(item);}
}
}
return intersection;
}
}
//
//
//
//
}
}