参考文章:http://blog.fens.me/hadoop-mapreduce-recommend/
最近学习hadoop,用hadoop实现电影推荐系统,详细步骤请查看上面的链接。在最后一步计算推荐值时,对代码什么不解,一直不明白代码的意思,经过好几天的学习,终于弄懂了,随手把自己的理解记录下来以便以后回顾。hadoop版本是1.1.2。
最后一步的代码如下:
package com.hadoop.mapreduce.examples.movie_recommendation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ComputeRecommend {
private final static Map<Integer, List> cooccurrenceMatrix = new HashMap<Integer, List>();
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] tokens = value.toString().split(",");
System.out.println(tokens[0]);
String[] v1 = tokens[0].split(":");
String[] v2 = tokens[1].split(":");
if (v1.length > 1) {// cooccurrence
int itemID1 = Integer.parseInt(v1[0].trim());
int itemID2 = Integer.parseInt(v1[1].trim());
int num = Integer.parseInt(tokens[1].trim());
List list = null;
if (!cooccurrenceMatrix.containsKey(itemID1)) {
list = new ArrayList();
} else {
list = cooccurrenceMatrix.get(itemID1);
}
list.add(new Cooccurrence(itemID1, itemID2, num));
cooccurrenceMatrix.put(itemID1, list);
}
if (v2.length > 1) {// userVector
int itemID = Integer.parseInt(tokens[0]);
int userID = Integer.parseInt(v2[0]);
double pref = Double.parseDouble(v2[1]);
for (Object cob : cooccurrenceMatrix.get(itemID)) {