【八】hadoop编程之基于用户的协同过滤推荐算法UserCF

最新推荐文章于 2025-02-02 11:18:18 发布

jy02268879

最新推荐文章于 2025-02-02 11:18:18 发布

阅读量2.4k

点赞数 1

分类专栏： hadoop 文章标签： hadoop UserCF 基于用户的协同过滤推荐算法

本文链接：https://blog.csdn.net/jy02268879/article/details/80539944

版权

hadoop 专栏收录该内容

13 篇文章

订阅专栏

基于用户的协同过滤推荐算法UserCF：给用户推荐和他兴趣相似的其他用户喜欢的物品

模拟情景

用户 A B C D E F

商品 1 2 3 4 5 6

行为点击 1.0分搜索 2.0分收藏 5.0分付款 10.0分

用户行为列表如下

用户物品行为

A 1 点击

A 3 收藏

A 4 搜索

B 2 搜索

B 5 搜索

C 1 收藏

C 6 付款

D 1 付款

D 5 收藏

E 3 收藏

E 4 点击

F 2 收藏

F 3 搜索

F 6 点击

算法步骤

1.根据用户行为列表计算物品、用户的评分矩阵

1 2 3 4 5 6

A 1 0 5 3 0 0

B 0 3 0 0 3 0

C 5 0 0 0 0 10

D 10 0 0 0 5 0

E 0 0 5 1 0 0

F 0 5 3 0 0 1

2.根据评分矩阵计算用户与用户相似度矩阵

将所有用户两两计算相似度

A B C D E F

A 1 0 0.08 0.15 0.93 0.43

B 0 1 0 0.32 0 0.6

C 0.08 0 1 0.4 0 0.15

D 0.15 0.32 0.4 1 0 0

E 0.93 0 0 0 1 0.5

F 0.43 0.6 0.15 0 0.5 1

3.相似度矩阵X评分矩阵=推荐列表

1 2 3 4 5 6

A 2.9 2.2 11.0 3.9 0.8 1.2

B 3.2 6.0 1.8 0 4.6 0.6

C 9.1 0.8 0.9 0.2 2.0 10.2

D 12.2 1.0 0.8 0.5 6.0 4.0

E 0.9 2.5 11.2 3.8 0 0.5

F 1.2 6.8 7.7 1.82 1.8 2.5

4.在推荐列表中，将之前产生过操作的物品（即评分矩阵中有过评分的物品）置零

1 2 3 4 5 6

A 0 2.2 0 0 0.8 1.2

B 3.2 0 1.8 0 0 0.6

C 0 0.8 0.9 0.2 2.0 0

D 0 1.0 0.8 0.5 0 4.0

E 0.9 2.5 0 0 0 0.5

F 1.2 0 0 1.82 1.8 0

项目目录：

输入文件如下

MapReduce步骤

1.根据用户行为列表构建评分矩阵

输入：用户ID，物品ID，分值

输出：用户ID（行）——物品ID（列）——分值

代码：

mapper1

package step1;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author liyijie
 * @date 2018年5月13日下午10:36:18
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 根据用户行为列表得到用户、物品的评分矩阵
 */
public class Mapper1  extends Mapper<LongWritable, Text, Text, Text>  {
	private Text outKey = new Text();
	private Text outValue = new Text();
	/**
	 * key:行号1
	 * value:A,1,1	用户A对物品1有过点击操作(分值1)
	 * */
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
       String[] values = value.toString().split(",");
       String userID = values[0];
       String itemID = values[1];
       String score = values[2];
       
      
    	   
	   //key:列号	用户ID	value:行号_值	物品ID_分值
	   outKey.set(userID);
	   outValue.set(itemID+"_"+score);
	   
	   context.write(outKey, outValue);
       
    } 
}

reducer1

package step1;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author liyijie
 * @date 2018年5月13日下午10:56:28
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 
 * 根据用户行为列表得到用户、物品的评分矩阵
 */
public class Reducer1 extends Reducer<Text, Text, Text, Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	 ///key:列号	用户ID	value:行号_值	物品ID_分值
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		String itemID=key.toString();
		
		//userID,score
		Map<String,Integer> map = new HashMap<>();
		
		//text:行号_值
		for(Text value:values){  
			String[] split = value.toString().split("_");
			String userID = split[0];
			String score = split[1];
			
			if(map.get(userID)==null){
				map.put(userID, Integer.parseInt(score));
			}else{
				Integer preScore = map.get(userID);
				map.put(userID, preScore+Integer.parseInt(score));
			}
		}
		StringBuilder sb =  new StringBuilder();
		for(Map.Entry<String,Integer> entry:map.entrySet()){
			String userID = entry.getKey();
			String score = String.valueOf(entry.getValue());
			sb.append(userID).append("_").append(score).append(",");
		}
		String line = null;
		if(sb.toString().endsWith(",")){
			line = sb.substring(0, sb.length()-1);
		}
	
		//key:行号 物品ID		value:列号_值,列号_值,列号_值,列号_值,列号_值...    用户ID_分值
		outKey.set(itemID);
		outValue.set(line);
		
		context.write(outKey,outValue);  
	}
	
	
}

mr1

package step1;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * @author liyijie
 * @date 2018年5月13日下午11:07:13
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 根据用户行为列表得到用户、物品的评分矩阵
 */
public class MR1 {
	private static String inputPath = "/UserCF/step1_input/actionList.txt";
	private static String outputPath = "/UserCF/step1_output";
	private static String hdfs = "hdfs://node1:9000";
	
	public int run(){
		try {
		Configuration conf=new Configuration();  
		conf.set("fs.defaultFS", hdfs);		
		Job	job = Job.getInstance(conf,"step1");
		
		
		//配置任务map和reduce类  
		job.setJarByClass(MR1.class);  
		job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");  
	      job.setMapperClass(Mapper1.class);  
	      job.setReducerClass(Reducer1.class);  

	      job.setMapOutputKeyClass(Text.class);  
	      job.setMapOutputValueClass(Text.class);  

	      job.setOutputKeyClass(Text.class);  
	      job.setOutputValueClass(Text.class);  

	      FileSystem fs = FileSystem.get(conf);
	      Path inpath = new Path(inputPath);
	      if(fs.exists(inpath)){
	          FileInputFormat.addInputPath(job,inpath);  
	      }else{
	    	  System.out.println(inpath);
	    	  System.out.println("不存在");
	      }
	      
	      Path outpath = new Path(outputPath);
	      fs.delete(outpath,true);
	      FileOutputFormat.setOutputPath(job, outpath); 
	      
			return job.waitForCompletion(true)?1:-1;
		} catch (ClassNotFoundException | InterruptedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return -1;
	}
	 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException  { 
		int result = -1;
		result = new MR1().run();
		if(result==1){
			System.out.println("step1运行成功");
		}else if(result==-1){
			System.out.println("step1运行失败");
		}
	  }
}

输出结果

2.利用评分矩阵构建用户与用户的相似度矩阵

输入：步骤1输出

缓存：步骤1输出

（输出和缓存是相同的文件）

输出：用户ID（行）——用户ID（列）——相似度

代码：

mapper2

package step2;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author liyijie
 * @date 2018年5月13日下午11:43:51
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 
 * 利用评分矩阵构建用户与用户的相似度矩阵
 */
public class Mapper2 extends Mapper<LongWritable, Text, Text, Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	private List<String> cacheList = new ArrayList<String>();
	//			右矩阵列值    下标右行       右值
	//private Map<String,String[]> cacheMap = new HashMap<>();
	
	private DecimalFormat df = new DecimalFormat("0.00");
	
	/**在map执行之前会执行这个方法，只会执行一次
	 * 
	 * 通过输入流将全局缓存中的矩阵读入一个java容器中
	 */
	@Override
	protected void setup(Context context)throws IOException, InterruptedException {
		super.setup(context);
		FileReader fr = new FileReader("itemUserScore1");
		BufferedReader br  = new BufferedReader(fr);
		
		//右矩阵	
		//key:行号 物品ID		value:列号_值,列号_值,列号_值,列号_值,列号_值...    用户ID_分值
		String line = null;
		while((line=br.readLine())!=null){
			cacheList.add(line);
			/**String[] cloumnAndLine_matrix2 = line.split("\t");
			String itemID = cloumnAndLine_matrix2[0];
			String[] row_value_array_matrix2 =cloumnAndLine_matrix2[1].split(",");
			String[] row_value_list_matrix2 = new String[row_value_array_matrix2.length];
			for(int i = 0;i<row_value_array_matrix2.length;i++){
				String row_value = row_value_array_matrix2[i];
				String[] split = row_value.split("_");
				String userID = split[0];
				String score = split[1];
				row_value_list_matrix2[Integer.parseInt(userID)-1]=score;
			}
			cacheMap.put(itemID, row_value_list_matrix2);*/
		}
		
		fr.close();
		br.close();
	}


	/**
	 * key: 行号	物品ID
	 * value:行	列_值,列_值,列_值,列_值	用户ID_分值
	 * */
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
       
       String[] rowAndLine_matrix1 = value.toString().split("\t");
       
       //矩阵行号
       String row_matrix1 = rowAndLine_matrix1[0];
       //列_值
       String[] cloumn_value_array_matrix1 = rowAndLine_matrix1[1].split(",");
       
       //计算左侧矩阵行的空间距离
       double denominator1 = 0;
       for(String column_value:cloumn_value_array_matrix1){
    	   String score = column_value.split("_")[1];
    	   denominator1 += Double.valueOf(score)*Double.valueOf(score);
       }
       denominator1 = Math.sqrt(denominator1);
       /** 
       //右矩阵列集合
       Set<String> cloumns_matrix2 = cacheMap.keySet();
       
       for(String cloumn_matrix2:cloumns_matrix2){
	       //矩阵两位相乘得到的结果	分子
		   int numerator = 0;
		   String[] row_value_list_matrix2 = cacheMap.get(cloumn_matrix2);//取右矩阵第n行  即是N物品所有的用户评分
	       
	       //计算右侧矩阵行的空间距离
	       double denominator2 = 0;
	       for(String column_value:row_value_list_matrix2){
	    	   String score = column_value.split("_")[1];
	    	   denominator2 += Double.valueOf(score)*Double.valueOf(score);
	       }
	       denominator2 = Math.sqrt(denominator2);
		   
		   
		   for(String cloumn_value_matrix1:cloumn_value_array_matrix1){
	    	  
	    	   String[] split = cloumn_value_matrix1.split("_");
	    	   int cloumn_matrix1 = Integer.parseInt(split[0]);
	    	   int v_matrix1 = Integer.parseInt(split[1]);
	    	   int v_matrix2 = Integer.parseInt(row_value_list_matrix2[cloumn_matrix1-1]);//取右矩阵第n列第cloumn_matrix1行
	    	   numerator +=v_matrix1*v_matrix2;
			
	       }
	   	
		   double cos = numerator/(denominator1*denominator2);
		   if(cos == 0){
			   continue;
		   }
		   
	  	   //cos就是结果矩阵中的某个元素，坐标
	  	   outKey.set(row_matrix1);
	  	   outValue.set(cloumn_matrix2+"_"+df.format(cos));
	  	   System.out.println("mapper2---send-->key:"+outKey+" value:"+outValue);
	  	   //输出格式为	key:行 物品ID	value:列_值	用户ID_分值
	  	   context.write(outKey, outValue);
       }*/
       for(String line:cacheList){
    	   
    	   String[] rowAndLine_matrix2 = line.toString().split("\t");
    	   //右侧矩阵line
    	   //格式: 列 tab 行_值,行_值,行_值,行_值
    	   String cloumn_matrix2 = rowAndLine_matrix2[0];
    	   String[] row_value_array_matrix2 = rowAndLine_matrix2[1].split(",");
    	   
    	 //计算右侧矩阵行的空间距离
	       double denominator2 = 0;
	       for(String column_value:row_value_array_matrix2){
	    	   String score = column_value.split("_")[1];
	    	   denominator2 += Double.valueOf(score)*Double.valueOf(score);
	       }
	       denominator2 = Math.sqrt(denominator2);
    	   
	       //矩阵两位相乘得到的结果	分子
		   int numerator = 0;
    	   
    	   
    	   //遍历左侧矩阵一行的每一列
    	   
    	  for(String cloumn_value_matrix1:cloumn_value_array_matrix1){
    		  String cloumn_matrix1 = cloumn_value_matrix1.split("_")[0];
    		  String value_matrix1 = cloumn_value_matrix1.split("_")[1];
    		  
    		  //遍历右侧矩阵一行的每一列
    		  for(String cloumn_value_matrix2:row_value_array_matrix2){
    			  if(cloumn_value_matrix2.startsWith(cloumn_matrix1+"_")){
    				  String value_matrix2 = cloumn_value_matrix2.split("_")[1];
    				  //将两列的值相乘并累加
    				  numerator+= Integer.valueOf(value_matrix1)*Integer.valueOf(value_matrix2);
    				  
    			  }
    		  }
    	  }
    	  
		   double cos = numerator/(denominator1*denominator2);
		   if(cos == 0){
			   continue;
		   }
    	  
    	  //cos就是结果矩阵中的某个元素，坐标	行：row_matrix1 	列：row_matrix2（右侧矩阵已经被转置）
    	  outKey.set(row_matrix1);
    	  outValue.set(cloumn_matrix2+"_"+df.format(cos));
    	  //输出格式为	key:行	value:列_值
    	  context.write(outKey, outValue);
       }
    } 
}

reducer2

package step2;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author liyijie
 * @date 2018年5月13日下午11:43:59
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 利用评分矩阵构建用户与用户的相似度矩阵
 */
public class Reducer2 extends Reducer<Text, Text, Text, Text>{
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	 //	key:行 物品ID	value:列_值	用户ID_分值
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		StringBuilder sb = new StringBuilder();

		for(Text text:values){  
			sb.append(text+",");
        }
		
		String line = null;
		if(sb.toString().endsWith(",")){
			line = sb.substring(0, sb.length()-1);
		}
	

		outKey.set(key);
		outValue.set(line);

		context.write(outKey,outValue);  
	}
	
}

mr2

package step2;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * @author liyijie
 * @date 2018年5月13日下午11:44:07
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 *利用评分矩阵构建用户与用户的相似度矩阵
 */
public class MR2 {
	private static String inputPath = "/UserCF/step1_output";
	private static String outputPath = "/UserCF/step2_output";
	//将step1中输出的转置矩阵作为全局缓存
	private static String cache="/UserCF/step1_output/part-r-00000";
	
	private static String hdfs = "hdfs://node1:9000";
	
	public int run(){
		try {
		Configuration conf=new Configuration();  
		conf.set("fs.defaultFS", hdfs);		
		Job	job = Job.getInstance(conf,"step2");
		//如果未开启,使用 FileSystem.enableSymlinks()方法来开启符号连接。
		FileSystem.enableSymlinks();
		//要使用符号连接，需要检查是否启用了符号连接
		 boolean areSymlinksEnabled = FileSystem.areSymlinksEnabled();
		 System.out.println(areSymlinksEnabled);
		//添加分布式缓存文件
		job.addCacheArchive(new URI(cache+"#itemUserScore1"));
		
	
		//配置任务map和reduce类  
		job.setJarByClass(MR2.class);  
		job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");  
	      job.setMapperClass(Mapper2.class);  
	      job.setReducerClass(Reducer2.class);  

	      job.setMapOutputKeyClass(Text.class);  
	      job.setMapOutputValueClass(Text.class);  

	      job.setOutputKeyClass(Text.class);  
	      job.setOutputValueClass(Text.class);  

	      FileSystem fs = FileSystem.get(conf);
	      Path inpath = new Path(inputPath);
	      if(fs.exists(inpath)){
	          FileInputFormat.addInputPath(job,inpath);  
	      }else{
	    	  System.out.println(inpath);
	    	  System.out.println("不存在");
	      }
	      
	      Path outpath = new Path(outputPath);
	      fs.delete(outpath,true);
	      FileOutputFormat.setOutputPath(job, outpath); 
	      
			return job.waitForCompletion(true)?1:-1;
		} catch (ClassNotFoundException | InterruptedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		}
		return -1;
	}
	
	 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException  { 
		int result = -1;
		result = new MR2().run();
		if(result==1){
			System.out.println("step2运行成功");
		}else if(result==-1){
			System.out.println("step2运行失败");
		}
	  }
}

输出结果

3.将评分矩阵转置

输入：步骤1输出

输出：物品ID（行）——用户ID（列）——分值

代码：

mapper3

package step3;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author liyijie
 * @date 2018年5月13日下午10:36:18
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 将评分矩阵转置
 */
public class Mapper3  extends Mapper<LongWritable, Text, Text, Text>  {
	private Text outKey = new Text();
	private Text outValue = new Text();
	/**
	 * key:1
	 * value:1	1_0,2_3,3_-1,4_2,5_-3
	 * */
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
       String[] rowAndLine = value.toString().split("\t");
       
       //矩阵行号	物品ID
       String itemID = rowAndLine[0];
       //列值	用户ID_分值
       String[] lines = rowAndLine[1].split(",");
       
       
       for(int i = 0 ; i<lines.length; i++){
    	   String userID = lines[i].split("_")[0];
    	   String score = lines[i].split("_")[1];
    	   
    	   //key:列号	 用户ID	value:行号_值	 物品ID_分值
    	   outKey.set(userID);
    	   outValue.set(itemID+"_"+score);
    	   
    	   context.write(outKey, outValue);
       }
    } 
}

reducer3

package step3;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author liyijie
 * @date 2018年5月13日下午10:56:28
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 
 * 
 * 将评分矩阵转置
 */
public class Reducer3 extends Reducer<Text, Text, Text, Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	 //key:列号	 用户ID		value:行号_值,行号_值,行号_值,行号_值...	物品ID_分值
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		StringBuilder sb = new StringBuilder();
		
		//text:行号_值		物品ID_分值
		for(Text text:values){  
            sb.append(text).append(",");
        }  
		String line = null;
		if(sb.toString().endsWith(",")){
			line = sb.substring(0, sb.length()-1);
		}
	
		
		outKey.set(key);
		outValue.set(line);
		
		context.write(outKey,outValue);  
	}
	
	
}

mr3

package step3;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * @author liyijie
 * @date 2018年5月13日下午11:07:13
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 将评分矩阵转置
 */
public class MR3 {
	private static String inputPath = "/UserCF/step1_output";
	private static String outputPath = "/UserCF/step3_output";
	private static String hdfs = "hdfs://node1:9000";
	
	public int run(){
		try {
		Configuration conf=new Configuration();  
		conf.set("fs.defaultFS", hdfs);		
		Job	job = Job.getInstance(conf,"step3");
		
		
		//配置任务map和reduce类  
		job.setJarByClass(MR3.class);  
		job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");  
	      job.setMapperClass(Mapper3.class);  
	      job.setReducerClass(Reducer3.class);  

	      job.setMapOutputKeyClass(Text.class);  
	      job.setMapOutputValueClass(Text.class);  

	      job.setOutputKeyClass(Text.class);  
	      job.setOutputValueClass(Text.class);  

	      FileSystem fs = FileSystem.get(conf);
	      Path inpath = new Path(inputPath);
	      if(fs.exists(inpath)){
	          FileInputFormat.addInputPath(job,inpath);  
	      }else{
	    	  System.out.println(inpath);
	    	  System.out.println("不存在");
	      }
	      
	      Path outpath = new Path(outputPath);
	      fs.delete(outpath,true);
	      FileOutputFormat.setOutputPath(job, outpath); 
	      
			return job.waitForCompletion(true)?1:-1;
		} catch (ClassNotFoundException | InterruptedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return -1;
	}
	 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException  { 
		int result = -1;
		result = new MR3().run();
		if(result==1){
			System.out.println("step3运行成功");
		}else if(result==-1){
			System.out.println("step3运行失败");
		}
	  }
}

输出结果

4.用户与用户相似度矩阵X评分矩阵

输入：步骤2输出

缓存：步骤3输出

输出：用户ID（行）——物品ID（列）——分值

代码：

mapper4

package step4;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author liyijie
 * @date 2018年5月13日下午11:43:51
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 
 * 用户与用户相似度矩阵X评分矩阵（经过步骤3转置）
 */
public class Mapper4 extends Mapper<LongWritable, Text, Text, Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	private List<String> cacheList = new ArrayList<String>();
	
	private DecimalFormat df = new DecimalFormat("0.00");
		
	/**在map执行之前会执行这个方法，只会执行一次
	 * 
	 * 通过输入流将全局缓存中的矩阵读入一个java容器中
	 */
	@Override
	protected void setup(Context context)throws IOException, InterruptedException {
		super.setup(context);
		FileReader fr = new FileReader("itemUserScore2");
		BufferedReader br  = new BufferedReader(fr);
		
		//右矩阵	
		//key:行号 物品ID		value:列号_值,列号_值,列号_值,列号_值,列号_值...    用户ID_分值
		String line = null;
		while((line=br.readLine())!=null){
			cacheList.add(line);
		}
		
		fr.close();
		br.close();
	}


	/**
	 * key: 行号	物品ID
	 * value:行	列_值,列_值,列_值,列_值	用户ID_分值
	 * */
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
       
       String[] rowAndLine_matrix1 = value.toString().split("\t");
       
       //矩阵行号
       String row_matrix1 = rowAndLine_matrix1[0];
       //列_值
       String[] cloumn_value_array_matrix1 = rowAndLine_matrix1[1].split(",");
       
       for(String line:cacheList){
    	   
    	   String[] rowAndLine_matrix2 = line.toString().split("\t");
    	   //右侧矩阵line
    	   //格式: 列 tab 行_值,行_值,行_值,行_值
    	   String cloumn_matrix2 = rowAndLine_matrix2[0];
    	   String[] row_value_array_matrix2 = rowAndLine_matrix2[1].split(",");
    	   
    	   
	       //矩阵两位相乘得到的结果	
		   double result = 0;
    	   
    	   
    	   //遍历左侧矩阵一行的每一列
    	  for(String cloumn_value_matrix1:cloumn_value_array_matrix1){
    		  String cloumn_matrix1 = cloumn_value_matrix1.split("_")[0];
    		  String value_matrix1 = cloumn_value_matrix1.split("_")[1];
    		  
    		  //遍历右侧矩阵一行的每一列
    		  for(String cloumn_value_matrix2:row_value_array_matrix2){
    			  if(cloumn_value_matrix2.startsWith(cloumn_matrix1+"_")){
    				  String value_matrix2 = cloumn_value_matrix2.split("_")[1];
    				  //将两列的值相乘并累加
    				  result+= Double.valueOf(value_matrix1)*Double.valueOf(value_matrix2);
    				  
    			  }
    		  }
    	  }
    	  
    	  if(result==0){
    		  continue;
    	  }
    	  //result就是结果矩阵中的某个元素，坐标	行：row_matrix1 	列：row_matrix2（右侧矩阵已经被转置）
    	  outKey.set(row_matrix1);
    	  outValue.set(cloumn_matrix2+"_"+df.format(result));
    	  //输出格式为	key:行	value:列_值
    	  context.write(outKey, outValue);
       }
    } 
}

reducer4

package step4;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author liyijie
 * @date 2018年5月13日下午11:43:59
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 用户与用户相似度矩阵X评分矩阵（经过步骤3转置）
 */
public class Reducer4 extends Reducer<Text, Text, Text, Text>{
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	 //	key:行 物品ID	value:列_值	用户ID_分值
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		StringBuilder sb = new StringBuilder();

		for(Text text:values){  
			sb.append(text+",");
        }
		
		String line = null;
		if(sb.toString().endsWith(",")){
			line = sb.substring(0, sb.length()-1);
		}
	

		outKey.set(key);
		outValue.set(line);

		context.write(outKey,outValue);  
	}
	
}

mr4

package step4;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * @author liyijie
 * @date 2018年5月13日下午11:44:07
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 用户与用户相似度矩阵X评分矩阵（经过步骤3转置）
 */
public class MR4 {
	private static String inputPath = "/UserCF/step2_output";
	private static String outputPath = "/UserCF/step4_output";
	//将step1中输出的转置矩阵作为全局缓存
	private static String cache="/UserCF/step3_output/part-r-00000";
	
	private static String hdfs = "hdfs://node1:9000";
	
	public int run(){
		try {
		Configuration conf=new Configuration();  
		conf.set("fs.defaultFS", hdfs);		
		Job	job = Job.getInstance(conf,"step4");
		//如果未开启,使用 FileSystem.enableSymlinks()方法来开启符号连接。
		FileSystem.enableSymlinks();
		//要使用符号连接，需要检查是否启用了符号连接
		 boolean areSymlinksEnabled = FileSystem.areSymlinksEnabled();
		 System.out.println(areSymlinksEnabled);
		//添加分布式缓存文件
		job.addCacheArchive(new URI(cache+"#itemUserScore2"));
		
	
		//配置任务map和reduce类  
		job.setJarByClass(MR4.class);  
		job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");  
	      job.setMapperClass(Mapper4.class);  
	      job.setReducerClass(Reducer4.class);  

	      job.setMapOutputKeyClass(Text.class);  
	      job.setMapOutputValueClass(Text.class);  

	      job.setOutputKeyClass(Text.class);  
	      job.setOutputValueClass(Text.class);  

	      FileSystem fs = FileSystem.get(conf);
	      Path inpath = new Path(inputPath);
	      if(fs.exists(inpath)){
	          FileInputFormat.addInputPath(job,inpath);  
	      }else{
	    	  System.out.println(inpath);
	    	  System.out.println("不存在");
	      }
	      
	      Path outpath = new Path(outputPath);
	      fs.delete(outpath,true);
	      FileOutputFormat.setOutputPath(job, outpath); 
	      
			return job.waitForCompletion(true)?1:-1;
		} catch (ClassNotFoundException | InterruptedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		}
		return -1;
	}
	
	 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException  { 
		int result = -1;
		result = new MR4().run();
		if(result==1){
			System.out.println("step4运行成功");
		}else if(result==-1){
			System.out.println("step4运行失败");
		}
	  }
}

输出结果

5.根据评分矩阵，将步骤4的输出中，用户已经有过行为的商品评分置零

输入：步骤4输出

缓存：步骤1输出

输出：用户ID（行）——物品ID（列）——分值（最终推荐列表）

代码：

mapper5

package step5;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author liyijie
 * @date 2018年5月13日下午11:43:51
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 
 * 根据评分矩阵，将步骤4的输出中，用户已经有过行为的商品评分置零
 */
public class Mapper5 extends Mapper<LongWritable, Text, Text, Text> {
	private Text outKey = new Text();
	private Text outValue = new Text();
	private List<String> cacheList = new ArrayList<String>();
			
	/**在map执行之前会执行这个方法，只会执行一次
	 * 
	 * 通过输入流将全局缓存中的矩阵读入一个java容器中
	 */
	@Override
	protected void setup(Context context)throws IOException, InterruptedException {
		super.setup(context);
		FileReader fr = new FileReader("itemUserScore3");
		BufferedReader br  = new BufferedReader(fr);
		
		//右矩阵	
		//key:行号 物品ID		value:列号_值,列号_值,列号_值,列号_值,列号_值...    用户ID_分值
		String line = null;
		while((line=br.readLine())!=null){
			cacheList.add(line);
		}
		
		fr.close();
		br.close();
	}


	/**
	 * key: 行号	物品ID
	 * value:行	列_值,列_值,列_值,列_值	用户ID_分值
	 * */
    @Override  
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
       
       String[] rowAndLine_matrix1 = value.toString().split("\t");
       
       //矩阵行号	物品ID
       String item_matrix1 = rowAndLine_matrix1[0];
       //列_值
       String[] user_score_array_matrix1 = rowAndLine_matrix1[1].split(",");
       
       for(String line:cacheList){
    	   
    	   String[] rowAndLine_matrix2 = line.toString().split("\t");
    	   //右侧矩阵line
    	   //格式: 列 tab 行_值,行_值,行_值,行_值
    	   String item__matrix2 = rowAndLine_matrix2[0];
    	   String[] user_score_array_matrix2 = rowAndLine_matrix2[1].split(",");
    	   
    	   
    	   
	       //矩阵两位相乘得到的结果	
		   //double result = 0;
    	   
    	   //如果物品ID物品相同
    	   if(item_matrix1.equals(item__matrix2)){
    		   
    		   //遍历matrix1的列
    		   for(String user_score_matrix1:user_score_array_matrix1){
    			   boolean flag = false;
    			   String user_matrix1 = user_score_matrix1.split("_")[0];
    			   String score_matrix1 = user_score_matrix1.split("_")[1];

    			   //遍历matrix2的列
    			   for(String user_score_matrix2:user_score_array_matrix2){
        			   String user_matrix2 = user_score_matrix2.split("_")[0];
        			   if(user_matrix1.equals(user_matrix2)){
        				   flag = true;
        			   }
    			   }
    			   //该用户没有对该物品产生行为
    			   if(flag==false){
    				   outKey.set(item_matrix1);
    				   outValue.set(user_matrix1+"_"+score_matrix1);
    				   //输出格式为	key:行	value:列_值
    				   context.write(outKey, outValue);
    			   }
    		   }
    	   }
    	  
       }
    } 
}

reducer5

package step5;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author liyijie
 * @date 2018年5月13日下午11:43:59
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 根据评分矩阵，将步骤4的输出中，用户已经有过行为的商品评分置零
 */
public class Reducer5 extends Reducer<Text, Text, Text, Text>{
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	 //	key:行 物品ID	value:列_值	用户ID_分值
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		StringBuilder sb = new StringBuilder();

		for(Text text:values){  
			sb.append(text+",");
        }
		
		String line = null;
		if(sb.toString().endsWith(",")){
			line = sb.substring(0, sb.length()-1);
		}
	

		outKey.set(key);
		outValue.set(line);

		context.write(outKey,outValue);  
	}
	
}

mr5

package step5;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * @author liyijie
 * @date 2018年5月13日下午11:44:07
 * @email 37024760@qq.com
 * @remark
 * @version 
 * 
 * 根据评分矩阵，将步骤4的输出中，用户已经有过行为的商品评分置零
 */
public class MR5 {
	private static String inputPath = "/UserCF/step4_output";
	private static String outputPath = "/UserCF/step5_output";
	//将step1中输出的转置矩阵作为全局缓存
	private static String cache="/UserCF/step1_output/part-r-00000";
	
	private static String hdfs = "hdfs://node1:9000";
	
	public int run(){
		try {
		Configuration conf=new Configuration();  
		conf.set("fs.defaultFS", hdfs);		
		Job	job = Job.getInstance(conf,"step5");
		//如果未开启,使用 FileSystem.enableSymlinks()方法来开启符号连接。
		FileSystem.enableSymlinks();
		//要使用符号连接，需要检查是否启用了符号连接
		 boolean areSymlinksEnabled = FileSystem.areSymlinksEnabled();
		 System.out.println(areSymlinksEnabled);
		//添加分布式缓存文件
		job.addCacheArchive(new URI(cache+"#itemUserScore3"));
		
	
		//配置任务map和reduce类  
		job.setJarByClass(MR5.class);  
		job.setJar("F:\\eclipseworkspace\\UserCF\\UserCF.jar");  
	      job.setMapperClass(Mapper5.class);  
	      job.setReducerClass(Reducer5.class);  

	      job.setMapOutputKeyClass(Text.class);  
	      job.setMapOutputValueClass(Text.class);  

	      job.setOutputKeyClass(Text.class);  
	      job.setOutputValueClass(Text.class);  

	      FileSystem fs = FileSystem.get(conf);
	      Path inpath = new Path(inputPath);
	      if(fs.exists(inpath)){
	          FileInputFormat.addInputPath(job,inpath);  
	      }else{
	    	  System.out.println(inpath);
	    	  System.out.println("不存在");
	      }
	      
	      Path outpath = new Path(outputPath);
	      fs.delete(outpath,true);
	      FileOutputFormat.setOutputPath(job, outpath); 
	      
			return job.waitForCompletion(true)?1:-1;
		} catch (ClassNotFoundException | InterruptedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		}
		return -1;
	}
	
	 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException  { 
		int result = -1;
		result = new MR5().run();
		if(result==1){
			System.out.println("step5运行成功");
		}else if(result==-1){
			System.out.println("step5运行失败");
		}
	  }
}

输出结果