Hadoop 2.6 使用Map Reduce实现矩阵相乘1 矩阵转置

项目地址:https://github.com/tudoupaisimalingshu/hadoop_matrix

矩阵相乘

一、理论基础



二、如何用程序实现?A[M][N]*B[N][P]

import java.util.Arrays;

public class Matrix {

	public static void main(String[] args) {
		int[][] matrix1 = {
				{1,2,-2,0},
				{3,3,4,-3},
				{-2,0,2,3},
				{5,3,-1,2},
				{-4,2,0,2}};//左矩阵,5*4
		int[][] matrix2 = {
				{0,3,-1,2,-3},
				{1,3,5,-2,-1},
				{0,1,4,-1,2},
				{-2,2,-1,1,2}};//右矩阵,4*5
		int [][] matrix3 = new int[5][5];//结果矩阵,5*5
		for(int i=0;i<5;i++)//计算结果矩阵的每一行
		{
			int[] row = matrix1[i];//左边矩阵第i行
			System.out.println("row=" + Arrays.toString(row));
			for(int j=0;j<5;j++)//计算结果矩阵的每一列
			{
				int[] line = new int[4];//右边矩阵第j列
				for(int k=0;k<4;k++)
				{
					line[k] = matrix2[k][j];
				}//因为列向量是竖着的,用循环获得该列的各个元素
				System.out.println("line=" + Arrays.toString(line));
				int result_i_j = 0;//定义相乘结果
				for(int m=0;m<4;m++)
				{
					result_i_j += row[m] * line[m];//累加乘积
				}
				System.out.println("result_i_j=" + result_i_j);
				System.out.println("--------------------");
				matrix3[i][j] = result_i_j;//设置结果矩阵对应位置的值
			}
		}
		
		//输出结果矩阵
		for(int i=0;i<5;i++)
		{
			for(int j=0;j<5;j++)
			{
				System.out.print(matrix3[i][j] + "\t");
			}
			System.out.println();
		}
	}
}


输出:

row=[1, 2, -2, 0]
line=[0, 1, 0, -2]
result_i_j=2
--------------------
line=[3, 3, 1, 2]
result_i_j=7
--------------------
line=[-1, 5, 4, -1]
result_i_j=1
--------------------
line=[2, -2, -1, 1]
result_i_j=0
--------------------
line=[-3, -1, 2, 2]
result_i_j=-9
--------------------
row=[3, 3, 4, -3]
line=[0, 1, 0, -2]
result_i_j=9
--------------------
line=[3, 3, 1, 2]
result_i_j=16
--------------------
line=[-1, 5, 4, -1]
result_i_j=31
--------------------
line=[2, -2, -1, 1]
result_i_j=-7
--------------------
line=[-3, -1, 2, 2]
result_i_j=-10
--------------------
row=[-2, 0, 2, 3]
line=[0, 1, 0, -2]
result_i_j=-6
--------------------
line=[3, 3, 1, 2]
result_i_j=2
--------------------
line=[-1, 5, 4, -1]
result_i_j=7
--------------------
line=[2, -2, -1, 1]
result_i_j=-3
--------------------
line=[-3, -1, 2, 2]
result_i_j=16
--------------------
row=[5, 3, -1, 2]
line=[0, 1, 0, -2]
result_i_j=-1
--------------------
line=[3, 3, 1, 2]
result_i_j=27
--------------------
line=[-1, 5, 4, -1]
result_i_j=4
--------------------
line=[2, -2, -1, 1]
result_i_j=7
--------------------
line=[-3, -1, 2, 2]
result_i_j=-16
--------------------
row=[-4, 2, 0, 2]
line=[0, 1, 0, -2]
result_i_j=-2
--------------------
line=[3, 3, 1, 2]
result_i_j=-2
--------------------
line=[-1, 5, 4, -1]
result_i_j=12
--------------------
line=[2, -2, -1, 1]
result_i_j=-10
--------------------
line=[-3, -1, 2, 2]
result_i_j=14
--------------------
2	7	1	0	-9	
9	16	31	-7	-10	
-6	2	7	-3	16	
-1	27	4	7	-16	
-2	-2	12	-10	14	


三、传统程序的问题:

1、不能并发执行,总是按照循环的条件一次一次执行。

2、如果矩阵的规模很大,以至于放不到内存中,则可能要放入文件中,那么对于左侧矩阵还好说,每次只需要读取一行放入内存,下次循环再读取下一行即可;从程序中可以看到,对于右侧矩阵,我们需要得到列向量,也就是遍历所有的行,每行取一个元素,然后组成列向量,当文件很大时,速度太慢。

 

四、解决方案

1、针对问题1引入并发执行框架Hadoop,其中的Map和Reduce操作可以并发执行。

2、针对问题2,将右边矩阵转置,从而实现列向量转为行向量



五、使用Hadoop Map Reduce 进行矩阵相乘

1、矩阵的存储结构



为什么要把每一行的所有列写在一行?

矩阵文件可能很大,此时Hadoop的HDFS就会将文件分片,如果没有将同一行的所有列写在一起,则属于同一行的元素可能会被分到不同的分片,导致后面还会消耗时间和空间去查找拼接,也就是还需要写reduce来合并行。

 

为什么要对一行的每一个元素标出列的序号?

由于Hadoop是并行的,在进行map拆分的之后进行reduce合并的过程中,并不能保证一行的各个元素是有序的,因此要标出元素对应的下标,在hadoop中,由于行号是唯一的,再加上标明的列号,就能保证在并行处理过程中的正确性。

 

 

 

2、矩阵转置的Map Reduce实现


package hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

public class Step1 {
	public static class Mapper1 extends Mapper<LongWritable,Text,Text,Text>
	{
		private Text outKey = new Text();
		private Text outValue = new Text();
		
		/*
			待转置矩阵
			0	3	-1	2	-3
			1	3	5	-2	-1
			0	1	4	-1	2
			-2	2	-1	1	2
		*/
		/*
			目标矩阵
			0	1	1	-2
			3	3	1	2
			-1	5	4	-1
			2	-2	-1	1
			-3	-1	2	2
		*/
		//对于每一行,以第一行为例
		//key : 1
		//value : "1	1_0,2_3,3_-1,4_2,5_-3"
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String[] rowAndline = value.toString().split("\t");
			//rowAndline : {"1","1_0,2_3,3_-1,4_2,5_-3"}
			String row = rowAndline[0];
			//row "1"
			String[] lines = rowAndline[1].split(",");
			//rowAndline[1] : "1_0,2_3,3_-1,4_2,5_-3"
			//lines : {"1_0","2_3","3_-1","4_2","5_-3"}
			for(String line : lines)//对于每一列,以第一列为例,line "1_0"
			{
				String colunm = line.split("_")[0];
				//colunm : 1
				String valueStr = line.split("_")[1];
				//valueStr : 0 
				outKey.set(colunm);
				//将列作为行
				outValue.set(row + "_" + valueStr);
				//将行作为列
				context.write(outKey, outValue);
				// 产生(1,"1_0")
			}
			//循环结束,对于{"1_0","2_3","3_-1","4_2","5_-3"}
			//产生(1,"1_0") 第一行,第一列_0    (2,"1_3")  第二行,第一列_3		(3,"1_-1") (4,"1_2")(5,"1_-3")
			/*
			目标转置矩阵
			0	1	1	-2
			3	3	1	2
			-1	5	4	-1
			2	-2	-1	1
			-3	-1	2	2
			*/
			//正好对应于转置矩阵的第一列
		}
		/*
			所有map操作产生
			 ("1","1_0")	("2","1_3") 	("3","1_-1")	("4","1_2")		("5","1_-3")
			("1","2_1")	("2","2_3") 	("3","2_5")	    ("4","2_-2")	("5","2_-1")
			("1","3_0")	("2","3_1")	    ("3","3_4")		("4","3_-1")	("5","3_2")
			("1","4_-2")  ("2","4_2")	    ("3","4_-1")	("4","4_1")		("5","4_2")
		*/

	}
	

	/*
		Reduce任务,将map操作产生的所有键值对集合进行合并,生成转置矩阵的存储表示
		key值相同的值会组成值的集合
		如:
		key:"1"时
		values:{"3_0","1_0","4_-2","2_1"} 
		注意:这里就是为什么要进行列标号的原因,values的顺序不一定就是原来矩阵列的顺序
	*/
	
	public static class Reducer1 extends Reducer<Text,Text,Text,Text>
	{
		private Text outKey = new Text();
		private Text outValue = new Text();
		
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			
			StringBuilder sb = new StringBuilder();
			for(Text text : values)
			{
				sb.append(text + ",");
			}
			//sb : "3_0,1_0,4_-2,2_1,"
			//注意这里末尾有个逗号
			String line = "";
			if(sb.toString().endsWith(","))
			{
				line = sb.substring(0,sb.length()-1);
			}
			//去掉逗号
			//line : "3_0,1_0,4_-2,2_1"
			outKey.set(key);
			outValue.set(line);
			//("1","3_0,1_0,4_-2,2_1")
			context.write(outKey, outValue);
		}
		
	}
	
	private static final String INPATH = "input/matrix.txt";//输入文件路径
	private static final String OUTPATH = "output/step1";//输出文件路径
	private static final String HDFS = "hdfs://pc1:9000";//HDFS路径
	
	public void run() throws IOException, ClassNotFoundException, InterruptedException {
		 Configuration conf = new Configuration();
		    //String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		    //String[] otherArgs = {"hdfs://pc1:9000/input/chenjie.txt","hdfs://pc1:9000/output/out4"};
		    String[] otherArgs = {"input/matrix.txt","hdfs://pc1:9000/output/step1"};
		    //这里需要配置参数即输入和输出的HDFS的文件路径
		    if (otherArgs.length != 2) {
		      System.err.println("Usage: wordcount <in> <out>");
		      System.exit(2);
		    }
		    //conf.set("fs.defaultFS",HDFS);
		   // JobConf conf1 = new JobConf(WordCount.class);
		    Job job = new Job(conf, "step1");//Job(Configuration conf, String jobName) 设置job名称和
		    job.setJarByClass(Step1.class);
		    job.setMapperClass(Mapper1.class); //为job设置Mapper类 
		    //job.setCombinerClass(IntSumReducer.class); //为job设置Combiner类  
		    job.setReducerClass(Reducer1.class); //为job设置Reduce类 

		    job.setMapOutputKeyClass(Text.class);  
		    job.setMapOutputValueClass(Text.class); 

		    job.setOutputKeyClass(Text.class);        //设置输出key的类型
		    job.setOutputValueClass(Text.class);//  设置输出value的类型

		    job.setOutputFormatClass(SequenceFileOutputFormat.class);
		    FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //为map-reduce任务设置InputFormat实现类   设置输入路径

		    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//为map-reduce任务设置OutputFormat实现类  设置输出路径
		    System.exit(job.waitForCompletion(true) ? 0 : 1);
		
		
		/*Configuration conf = new Configuration();
		conf.set("fs.defaultFS",HDFS);
		Job job = Job.getInstance(conf,"step1");
		job.setJarByClass(Step1.class);
		job.setMapperClass(Mapper1.class);
		job.setReducerClass(Reducer1.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileSystem fs = FileSystem.get(conf);
		Path inPath = new Path(INPATH);
		if(fs.exists(inPath))
		{
			//FileInputFormat.addInputPath(conf, inPath);
		}
		Path outPath = new Path(OUTPATH);
		if(fs.exists(outPath))
		{
			fs.delete(outPath, true);
		}*/
		
	}
	
	public static void main(String[] args)
	{
		try {
			new Step1().run();
		} catch (ClassNotFoundException | IOException | InterruptedException e) {
			e.printStackTrace();
		}
	}
	
}


运行结果:




使用hadoop fs -text  文件路径查看转置结果:



然后进行矩阵相乘(点击打开)

  • 3
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值