Hadoop 实现矩阵相乘

29 篇文章 1 订阅
8 篇文章 0 订阅

包括两点:

1、mapreduce实现矩阵相乘

2、python脚本生成矩阵

mapreduce实现矩阵相乘中数据组织方式变换的过程如下图所示:


mapreduce 实现代码:

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class MatrixMutiply {
	/*
	 * 矩阵存放在一个文件里面。
	 * 刚开始两个矩阵放在一个文件里面,hadoop会为两个文件做两次map导致先做一次map和reduce,
	 * 这样另外一个矩阵就没有数据,后面的reduce会出现问题
	 * 矩阵存放的形式是:
	 * A,1,1,2   表示A矩阵第一行第一列数据为2
	 * A,1,2,1
	 * A,2,1,3
	 * A,2,2,4
	 * 这样存放的目的是防止一次map在读取数据时分片而导致数据读取不完整
	 * 矩阵由python脚本产生,python脚本见BuildMatrix.py
	 * 
	 * */
	
	public static class MatrixMapper extends Mapper<Object, Text, Text, Text>{
		/*
		 * rowNumA and colNumB need to be confirm manually
		 * map阶段:
		 * 将数据组织为KEY VALUE的形式
		 * key:结果矩阵的元素的位置号
		 * value:结果矩阵元素需要用到的原两个矩阵的数据
		 * 要注意运算矩阵前矩阵和后矩阵在map阶段处理数据在组织map输出数据时不一样
		 * 
		 * */
		private int rowNumA = 4;  // matrix A row
		private int colNumB = 3;  // matrix B column
		private Text mapOutputkey;
		private Text mapOutputvalue;
		
		@Override
		protected void map(Object key, Text value,
				Mapper<Object, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			System.out.println("map input key:" + key);
			System.out.println("map input value:" + value);
			String[] matrixStrings = value.toString().split("\n");
			for(String item : matrixStrings){
				System.out.println("item:"+ item);
				String[] elemString = item.split(",");
				for(String string : elemString){
					System.out.println("element" + string);
				}

				System.out.println("elemString[0]:"+elemString[0]);
				if(elemString[0].equals("A")){   // 此处一定要用equals,而不能用==来判断
					/*
					 * 对A矩阵进行map化,outputkey outputvalue 在组织上要注意细节,处理好细节
					 * */
					for(int i=1; i<=colNumB; i++){
						mapOutputkey = new Text(elemString[1] + "," + String.valueOf(i));
						mapOutputvalue = new Text("A:" + elemString[2] + "," + elemString[3]);
						context.write(mapOutputkey, mapOutputvalue);
						System.out.println("mapoutA:"+mapOutputkey+mapOutputvalue);
					}
				}
				/*
				 * 对B矩阵map,mapoutput的组织和A矩阵的不同,细节要处理好
				 * */
				else if(elemString[0].equals("B")){
					for(int j=1; j<=rowNumA; j++){
						mapOutputkey = new Text(String.valueOf(j) + "," + elemString[2]);
						mapOutputvalue = new Text("B:" + elemString[1] + "," + elemString[3]);
						context.write(mapOutputkey, mapOutputvalue);
						System.out.println("mapoutB"+mapOutputkey+mapOutputvalue);
					}
				}
                else{   // just for debug
					System.out.println("mapout else else :--------------->"+ item);
                }
			}
		}
	}
	
	public static class MatixReducer extends Reducer<Text, Text, Text, Text> {

		private HashMap<String, String> MatrixAHashmap = new HashMap<String, String>();
		private HashMap<String, String> MatrixBHashmap = new HashMap<String, String>();
		private String val; 
 		
		@Override
		protected void reduce(Text key, Iterable<Text> value,
				Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			System.out.println("reduce input key:" + key);
			System.out.println("reduce input value:" + value.toString());
			for(Text item : value){
				val = item.toString();
			    System.out.println("val------------"+val);
                if(!val.equals("0")){
					String[] kv = val.substring(2).split(",");
					if(val.startsWith("A:")){
						MatrixAHashmap.put(kv[0], kv[1]);
					}
					if(val.startsWith("B:")){
						MatrixBHashmap.put(kv[0], kv[1]);
					}

                }
			}
			/*just for debug*/
			System.out.println("hashmapA:"+MatrixAHashmap);
			System.out.println("hashmapB:"+MatrixBHashmap);
				Iterator<String> iterator = MatrixAHashmap.keySet().iterator();
				int sum = 0;
					while(iterator.hasNext()){
						String keyString = iterator.next();
		                	sum += Integer.parseInt(MatrixAHashmap.get(keyString))*
		    						Integer.parseInt(MatrixBHashmap.get(keyString));
					}
					//LongWritable reduceOutputvalue = new LongWritable(sum);
		            Text reduceOutputvalue = new Text(String.valueOf(sum));
					context.write(key, reduceOutputvalue);
					/*just for debug*/
					System.out.println("reduce output key:" + key);
		 			System.out.println("reduce output value:" + reduceOutputvalue);
		}
	}
	
	public static void main(String[] args) throws Exception{
		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		if(otherArgs.length != 2){
			System.err.println("Usage: matrix <in> <out>");
			System.exit(2);
		}
		
		Job job = Job.getInstance(conf, "matrix");
		job.setJarByClass(MatrixMutiply.class);
		job.setMapperClass(MatrixMapper.class);
		/*按照思路,这里不需要combiner操作,不需指明*/
//		job.setCombinerClass(MatixReducer.class);  
		job.setReducerClass(MatixReducer.class);
		/*这两个outputkeyclass outputvalueclass 对map output 和 reduce output同时起作用*/
		/*注意是同时,所以在指定map 和 reduce的输出时要一致*/
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);  // 此处是怎么判断要结束的?
	}
}


运行上述代码的脚本:

hadoop com.sun.tools.javac.Main MatrixMutiply.java 
jar cf matrix.jar MatrixMutiply*.class
hadoop fs -rm -r /matrixoutput    # 只是在再次运行时需要删掉上一次运行时生成的文件
hadoop jar matrix.jar MatrixMutiply /matrixinput/* /matrixoutput

细节的地方要注意:判断字符串相等时,要用equals来判断



产生矩阵的python脚本

# coding:utf-8
__author__ = 'taohao'
import random


class BuildMatrix(object):

    def build_matrix_a(self, row, col):
        """
        matrix:
            1  0  2
            -1 3  1
        turn to ->Matrix name,rowNum,colNum,elementNum
        for example:
            A,1,1,1
            A,1,2,1
            A,1,3,2
            A,2,1,-1
            A,2,2,3
            A,2,3,1

        save the matrix to file for hadoop to read data from file
        :return:
        """
        fd = open('Matrix.txt', 'a')  # 'a' is to write the file at the end of old file
        num = ''
        for i in range(row):
            for j in range(col):
                num += ',' + str(i+1) + ',' + str(j+1) + ','
                num += str(random.randint(1, 10))
                fd.write('A' + num + '\n')
                num = ''
        fd.close()

    def build_matrix_b(self, row, col):
        """
        the same as def build_matrix_a
        :param row:
        :param col:
        :return:
        """
        fd = open('Matrix.txt', 'a')
        num = ''
        for i in range(row):
            for j in range(col):
                num += ',' + str(i+1) + ',' + str(j+1) + ','
                num += str(random.randint(1, 10))
                fd.write('B' + num + '\n')
                num = ''
        fd.close()


if __name__ == '__main__':
    rowA = 4
    colA = 2
    rowB = 2
    colB = 3
    bulid = BuildMatrix()
    bulid.build_matrix_a(rowA, colA)
    bulid.build_matrix_b(rowB, colB)

另一篇python脚本生成矩阵,矩阵相乘,请看: http://blog.csdn.net/thao6626/article/details/46472719






  • 2
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
Hadoop可以通过MapReduce框架来实现矩阵相乘,具体步骤如下: 1. 将矩阵A和矩阵B分别存储在HDFS中,并将它们进行分块,每个块的大小可以根据实际情况来确定。 2. 编写Map函数,将矩阵A和矩阵B的分块读入内存中,进行矩阵相乘操作,并将结果输出为键值对(key-value)的形式。其中,key表示输出矩阵的行和列,value表示矩阵相乘后的结果。 3. 编写Reduce函数,将Map函数输出的键值对进行合并,得到最终的矩阵相乘结果。 具体实现细节可以参考以下代码: Map函数: ``` public static class MatrixMapper extends Mapper<LongWritable, Text, Text, Text> { private Text outKey = new Text(); private Text outValue = new Text(); private int row = 0; private int col = 0; private int n = 0; private String flag = ""; public void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); row = Integer.parseInt(conf.get("row")); col = Integer.parseInt(conf.get("col")); n = Integer.parseInt(conf.get("n")); flag = conf.get("flag"); } public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] tokens = line.split(","); if (flag.equals("A")) { int i = Integer.parseInt(tokens[0]); int j = Integer.parseInt(tokens[1]); int v = Integer.parseInt(tokens[2]); for (int k = 1; k <= n; k++) { outKey.set(i + "," + k); outValue.set("A," + j + "," + v); context.write(outKey, outValue); } } else { int j = Integer.parseInt(tokens[0]); int k = Integer.parseInt(tokens[1]); int v = Integer.parseInt(tokens[2]); for (int i = 1; i <= row; i++) { outKey.set(i + "," + k); outValue.set("B," + j + "," + v); context.write(outKey, outValue); } } } } ``` Reduce函数: ``` public static class MatrixReducer extends Reducer<Text, Text, Text, Text> { private Text outValue = new Text(); public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { int[] a = new int[n + 1]; int[] b = new int[n + 1]; for (Text value : values) { String[] tokens = value.toString().split(","); if (tokens[0].equals("A")) { int j = Integer.parseInt(tokens[1]); int v = Integer.parseInt(tokens[2]); a[j] = v; } else { int j = Integer.parseInt(tokens[1]); int v = Integer.parseInt(tokens[2]); b[j] = v; } } int sum = 0; for (int i = 1; i <= n; i++) { sum += a[i] * b[i]; } outValue.set(String.valueOf(sum)); context.write(key, outValue); } } ``` 调用MapReduce作业: ``` Configuration conf = new Configuration(); conf.set("row", String.valueOf(row)); conf.set("col", String.valueOf(col)); conf.set("n", String.valueOf(n)); Job job = Job.getInstance(conf, "MatrixMultiply"); job.setJarByClass(MatrixMultiply.class); job.setMapperClass(MatrixMapper.class); job.setReducerClass(MatrixReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); ``` 以上代码实现矩阵相乘MapReduce作业,其中参数row、col和n分别表示矩阵A的行数、矩阵B的列数和矩阵A的列数(也是矩阵B的行数)。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值