11 篇文章 0 订阅
13 篇文章 8 订阅

# Problem

M*N的矩阵A分割成s*t, N*P的矩阵B分割成t*v，

# Input

<M><i><j><m_ij>，M为矩阵，i， j为矩阵M的元素下标，m_ij为矩阵M行列下标为i，j的非0元素

A,0,1,1.0
A,0,2,2.0
A,0,3,3.0
A,0,4,4.0
A,1,0,5.0
A,1,1,6.0
A,1,2,7.0
A,1,3,8.0
A,1,4,9.0
B,0,1,1.0
B,0,2,2.0
B,1,0,3.0
B,1,1,4.0
B,1,2,5.0
B,2,0,6.0
B,2,1,7.0
B,2,2,8.0
B,3,0,9.0
B,3,1,10.0
B,3,2,11.0
B,4,0,12.0
B,4,1,13.0
B,4,2,14.0


# Output

<i><j><m_ij>，结果矩阵行列下标i， j，以及对应的值m_ij

0,0,90.0
0,1,100.0
0,2,110.0
1,0,240.0
1,1,275.0
1,2,310.0

# Pseudocode

map(key, value):
// value is ("A", i, j, a_ij) or ("B", j, k, b_jk)
if value[0] == "A":
i = value[1]
j = value[2]
a_ij = value[3]
for k_per_v = 0 to p/v - 1:
emit((i/s, j/t, k_per_v), ("A", i%s, j%t, a_ij))
else:
j = value[1]
k = value[2]
b_jk = value[3]
for i_per_s = 0 to m/s - 1:
emit((i_per_s, j/t, k/v), ("B", j%t, k%v, b_jk))

reduce(key, values):
// key is (i_per_s, j_per_t, k_per_v)
// values is a list of ("A", i_mod_s, j_mod_t, a_ij) and ("B", j_mod_t, k_mod_v, b_jk)
list_A = [(i_mod_s, j_mod_t, a_ij) for (M, i_mod_s, j_mod_t, a_ij) in values if M == "A"]
list_B = [(j_mod_t, k_mod_v, b_jk) for (M, j_mod_t, k_mod_v, b_jk) in values if M == "B"]
hash = {}
for a in list_A:
for b in list_B:
if a[1] == b[0]: // a.j_mod_t == b.j_mod_t
hash[(a[0], b[1])] += a[2]*b[2]
for {(i_mod_s, k_mod_v): v} in hash:
emit((key[0]*s + i_mod_s, key[2]*v + k_mod_v), v)

map(key, value):
emit(key, value)

reduce(key, values):
result = 0
for value in values:
result += value
emit(key, result)

import java.io.IOException;
import java.util.*;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map.Entry;

public class BlockMatrixMultiplication {

public static class Map extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
int m = Integer.parseInt(conf.get("m"));
int p = Integer.parseInt(conf.get("p"));
int s = Integer.parseInt(conf.get("s"));
int t = Integer.parseInt(conf.get("t"));
int v = Integer.parseInt(conf.get("v"));
int mPerS = m/s; // Number of blocks in each column of A.
int pPerV = p/v; // Number of blocks in each row of B.
String line = value.toString();
String[] indicesAndValue = line.split(",");
Text outputKey = new Text();
Text outputValue = new Text();
if (indicesAndValue[0].equals("A")) {
int i = Integer.parseInt(indicesAndValue[1]);
int j = Integer.parseInt(indicesAndValue[2]);
for (int kPerV = 0; kPerV < pPerV; kPerV++) {
outputKey.set(Integer.toString(i/s) + "," + Integer.toString(j/t) + "," + Integer.toString(kPerV));
outputValue.set("A," + Integer.toString(i%s) + "," + Integer.toString(j%t) + "," + indicesAndValue[3]);
context.write(outputKey, outputValue);
}
} else {
int j = Integer.parseInt(indicesAndValue[1]);
int k = Integer.parseInt(indicesAndValue[2]);
for (int iPerS = 0; iPerS < mPerS; iPerS++) {
outputKey.set(Integer.toString(iPerS) + "," + Integer.toString(j/t) + "," + Integer.toString(k/v));
outputValue.set("B," + Integer.toString(j%t) + "," + Integer.toString(k%v) + "," + indicesAndValue[3]);
context.write(outputKey, outputValue);
}
}
}
}

public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String[] value;
ArrayList<Entry<String, Float>> listA = new ArrayList<Entry<String, Float>>();
ArrayList<Entry<String, Float>> listB = new ArrayList<Entry<String, Float>>();
for (Text val : values) {
value = val.toString().split(",");
if (value[0].equals("A")) {
listA.add(new SimpleEntry<String, Float>(value[1] + "," + value[2], Float.parseFloat(value[3])));
} else {
listB.add(new SimpleEntry<String, Float>(value[1] + "," + value[2], Float.parseFloat(value[3])));
}
}
String[] iModSAndJModT;
String[] jModTAndKModV;
float a_ij;
float b_jk;
String hashKey;
HashMap<String, Float> hash = new HashMap<String, Float>();
for (Entry<String, Float> a : listA) {
iModSAndJModT = a.getKey().split(",");
a_ij = a.getValue();
for (Entry<String, Float> b : listB) {
jModTAndKModV = b.getKey().split(",");
b_jk = b.getValue();
if (iModSAndJModT[1].equals(jModTAndKModV[0])) {
hashKey = iModSAndJModT[0] + "," + jModTAndKModV[1];
if (hash.containsKey(hashKey)) {
hash.put(hashKey, hash.get(hashKey) + a_ij*b_jk);
} else {
hash.put(hashKey, a_ij*b_jk);
}
}
}
}
String[] blockIndices = key.toString().split(",");
String[] indices;
String i;
String k;
Configuration conf = context.getConfiguration();
int s = Integer.parseInt(conf.get("s"));
int v = Integer.parseInt(conf.get("v"));
Text outputValue = new Text();
for (Entry<String, Float> entry : hash.entrySet()) {
indices = entry.getKey().split(",");
i = Integer.toString(Integer.parseInt(blockIndices[0])*s + Integer.parseInt(indices[0]));
k = Integer.toString(Integer.parseInt(blockIndices[2])*v + Integer.parseInt(indices[1]));
outputValue.set(i + "," + k + "," + Float.toString(entry.getValue()));
context.write(null, outputValue);
}
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// A is an m-by-n matrix; B is an n-by-p matrix.
conf.set("m", "2");
conf.set("n", "5");
conf.set("p", "3");
conf.set("s", "2"); // Number of rows in a block in A.
conf.set("t", "5"); // Number of columns in a block in A = number of rows in a block in B.
conf.set("v", "3"); // Number of columns in a block in B.

Job job = new Job(conf, "MatrixMatrixMultiplicationTwoStepsBlock");
job.setJarByClass(BlockMatrixMultiplication.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);
}
}

# Analysis

1、reduce job的数目 = 所有发送给reduce任务的unique keys 的数目，即 (m*n*p) / (s*t*v)
2、矩阵A发送给每个reduce元素的个数为st
3、矩阵B发送给每个reduce元素的个数为tv

communication cost：
1、The communication cost of the first Map tasks is

2、The communication cost of the first Reduce tasks is

3、The communication cost of the second Map tasks is

4、The communication cost of the second Reduce tasks is

# Reference

08-05
11-29
04-16 1428
08-15 825
01-06 3757
06-15 195
12-13 9626
03-20 1万+
04-18 936
09-26 138
12-01 899
03-24 2142
01-15 621
06-09 4万+
04-09 3991
12-22 6029
11-28 222

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

joeywen

¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、C币套餐、付费专栏及课程。