开源 mapreduce java_MapReduce应用

Luna Knight

于 2021-02-25 18:52:52 发布

阅读量93

点赞数

文章标签：开源 mapreduce java

本文链接：https://blog.csdn.net/weixin_42139357/article/details/114668568

版权

1、MapReduce实现矩阵相乘

一. 准备数据

#!/bin/bash

if [ $# -ne 3 ]

then

echo "there must be 3 arguments to generate the two matries file!"

exit 1

cat /dev/null > M_$1_$2

cat /dev/null > N_$2_$3

for i in `seq 1 $1`

for j in `seq 1 $2`

s=$((RANDOM%100))

echo -e "$i,$j\t$s" >>M_$1_$2

done

echo "we have built the matrix file M"

for i in `seq 1 $2`

for j in ` seq 1 $3`

s=$((RANDOM%100))

echo -e "$i,$j\t$s" >>N_$2_$3

done

echo "we have built the matrix file N"

用一下脚本语言准备数组数据

M_3_2:

1,181

1,213

2,138

2,246

3,10

3,22

N_2_4:

1,199

1,238

1,334

1,419

2,121

2,24

2,336

2,464

二. 计算

public class Matrix {

private static class MatrixMapper extends

Mapper {

private static int colN = 0;

private static int rowM = 0;

@Override

protected void setup(

Mapper.Context context)

throws IOException, InterruptedException {

Configuration configuration = context.getConfiguration();

colN = configuration.getInt("colN", 0);

rowM = configuration.getInt("rowM", 0);

}

@Override

protected void map(LongWritable key, Text value,

Mapper.Context context)

throws IOException, InterruptedException {

FileSplit fileSplit = (FileSplit) context.getInputSplit();

String fileName = fileSplit.getPath().getName();

String[] strings = value.toString().split(",");

int i = Integer.parseInt(strings[0]);

String[] ser = strings[1].split("\t");

int j = Integer.parseInt(ser[0]);

int val = Integer.parseInt(ser[1]);

if (fileName.startsWith("M")) {

for (int count = 1; count <= colN; count++) {

context.write(new Text(i + "," + count), new Text("M," + j

+ "," + val + ""));

}

} else {

for (int count = 1; count <= rowM; count++) {

context.write(new Text(count + "," + j), new Text("N," + i

+ "," + val + ""));

}

private static class MatrixReduce extends

Reducer {

private static int rowM = 0;

@Override

protected void setup(

Reducer.Context context)

throws IOException, InterruptedException {

Configuration configuration = context.getConfiguration();

rowM = configuration.getInt("rowM", 0);

}

@Override

protected void reduce(Text key, Iterable values,

Reducer.Context context)

throws IOException, InterruptedException {

int sumValue = 0;

int[] m_Arr = new int[rowM + 1];

int[] n_Arr = new int[rowM + 1];

for (Text value : values) {

String string = value.toString();

String[] strings = string.split(",");

if (strings[0].equals("M")) {

m_Arr[Integer.parseInt(strings[1])] = Integer

.parseInt(strings[2]);

} else {

n_Arr[Integer.parseInt(strings[1])] = Integer

.parseInt(strings[2]);

}

for (int i = 1; i

sumValue += m_Arr[i] * n_Arr[i];

}

context.write(key, new IntWritable(sumValue));

}

public static void main(String[] args) throws IllegalArgumentException,

IOException, ClassNotFoundException, InterruptedException {

Configuration configuration = HadoopConfig.getConfiguration();

configuration.setInt("colN", 4);

configuration.setInt("rowN", 2);

configuration.setInt("colM", 2);

configuration.setInt("rowM", 3);

Job job = Job.getInstance(configuration, "矩阵相乘");

job.setJarByClass(Sort.class);

job.setMapperClass(MatrixMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

job.setReducerClass(MatrixReduce.class);

FileInputFormat.addInputPath(job, new Path("/matrix"));

FileOutputFormat.setOutputPath(job, new Path("/matrixOutput"));

job.waitForCompletion(true);

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

三. 结果

1,18292

1,23130

1,33222

1,42371

2,14728

2,21628

2,32948

2,43666

3,142

3,28

3,372

3,4128

2、MapReduce实现倒排索引

一、准备数据

file1:

one fish

two bird

two monkey

file2:

two peach

three watermelon

二、计算

public class InvertIndex {

private static class InvertIndexMapper extends

Mapper {

@Override

protected void map(LongWritable key, Text value,

Mapper.Context context)

throws IOException, InterruptedException {

FileSplit fileSplit = (FileSplit) context.getInputSplit();

String fileName = fileSplit.getPath().toString();

String[] words = value.toString().split(" ");

for (String string : words) {

context.write(new Text(string), new Text(fileName + "#" + key.toString()));

}

private static class InvertIndexReduce extends

Reducer {

@Override

protected void reduce(Text key, Iterable values,

Reducer.Context context)

throws IOException, InterruptedException {

StringBuilder stringBuilder = new StringBuilder();

for (Text text : values) {

stringBuilder.append(text.toString()).append(";");

}

context.write(key, new Text(stringBuilder.toString()));

}

public static void main(String[] args) throws IOException,

ClassNotFoundException, InterruptedException{

Configuration configuration = HadoopConfig.getConfiguration();

Job job = Job.getInstance(configuration, "倒排索引");

job.setJarByClass(InvertIndex.class);

job.setMapperClass(InvertIndexMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(Text.class);

job.setReducerClass(InvertIndexReduce.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, new Path("/data"));

FileOutputFormat.setOutputPath(job, new Path("/ouput"));

job.waitForCompletion(true);

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

三、结果

birdhdfs://127.0.0.1:8020/data/file1#9;

fishhdfs://127.0.0.1:8020/data/file1#0;

monkeyhdfs://127.0.0.1:8020/data/file1#18;

onehdfs://127.0.0.1:8020/data/file1#0;

peachhdfs://127.0.0.1:8020/data/file2#0;

threehdfs://127.0.0.1:8020/data/file2#10;

twohdfs://127.0.0.1:8020/data/file2#0;hdfs://127.0.0.1:8020/data/file1#18;hdfs://127.0.0.1:8020/data/file1#9;

watermelonhdfs://127.0.0.1:8020/data/file2#10;

3、MapReduce实现复杂倒排索引

一、准备数据

file1:

one fish

two bird

two monkey

file2:

two peach

three watermelon

二、计算

public class ComplexInvertIndex {

private static class FileNameRecordReader extends RecordReader {

LineRecordReader lineRecordReader = new LineRecordReader();

String fileName;

@Override

public void initialize(InputSplit split, TaskAttemptContext context)

throws IOException, InterruptedException {

lineRecordReader.initialize(split, context);

fileName = ((FileSplit) split).getPath().getName();

}

@Override

public boolean nextKeyValue() throws IOException, InterruptedException {

return lineRecordReader.nextKeyValue();

}

@Override

public Text getCurrentKey() throws IOException, InterruptedException {

return new Text(fileName);

}

@Override

public Text getCurrentValue() throws IOException, InterruptedException {

return lineRecordReader.getCurrentValue();

}

@Override

public float getProgress() throws IOException, InterruptedException {

return lineRecordReader.getProgress();

}

@Override

public void close() throws IOException {

lineRecordReader.close();

}

private static class FileNameInputFormat extends

FileInputFormat {

@Override

public RecordReader createRecordReader(InputSplit split,

TaskAttemptContext context) throws IOException,

InterruptedException {

FileNameRecordReader fileNameRecordReader = new FileNameRecordReader();

fileNameRecordReader.initialize(split, context);

return fileNameRecordReader;

}

private static class ComplexInvertIndexMapper extends

Mapper {

@Override

protected void map(Text key, Text value,

Mapper.Context context)

throws IOException, InterruptedException {

String[] strs = value.toString().split(" ");

for (String string : strs) {

context.write(new Text( string+"#"+key.toString() ),new IntWritable(1));

}

private static class ComplexInvertIndexCombiner extends

Reducer {

@Override

protected void reduce(Text key, Iterable values,

Reducer.Context context)

throws IOException, InterruptedException {

int sum = 0;

for (IntWritable value : values) {

sum += value.get();

}

context.write(key,new IntWritable(sum));

System.out.println(key.toString() + sum +"");

}

//把key的前面字段聚合，排序

private static class InvertIndexPartitioner extends

HashPartitioner {

@Override

public int getPartition(Text key, IntWritable value, int numReduceTasks) {

String[] strs = key.toString().split("#");

return super.getPartition(new Text(strs[0]), value, numReduceTasks);

}

private static class ComplexInvertIndexReduce extends

Reducer {

static Map map = new HashMap();

@Override

protected void reduce(Text key, Iterable values,

Reducer.Context context)

throws IOException, InterruptedException {

String[] strings = key.toString().split("#");

String word = strings[0];

String doc = strings[1];

int sum = 0;

for(IntWritable value : values){

sum = sum + value.get();

}

if( map.get(word) == null ){

map.put(word," ("+doc+","+sum+") ");

}else{

map.put(word,map.get(word)+" ("+doc+","+sum+") ");

}

@Override

protected void cleanup(

Reducer.Context context)

throws IOException, InterruptedException {

for(String key:map.keySet()){

context.write(new Text(key), new Text(map.get(key)));

}

public static void main(String[] args)throws IOException,

ClassNotFoundException, InterruptedException{

Configuration configuration = HadoopConfig.getConfiguration();

Job job = Job.getInstance(configuration, "复杂倒排索引");

job.setJarByClass(ComplexInvertIndex.class);

job.setInputFormatClass(FileNameInputFormat.class);

job.setMapperClass(ComplexInvertIndexMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setCombinerClass(ComplexInvertIndexCombiner.class);

job.setReducerClass(ComplexInvertIndexReduce.class);

job.setPartitionerClass(InvertIndexPartitioner.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, new Path("/data"));

FileOutputFormat.setOutputPath(job, new Path("/ouputdata"));

job.waitForCompletion(true);

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

三、结果查看

monkey (file1,1)

bird (file1,1)

fish (file1,1)

one (file1,1)

peach (file2,1)

watermelon (file2,1)

three (file2,1)

two (file1,2) (file2,1)

Luna Knight

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
开源 mapreduce java_MapReduce应用

1、MapReduce实现矩阵相乘一. 准备数据#!/bin/bashif[$#-ne3]thenecho"theremustbe3argumentstogeneratethetwomatriesfile!"exit1ficat/dev/null>M_$1_$2cat/dev/null>N_$2_$3foriin`seq1$1`d...
复制链接

扫一扫