MapReduce实现查找、排序

最新推荐文章于 2024-07-06 10:20:17 发布

Aurora(^ω^)

最新推荐文章于 2024-07-06 10:20:17 发布

阅读量568

点赞数 2

文章标签： mapreduce hadoop 大数据

本文链接：https://blog.csdn.net/weixin_52212298/article/details/127786704

版权

任务：

1．查找相同字母组成的单词

一本英文书籍包含成千上万个单词，现在我们需要在大量的单词中，找出相同字母组成的所有单词。

2．编写程序实现对输入文件的排序

现在有多个输入文件，每个文件中的每行内容均为一个整数。要求读取所有文件中的整数，进行升序排序后，输出到一个新的文件中，输出的数据格式为每行两个整数，第一个数字为第二个整数的排序位次，第二个整数为原待排列的整数。

数据集：

1．查找相同字母组成的单词

cat
tar
bar
act
rat

2．编写程序实现对输入文件的排序

file1.txt:

33
37
12
40

file2.txt:

4
16
39
5

file3.txt:

1
45
25

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Anagram{
/** 排序、分组*/
public static class AnagramMapper extends Mapper<LongWritable, Text, Text, Text>{
    public void map(LongWritable key, Text value,Context context) throws IOException , 
											InterruptedException {
       String text = value.toString();    // 将Text转换成String
       char[] textCharArr = text.toCharArray();//将String转换成字符数组，为排序作准备
       Arrays.sort(textCharArr);  // 使用 Arrays对数组进行排序
       String sortedText = new String(textCharArr);       // 排序后的字符串
       context.write(new Text(sortedText), value);
    }}
/** 统计相同字母组成的单词*/
public static class AnagramReduce extends Reducer<Text, Text, Text, Text>{
    public void reduce(Text key, Iterable<Text> values, Context context) 
throws IOException, InterruptedException {
        StringBuilder value = new StringBuilder();        // 值
        int count = 0;        // 计数
        for(Text text : values){        // 拼接单词
           if(value.length() > 0){    // 分割符,
                value.append(",");
            }
            value.append(text);
            count++;       // 计数
        }
//因为要统计相同字母组成的单词，所以相同字母组成的单词个数大于等于2才会输出
        if(count > 1){
            context.write(key, new Text(value.toString()));
        }
    }
}
public static void main(String[] args) throws Exception {
	Configuration configuration=new Configuration(); 
	Path inpath=new Path("hdfs://192.168.109.125:8020/input"); 
	Path outpath=new Path("hdfs://192.168.109.125:8020/output"); 
	Job job=Job.getInstance(configuration);
	job.setJarByClass(Anagram.class); 
// 指定mapper、reduce
	job.setMapperClass(AnagramMapper.class); 
	job.setReducerClass(AnagramReduce.class); 
// 指定mapper、reduce的输出类型
	job.setOutputKeyClass(Text.class); 
	job.setOutputValueClass(Text.class); 
// 指定输入、输出目录
	FileInputFormat.addInputPath(job, inpath); 
	FileOutputFormat.setOutputPath(job, outpath); 
//提交作业并等待执行完成。
	System.exit(job.waitForCompletion(true) ? 0 : 1);	
}
}

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class NumberSort {

	public static class Map extends Mapper<Object, Text, IntWritable, IntWritable> {
		private static IntWritable data = new IntWritable();
		public void map(Object key,Text value,Context context)throws IOException, InterruptedException {
			String line = value.toString();
			System.out.println("line:"+ line);
			context.write(new IntWritable(Integer.parseInt(value.toString())),new IntWritable(1));
			}
		}
	
	public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
		private static IntWritable linenumber = new IntWritable(1);
		@Override
		protected void reduce(IntWritable key, Iterable<IntWritable> values, Context context)
				throws IOException, InterruptedException {
			for (IntWritable val : values) {
				context.write(linenumber, key);
				linenumber = new IntWritable(linenumber.get() + 1);
			}
		}
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration conf=new Configuration();
		Job job=Job.getInstance(conf);		
		job.setJarByClass(NumberSort.class);
		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path("hdfs://192.168.109.125:8020/input"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.109.125:8020/output"));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}