大数据第三章 mapreduce实践

ww20110863

已于 2024-07-23 19:14:29 修改

阅读量117

点赞数 3

文章标签： mapreduce 大数据

于 2024-07-19 22:16:36 首次发布

本文链接：https://blog.csdn.net/ww20110863/article/details/140560672

版权

mapreduce实战

需求：给定若干文件，统计某几个单词在这些文件中出现的次数

比如有a.txt,b.txt,c.txt希望的输出结果为：
hello a.txt–>4 b.txt–>4 c.txt–>4
java c.txt -->1

第一次先生成word–filename的形式

public class OneIndexMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
	String name;

	@Override
	protected void setup(Mapper<LongWritable, Text, Text, IntWritable>.Context context)
			throws IOException, InterruptedException {

		// 获取文件名称
		//此处是抽象类，是因为可能读取的是文件，也可能读取的是数据库，根据自己的需要来转化具体类型
		//InputSplit inputSplit1 = context.getInputSplit();
		FileSplit inputSplit = (FileSplit) context.getInputSplit();
		name = inputSplit.getPath().getName();
	}

	Text k = new Text();
	IntWritable v = new IntWritable(1);

	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
			throws IOException, InterruptedException {
		// atguigu pingping

		// 1 获取一行
		String line = value.toString();

		// 2 切割
		String[] fields = line.split(" ");

		// 3 写出
		for (String word : fields) {
			//将单词和文件名进行拼接作为key
			k.set(word + "--" + name);

			context.write(k, v);
		}
	}
}

public class OneIndexReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

	IntWritable v = new IntWritable();

	@Override
	protected void reduce(Text key, Iterable<IntWritable> values, Context context)
			throws IOException, InterruptedException {

		int sum = 0;
		// 1 累加求和
		for (IntWritable value : values) {
			sum += value.get();
		}

		v.set(sum);

		// 2 写出
		context.write(key, v);
	}
}

public class OneIndexDriver {

	public static void main(String[] args) throws Exception, IOException {

		// 输入输出路径需要根据自己电脑上实际的输入输出路径设置
		args = new String[] { "e:/input/inputoneindex", "e:/output5" };

		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);
		job.setJarByClass(OneIndexDriver.class);

		job.setMapperClass(OneIndexMapper.class);
		job.setReducerClass(OneIndexReducer.class);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);

		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		job.waitForCompletion(true);

	}
}

第二次用第一次生成的结果进行再次mapreduce，生成需要的格式

public class TwoIndexMapper extends Mapper<LongWritable, Text, Text, Text>{
	
	Text k = new Text();
	Text v = new Text();
	
	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
			throws IOException, InterruptedException {
		
//		atguigu--a.txt	3
//		atguigu--b.txt	2
//		atguigu--c.txt	2
		
		// 1 获取一行
		String line = value.toString();
		
		// 2 切割
		String[] fields = line.split("--");
		
		// 3 封装
		k.set(fields[0]);
		v.set(fields[1]);
		
		// 3 写出
		context.write(k	, v);
	}
}

public class TwoIndexReducer extends Reducer<Text, Text, Text, Text>{
	
	Text v = new Text();
	
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		
//		atguigu --a.txt	3
//				--b.txt	2
//				--c.txt	2
//		atguigu	c.txt-->2	b.txt-->2	a.txt-->3	
		
		// 1 拼接字符串
		StringBuffer sb = new StringBuffer();
		
		for (Text value : values) {
			sb.append(value.toString().replace("\t", "-->") +"\t");
		}
		
		v.set(sb.toString());

		// 2 写出
		context.write(key, v);
	}
}

public class TwoIndexDriver {

	public static void main(String[] args) throws Exception, IOException {
		
		// 输入输出路径需要根据自己电脑上实际的输入输出路径设置
		args = new String[] { "e:/input/inputtwoindex", "e:/output6" };

		Configuration config = new Configuration();
		Job job = Job.getInstance(config);

		job.setJarByClass(TwoIndexDriver.class);
		job.setMapperClass(TwoIndexMapper.class);
		job.setReducerClass(TwoIndexReducer.class);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		boolean result = job.waitForCompletion(true);
		System.exit(result ? 0 : 1);

	}
}