MapReduce的编程开发——数据字典的使用和多目录输出


前言

本文主要是学习MapReduce的学习笔记,对所学内容进行记录。
实验环境:
1.Linux Ubuntu 16.04

2.hadoop3.0.0

3.eclipse4.5.1


一、启动Hadoop

  1. 进入Hadoop启动目录cd /apps/hadoop/sbin
  2. 启动Hadoop./start-all.sh
  3. 输入‘jps’,启动后显示如下信息
    在这里插入图片描述

二、环境搭配

  1. 打开eclipse->Window->Preferences;

  2. 选择Hadoop Map/Reduce,选择Hadoop包根目录,/apps/hadoop,点击Apply,点击OK;

  3. 点击window–>show view–>other–>mapreduce tools–>map/reduce locations,之后页面会出现对应的标签页;
    界面

  4. 点击3中图标1,在Local name输入myhadoop,在DFS Master 框下Port输入8020,点击Finish,出现3中右侧页面;
    在这里插入图片描述

  5. 点击3中

  6. 图标2,选择下图内容,出现第3步图中左侧内容
    在这里插入图片描述
    完成环境配置环境。

三、数据字典的使用和多目录输出

  1. 新建test项目,新建multiple包;
  2. 新建MaxMin类,即MaxMin.java,编写并保存如下代码:
package multiple;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
 
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
 
public class MaxMin{
	private static class MWMapper extends Mapper<LongWritable, Text, Text, WordWritable> {
		private Text outkey = new Text();
		private WordWritable outval = new WordWritable();
		private String maxkey = "";
		private Double maxval = 0D;
		private String minkey = "";
		private Double minval = 0D;
 
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, WordWritable>.Context context)
				throws IOException, InterruptedException {
			String[] star = value.toString().split(" ");
			
			if(star.length ==7 && null != star){
				if(maxval < Double.parseDouble(star[6])){
					maxval = Double.parseDouble(star[6]);
					maxkey = star[0];
				}				
				if(minval <= 0D){
					minval = Double.parseDouble(star[6]);
					minkey = star[0];
				}
				if(minval > Double.parseDouble(star[6])){
					minval = Double.parseDouble(star[6]);
					minkey = star[0];
				}	
			}else{
				context.getCounter("erro_line", "word_line").increment(1);
			}		
		}
		@Override
		protected void cleanup(Mapper<LongWritable, Text, Text, WordWritable>.Context context)
				throws IOException, InterruptedException {
			
			outkey.set("max_min");
			outval.setKeyName(maxkey);
			outval.setKeyval(maxval);
			context.write(outkey, outval);

			outkey.set("max_min");
			outval.setKeyName(minkey);
			outval.setKeyval(minval);
			context.write(outkey, outval);

			
		}
	}
 
	public static class MWReducer extends Reducer<Text, WordWritable, WordWritable, NullWritable> {

		private WordWritable outkey = new WordWritable();
		private String maxkey = "";
		private Double maxval = 0D;
		private String minkey = "";
		private Double minval = 0D;
 
		private Double tmpval = 0D;

		private MultipleOutputs<Text, DoubleWritable> outputs =null;
		@Override
		protected void setup(Context context)
				throws IOException, InterruptedException {
			 outputs = new MultipleOutputs(context);			 
		}
		@Override
		protected void reduce(Text key, Iterable<WordWritable> values,
				Reducer<Text, WordWritable, WordWritable, NullWritable>.Context context)
				throws IOException, InterruptedException {
				
				for (WordWritable w : values) {
					if(maxval<w.getKeyval()){
						maxval=w.getKeyval();
						maxkey=w.getKeyName();
					}
					if(minval<=0D){
						minval=w.getKeyval();
						minkey=w.getKeyName();
					}
					if(minval>w.getKeyval()){
						minval=w.getKeyval();
						minkey=w.getKeyName();
					}
				}
			outkey.setKeyName(maxkey);
			outkey.setKeyval(maxval);
			outputs.write(new Text(maxkey), new DoubleWritable(maxval), "maxout/max");
			context.write(outkey, NullWritable.get());	
			outkey.setKeyName(minkey);
			outkey.setKeyval(minval);
			outputs.write(new Text(minkey), new DoubleWritable(minval), "minout/min");
			context.write(outkey, NullWritable.get());
			
		}
		@Override
		protected void cleanup(Reducer<Text, WordWritable, WordWritable, NullWritable>.Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			outputs.close();
		}
	}
	public static void main(String[] args) throws Exception {
	
		String dir_in = "hdfs://localhost:8020/multi/input";
		String dir_out = "hdfs://localhost:8020/multi/output";
		Path in = new Path(dir_in);
		Path out = new Path(dir_out);

		Configuration conf = new Configuration();
		out.getFileSystem(conf).delete(out, true);

		Job job = Job.getInstance(conf, "maxword");

		job.setJarByClass(MaxMin.class);

		job.setMapperClass(MWMapper.class);
		job.setReducerClass(MWReducer.class);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(WordWritable.class);
		job.setOutputKeyClass(WordWritable.class);
		job.setOutputValueClass(NullWritable.class);

		FileInputFormat.addInputPath(job, in);
		FileOutputFormat.setOutputPath(job, out);
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

 
class WordWritable implements Writable {
 
	private String keyName = "";
	private Double keyval = 0D;
 
	public String getKeyName() {
		return keyName;
	}
 
	public void setKeyName(String keyName) {
		this.keyName = keyName;
	}
 
	public Double getKeyval() {
		return keyval;
	}
 
	public void setKeyval(Double keyval) {
		this.keyval = keyval;
	}
 
	@Override
	public void write(DataOutput out) throws IOException {
		// TODO Auto-generated method stub
		out.writeUTF(keyName);
		out.writeDouble(keyval);
	}
 
	@Override
	public void readFields(DataInput in) throws IOException {
		// TODO Auto-generated method stub
		this.keyName = in.readUTF();
		this.keyval = in.readDouble();
	}
 
	@Override
	public String toString() {
		return " [keyName=" + keyName + ", keyval=" + keyval + "]";
	}
 
}
  1. 运行指令cp /apps/hadoop/etc/hadoop/{core-site.xml,hdfs-site.xml,log4j.properties} /home/dolphin/workspace/test/src,将hadoop配置文件复制到src文件夹下;
  2. 创建输入文件存放路径
hadoop fs -mkdir /multi
hadoop fs -mkdir /multi/input
  1. 将数据文件放入hadoop目录下,hadoop fs -put /home/dolphin/Desktop/demo.txt /multi/input,demo.txt内容如下:
1 3 5 7 9 11 13
2 4 6 8 10 12 14
3 9 27 2 4 8 16
  1. 运行MaxMin.java文件,得到单词计数的结果在output文件夹中如下所示
    在这里插入图片描述

在这里插入图片描述


总结

本次实验实现的是对长度为7的一组数中最后的一个数的比较

将最大的数输出为value,其所在的数组中的第一个数作为key输出到maxout文件夹中

将最小的数输出为value,其所在的数组中的第一个数作为key输出到minout文件夹中

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值