前言
本文主要是学习MapReduce的学习笔记,对所学内容进行记录。
实验环境:
1.Linux Ubuntu 16.04
2.hadoop3.0.0
3.eclipse4.5.1
一、启动Hadoop
- 进入Hadoop启动目录
cd /apps/hadoop/sbin
- 启动Hadoop
./start-all.sh
- 输入‘jps’,启动后显示如下信息
二、环境搭配
-
打开eclipse->Window->Preferences;
-
选择Hadoop Map/Reduce,选择Hadoop包根目录,
/apps/hadoop
,点击Apply,点击OK; -
点击window–>show view–>other–>mapreduce tools–>map/reduce locations,之后页面会出现对应的标签页;
-
点击3中图标1,在Local name输入myhadoop,在DFS Master 框下Port输入8020,点击Finish,出现3中右侧页面;
-
点击3中
-
图标2,选择下图内容,出现第3步图中左侧内容
完成环境配置环境。
三、数据字典的使用和多目录输出
- 新建test项目,新建multiple包;
- 新建MaxMin类,即MaxMin.java,编写并保存如下代码:
package multiple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class MaxMin{
private static class MWMapper extends Mapper<LongWritable, Text, Text, WordWritable> {
private Text outkey = new Text();
private WordWritable outval = new WordWritable();
private String maxkey = "";
private Double maxval = 0D;
private String minkey = "";
private Double minval = 0D;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, WordWritable>.Context context)
throws IOException, InterruptedException {
String[] star = value.toString().split(" ");
if(star.length ==7 && null != star){
if(maxval < Double.parseDouble(star[6])){
maxval = Double.parseDouble(star[6]);
maxkey = star[0];
}
if(minval <= 0D){
minval = Double.parseDouble(star[6]);
minkey = star[0];
}
if(minval > Double.parseDouble(star[6])){
minval = Double.parseDouble(star[6]);
minkey = star[0];
}
}else{
context.getCounter("erro_line", "word_line").increment(1);
}
}
@Override
protected void cleanup(Mapper<LongWritable, Text, Text, WordWritable>.Context context)
throws IOException, InterruptedException {
outkey.set("max_min");
outval.setKeyName(maxkey);
outval.setKeyval(maxval);
context.write(outkey, outval);
outkey.set("max_min");
outval.setKeyName(minkey);
outval.setKeyval(minval);
context.write(outkey, outval);
}
}
public static class MWReducer extends Reducer<Text, WordWritable, WordWritable, NullWritable> {
private WordWritable outkey = new WordWritable();
private String maxkey = "";
private Double maxval = 0D;
private String minkey = "";
private Double minval = 0D;
private Double tmpval = 0D;
private MultipleOutputs<Text, DoubleWritable> outputs =null;
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
outputs = new MultipleOutputs(context);
}
@Override
protected void reduce(Text key, Iterable<WordWritable> values,
Reducer<Text, WordWritable, WordWritable, NullWritable>.Context context)
throws IOException, InterruptedException {
for (WordWritable w : values) {
if(maxval<w.getKeyval()){
maxval=w.getKeyval();
maxkey=w.getKeyName();
}
if(minval<=0D){
minval=w.getKeyval();
minkey=w.getKeyName();
}
if(minval>w.getKeyval()){
minval=w.getKeyval();
minkey=w.getKeyName();
}
}
outkey.setKeyName(maxkey);
outkey.setKeyval(maxval);
outputs.write(new Text(maxkey), new DoubleWritable(maxval), "maxout/max");
context.write(outkey, NullWritable.get());
outkey.setKeyName(minkey);
outkey.setKeyval(minval);
outputs.write(new Text(minkey), new DoubleWritable(minval), "minout/min");
context.write(outkey, NullWritable.get());
}
@Override
protected void cleanup(Reducer<Text, WordWritable, WordWritable, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
outputs.close();
}
}
public static void main(String[] args) throws Exception {
String dir_in = "hdfs://localhost:8020/multi/input";
String dir_out = "hdfs://localhost:8020/multi/output";
Path in = new Path(dir_in);
Path out = new Path(dir_out);
Configuration conf = new Configuration();
out.getFileSystem(conf).delete(out, true);
Job job = Job.getInstance(conf, "maxword");
job.setJarByClass(MaxMin.class);
job.setMapperClass(MWMapper.class);
job.setReducerClass(MWReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WordWritable.class);
job.setOutputKeyClass(WordWritable.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
class WordWritable implements Writable {
private String keyName = "";
private Double keyval = 0D;
public String getKeyName() {
return keyName;
}
public void setKeyName(String keyName) {
this.keyName = keyName;
}
public Double getKeyval() {
return keyval;
}
public void setKeyval(Double keyval) {
this.keyval = keyval;
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(keyName);
out.writeDouble(keyval);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.keyName = in.readUTF();
this.keyval = in.readDouble();
}
@Override
public String toString() {
return " [keyName=" + keyName + ", keyval=" + keyval + "]";
}
}
- 运行指令
cp /apps/hadoop/etc/hadoop/{core-site.xml,hdfs-site.xml,log4j.properties} /home/dolphin/workspace/test/src
,将hadoop配置文件复制到src文件夹下; - 创建输入文件存放路径
hadoop fs -mkdir /multi
hadoop fs -mkdir /multi/input
- 将数据文件放入hadoop目录下,
hadoop fs -put /home/dolphin/Desktop/demo.txt /multi/input
,demo.txt内容如下:
1 3 5 7 9 11 13
2 4 6 8 10 12 14
3 9 27 2 4 8 16
- 运行MaxMin.java文件,得到单词计数的结果在output文件夹中如下所示
总结
本次实验实现的是对长度为7的一组数中最后的一个数的比较
将最大的数输出为value,其所在的数组中的第一个数作为key输出到maxout文件夹中
将最小的数输出为value,其所在的数组中的第一个数作为key输出到minout文件夹中