Hadoop(五)

Hadoop_Day05


WordCount代码

package org.hadoop.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WCDriver {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setJarByClass(WCDriver.class);
        job.setJobName("xljWC");
        job.setNumReduceTasks(1);

        job.setMapperClass(WCMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setReducerClass(WCReduce.class);
        job.setOutputValueClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        Path path1 = new Path(args[0]);
        Path path2 = new Path(args[1]);

        FileSystem fileSystem = FileSystem.get(conf);
        boolean b = fileSystem.exists(path2);
        if (b) {
            fileSystem.delete(path2, true);
        }
        FileInputFormat.addInputPath(job,path1);
        FileOutputFormat.setOutputPath(job,path2);

        job.waitForCompletion(true);

    }
}
package org.hadoop.wordcount;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WCMap extends Mapper<LongWritable, Text,Text,LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        String s = value.toString();
        String[] sArr = s.split(",");
        for (String s1 : sArr) {
            context.write(new Text(s1),new LongWritable(1L));
        }
    }
}
package org.hadoop.wordcount;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WCReduce extends Reducer<Text, LongWritable,Text,LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
        long sum = 0;
        for (LongWritable value : values) {
            sum += value.get();
        }
        context.write(key,new LongWritable(sum));
    }
}

使用简化的WordCount查看偏移量

package org.hadoop.wordcount2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WCDriver1 {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setJarByClass(WCDriver1.class);
        job.setJobName("xljWC1");
        job.setNumReduceTasks(1);

        job.setMapperClass(WCMap1.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setReducerClass(WCReduce1.class);
        job.setOutputValueClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        Path path1 = new Path(args[0]);
        Path path2 = new Path(args[1]);

        FileSystem fileSystem = FileSystem.get(conf);
        boolean b = fileSystem.exists(path2);
        if (b) {
            fileSystem.delete(path2, true);
        }
        FileInputFormat.addInputPath(job,path1);
        FileOutputFormat.setOutputPath(job,path2);

        job.waitForCompletion(true);

    }
}

package org.hadoop.wordcount2;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WCMap1 extends Mapper<LongWritable, Text, Text, NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException {

        context.write(new Text("偏移量" + key + "\t内容" + value), NullWritable.get());
    }
}
package org.hadoop.wordcount2;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WCReduce1 extends Reducer<Text, NullWritable, Text, NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<NullWritable> values, Reducer<Text, NullWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {

        context.write(key, NullWritable.get());
    }
}

在这里插入图片描述

在这里插入图片描述

CRLF(windows默认格式\r\n)和LF(linux默认格式\r)下的偏移量有所不同


Yarn的工作机制

  • mapreduce提交程序到客户端所在的节点上去,yarnrunner向resourcemanager申请一个Application,resourcemanager将该应⽤程序的资源路径和application_id返回给yarnrunner,该程序将运行所需要的资源提交到hdfs上去,资源提交完毕之后申请运行MapreduceApplicationMaster
  • resourcemanager将⽤户的请求初始化成⼀个task,其中⼀个NodeManager领取到task任务,该NodeManager创建容器Container,并产⽣MapreduceApplicationMaster,Container从HDFS上拷⻉资源到本地,MapreduceApplicationMaster向resourcemanager申请运⾏maptask资源,RM将运⾏maptask任务分配给另外两个NodeManager,另两个NodeManager分别领取任务并创 建container。 ,MapreduceApplicationMaster向两个接收到任务的NodeManager发送程序启动脚本,这两个NodeManager分别启动maptask,maptask对数据分区排序
  • MapreduceApplicationMaster等待所有maptask运⾏完毕后,向resourcemanager申请container,运⾏reducetask,reducetask向maptask获取相应分区的数据,程序运⾏完毕后,MapreduceApplicationMaster会向resourcemanager申请注销⾃⼰
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值