Hadoop MapReduce 任务发布

最新推荐文章于 2024-04-17 19:02:39 发布

别等时光非礼了梦想z

最新推荐文章于 2024-04-17 19:02:39 发布

阅读量532

点赞数

分类专栏： Hadoop

本文链接：https://blog.csdn.net/F_TimeOk/article/details/80904127

版权

Hadoop 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

MapReduce 任务发布

预埋数据0.0

[root@CentOS ~]# vim words 
When I am down
You raise me up
I believe I can fly

上传到hdfs中

[root@CentOS ~]# hdfs dfs -mkdir /demo
[root@CentOS ~]# hdfs dfs -put words /demo/words

1）jar包发布

代码中添加

job.setJarByClass(CustomJobSubmitter.class);

具体如下

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

public class CustomJobSubmitter {
    public static void main(String[] args) throws Exception {
        //1.创建job
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(CustomJobSubmitter.class);

        //2.设置数据的格式类型 决定读入和写出的数据的方式
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //3.设置数据的读入和写入路径
        Path src = new Path("/demo/words");
        Path res = new Path("/demo/res");

        FileSystem fileSystem = FileSystem.get(conf);

        if (fileSystem.exists(res)) {
            fileSystem.delete(res, true);
        }
        TextInputFormat.addInputPath(job, src);
        TextOutputFormat.setOutputPath(job, res);

        //4.设置数据的处理规则
        job.setMapperClass(WordsMapper.class);
        job.setReducerClass(WordsReducer.class);

        //5.设置Mapper和Reducer的key,value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //6.提交任务job
        job.waitForCompletion(true);

    }

    public static class WordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] tokens = value.toString().split(" ");
            for (String token : tokens) {
                context.write(new Text(token), new IntWritable(1));
            }
        }
    }

    public static class WordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int total = 0;
            for (IntWritable value : values) {
                total += value.get();
            }
            context.write(new Text(key), new IntWritable(total));
        }
    }
}

使用maven打包项目

将构建成功生成的jar上传至linux系统中（这里使用lrzsz软件）

[root@CentOS ~]# rz

[root@CentOS ~]# hadoop jar job_submit-1.0-SNAPSHOT.jar com.fdh.submit.CustomJobSubmitter
18/05/25 22:33:33 INFO client.RMProxy: Connecting to ResourceManager at CentOS/192.168.199.128:8032
18/05/25 22:33:34 WARN mapreduce.JobSubmitter: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
18/05/25 22:33:34 INFO input.FileInputFormat: Total input paths to process : 1
18/05/25 22:33:34 INFO mapreduce.JobSubmitter: number of splits:1
18/05/25 22:33:35 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1527258142264_0002
18/05/25 22:33:36 INFO impl.YarnClientImpl: Submitted application application_1527258142264_0002
18/05/25 22:33:36 INFO mapreduce.Job: The url to track the job: http://CentOS:8088/proxy/application_1527258142264_0002/
18/05/25 22:33:36 INFO mapreduce.Job: Running job: job_1527258142264_0002
18/05/25 22:33:56 INFO mapreduce.Job: Job job_1527258142264_0002 running in uber mode : false
18/05/25 22:33:56 INFO mapreduce.Job:  map 0% reduce 0%
18/05/25 22:34:14 INFO mapreduce.Job:  map 100% reduce 0%
18/05/25 22:34:27 INFO mapreduce.Job:  map 100% reduce 100%
18/05/25 22:34:28 INFO mapreduce.Job: Job job_1527258142264_0002 completed successfully
18/05/25 22:34:29 INFO mapreduce.Job: Counters: 49
	File System Counters
		FILE: Number of bytes read=135
		FILE: Number of bytes written=212181
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=145
		HDFS: Number of bytes written=69
		HDFS: Number of read operations=6
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=2
	Job Counters 
		Launched map tasks=1
		Launched reduce tasks=1
		Data-local map tasks=1
		Total time spent by all maps in occupied slots (ms)=16336
		Total time spent by all reduces in occupied slots (ms)=8814
		Total time spent by all map tasks (ms)=16336
		Total time spent by all reduce tasks (ms)=8814
		Total vcore-seconds taken by all map tasks=16336
		Total vcore-seconds taken by all reduce tasks=8814
		Total megabyte-seconds taken by all map tasks=16728064
		Total megabyte-seconds taken by all reduce tasks=9025536
	Map-Reduce Framework
		Map input records=3
		Map output records=13
		Map output bytes=103
		Map output materialized bytes=135
		Input split bytes=94
		Combine input records=0
		Combine output records=0
		Reduce input groups=11
		Reduce shuffle bytes=135
		Reduce input records=13
		Reduce output records=11
		Spilled Records=26
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=515
		CPU time spent (ms)=2280
		Physical memory (bytes) snapshot=263921664
		Virtual memory (bytes) snapshot=1687027712
		Total committed heap usage (bytes)=129011712
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=51
	File Output Format Counters 
		Bytes Written=69

查看map reduce计算结果

[root@CentOS ~]# hdfs dfs -cat /demo/res/part-r-00000
I	3
When	1
You	1
am	1
believe	1
can	1
down	1
fly	1
me	1
raise	1
up	1

进阶版

设置启动入口，这样当我们发布任务时只需要将jar上传到linux系统中即可

[root@CentOS ~]# cat Start.java 
public class Start {
    public static void main(String[] args) throws Exception{
        Runtime.getRuntime().exec("hadoop jar job_submit-1.0-SNAPSHOT.jar com.fdh.submit.CustomJobSubmitter");
    }
}

[root@CentOS ~]# javac Start.java 
[root@CentOS ~]# java Start

此时查看map reduce的结果时一样的

[root@CentOS ~]# hdfs dfs -cat /demo/res/part-r-00000
I	3
When	1
You	1
am	1
believe	1
can	1
down	1
fly	1
me	1
raise	1
up	1

2）跨平台发布任务

拷贝hadoop的四个配置文件到项目目录下

[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/core-site.xml 
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/hdfs-site.xml 
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/mapred-site.xml
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/yarn-site.xml

修改mapred-site.xml添加如下配置

<property>
    <name>mapreduce.app-submission.cross-platform</name>
    <value>true</value>
</property>

使用maven的package插件打包，查看jar路径

添加配置信息后重新package

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

public class CustomJobSubmitter {
    public static void main(String[] args) throws Exception {
        //1.创建job
        Configuration conf = new Configuration();

        conf.addResource("core-site.xml");
        conf.addResource("hdfs-site.xml");
        conf.addResource("mapred-site.xml");
        conf.addResource("yarn-site.xml");

        conf.set("mapreduce.job.jar", "file:///E:/ideaWorkspace/hadoop/job_submit/target/job_submit-1.0-SNAPSHOT.jar");

        Job job = Job.getInstance(conf);

        job.setJarByClass(CustomJobSubmitter.class);

        //2.设置数据的格式类型 决定读入和写出的数据的方式
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //3.设置数据的读入和写入路径
        Path src = new Path("hdfs://CentOS:9000/demo/words");
        Path res = new Path("hdfs://CentOS:9000/demo/res");

        FileSystem fileSystem = FileSystem.get(conf);

        Path res1 = new Path("/demo/res");
        if (fileSystem.exists(res1)) {
            fileSystem.delete(res1, true);
        }
        TextInputFormat.addInputPath(job, src);
        TextOutputFormat.setOutputPath(job, res);

        //4.设置数据的处理规则
        job.setMapperClass(WordsMapper.class);
        job.setReducerClass(WordsReducer.class);

        //5.设置Mapper和Reducer的key,value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //6.提交任务job
        job.waitForCompletion(true);

    }

    public static class WordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] tokens = value.toString().split(" ");
            for (String token : tokens) {
                context.write(new Text(token), new IntWritable(1));
            }
        }
    }

    public static class WordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int total = 0;
            for (IntWritable value : values) {
                total += value.get();
            }
            context.write(new Text(key), new IntWritable(total));
        }
    }
}

3）本地仿真（不依赖真实的yarn集群，测试阶段使用）

目录结构

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

public class CustomJobSubmitter {
    public static void main(String[] args) throws Exception {
        //1.创建job
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);

        //2.设置数据的格式类型 决定读入和写出的数据的方式
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //3.设置数据的读入和写入路径
        Path src = new Path("file:///E:/hadoop_test/demo/words");
        Path res = new Path("file:///E:/hadoop_test/demo/res");

        FileSystem fileSystem = FileSystem.get(conf);

        if (fileSystem.exists(res)) {
            fileSystem.delete(res, true);
        }
        TextInputFormat.addInputPath(job, src);
        TextOutputFormat.setOutputPath(job, res);

        //4.设置数据的处理规则
        job.setMapperClass(WordsMapper.class);
        job.setReducerClass(WordsReducer.class);

        //5.设置Mapper和Reducer的key,value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //6.提交任务job
        job.waitForCompletion(true);

    }

    public static class WordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] tokens = value.toString().split(" ");
            for (String token : tokens) {
                context.write(new Text(token), new IntWritable(1));
            }
        }
    }

    public static class WordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int total = 0;
            for (IntWritable value : values) {
                total += value.get();
            }
            context.write(new Text(key), new IntWritable(total));
        }
    }
}

出现以下异常