MapReduce 任务发布
预埋数据0.0
[root@CentOS ~]# vim words
When I am down
You raise me up
I believe I can fly
上传到hdfs中
[root@CentOS ~]# hdfs dfs -mkdir /demo
[root@CentOS ~]# hdfs dfs -put words /demo/words
1)jar包发布
代码中添加
job.setJarByClass(CustomJobSubmitter.class);
具体如下
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
public class CustomJobSubmitter {
public static void main(String[] args) throws Exception {
//1.创建job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(CustomJobSubmitter.class);
//2.设置数据的格式类型 决定读入和写出的数据的方式
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
//3.设置数据的读入和写入路径
Path src = new Path("/demo/words");
Path res = new Path("/demo/res");
FileSystem fileSystem = FileSystem.get(conf);
if (fileSystem.exists(res)) {
fileSystem.delete(res, true);
}
TextInputFormat.addInputPath(job, src);
TextOutputFormat.setOutputPath(job, res);
//4.设置数据的处理规则
job.setMapperClass(WordsMapper.class);
job.setReducerClass(WordsReducer.class);
//5.设置Mapper和Reducer的key,value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//6.提交任务job
job.waitForCompletion(true);
}
public static class WordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] tokens = value.toString().split(" ");
for (String token : tokens) {
context.write(new Text(token), new IntWritable(1));
}
}
}
public static class WordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total = 0;
for (IntWritable value : values) {
total += value.get();
}
context.write(new Text(key), new IntWritable(total));
}
}
}
使用maven打包项目
将构建成功生成的jar上传至linux系统中(这里使用lrzsz软件)
[root@CentOS ~]# rz
[root@CentOS ~]# hadoop jar job_submit-1.0-SNAPSHOT.jar com.fdh.submit.CustomJobSubmitter
18/05/25 22:33:33 INFO client.RMProxy: Connecting to ResourceManager at CentOS/192.168.199.128:8032
18/05/25 22:33:34 WARN mapreduce.JobSubmitter: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
18/05/25 22:33:34 INFO input.FileInputFormat: Total input paths to process : 1
18/05/25 22:33:34 INFO mapreduce.JobSubmitter: number of splits:1
18/05/25 22:33:35 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1527258142264_0002
18/05/25 22:33:36 INFO impl.YarnClientImpl: Submitted application application_1527258142264_0002
18/05/25 22:33:36 INFO mapreduce.Job: The url to track the job: http://CentOS:8088/proxy/application_1527258142264_0002/
18/05/25 22:33:36 INFO mapreduce.Job: Running job: job_1527258142264_0002
18/05/25 22:33:56 INFO mapreduce.Job: Job job_1527258142264_0002 running in uber mode : false
18/05/25 22:33:56 INFO mapreduce.Job: map 0% reduce 0%
18/05/25 22:34:14 INFO mapreduce.Job: map 100% reduce 0%
18/05/25 22:34:27 INFO mapreduce.Job: map 100% reduce 100%
18/05/25 22:34:28 INFO mapreduce.Job: Job job_1527258142264_0002 completed successfully
18/05/25 22:34:29 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=135
FILE: Number of bytes written=212181
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=145
HDFS: Number of bytes written=69
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=16336
Total time spent by all reduces in occupied slots (ms)=8814
Total time spent by all map tasks (ms)=16336
Total time spent by all reduce tasks (ms)=8814
Total vcore-seconds taken by all map tasks=16336
Total vcore-seconds taken by all reduce tasks=8814
Total megabyte-seconds taken by all map tasks=16728064
Total megabyte-seconds taken by all reduce tasks=9025536
Map-Reduce Framework
Map input records=3
Map output records=13
Map output bytes=103
Map output materialized bytes=135
Input split bytes=94
Combine input records=0
Combine output records=0
Reduce input groups=11
Reduce shuffle bytes=135
Reduce input records=13
Reduce output records=11
Spilled Records=26
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=515
CPU time spent (ms)=2280
Physical memory (bytes) snapshot=263921664
Virtual memory (bytes) snapshot=1687027712
Total committed heap usage (bytes)=129011712
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=51
File Output Format Counters
Bytes Written=69
查看map reduce计算结果
[root@CentOS ~]# hdfs dfs -cat /demo/res/part-r-00000
I 3
When 1
You 1
am 1
believe 1
can 1
down 1
fly 1
me 1
raise 1
up 1
进阶版
设置启动入口,这样当我们发布任务时只需要将jar上传到linux系统中即可
[root@CentOS ~]# cat Start.java
public class Start {
public static void main(String[] args) throws Exception{
Runtime.getRuntime().exec("hadoop jar job_submit-1.0-SNAPSHOT.jar com.fdh.submit.CustomJobSubmitter");
}
}
[root@CentOS ~]# javac Start.java
[root@CentOS ~]# java Start
此时查看map reduce的结果时一样的
[root@CentOS ~]# hdfs dfs -cat /demo/res/part-r-00000
I 3
When 1
You 1
am 1
believe 1
can 1
down 1
fly 1
me 1
raise 1
up 1
2)跨平台发布任务
拷贝hadoop的四个配置文件到项目目录下
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/core-site.xml
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/hdfs-site.xml
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/mapred-site.xml
[root@CentOS ~]# sz /usr/hadoop-2.6.0/etc/hadoop/yarn-site.xml
修改mapred-site.xml添加如下配置
<property>
<name>mapreduce.app-submission.cross-platform</name>
<value>true</value>
</property>
使用maven的package插件打包,查看jar路径
添加配置信息后重新package
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
public class CustomJobSubmitter {
public static void main(String[] args) throws Exception {
//1.创建job
Configuration conf = new Configuration();
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
conf.addResource("mapred-site.xml");
conf.addResource("yarn-site.xml");
conf.set("mapreduce.job.jar", "file:///E:/ideaWorkspace/hadoop/job_submit/target/job_submit-1.0-SNAPSHOT.jar");
Job job = Job.getInstance(conf);
job.setJarByClass(CustomJobSubmitter.class);
//2.设置数据的格式类型 决定读入和写出的数据的方式
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
//3.设置数据的读入和写入路径
Path src = new Path("hdfs://CentOS:9000/demo/words");
Path res = new Path("hdfs://CentOS:9000/demo/res");
FileSystem fileSystem = FileSystem.get(conf);
Path res1 = new Path("/demo/res");
if (fileSystem.exists(res1)) {
fileSystem.delete(res1, true);
}
TextInputFormat.addInputPath(job, src);
TextOutputFormat.setOutputPath(job, res);
//4.设置数据的处理规则
job.setMapperClass(WordsMapper.class);
job.setReducerClass(WordsReducer.class);
//5.设置Mapper和Reducer的key,value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//6.提交任务job
job.waitForCompletion(true);
}
public static class WordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] tokens = value.toString().split(" ");
for (String token : tokens) {
context.write(new Text(token), new IntWritable(1));
}
}
}
public static class WordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total = 0;
for (IntWritable value : values) {
total += value.get();
}
context.write(new Text(key), new IntWritable(total));
}
}
}
3)本地仿真(不依赖真实的yarn集群,测试阶段使用)
目录结构
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
public class CustomJobSubmitter {
public static void main(String[] args) throws Exception {
//1.创建job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//2.设置数据的格式类型 决定读入和写出的数据的方式
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
//3.设置数据的读入和写入路径
Path src = new Path("file:///E:/hadoop_test/demo/words");
Path res = new Path("file:///E:/hadoop_test/demo/res");
FileSystem fileSystem = FileSystem.get(conf);
if (fileSystem.exists(res)) {
fileSystem.delete(res, true);
}
TextInputFormat.addInputPath(job, src);
TextOutputFormat.setOutputPath(job, res);
//4.设置数据的处理规则
job.setMapperClass(WordsMapper.class);
job.setReducerClass(WordsReducer.class);
//5.设置Mapper和Reducer的key,value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//6.提交任务job
job.waitForCompletion(true);
}
public static class WordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] tokens = value.toString().split(" ");
for (String token : tokens) {
context.write(new Text(token), new IntWritable(1));
}
}
}
public static class WordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total = 0;
for (IntWritable value : values) {
total += value.get();
}
context.write(new Text(key), new IntWritable(total));
}
}
}
出现以下异常
将错误的哪一行 修改为return true即可