package cn.coder.mapreduce;
import com.google.common.base.Strings;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class JobMain extends Configured implements Tool {
//该方法指定一个job任务
@Override
public int run(String[] strings) throws Exception {
//1.创建job任务对象 获取configure对象 任务名
Job job = Job.getInstance(super.getConf(), "wordcount");
//2.配置job任务对象
//第一步:指定文件的读取方式和读取路径
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("hdfs://hadoop01:8020/wordcount"));
//第二步:指定Map阶段的处理方式
job.setMapperClass(WordCountMapper.class);
//设置map阶段k2的类型
job.setMapOutputKeyClass(Text.class);
//设置map阶段v2的类型
job.setMapOutputValueClass(LongWritable.class);
//第3,4,5,6 shuffle阶段采用默认的方式
//第7步:指定Reduce阶段的处理方式和数据类型
job.setReducerClass(WordCountReduce.class);
//设置K3的类型
job.setOutputKeyClass(Text.class);
//设置V3的类型
job.setOutputValueClass(LongWritable.class);
//第8部:设置输出类型
job.setOutputFormatClass(TextOutputFormat.class);
//设置输出的路径
TextOutputFormat.setOutputPath(job,new Path("hdfs://hadoop01:8020/wordcount_out"));
//等待任务结束
boolean b = job.waitForCompletion(true);
//0正常1不正常
return b?0:1;
}
public static void main(String[] args) throws Exception {
//启动job任务 把这个configuration对象给了父类configure
Configuration conf = new Configuration();
//集群运行模式 也可以在mapred-site.xml里设置
conf.set("mapreduce.framework.name","yarn");
int run = ToolRunner.run(conf, new JobMain(), args);
System.exit(run);
}
}
MapReduceJob设置集群运行模式
最新推荐文章于 2022-07-05 09:28:07 发布