package cn.tedu.flow;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FlowDriver {
public static void main(String[] args) throws Exception {
//1.创建作业对象
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "flow_job");
//2.设置入口类
job.setJarByClass(cn.tedu.flow.FlowDriver.class);
//3.设定Mapper类
job.setMapperClass(cn.tedu.flow.FlowMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
//4.设定Reducer类
job.setReducerClass(cn.tedu.flow.FlowReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
//5.设定输入位置
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.157.136:9000/flow.tx
mapper
最新推荐文章于 2021-08-25 14:54:37 发布
本文详细探讨了MapReduce中的Mapper组件,包括其工作原理、输入输出格式、常见应用场景和最佳实践。Mapper负责将输入数据转换为中间键值对,为后续的Shuffle和Reduce阶段打下基础。通过实例分析,揭示了Mapper如何处理各种类型的数据,并优化数据处理效率。
摘要由CSDN通过智能技术生成