package cn.itcast.bigdata.mr.flowsum; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class FlowCount { static class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean>{ FlowBean bean = new FlowBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //将一行内容转成string String line = value.toString(); //切分字段 String[] fields = line.split("\t"); //取出手机号 String phoneNbr = fields[1]; //取出上行流量下行流量 long upFlow = Long.parseLong(fields[fields.length-3]); long dFlow = Long.parseLong(fields[fields.length-2]); //context.write(new Text(phoneNbr), new FlowBean(upFlow, dFlow)); bean.set(upFlow,dFlow); context.write(new Text(phoneNbr), bean); } } static class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{ FlowBean bean = new FlowBean(); //<183323,bean1><183323,bean2><183323,bean3><183323,bean4>....... @Override protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException { long sum_upFlow = 0; long sum_dFlow = 0; //遍历所有bean,将其中的上行流量,下行流量分别累加 for(FlowBean bean: values){ sum_upFlow += bean.getUpFlow(); sum_dFlow += bean.getdFlow(); } //FlowBean resultBean = new FlowBean(sum_upFlow, sum_dFlow); //context.write(key, resultBean); bean.set(sum_upFlow,sum_dFlow); context.write(key, bean); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); //在本地运行,默认的配置,不写也是会有这样的配置 // conf.set("mapreduce.framework.name", "local"); // conf.set("fs.defaultFs", "file:///");//读哪里的数据,写到哪里 //运行集群模式(idea中操作集群),就是把程序提交到yarn中去运行 //要想运行为集群模式,以下3个参数要指定为集群上的值 conf.set("mapreduce.framework.name", "yarn"); conf.set("yarn.resourcemanager.hostname", "linux1");//文件存取地 conf.set("fs.defaultFS", "hdfs://linux1:9000/"); System.setProperty("HADOOP_USER_NAME", "hadoop");//指定用户 //因为windows启动脚本后的环境变量,不符合linux语法,所以与win下需要重写YARNRunner.java。mac下不用 Job job = Job.getInstance(conf); //指定本程序的jar包所在的本地路径(四种场景) //1.idea运行本地单机hadoop时,无须设置(用于测试,速度快、方便) //2.打成jar包,idea中来操作集群时使用 job.setJar("/Users/mx/Desktop/hadoop.jar"); //3.打成jar包,在集群中 java -jar 时使用,需要导入四个xml配置文件 // job.setJar("/home/hadoop/hadoop.jar"); //4.打成jar包,在集群中 hadoop jar 时使用(常用) // job.setJarByClass(FlowCount.class); //指定本业务job要使用的mapper/Reducer业务类 job.setMapperClass(FlowCountMapper.class); job.setReducerClass(FlowCountReducer.class); //指定mapper输出数据的kv类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FlowBean.class); //指定最终输出的数据的kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); //指定job的输入原始文件所在目录 FileInputFormat.setInputPaths(job, new Path(args[0])); //指定job的输出结果所在目录 FileOutputFormat.setOutputPath(job, new Path(args[1])); //将job中配置的相关参数,以及job所用的java类所在的jar包,提交给yarn去运行 /*job.submit();*/ boolean res = job.waitForCompletion(true); System.exit(res?0:1); } }
mapReducer示例
最新推荐文章于 2022-03-06 23:49:26 发布