计数器
- 内置计数器
-
定义计数器
- 使用上下文对象context的getCounter方法,两个参数,计数器类型和名字
- 枚举创建
排序和序列化
原数据:
需求:
- 第一列按字典顺序进行排列
- 第一列相同时,第二列按升序进行排序
-
定义sortbean类实现WritableComparable接口,按照需求定义成员变量,重写compareTo方法和write、readFields方法
package com.bigdata.mapreduce.sort; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class sortbean implements WritableComparable<sortbean> { private String word; private int num; //实现比较器,指定比较规则 /* 排序规则:先排第一个,按字典顺序 第一个相同,第二列按升序排 */ @Override public int compareTo(sortbean o) { //将成员和传进来的进行比较,对第一列排序 //compareTo 字符串自带方法 int result = this.word.compareTo(o.word); //如果第一列相同则按照第二列进行排序 //升序 //正数则说明this.num比o.num大将this.num放在后面 //负数则说明this.num比o.num小将this.num放在前面 if(result == 0){ return this.num - o.num; } return result; } //实现序列化 @Override public void write(DataOutput dataOutput) throws IOException { //write后看成员是上面类型 //固定步骤 dataOutput.writeUTF(word); dataOutput.writeInt(num); } //反序列化 @Override public void readFields(DataInput dat0aInput) throws IOException { //固定步骤 this.word = dat0aInput.readUTF(); this.num = dat0aInput.readInt(); } public String getWord() { return word; } public void setWord(String word) { this.word = word; } public int getNum() { return num; } public void setNum(int num) { this.num = num; } @Override public String toString() { return word + '\t' + num ; } }
-
mapper类
package com.bigdata.mapreduce.sort; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class s_mapper extends Mapper<LongWritable, Text,sortbean, NullWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //将文本数据V1拆分,并封装到sortbean对象 String[] split = value.toString().split("\t"); sortbean sb = new sortbean(); sb.setWord(split[0]); sb.setNum(Integer.parseInt(split[1])); //将K2,V2写入上下问对象 context.write(sb,NullWritable.get()); } }
-
Reducer类
package com.bigdata.mapreduce.sort; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class s_reducer extends Reducer<sortbean, NullWritable,sortbean,NullWritable> { @Override protected void reduce(sortbean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { context.write(key,NullWritable.get()); } }
-
主类
package com.bigdata.mapreduce.sort; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class s_job extends Configured implements Tool { @Override public int run(String[] strings) throws Exception { //创建job对象 Job job = Job.getInstance(super.getConf(),"flow"); //配置job对象 //1. 配置输入类和输入路径 job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job,new Path("file:///D:\\mapreduce_demo\\input")); //2. 设置mapper类和数据类型(K2,V2) job.setMapperClass(s_mapper.class); job.setMapOutputKeyClass(sortbean.class); job.setMapOutputValueClass(NullWritable.class); //3. 分区 //4.排序 //当定义了sortbean类后在类中制定了排序规则,无需做其他操作 //5.规约 6.分组 //7. 设置reducer类和数据类型(K3,V3) job.setReducerClass(s_reducer.class); job.setOutputKeyClass(sortbean.class); job.setOutputValueClass(NullWritable.class); //8.设置输出类型和路径 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job,new Path("file:///D:\\mapreduce_demo\\sort_out")); //等待任务结束 boolean flag = job.waitForCompletion(true); return flag ? 0 : 1; } public static void main(String[] args) throws Exception { Configuration entries = new Configuration(); int run = ToolRunner.run(entries, new s_job(), args); System.exit(run); } }
规约
综合案例
-
flowbean
package com.bigdata.mapreduce.example1; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class flowbean implements Writable { private Integer upFlow; private Integer downFlow; private Integer upCountFlow; private Integer downCountFlow; @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(upFlow); dataOutput.writeInt(downFlow); dataOutput.writeInt(upCountFlow); dataOutput.writeInt(downCountFlow); } @Override public void readFields(DataInput dataInput) throws IOException { this.upFlow = dataInput.readInt(); this.downFlow = dataInput.readInt(); this.upCountFlow = dataInput.readInt(); this.downCountFlow = dataInput.readInt(); } public Integer getUpFlow() { return upFlow; } public void setUpFlow(Integer upFlow) { this.upFlow = upFlow; } public Integer getDownFlow() { return downFlow; } public void setDownFlow(Integer downFlow) { this.downFlow = downFlow; } public Integer getUpCountFlow() { return upCountFlow; } public void setUpCountFlow(Integer upCountFlow) { this.upCountFlow = upCountFlow; } public Integer getDownCountFlow() { return downCountFlow; } public void setDownCountFlow(Integer downCountFlow) { this.downCountFlow = downCountFlow; } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + upCountFlow + "\t" + downCountFlow ; } }
-
mapper
package com.bigdata.mapreduce.example1; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class FlowCountMapper extends Mapper<LongWritable, Text,Text,flowbean> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] split = value.toString().split("\t"); flowbean fb = new flowbean(); Text text = new Text(split[1]); fb.setUpFlow(Integer.parseInt(split[6])); fb.setDownFlow(Integer.parseInt(split[7])); fb.setUpCountFlow(Integer.parseInt(split[8])); fb.setDownCountFlow(Integer.parseInt(split[9])); context.write(text,fb); } }
-
reducer
package com.bigdata.mapreduce.example1; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class FlowCountReducer extends Reducer<Text, flowbean, Text, flowbean> { @Override protected void reduce(Text key, Iterable<flowbean> values, Context context) throws IOException, InterruptedException { flowbean fb = new flowbean(); Integer upFlow = 0; Integer downFlow = 0; Integer upCountFlow = 0; Integer downCountFlow = 0; for (flowbean f : values) { upFlow += f.getUpFlow(); downFlow += f.getDownFlow(); upCountFlow += f.getUpCountFlow(); downCountFlow += f.getDownCountFlow(); } fb.setUpFlow(upFlow); fb.setDownFlow(downFlow); fb.setUpCountFlow(upCountFlow); fb.setDownCountFlow(downCountFlow); context.write(key,fb); } }
-
jobmain
package com.bigdata.mapreduce.example1; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class FlowJobMain extends Configured implements Tool { @Override public int run(String[] strings) throws Exception { //创建job任务对象 Job job = Job.getInstance(super.getConf(),"example1"); //设置job配置 //设置job输入类型和路径 job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job,new Path("")); //设置mapper类和数据类型(K2,V2) job.setMapperClass(FlowCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(flowbean.class); //分区、排序、规约、分组 //设置reducer类和数据类型(K3,V3) job.setReducerClass(FlowCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(flowbean.class); //设置输入类型和路径 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job,new Path("")); //等待任务结束 boolean flag = job.waitForCompletion(true); return flag?0:1; } public static void main(String[] args) throws Exception { Configuration entries = new Configuration(); int run = ToolRunner.run(entries, new FlowJobMain(), args); System.exit(run); } }
-
FlowBean
package com.bigdata.mapreduce.example2; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class FlowBean implements WritableComparable<FlowBean> { private Integer upFlow; private Integer downFlow; private Integer upCountFlow; private Integer downCountFlow; @Override public int compareTo(FlowBean o) { return o.upFlow - this.upFlow; } @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(upFlow); dataOutput.writeInt(downFlow); dataOutput.writeInt(upCountFlow); dataOutput.writeInt(downCountFlow); } @Override public void readFields(DataInput dataInput) throws IOException { this.upFlow = dataInput.readInt(); this.downFlow = dataInput.readInt(); this.upCountFlow = dataInput.readInt(); this.downCountFlow = dataInput.readInt(); } public Integer getUpFlow() { return upFlow; } public void setUpFlow(Integer upFlow) { this.upFlow = upFlow; } public Integer getDownFlow() { return downFlow; } public void setDownFlow(Integer downFlow) { this.downFlow = downFlow; } public Integer getUpCountFlow() { return upCountFlow; } public void setUpCountFlow(Integer upCountFlow) { this.upCountFlow = upCountFlow; } public Integer getDownCountFlow() { return downCountFlow; } public void setDownCountFlow(Integer downCountFlow) { this.downCountFlow = downCountFlow; } @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + upCountFlow + "\t" + downCountFlow ; } }
-
mapper
package com.bigdata.mapreduce.example2; import com.bigdata.mapreduce.example1.flowbean; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class smapper extends Mapper<LongWritable, Text,FlowBean,Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] split = value.toString().split("\t"); FlowBean fb = new FlowBean(); Text text = new Text(split[0]); fb.setUpFlow(Integer.parseInt(split[1])); fb.setDownFlow(Integer.parseInt(split[2])); fb.setUpCountFlow(Integer.parseInt(split[3])); fb.setDownCountFlow(Integer.parseInt(split[4])); context.write(fb,text); } }
-
reducer
package com.bigdata.mapreduce.example2; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class sreducer extends Reducer<FlowBean, Text,Text,FlowBean> { @Override protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { context.write(value,key); } } }
-
jobmain
package com.bigdata.mapreduce.example2; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class sjobmain extends Configured implements Tool { @Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(super.getConf(),"sortflow"); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job,new Path("")); job.setMapperClass(smapper.class); job.setMapOutputKeyClass(FlowBean.class); job.setMapOutputValueClass(Text.class); //3 4 5 6 job.setReducerClass(sreducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job,new Path("")); boolean flag = job.waitForCompletion(true); return flag?0:1; } public static void main(String[] args) throws Exception { Configuration entries = new Configuration(); int run = ToolRunner.run(entries, new sjobmain(), args); System.exit(run); } }
在需求1的基础上添加分区类对主类进行修改即可
-
partitioner
package com.bigdata.mapreduce.example3; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; public class flowPartitioner extends Partitioner<Text,flowbean> { @Override public int getPartition(Text text, flowbean flowbean, int i) { if(text.toString().startsWith("135")){ return 0; }else if (text.toString().startsWith("136")){ return 1; }else if (text.toString().startsWith("137")){ return 2; }else{ return 3; } } }
-
jobmain
package com.bigdata.mapreduce.example3; import com.bigdata.mapreduce.example1.FlowCountMapper; import com.bigdata.mapreduce.example1.FlowCountReducer; import com.bigdata.mapreduce.example1.flowbean; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class FlowJobMain extends Configured implements Tool { @Override public int run(String[] strings) throws Exception { //创建job任务对象 Job job = Job.getInstance(super.getConf(),"flow"); //设置job配置 //设置job输入类型和路径 job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job,new Path("")); //设置mapper类和数据类型(K2,V2) job.setMapperClass(FlowCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(flowbean.class); //分区、排序、规约、分组 //设置分区类 job.setPartitionerClass(flowPartitioner.class); //设置分区数 job.setNumReduceTasks(4); //设置reducer类和数据类型(K3,V3) job.setReducerClass(FlowCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(flowbean.class); //设置输入类型和路径 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job,new Path("")); //等待任务结束 boolean flag = job.waitForCompletion(true); return flag?0:1; } public static void main(String[] args) throws Exception { Configuration entries = new Configuration(); int run = ToolRunner.run(entries, new FlowJobMain(), args); System.exit(run); } }
MapReduce知识点(1)
MapReduce知识点(2)
MapReduce知识点(3)
MapReduce知识点(4)