更新版的主要是解决上一篇文章里面比较不合理的地方
上一篇文章中在Reduce类里面重写了cleanup方法用于进行第二次排序,虽然可以实现要求,但是比较不科学
在本文章中将分两个两个MapReduce任务来执行。
补充概念:在Hadoop中,每个MapReduce任务都被初始化为一个job,每个job又可分为两个阶段:map阶段和reduce阶段。这两个阶段分别用两个函数来表示。Map函数接收一个<key,value>形式的输入,然后同样产生一个<key,value>形式的中间输出,Hadoop会负责将所有具有相同中间key值的value集合在一起传递给reduce函数,reduce函数接收一个如<key,(list of values)>形式的输入,然后对这个value集合进行处理,每个reduce产生0或1个输出,reduce的输出也是<key,value>形式。
TradeBean类:
package com.wqs.myWritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class TradeBean implements WritableComparable<TradeBean>{
private String name;
private int income;
private int pay;
private int profit;
public TradeBean() {
super();
// TODO 自动生成的构造函数存根
}
public TradeBean(String name, int income, int pay, int profit) {
super();
this.name = name;
this.income = income;
this.pay = pay;
this.profit = profit;
}
@Override
public void readFields(DataInput in) throws IOException {
name = in.readUTF();
income = in.readInt();
pay = in.readInt();
profit = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
out.writeInt(income);
out.writeInt(pay);
out.writeInt(profit);
}
@Override
public int compareTo(TradeBean tradeBean) {
if(this.profit > tradeBean.getProfit()) return -1;
else if(this.profit < tradeBean.getProfit()) return 1;
else if(this.income > tradeBean.getIncome()) return -1;
else if(this.income < tradeBean.getIncome()) return -1;
else return 0;
}
@Override
public String toString() {
return name + " " + income + " " + pay + " " + profit;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getIncome() {
return income;
}
public void setIncome(int income) {
this.income = income;
}
public int getPay() {
return pay;
}
public void setPay(int pay) {
this.pay = pay;
}
public int getProfit() {
return profit;
}
public void setProfit(int profit) {
this.profit = profit;
}
}
Map类:
package com.wqs.myWritableComparable;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<Object, Text, Text, TradeBean>{
private TradeBean bean = new TradeBean();
private Text name = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException{
String line = value.toString();
String[] temp = line.split(" ");
name.set(temp[0]);
bean.setName(temp[0]);
bean.setIncome(Integer.valueOf(temp[1]));
bean.setPay(Integer.valueOf(temp[2]));
bean.setProfit(0);
context.write(name, bean);
}
}
Reduce类:
package com.wqs.myWritableComparable;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce extends Reducer<Text, TradeBean, TradeBean, NullWritable>{
@Override
protected void reduce(Text k2, Iterable<TradeBean> vs2, Context context)
throws IOException, InterruptedException {
String name = null;
int income = 0;
int pay = 0;
int profit = 0;
for (TradeBean tradeBean : vs2) {
income += tradeBean.getIncome();
pay += tradeBean.getPay();
}
name = k2.toString();
profit = income - pay;
context.write(new TradeBean(name, income, pay, profit), NullWritable.get());
}
}
Map2类:
package com.wqs.myWritableComparable;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map2 extends Mapper<Object, Text, TradeBean, NullWritable> {
private TradeBean bean = new TradeBean();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException{
String line = value.toString();
String[] temp = line.split(" ");
bean.setName(temp[0]);
bean.setIncome(Integer.valueOf(temp[1]));
bean.setPay(Integer.valueOf(temp[2]));
bean.setProfit(Integer.valueOf(temp[3]));
context.write(bean, NullWritable.get());
}
}
Reduce2类:
package com.wqs.myWritableComparable;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce2 extends Reducer<TradeBean, NullWritable, TradeBean, NullWritable> {
@Override
protected void reduce(TradeBean k2, Iterable<NullWritable> vs2, Context context)
throws IOException, InterruptedException {
context.write(k2, NullWritable.get());
}
}
Main类:
package com.wqs.myWritableComparable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Main {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
System.setProperty("hadoop.home.dir", "E:/hadoop-2.7.7");
args = new String[] { "/demo03/in/", "/demo03/out", "/demo03/out2" };
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 3){
System.err.println("Usage:InvertedIndex");
System.exit(2);
}
Job job = Job.getInstance();
job.setJarByClass(Main.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TradeBean.class);
job.setOutputKeyClass(TradeBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.222.128:9000" + args[0]));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.222.128:9000" + args[1]));
Job job2 = Job.getInstance();
job2.setJarByClass(Main.class);
job2.setMapperClass(Map2.class);
job2.setReducerClass(Reduce2.class);
job2.setMapOutputKeyClass(TradeBean.class);
job2.setMapOutputValueClass(NullWritable.class);
job2.setOutputKeyClass(TradeBean.class);
job2.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job2, new Path("hdfs://192.168.222.128:9000" + args[1]));
FileOutputFormat.setOutputPath(job2, new Path("hdfs://192.168.222.128:9000" + args[2]));
//等待job执行完毕之后再执行job2
if (job.waitForCompletion(true)) {
System.exit(job2.waitForCompletion(true) ? 0 : 1);
}
}
}