数据:
//姓名 收入 支出 时间
zhangsan 6000 0 2016-05-01
lisi 2000 0 2016-05-01
lisi 0 100 2016-05-01
zhangsan 3000 0 2016-05-01
wangwu 9000 0 2016-05-01
wangwu 0 200 2016-05-01
zhangsan 200 400 2016-05-01
需求:
计算每个用户的收入、支出及利润情况,并优先显示利润最大的用户(按利润降序、如果利润相同则按收入降序)
分析:
实现WritableComparable接口,Writable接口是一个实现了序列化协议的序列化对象。在Hadoop中定义一个结构化对象都要实现Writable接口,使得该结构化对象可以序列化为字节流,字节流也可以反序列化为结构化对象。那WritableComparable接口是可序列化并且可比较的接口。 MapReduce中所有的key值类型都必须实现这个接口,既然是可序列化的那就必须得实现readFields()和write()这两个序列化和反序列化函数;既然是可比较的就必须实现compareTo()函数,该函数即是比较和排序规则的实现。这样MR中的key值就既能可序列化又是可比较的。
代码:
TradeBean类:
package com.wqs.myWritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class TradeBean implements WritableComparable<TradeBean>{
private String name;
private int income;
private int pay;
private int profit;
public TradeBean() {
super();
// TODO 自动生成的构造函数存根
}
public TradeBean(String name, int income, int pay, int profit) {
super();
this.name = name;
this.income = income;
this.pay = pay;
this.profit = profit;
}
@Override
public void readFields(DataInput in) throws IOException {
name = in.readUTF();
income = in.readInt();
pay = in.readInt();
profit = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
out.writeInt(income);
out.writeInt(pay);
out.writeInt(profit);
}
@Override
public int compareTo(TradeBean tradeBean) {
if(this.profit > tradeBean.getProfit()) return -1;
else if(this.profit < tradeBean.getProfit()) return 1;
else if(this.income > tradeBean.getIncome()) return -1;
else if(this.income < tradeBean.getIncome()) return -1;
else return 0;
}
@Override
public String toString() {
return name + " " + income + " " + pay + " " + profit;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getIncome() {
return income;
}
public void setIncome(int income) {
this.income = income;
}
public int getPay() {
return pay;
}
public void setPay(int pay) {
this.pay = pay;
}
public int getProfit() {
return profit;
}
public void setProfit(int profit) {
this.profit = profit;
}
}
Map类:
package com.wqs.myWritableComparable;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<Object, Text, Text, TradeBean>{
private TradeBean bean = new TradeBean();
private Text name = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException{
String line = value.toString();
String[] temp = line.split(" ");
name.set(temp[0]);
bean.setName(temp[0]);
bean.setIncome(Integer.valueOf(temp[1]));
bean.setPay(Integer.valueOf(temp[2]));
bean.setProfit(0);
context.write(name, bean);
}
}
Reduce类:
package com.wqs.myWritableComparable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce extends Reducer<Text, TradeBean, TradeBean, NullWritable>{
ArrayList<TradeBean> tradeBeans = new ArrayList<>();
@Override
protected void reduce(Text k2, Iterable<TradeBean> vs2, Context context)
throws IOException, InterruptedException {
String name = null;
int income = 0;
int pay = 0;
int profit = 0;
for (TradeBean tradeBean : vs2) {
income += tradeBean.getIncome();
pay += tradeBean.getPay();
}
name = k2.toString();
profit = income - pay;
tradeBeans.add(new TradeBean(name, income, pay, profit));
}
/**
* 在所有reduce执行结束之后对tradeBeans进行排序
* cleanup方法的作用:在所有reduce执行结束之后调用
* 目的:使结果按照利润进行排序。前面map阶段为了reduce阶段容易统计每个人的数据,将K1设置为了name
* 那么此时我们发现结果排序是按照name进行排序的,而不是需求所要求的按照利润进行排序,故把最终的结果集
* sort一下就可以了
*/
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Collections.sort(tradeBeans);
for (TradeBean tradeBean : tradeBeans) {
context.write(tradeBean, NullWritable.get());
}
}
}
Main:
package com.wqs.myWritableComparable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Main {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
System.setProperty("hadoop.home.dir", "E:/hadoop-2.7.7");
args = new String[] { "/demo03/in/", "/demo03/out" };
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("Usage:InvertedIndex");
System.exit(2);
}
Job job = Job.getInstance();
job.setJarByClass(Main.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TradeBean.class);
job.setOutputKeyClass(TradeBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.222.128:9000" + args[0]));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.222.128:9000" + args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}