MapReduce的一些高级特性
如果一个类实现了的Hadoop的序列化机制(接口:Writable),这个类的对象就可以作为输入和输出的值
public class Employee implements Writable
举例1:使用Employee类来封装员工信息,并且作为Map和Reduce的输入和输出
一定注意:序列化的顺序和反序列化的顺序要一致
@Override
public void readFields(DataInput input) throws IOException {
// 代表反序列化:输入
this.empno = input.readInt();
this.ename = input.readUTF();
this.job = input.readUTF();
this.mgr = input.readInt();
this.hiredate = input.readUTF();
this.sal = input.readInt();
this.comm = input.readInt();
this.deptno = input.readInt();
}
@Override
public void write(DataOutput output) throws IOException {
// 代表序列化过程,输出
output.writeInt(this.empno);
output.writeUTF(this.ename);
output.writeUTF(this.job);
output.writeInt(this.mgr);
output.writeUTF(this.hiredate);
output.writeInt(this.sal);
output.writeInt(this.comm);
output.writeInt(this.deptno);
}
排序:注意:按照key2进行排序
默认排序:数字 升序
如果要改变默认的排序规则,需要创建一个自己的比较器
package demo.sort.hadoop.number;
import org.apache.hadoop.io.IntWritable;
//对数字进行排序,定义自己规则
public class MyNumberComparator extends IntWritable.Comparator{
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return -super.compare(b1, s1, l1, b2, s2, l2);
}
}
在mian函数中:
//指定自己的比较规则
job.setSortComparatorClass(MyNumberComparator.class);
ps:如果mapper中的输出value2无输出则:NullWritable
// k1 v1 k2 v2???是什么 ----> 空值
public class NumberMapper extends Mapper<LongWritable, Text, IntWritable, NullWritable> {
字符串 字典顺序
字符串与数字类似,只需稍加改动即可
//对字符进行排序,定义自己规则
public class MyNumberComparator extends Text.Comparator{
对象的排序:按照员工的薪水排序
首先更改Employee类里的序列化
public class Employee implements WritableComparable<Employee> {
更改排序规则(单个排序规则 eg.以薪水sal排序)
@Override
public int compareTo(Employee o) {
// TODO Auto-generated method stub
if(this.sal >= o.getSal()) {
return 1;
}else {
return -1;
}
}
如果是多个排序规则(eg.先以部门号deptno排序,再以薪水sal排序)
注意部门号判定中无‘=’
@Override
public int compareTo(Employee o) {
// TODO Auto-generated method stub
if(this.deptno > o.deptno) {
return 1;
}else if(this.deptno < o.deptno){
return -1;
}
if(this.sal >= o.getSal()) {
return 1;
}else {
return -1;
}
}
附录:对象排序代码:
Employee.java
package sort.text;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
public class Employee implements WritableComparable<Employee> {
//����Ա�������� 7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30
private int empno;//Ա����
private String ename;//����
private String job;//ְλ
private int mgr;//�����Ա����
private String hiredate;//��ְ����
private int sal;//��н
private int comm;//����
private int deptno;//���ź�
@Override
public String toString() {
return "Employee [empno=" + empno + ", ename=" + ename + ", sal=" + sal + ", deptno=" + deptno + "]";
}
// @Override
// public int compareTo(Employee o) {
// // TODO Auto-generated method stub
// if(this.sal >= o.getSal()) {
// return 1;
// }else {
// return -1;
// }
// }
@Override
public int compareTo(Employee o) {
// TODO Auto-generated method stub
if(this.deptno > o.deptno) {
return 1;
}else if(this.deptno < o.deptno){
return -1;
}
if(this.sal >= o.getSal()) {
return 1;
}else {
return -1;
}
}
@Override
public void readFields(DataInput input) throws IOException {
// �������������
this.empno = input.readInt();
this.ename = input.readUTF();
this.job = input.readUTF();
this.mgr = input.readInt();
this.hiredate = input.readUTF();
this.sal = input.readInt();
this.comm = input.readInt();
this.deptno = input.readInt();
}
@Override
public void write(DataOutput output) throws IOException {
// �������л����̣����
output.writeInt(this.empno);
output.writeUTF(this.ename);
output.writeUTF(this.job);
output.writeInt(this.mgr);
output.writeUTF(this.hiredate);
output.writeInt(this.sal);
output.writeInt(this.comm);
output.writeInt(this.deptno);
}
public int getEmpno() {
return empno;
}
public void setEmpno(int empno) {
this.empno = empno;
}
public String getEname() {
return ename;
}
public void setEname(String ename) {
this.ename = ename;
}
public String getJob() {
return job;
}
public void setJob(String job) {
this.job = job;
}
public int getMgr() {
return mgr;
}
public void setMgr(int mgr) {
this.mgr = mgr;
}
public String getHiredate() {
return hiredate;
}
public void setHiredate(String hiredate) {
this.hiredate = hiredate;
}
public int getSal() {
return sal;
}
public void setSal(int sal) {
this.sal = sal;
}
public int getComm() {
return comm;
}
public void setComm(int comm) {
this.comm = comm;
}
public int getDeptno() {
return deptno;
}
public void setDeptno(int deptno) {
this.deptno = deptno;
}
}
SortEmployeeMain.java
package sort.text;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SortEmployeeMain {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Job job = Job.getInstance(new Configuration());
job.setJarByClass(SortEmployeeMain.class);
job.setMapperClass(SortEmployeeMapper.class);
job.setOutputKeyClass(Employee.class);
job.setMapOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.231.128:9000/data/input/emp.csv"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.231.128:9000/data/output/javaio"));
job.waitForCompletion(true);
}
}
SortEmployeeMapper.java
package sort.text;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SortEmployeeMapper extends Mapper<LongWritable, Text, Employee, NullWritable> {
@Override
protected void map(LongWritable key1, Text value1, Mapper<LongWritable, Text, Employee, NullWritable>.Context context)
throws IOException, InterruptedException {
String data = value1.toString();
String[] words = data.split(",");
Employee e = new Employee();
e.setEmpno(Integer.parseInt(words[0]));
e.setEname(words[1]);
e.setJob(words[2]);
try{
e.setMgr(Integer.parseInt(words[3]));
}catch(Exception ex){
e.setMgr(-1);
}
e.setHiredate(words[4]);
e.setSal(Integer.parseInt(words[5]));
try{
e.setComm(Integer.parseInt(words[6]));
}catch(Exception ex){
e.setComm(0);
}
e.setDeptno(Integer.parseInt(words[7]));
context.write(e,NullWritable.get());
}
}