为什么要优化?
数据格式
address.txt
1 America
2 China
3 Germen
number.txt
1 Spark 1
2 Hadoop 1
3 Flink 2
4 Kafka 3
5 Tachyon 2
输出格式:
发现程序运行的时候总是没法把结果写到HDFS上,有问题的代码如下
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* FileName: JoinImproved
* Author: hadoop
* Email: 3165845957@qq.com
* Date: 18-10-7 下午2:37
* Description:
*/
public class JoinImproved {
/**
* 使用Mapper将数据文件中的数据本身作为Mapper输出的key直接输出
*/
public static class JoinImprovedMapper extends Mapper<LongWritable, Text, MemberKey, MemberInformation> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String data = value.toString();
String[] dataSplited = data.split("\t");
if (dataSplited.length== 2){
MemberInformation memberInformation = new MemberInformation();
memberInformation.setAddressNo(dataSplited[0]);
memberInformation.setAddressName(dataSplited[1]);
memberInformation.setFlag(1);
MemberKey memberKey = new MemberKey();
memberKey.setKeyID(Integer.valueOf(dataSplited[0]));
memberKey.setFlag(false);
context.write(memberKey,memberInformation);
}else {
MemberInformation memberInformation = new MemberInformation();
memberInformation.setMemberNo(dataSplited[0]);
memberInformation.setMemberName(dataSplited[1]);
memberInformation.setAddressNo(dataSplited[2]);
MemberKey memberKey = new MemberKey();
memberKey.setKeyID(Integer.valueOf(dataSplited[2]));
memberKey.setFlag(true);
context.write(memberKey,memberInformation);
}
}
}
/**
* 使用Reducer将输入的key本身作为key直接输出
*/
public static class JoinImprovedReducer extends Reducer<MemberKey, MemberInformation, NullWritable,Text> {
@Override
protected void reduce(MemberKey key, Iterable<MemberInformation> values, Context context) throws IOException, InterruptedException {
MemberInformation memberInformation = new MemberInformation();
int count = 0;
for (MemberInformation item :values){
if (count == 0){
memberInformation = new MemberInformation(item);
System.out.println("111111111111111");
}else {
MemberInformation member = new MemberInformation(item);
member.setAddressName(memberInformation.getAddressName());
context.write(NullWritable.get(),new Text(member.toString()));
System.out.println("0000000000000000000000");
}
count++;
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();//设置MapReduce的配置
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length < 2){
System.out.println("Usage: JoinImproved <in> [<in>...] <out>");
System.exit(2);
}
//设置作业
//Job job = new Job(conf);
Job job = Job.getInstance(conf);
job.setJarByClass(JoinImproved.class);
job.setJobName("JJoinImproved");
//设置处理map,reduce的类
job.setMapperClass(JoinImprovedMapper.class);
job.setReducerClass(JoinImprovedReducer.class);
job.setMapOutputKeyClass(MemberKey.class);
job.setMapOutputValueClass(MemberInformation.class);
//设置输入输出格式的处理
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setGroupingComparatorClass(GroupComparator.class);
//设定输入输出路径
for (int i = 0; i < otherArgs.length-1;++i){
FileInputFormat.addInputPath(job,new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length-1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}
class GroupComparator extends WritableComparator{
public GroupComparator(){
super(MemberKey.class);
}
public GroupComparator(MemberKey memberKey){
super(memberKey.getClass());
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
MemberKey x = (MemberKey)a;
MemberKey y = (MemberKey)b;
if (x.getKeyID() == y.getKeyID()){
return 0;
}else {
return x.getKeyID() > y.getKeyID() ? 1:-1;
}
}
}
class MemberKey implements WritableComparable<MemberKey>{
private int keyID;
private boolean flag;
@Override
public int hashCode() {
return this.keyID;
}
public MemberKey() {
}
public MemberKey(int keyID, boolean flag) {
this.keyID = keyID;
this.flag = flag;
}
public int getKeyID() {
return keyID;
}
public void setKeyID(int keyID) {
this.keyID = keyID;
}
public boolean isFlag() {
return flag;
}
public void setFlag(boolean flag) {
this.flag = flag;
}
@Override
public int compareTo(MemberKey memberKey) {
if (this.keyID == memberKey.keyID){
if (this.flag == memberKey.flag){
return 0;
}else {
return this.flag ? -1 : 1;
}
}else {
return this.keyID > memberKey.keyID ? 1 : -1;
}
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(this.keyID);
dataOutput.writeBoolean(this.flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.keyID = dataInput.readInt();
this.flag = dataInput.readBoolean();
}
}
class MemberInformation implements WritableComparable{
private String memberNo = "";
private String memberName = "";
private String addressNo = "";
private String addressName = "";
private int flag = 0; //0代表department,1代表Worker
/**
*
*/
public MemberInformation(){
super();
}
public MemberInformation(String workerNo, String workerName, String departmentNo, String addressName, int flag) {
this.memberNo = workerNo;
this.memberName = workerName;
this.addressNo = departmentNo;
this.addressName = addressName;
this.flag = flag;
}
public MemberInformation(MemberInformation information){
this.memberNo = information.memberNo;
this.memberName = information.memberName;
this.addressNo = information.addressNo;
this.addressName = information.addressName;
this.flag = information.flag;
}
public String getMemberNo() {
return memberNo;
}
public void setMemberNo(String memberNo) {
this.memberNo = memberNo;
}
public String getMemberName() {
return memberName;
}
public void setMemberName(String memberName) {
this.memberName = memberName;
}
public String getAddressNo() {
return addressNo;
}
public void setAddressNo(String addressNo) {
this.addressNo = addressNo;
}
public String getAddressName() {
return addressName;
}
public void setAddressName(String addressName) {
this.addressName = addressName;
}
public int getFlag() {
return flag;
}
public void setFlag(int flag) {
this.flag = flag;
}
@Override
public int compareTo(Object o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(this.memberNo);
dataOutput.writeUTF(this.memberName);
dataOutput.writeUTF(this.addressNo);
dataOutput.writeUTF(this.addressName);
dataOutput.writeInt(this.flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.memberNo = dataInput.readUTF();
this.memberName = dataInput.readUTF();
this.addressNo = dataInput.readUTF();
this.addressName = dataInput.readUTF();
this.flag = dataInput.readInt();
}
@Override
public String toString() {
return this.memberNo + " "+ this.memberName + " "+ this.addressNo + " "+ this.addressName;
}
}
修改后的代码如下:
package DT;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* FileName: JoinImproved
* Author: hadoop
* Email: 3165845957@qq.com
* Date: 18-10-7 下午2:37
* Description:
*/
public class JoinImproved {
//使用Mapper将数据文件中的数据本身作为Mapper输出的key直接输出
public static class JoinImprovedMapper extends Mapper<LongWritable, Text, MemberKey, MemberInformation> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String data = value.toString();
String[] dataSplited = data.split("\t");
if (dataSplited.length== 2){
MemberInformation memberInformation = new MemberInformation();
memberInformation.setAddressNo(dataSplited[0]);
memberInformation.setAddressName(dataSplited[1]);
MemberKey memberKey = new MemberKey();
memberKey.setKeyID(Integer.parseInt(dataSplited[0]));
memberKey.setFlag(true);
context.write(memberKey,memberInformation);
}else {
MemberInformation memberInformation = new MemberInformation();
memberInformation.setMemberNo(dataSplited[0]);
memberInformation.setMemberName(dataSplited[1]);
memberInformation.setAddressNo(dataSplited[2]);
MemberKey memberKey = new MemberKey();
memberKey.setKeyID(Integer.parseInt(dataSplited[2]));
memberKey.setFlag(false);
context.write(memberKey,memberInformation);
}
}
}
//使用Reducer将输入的key本身作为key直接输出
public static class JoinImprovedReducer extends Reducer<MemberKey, MemberInformation, NullWritable,Text> {
@Override
protected void reduce(MemberKey key, Iterable<MemberInformation> values, Context context) throws IOException, InterruptedException {
MemberInformation memberInformation = null;
int count = 0;
for (MemberInformation item :values){
if (count == 0){
memberInformation = new MemberInformation(item);
count++;
}else {
item.setAddressName(memberInformation.getAddressName());
context.write(NullWritable.get(),new Text(item.toString()));
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();//设置MapReduce的配置
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length < 2){
System.out.println("Usage: JoinImproved <in> [<in>...] <out>");
System.exit(2);
}
//设置作业
//Job job = new Job(conf);
Job job = Job.getInstance(conf);
job.setJarByClass(JoinImproved.class);
job.setJobName("JoinImproved");
job.setGroupingComparatorClass(GroupComparator.class);
//设置处理map,reduce的类
job.setMapperClass(JoinImprovedMapper.class);
job.setMapOutputKeyClass(MemberKey.class);
job.setMapOutputValueClass(MemberInformation.class);
//设置输入输出格式的处理
job.setReducerClass(JoinImprovedReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
//设定输入输出路径
for (int i = 0; i < otherArgs.length-1;++i){
FileInputFormat.addInputPath(job,new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length-1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}
class GroupComparator extends WritableComparator{
protected GroupComparator() {
super(MemberKey.class, true);
}
//两个BeanKey进行比较排序
@Override
public int compare(WritableComparable a, WritableComparable b) {
MemberKey a1=(MemberKey)a;
MemberKey b1=(MemberKey)b;
if(a1.getKeyID()==b1.getKeyID()){
return 0;
}else{
return a1.getKeyID()>b1.getKeyID()?1:-1;
}
}
}
class MemberKey implements WritableComparable<MemberKey> {
private int keyID;
private boolean flag; // true:address false:person
public MemberKey(int addressNo, boolean flag) {
super();
this.keyID = addressNo;
this.flag = flag;
}
public MemberKey() {
super();
// TODO Auto-generated constructor stub
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(keyID);
dataOutput.writeBoolean(flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.keyID = dataInput.readInt();
this.flag = dataInput.readBoolean();
}
// partitioner执行时调用hashcode()方法和compareTo()方法
// compareTo()方法作为shuffle排序的默认方法
@Override
public int hashCode() {
return this.keyID; // 按AddreNo进行分组
}
//用于排序,将相同的AddressNo的地址表和人员表,将地址表放到首位
@Override
public int compareTo(MemberKey o) {
if (this.keyID == o.getKeyID()) { // 如果是同一个AddressNo的数据则判断是Person还是Address表
if (this.flag == o.isFlag()) { //如果属性相同属于同种类型的表,返回0
return 0;
} else {
return this.flag ? -1 : 1; // true表示Address表 返回更小的值,将排至values队首
}
} else {
return this.keyID - o.getKeyID() > 0 ? 1 : -1; //按AddressNo排序
}
}
public int getKeyID() {
return keyID;
}
public void setKeyID(int keyID) {
this.keyID = keyID;
}
public boolean isFlag() {
return flag;
}
public void setFlag(boolean flag) {
this.flag = flag;
}
}
class MemberInformation implements WritableComparable {
private String memberNo = "";
private String memberName = "";
private String addressNo = "";
private String addressName = "";
public MemberInformation(MemberInformation bean) {
this.memberName = bean.getMemberName();
this.memberNo = bean.getMemberNo();
this.addressName = bean.getAddressName();
this.addressNo = bean.getAddressNo();
}
public MemberInformation() {
super();
// TODO Auto-generated constructor stub
}
public MemberInformation(String numberNo, String numberName, String addressNo,
String addressName) {
super();
this.memberNo = numberNo;
this.memberName = numberName;
this.addressNo = addressNo;
this.addressName = addressName;
}
public String getMemberNo() {
return memberNo;
}
public void setMemberNo(String memberNo) {
this.memberNo = memberNo;
}
public String getMemberName() {
return memberName;
}
public void setMemberName(String memberName) {
this.memberName = memberName;
}
public String getAddressNo() {
return addressNo;
}
public void setAddressNo(String addressNo) {
this.addressNo = addressNo;
}
public String getAddressName() {
return addressName;
}
public void setAddressName(String addressName) {
this.addressName = addressName;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(memberNo);
out.writeUTF(memberName);
out.writeUTF(addressNo);
out.writeUTF(addressName);
}
@Override
public void readFields(DataInput in) throws IOException {
this.memberNo = in.readUTF();
this.memberName = in.readUTF();
this.addressNo = in.readUTF();
this.addressName = in.readUTF();
}
@Override
public int compareTo(Object o) {
return 0;
}
@Override
public String toString() {
return memberNo + " " + memberName + " " + addressNo + " " + addressName;
}
}
问题出现在 WritableComparator方法的继承上
有问题的继承:
修改后的代码继承: