reduce端join与map端join算法实现
reduce端join算法实现
JavaBean类
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Bean implements WritableComparable<Bean> {
private String id;
private String date;
private String pid;
private String amount;
private String name;
private String categoryId;
private String price;
@Override
public String toString() {
return "Bean{" +
"id='" + id + '\'' +
", date='" + date + '\'' +
", pid='" + pid + '\'' +
", amount='" + amount + '\'' +
", name='" + name + '\'' +
", categoryId='" + categoryId + '\'' +
", price=" + price +
'}';
}
public Bean() {
}
public Bean( String name, String categoryId, String price) {
this.name = name;
this.categoryId = categoryId;
this.price = price;
}
public Bean(String id, String date, String pid, String amount) {
this.id = id;
this.date = date;
this.pid = pid;
this.amount = amount;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public String getAmount() {
return amount;
}
public void setAmount(String amount) {
this.amount = amount;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getCategoryId() {
return categoryId;
}
public void setCategoryId(String categoryId) {
this.categoryId = categoryId;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(id+"");
dataOutput.writeUTF(date+"");
dataOutput.writeUTF(pid+"");
dataOutput.writeUTF(amount+"");
dataOutput.writeUTF(name+"");
dataOutput.writeUTF(categoryId+"");
dataOutput.writeUTF(price+"");
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.id = dataInput.readUTF();
this.date = dataInput.readUTF();
this.pid = dataInput.readUTF();
this.amount = dataInput.readUTF();
this.name = dataInput.readUTF();
this.categoryId = dataInput.readUTF();
this.price = dataInput.readUTF();
}
@Override
public int compareTo(Bean o) {
return 1;
}
}
Map类
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
public class Map extends Mapper<LongWritable, Text, Text, Bean>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
FileSplit fs = (FileSplit) context.getInputSplit();
String name = fs.getPath().getName();
if (name.contains("orders")){
String[] split = value.toString().split(",");
context.write(new Text(split[2]),new Bean(split[0],split[1],split[2],split[3]));
}else if (name.contains("product")){
String[] split = value.toString().split(",");
context.write(new Text(split[0]),new Bean(split[1],split[2],split[3]));
}
}
}
Reduce类
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class Reduce extends Reducer<Text, Bean, Bean, NullWritable> {
@Override
protected void reduce(Text key, Iterable<Bean> values, Context context) throws IOException, InterruptedException {
Bean bean = new Bean();
for (Bean value : values) {
if (null !=value.getId() && !value.getId().equals("null")){
bean.setId(value.getId());
bean.setDate(value.getDate());
bean.setPid(value.getPid());
bean.setAmount(value.getAmount());
}else {
bean.setName(value.getName());
bean.setCategoryId(value.getCategoryId());
bean.setPrice(value.getPrice());
}
}
context.write(bean,NullWritable.get());
}
}
Driver类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Driver {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(Driver.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Bean.class);
job.setOutputKeyClass(Bean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job,new Path("D:\\桌面\\input"));
FileOutputFormat.setOutputPath(job,new Path("D:\\桌面\\output"));
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}
map端join算法实现
Map类
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
public class Map extends Mapper<LongWritable, Text, Text, Text>{
private HashMap<String, String> hashMap = new HashMap<>();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
URI[] files = DistributedCache.getCacheFiles(context.getConfiguration());
FileSystem fs = FileSystem.get(files[0], context.getConfiguration());
FSDataInputStream open = fs.open(new Path(files[0]));
BufferedReader reader = new BufferedReader(new InputStreamReader(open, StandardCharsets.UTF_8));
String line ="";
while ((line=reader.readLine())!=null){
String[] split = line.split(",");
hashMap.put(split[0], split[1] + "\t" + split[2] + "\t" + split[3]);
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split(",");
String s = hashMap.get(split[2]);
context.write(new Text(split[2]),new Text(s+"\t"+split[0]+"\t"+split[1]+"\t"+split[3]));
}
}
Driver类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.net.URI;
public class Driver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
DistributedCache.addCacheFile(new URI("hdfs://192.168.10.231:8020/product.txt"),conf);
Job job = Job.getInstance(conf);
job.setJarByClass(Driver.class);
job.setMapperClass(Map.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job,new Path("D:\\桌面\\input\\orders.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\桌面\\output"));
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}