数据
------------
[customers.txt]
1,tom,12
2,tom,13
3,tom,14
4,tom,15
[orders.txt]
1,no001,12.23,1
2,no001,12.23,1
3,no001,12.23,2
4,no001,12.23,2
5,no001,12.23,2
6,no001,12.23,3
7,no001,12.23,3
8,no001,12.23,3
9,no001,12.23,3
------------
[customers.txt]
1,tom,12
2,tom,13
3,tom,14
4,tom,15
[orders.txt]
1,no001,12.23,1
2,no001,12.23,1
3,no001,12.23,2
4,no001,12.23,2
5,no001,12.23,2
6,no001,12.23,3
7,no001,12.23,3
8,no001,12.23,3
9,no001,12.23,3
map端join
---------------
1.创建Mapper
package com.hdfs.mr.mapjoin;
---------------
1.创建Mapper
package com.hdfs.mr.mapjoin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
/**
* join操作,map端连接。
*/
public class MapJoinMapper extends Mapper<LongWritable,Text,Text,NullWritable> {
* join操作,map端连接。
*/
public class MapJoinMapper extends Mapper<LongWritable,Text,Text,NullWritable> {
private Map<String,String> allCustomers = new HashMap<String,String>();
//启动,初始化客户信息
protected void setup(Context context) throws IOException, InterruptedException {
try {
Configuration conf = context.getConfiguration();
FileSystem fs = FileSystem.get(conf);
FSDataInputStream fis = fs.open(new Path("file:///d:/mr/mapjoin/customers.txt"));
//得到缓冲区阅读器
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
String line = null ;
while((line = br.readLine()) != null){
//得到cid
String cid = line.substring(0,line.indexOf(","));
allCustomers.put(cid,line);
}
} catch (Exception e) {
e.printStackTrace();
}
}
protected void setup(Context context) throws IOException, InterruptedException {
try {
Configuration conf = context.getConfiguration();
FileSystem fs = FileSystem.get(conf);
FSDataInputStream fis = fs.open(new Path("file:///d:/mr/mapjoin/customers.txt"));
//得到缓冲区阅读器
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
String line = null ;
while((line = br.readLine()) != null){
//得到cid
String cid = line.substring(0,line.indexOf(","));
allCustomers.put(cid,line);
}
} catch (Exception e) {
e.printStackTrace();
}
}
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//订单信息
String line = value.toString();
//提取customer id
String cid = line.substring(line.lastIndexOf(",") + 1);
//订单信息
String orderInfo = line.substring(0,line.lastIndexOf(","));
//订单信息
String line = value.toString();
//提取customer id
String cid = line.substring(line.lastIndexOf(",") + 1);
//订单信息
String orderInfo = line.substring(0,line.lastIndexOf(","));
//连接customer + "," + order
String customerInfo = allCustomers.get(cid);
context.write(new Text(customerInfo + "," + orderInfo),NullWritable.get());
}
String customerInfo = allCustomers.get(cid);
context.write(new Text(customerInfo + "," + orderInfo),NullWritable.get());
}
}
2.创建App
package com.hdfs.mr.mapjoin;
package com.hdfs.mr.mapjoin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
*/
public class MapJoinApp {
public static void main(String[] args) throws Exception {
*
*/
public class MapJoinApp {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
Job job = Job.getInstance(conf);
conf.set("fs.defaultFS","file:///");
Job job = Job.getInstance(conf);
//设置job的各种属性
job.setJobName("MapJoinApp"); //作业名称
job.setJarByClass(MapJoinApp.class); //搜索类
job.setJobName("MapJoinApp"); //作业名称
job.setJarByClass(MapJoinApp.class); //搜索类
//添加输入路径
FileInputFormat.addInputPath(job,new Path(args[0]));
//设置输出路径
FileOutputFormat.setOutputPath(job,new Path(args[1]));
FileInputFormat.addInputPath(job,new Path(args[0]));
//设置输出路径
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//没有reduce
job.setNumReduceTasks(0);
job.setMapperClass(MapJoinMapper.class); //mapper类
job.setMapOutputKeyClass(Text.class); //
job.setMapOutputValueClass(NullWritable.class); //
job.setMapOutputValueClass(NullWritable.class); //
job.waitForCompletion(true);
}
}
}
}
join端连接
-----------------------
1.自定义key
package com.hdfs.mr.mapjoin.reducejoin;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.DataOutput;
import java.io.IOException;
/**
*/
public class ComboKey2 implements WritableComparable<ComboKey2> {
//0-customer 1-order
private int type ;
private int cid ;
private int oid ;
private String customerInfo = "" ;
private String orderInfo = "" ;
*/
public class ComboKey2 implements WritableComparable<ComboKey2> {
//0-customer 1-order
private int type ;
private int cid ;
private int oid ;
private String customerInfo = "" ;
private String orderInfo = "" ;
public int compareTo(ComboKey2 o) {
int type0 = o.type ;
int cid0= o.cid;
int oid0 = o.oid;
String customerInfo0 = o.customerInfo;
String orderInfo0 = o.orderInfo ;
//是否同一个customer的数据
if(cid == cid0){
//同一个客户的两个订单
if(type == type0){
return oid - oid0 ;
}
//一个Customer + 他的order
else{
if(type ==0)
return -1 ;
else
return 1 ;
}
}
//cid不同
else{
return cid - cid0 ;
}
}
public void write(DataOutput out) throws IOException {
out.writeInt(type);
out.writeInt(cid);
out.writeInt(oid);
out.writeUTF(customerInfo);
out.writeUTF(orderInfo);
}
out.writeInt(type);
out.writeInt(cid);
out.writeInt(oid);
out.writeUTF(customerInfo);
out.writeUTF(orderInfo);
}
public void readFields(DataInput in) throws IOException {
this.type = in.readInt();
this.cid = in.readInt();
this.oid = in.readInt();
this.customerInfo = in.readUTF();
this.orderInfo = in.readUTF();
}
}
this.type = in.readInt();
this.cid = in.readInt();
this.oid = in.readInt();
this.customerInfo = in.readUTF();
this.orderInfo = in.readUTF();
}
}
2.自定义分区类
public class CIDPartitioner extends Partitioner<ComboKey2,NullWritable>{
public class CIDPartitioner extends Partitioner<ComboKey2,NullWritable>{
public int getPartition(ComboKey2 key, NullWritable nullWritable, int numPartitions) {
return key.getCid() % numPartitions;
}
}
3.创建Mapper
package com.it18zhang.hdfs.mr.mapjoin.reducejoin;
return key.getCid() % numPartitions;
}
}
3.创建Mapper
package com.it18zhang.hdfs.mr.mapjoin.reducejoin;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
/**
* mapper
*/
public class ReduceJoinMapper extends Mapper<LongWritable,Text,ComboKey2,NullWritable> {
* mapper
*/
public class ReduceJoinMapper extends Mapper<LongWritable,Text,ComboKey2,NullWritable> {
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//
String line = value.toString() ;
//
String line = value.toString() ;
//判断是customer还是order
FileSplit split = (FileSplit)context.getInputSplit();
String path = split.getPath().toString();
//客户信息
ComboKey2 key2 = new ComboKey2();
if(path.contains("customers")){
String cid = line.substring(0,line.indexOf(","));
String custInfo = line ;
key2.setType(0);
key2.setCid(Integer.parseInt(cid));
key2.setCustomerInfo(custInfo);
}
//order info
else{
String cid = line.substring(line.lastIndexOf(",") + 1);
String oid = line.substring(0, line.indexOf(","));
String oinfo = line.substring(0, line.lastIndexOf(","));
key2.setType(1);
key2.setCid(Integer.parseInt(cid));
key2.setOid(Integer.parseInt(oid));
key2.setOrderInfo(oinfo);
}
context.write(key2,NullWritable.get());
}
}
FileSplit split = (FileSplit)context.getInputSplit();
String path = split.getPath().toString();
//客户信息
ComboKey2 key2 = new ComboKey2();
if(path.contains("customers")){
String cid = line.substring(0,line.indexOf(","));
String custInfo = line ;
key2.setType(0);
key2.setCid(Integer.parseInt(cid));
key2.setCustomerInfo(custInfo);
}
//order info
else{
String cid = line.substring(line.lastIndexOf(",") + 1);
String oid = line.substring(0, line.indexOf(","));
String oinfo = line.substring(0, line.lastIndexOf(","));
key2.setType(1);
key2.setCid(Integer.parseInt(cid));
key2.setOid(Integer.parseInt(oid));
key2.setOrderInfo(oinfo);
}
context.write(key2,NullWritable.get());
}
}
4.创建Reducer
package com.hdfs.mr.mapjoin.reducejoin;
package com.hdfs.mr.mapjoin.reducejoin;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
import java.util.Iterator;
/**
* ReduceJoinReducer,reducer端连接实现。
*/
public class ReduceJoinReducer extends Reducer<ComboKey2,NullWritable,Text,NullWritable> {
* ReduceJoinReducer,reducer端连接实现。
*/
public class ReduceJoinReducer extends Reducer<ComboKey2,NullWritable,Text,NullWritable> {
protected void reduce(ComboKey2 key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
Iterator<NullWritable> it = values.iterator();
it.next();
int type = key.getType();
int cid = key.getCid() ;
String cinfo = key.getCustomerInfo() ;
while(it.hasNext()){
it.next();
String oinfo = key.getOrderInfo();
context.write(new Text(cinfo + "," + oinfo),NullWritable.get());
}
}
}
Iterator<NullWritable> it = values.iterator();
it.next();
int type = key.getType();
int cid = key.getCid() ;
String cinfo = key.getCustomerInfo() ;
while(it.hasNext()){
it.next();
String oinfo = key.getOrderInfo();
context.write(new Text(cinfo + "," + oinfo),NullWritable.get());
}
}
}
5.创建排序对比器
package com.hdfs.mr.mapjoin.reducejoin;
package com.hdfs.mr.mapjoin.reducejoin;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* 组合Key排序对比器
*/
public class ComboKey2Comparator extends WritableComparator {
protected ComboKey2Comparator() {
super(ComboKey2.class, true);
}
* 组合Key排序对比器
*/
public class ComboKey2Comparator extends WritableComparator {
protected ComboKey2Comparator() {
super(ComboKey2.class, true);
}
public int compare(WritableComparable a, WritableComparable b) {
ComboKey2 k1 = (ComboKey2) a;
ComboKey2 k2 = (ComboKey2) b;
return k1.compareTo(k2);
}
}
ComboKey2 k1 = (ComboKey2) a;
ComboKey2 k2 = (ComboKey2) b;
return k1.compareTo(k2);
}
}
6.分组对比器
package com.hdfs.mr.mapjoin.reducejoin;
package com.hdfs.mr.mapjoin.reducejoin;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* CID分组对比器
*/
public class CIDGroupComparator extends WritableComparator{
* CID分组对比器
*/
public class CIDGroupComparator extends WritableComparator{
protected CIDGroupComparator() {
super(ComboKey2.class, true);
}
super(ComboKey2.class, true);
}
public int compare(WritableComparable a, WritableComparable b) {
ComboKey2 k1 = (ComboKey2) a;
ComboKey2 k2 = (ComboKey2) b;
return k1.getCid() - k2.getCid();
}
}
ComboKey2 k1 = (ComboKey2) a;
ComboKey2 k2 = (ComboKey2) b;
return k1.getCid() - k2.getCid();
}
}
7.App
package com.hdfs.mr.mapjoin.reducejoin;
package com.hdfs.mr.mapjoin.reducejoin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
*/
public class ReduceJoinApp {
public static void main(String[] args) throws Exception {
*
*/
public class ReduceJoinApp {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
conf.set("fs.defaultFS","file:///");
Job job = Job.getInstance(conf);
//设置job的各种属性
job.setJobName("ReduceJoinApp"); //作业名称
job.setJarByClass(ReduceJoinApp.class); //搜索类
job.setJobName("ReduceJoinApp"); //作业名称
job.setJarByClass(ReduceJoinApp.class); //搜索类
//添加输入路径
FileInputFormat.addInputPath(job,new Path("D:\\mr\\reducejoin"));
//设置输出路径
FileOutputFormat.setOutputPath(job,new Path("D:\\mr\\reducejoin\\out"));
FileInputFormat.addInputPath(job,new Path("D:\\mr\\reducejoin"));
//设置输出路径
FileOutputFormat.setOutputPath(job,new Path("D:\\mr\\reducejoin\\out"));
job.setMapperClass(ReduceJoinMapper.class); //mapper类
job.setReducerClass(ReduceJoinReducer.class); //reducer类
job.setReducerClass(ReduceJoinReducer.class); //reducer类
//设置Map输出类型
job.setMapOutputKeyClass(ComboKey2.class); //
job.setMapOutputValueClass(NullWritable.class); //
job.setMapOutputKeyClass(ComboKey2.class); //
job.setMapOutputValueClass(NullWritable.class); //
//设置ReduceOutput类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class); //
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class); //
//设置分区类
job.setPartitionerClass(CIDPartitioner.class);
//设置分组对比器
job.setGroupingComparatorClass(CIDGroupComparator.class);
//设置排序对比器
job.setSortComparatorClass(ComboKey2Comparator.class);
job.setNumReduceTasks(2); //reduce个数
job.waitForCompletion(true);
}
}
job.setPartitionerClass(CIDPartitioner.class);
//设置分组对比器
job.setGroupingComparatorClass(CIDGroupComparator.class);
//设置排序对比器
job.setSortComparatorClass(ComboKey2Comparator.class);
job.setNumReduceTasks(2); //reduce个数
job.waitForCompletion(true);
}
}