1、需求
大表和小表进行join的时候,可以将小表发送到map端,然后将小表加载到内存,这样可以快速的进行join匹配
比如:求 每个用户 购物清单的中 每种商品 总金额
2、思路
可以将商品表文件,addChechFiule的形式发送到每一个map的工作目录下,然后加载到内存的hashMap中,这样来一条用户的订单就和hashMap进行对比即可。
按照按照用户分区 按照用户、商品分组
3、代码
-
1、ShopingBean.class
import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; /** * [@Author](https://my.oschina.net/arthor) liufu */ public class ShopingBean implements WritableComparable<ShopingBean> { private String userId; private String productName; private int payCount; [@Override](https://my.oschina.net/u/1162528) public void write(DataOutput out) throws IOException { out.writeUTF(userId); out.writeUTF(productName); out.writeInt(payCount); } [@Override](https://my.oschina.net/u/1162528) public void readFields(DataInput in) throws IOException { this.userId = in.readUTF(); this.productName = in.readUTF(); this.payCount = in.readInt(); } [@Override](https://my.oschina.net/u/1162528) public int compareTo(ShopingBean o) { //先用户排序 int useridCompare = this.userId.compareTo(o.getUserId()); if (useridCompare != 0){ return -useridCompare; } //再本用户,本商品排序 int produceCompare = this.productName.compareTo(o.getProductName()); if (produceCompare != 0){ return -produceCompare; } //再本用户、本商品、总价钱排序 return this.payCount > o.getPayCount() ? -1 : 1; } public String getUserId() { return userId; } public void setUserId(String userId) { this.userId = userId; } public String getProductName() { return productName; } public void setProductName(String productName) { this.productName = productName; } public int getPayCount() { return payCount; } public void setPayCount(int payCount) { this.payCount = payCount; } }
-
2、JoinInMapMapper.class
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.HashMap; /** * [@Author](https://my.oschina.net/arthor) liufu */ public class JoinInMapMapper extends Mapper<LongWritable, Text, ShopingBean, IntWritable>{ HashMap<String, Integer> productMap = null; ShopingBean k = null; IntWritable v = null; @Override protected void setup(Context context) throws IOException, InterruptedException { productMap = new HashMap<>(); k = new ShopingBean(); v = new IntWritable(); //拿到applicationRun那边创建的conf对象 Configuration conf = context.getConfiguration(); String addCacheFile = conf.get("addCacheFile"); String splitField = conf.get("splitField"); //获取addChachFile发送过来的小文件,并加载到内存的hashmap BufferedReader bf = new BufferedReader(new FileReader(addCacheFile)); String tmpLine = null; while ((tmpLine = bf.readLine()) != null){ String[] fields = tmpLine.split(splitField); productMap.put(fields[0], Integer.parseInt(fields[1])); } } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String productLine = value.toString(); String[] fields = productLine.split("\t"); k.setUserId(fields[0]); k.setProductName(fields[1]); Integer price = productMap.get(fields[1]); k.setPayCount(price * Integer.parseInt(fields[2])); v.set(k.getPayCount()); context.write(k, v); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { productMap.clear(); productMap = null; k = null; v = null; } }
-
3、JoinInMapReducer.class
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * @Author liufu */ public class JoinInMapReducer extends Reducer<ShopingBean, IntWritable, Text, IntWritable>{ @Override protected void reduce(ShopingBean key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int allCount = 0; for (IntWritable count : values){ allCount += count.get(); } context.write(new Text(key.getUserId() + " " + key.getProductName()), new IntWritable(allCount)); } }
-
4、JoinInMapPartitioner.class
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Partitioner; /** * @Author liufu */ public class JoinInMapPartitioner extends Partitioner<ShopingBean, IntWritable>{ //key ,value 对应map的输出类型 @Override public int getPartition(ShopingBean shopingBean, IntWritable intWritable, int numPartitions) { return shopingBean.getUserId().hashCode() & Integer.MAX_VALUE % numPartitions; } }
-
5、JoinInMapGroupComparetor.class
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; /** * @Author liufu */ public class JoinInMapGroupComparetor extends WritableComparator{ //这个一定要写,否则会报空指针异常,因为compare方法中无法进行类型转换 public JoinInMapGroupComparetor(){ super(ShopingBean.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { ShopingBean pre = (ShopingBean) a; ShopingBean after = (ShopingBean) b; int userIdCompare = pre.getUserId().compareTo(after.getUserId()); int productCompare = pre.getProductName().compareTo(after.getProductName()); //不能够是return userIdCompare ==0 && productCompare == 0 ? -1 : 1; //只有返回时0==>才表示相等,返回1表示大于,返回-1 表示小于 return userIdCompare ==0 && productCompare == 0 ? 0 : 1; } }
-
6、JoinInMapRun.class
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; /** * @Author liufu */ public class JoinInMapRun { public static void main(String[] args) throws IOException, URISyntaxException { Configuration conf = new Configuration(); conf.set("addCacheFile","product.txt"); conf.set("splitField","\t"); Job job = new Job(conf, "joinInMapper"); //通过classpath中主类找到jar job.setJarByClass(JoinInMapRun.class); job.addCacheFile(new URI("hdfs://192.168.0.186:9000/joininmapper/product.txt")); job.setPartitionerClass(JoinInMapPartitioner.class); job.setGroupingComparatorClass(JoinInMapGroupComparetor.class); //job的map端和reduce端代码 job.setMapperClass(JoinInMapMapper.class); job.setReducerClass(JoinInMapReducer.class); //设置map端和reduce输出的类型,这样才能够做反射得到对应的类 job.setMapOutputKeyClass(ShopingBean.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //job 如何读取数据,如何写出数据 job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); //job 的数据从哪里来; 绑定输入目录,可以使用setInputPaths, 也可以使用 addInputPaths FileInputFormat.setInputPaths(job, new Path("/joininmapper/input1/"),new Path("/joininmapper/input2/")); //写到哪里去 FileOutputFormat.setOutputPath(job, new Path("/joininmapper/output/")); try { boolean b = job.waitForCompletion(true); System.exit(b == true ? 0 : 1); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }