6、addCacheFile向map发送小文件实现map端join

1、需求

大表和小表进行join的时候,可以将小表发送到map端,然后将小表加载到内存,这样可以快速的进行join匹配

比如:求 每个用户 购物清单的中 每种商品 总金额

2、思路

可以将商品表文件,addChechFiule的形式发送到每一个map的工作目录下,然后加载到内存的hashMap中,这样来一条用户的订单就和hashMap进行对比即可。

按照按照用户分区 按照用户、商品分组

3、代码

  • 1、ShopingBean.class

      import org.apache.hadoop.io.WritableComparable;
    
      import java.io.DataInput;
      import java.io.DataOutput;
      import java.io.IOException;
    
      /**
       * [@Author](https://my.oschina.net/arthor) liufu
       */
      public class ShopingBean implements WritableComparable<ShopingBean> {
    
          private String userId;
          private String productName;
          private int payCount;
    
          [@Override](https://my.oschina.net/u/1162528)
          public void write(DataOutput out) throws IOException {
              out.writeUTF(userId);
              out.writeUTF(productName);
              out.writeInt(payCount);
          }
    
          [@Override](https://my.oschina.net/u/1162528)
          public void readFields(DataInput in) throws IOException {
              this.userId = in.readUTF();
              this.productName = in.readUTF();
              this.payCount = in.readInt();
          }
    
          [@Override](https://my.oschina.net/u/1162528)
          public int compareTo(ShopingBean o) {
              //先用户排序
              int useridCompare = this.userId.compareTo(o.getUserId());
              if (useridCompare != 0){
                  return -useridCompare;
              }
    
              //再本用户,本商品排序
              int produceCompare = this.productName.compareTo(o.getProductName());
              if (produceCompare != 0){
                  return -produceCompare;
              }
    
              //再本用户、本商品、总价钱排序
              return this.payCount > o.getPayCount() ? -1 : 1;
          }
    
    
          public String getUserId() {
              return userId;
          }
    
          public void setUserId(String userId) {
              this.userId = userId;
          }
    
          public String getProductName() {
              return productName;
          }
    
          public void setProductName(String productName) {
              this.productName = productName;
          }
    
          public int getPayCount() {
              return payCount;
          }
    
          public void setPayCount(int payCount) {
              this.payCount = payCount;
          }
      }
    
  • 2、JoinInMapMapper.class

      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.io.IntWritable;
      import org.apache.hadoop.io.LongWritable;
      import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapreduce.Mapper;
    
      import java.io.BufferedReader;
      import java.io.FileReader;
      import java.io.IOException;
      import java.util.HashMap;
    
      /**
       * [@Author](https://my.oschina.net/arthor) liufu
       */
      public class JoinInMapMapper extends Mapper<LongWritable, Text, ShopingBean, IntWritable>{
          HashMap<String, Integer> productMap = null;
          ShopingBean k = null;
          IntWritable v = null;
          @Override
          protected void setup(Context context) throws IOException, InterruptedException {
              productMap = new HashMap<>();
              k = new ShopingBean();
              v = new IntWritable();
    
              //拿到applicationRun那边创建的conf对象
              Configuration conf = context.getConfiguration();
              String addCacheFile = conf.get("addCacheFile");
              String splitField = conf.get("splitField");
    
              //获取addChachFile发送过来的小文件,并加载到内存的hashmap
              BufferedReader bf = new BufferedReader(new FileReader(addCacheFile));
              String tmpLine = null;
              while ((tmpLine = bf.readLine()) != null){
                  String[] fields = tmpLine.split(splitField);
                  productMap.put(fields[0], Integer.parseInt(fields[1]));
              }
          }
    
          @Override
          protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
              String productLine = value.toString();
              String[] fields = productLine.split("\t");
    
              k.setUserId(fields[0]);
              k.setProductName(fields[1]);
              Integer price = productMap.get(fields[1]);
              k.setPayCount(price * Integer.parseInt(fields[2]));
    
              v.set(k.getPayCount());
              context.write(k, v);
          }
    
          @Override
          protected void cleanup(Context context) throws IOException, InterruptedException {
              productMap.clear();
              productMap = null;
              k = null;
              v = null;
          }
      }
    
  • 3、JoinInMapReducer.class

      import org.apache.hadoop.io.IntWritable;
      import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapreduce.Reducer;
    
      import java.io.IOException;
    
      /**
       * @Author liufu
       */
      public class JoinInMapReducer extends Reducer<ShopingBean, IntWritable, Text, IntWritable>{
    
          @Override
          protected void reduce(ShopingBean key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
              int allCount = 0;
              for (IntWritable count : values){
                  allCount += count.get();
              }
    
              context.write(new Text(key.getUserId() + " " + key.getProductName()), new IntWritable(allCount));
          }
      }
    
  • 4、JoinInMapPartitioner.class

      import org.apache.hadoop.io.IntWritable;
      import org.apache.hadoop.mapreduce.Partitioner;
    
      /**
       * @Author liufu
       */
      public class JoinInMapPartitioner extends Partitioner<ShopingBean, IntWritable>{ //key ,value 对应map的输出类型
          @Override
          public int getPartition(ShopingBean shopingBean, IntWritable intWritable, int numPartitions) {
              return shopingBean.getUserId().hashCode() & Integer.MAX_VALUE % numPartitions;
          }
      }
    
  • 5、JoinInMapGroupComparetor.class

      import org.apache.hadoop.io.WritableComparable;
      import org.apache.hadoop.io.WritableComparator;
    
      /**
       * @Author liufu
       */
      public class JoinInMapGroupComparetor extends WritableComparator{
    
          //这个一定要写,否则会报空指针异常,因为compare方法中无法进行类型转换
          public JoinInMapGroupComparetor(){
              super(ShopingBean.class, true);
          }
    
          @Override
          public int compare(WritableComparable a, WritableComparable b) {
              ShopingBean pre = (ShopingBean) a;
              ShopingBean after = (ShopingBean) b;
    
              int userIdCompare = pre.getUserId().compareTo(after.getUserId());
              int productCompare = pre.getProductName().compareTo(after.getProductName());
    
              //不能够是return userIdCompare ==0 && productCompare == 0 ? -1 : 1;
              //只有返回时0==>才表示相等,返回1表示大于,返回-1 表示小于
              return userIdCompare ==0 && productCompare == 0 ? 0 : 1;
          }
      }
    
  • 6、JoinInMapRun.class

      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.io.IntWritable;
      import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapreduce.Job;
      import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
      import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
      import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
      import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    
      import java.io.IOException;
      import java.net.URI;
      import java.net.URISyntaxException;
    
      /**
       * @Author liufu
       */
      public class JoinInMapRun {
          public static void main(String[] args) throws IOException, URISyntaxException {
              Configuration conf = new Configuration();
              conf.set("addCacheFile","product.txt");
              conf.set("splitField","\t");
              Job job = new Job(conf, "joinInMapper");
    
              //通过classpath中主类找到jar
              job.setJarByClass(JoinInMapRun.class);
    
              job.addCacheFile(new URI("hdfs://192.168.0.186:9000/joininmapper/product.txt"));
              job.setPartitionerClass(JoinInMapPartitioner.class);
              job.setGroupingComparatorClass(JoinInMapGroupComparetor.class);
    
              //job的map端和reduce端代码
              job.setMapperClass(JoinInMapMapper.class);
              job.setReducerClass(JoinInMapReducer.class);
    
              //设置map端和reduce输出的类型,这样才能够做反射得到对应的类
              job.setMapOutputKeyClass(ShopingBean.class);
              job.setMapOutputValueClass(IntWritable.class);
              job.setOutputKeyClass(Text.class);
              job.setOutputValueClass(IntWritable.class);
    
              //job 如何读取数据,如何写出数据
              job.setInputFormatClass(TextInputFormat.class);
              job.setOutputFormatClass(TextOutputFormat.class);
    
              //job 的数据从哪里来;  绑定输入目录,可以使用setInputPaths, 也可以使用 addInputPaths
              FileInputFormat.setInputPaths(job, new Path("/joininmapper/input1/"),new Path("/joininmapper/input2/"));
    
              //写到哪里去
              FileOutputFormat.setOutputPath(job, new Path("/joininmapper/output/"));
    
              try {
                  boolean b = job.waitForCompletion(true);
                  System.exit(b == true ? 0 : 1);
              } catch (InterruptedException e) {
                  e.printStackTrace();
              } catch (ClassNotFoundException e) {
                  e.printStackTrace();
              }
          }
      }
    

转载于:https://my.oschina.net/liufukin/blog/799161

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值