MapReduce从HBase多路径导出数据到Hive

package com.zz.hbase.ccrc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import com.zz.hive.Constants;
import com.zz.util.DateUtil;


public class HBaseToHiveJob {

    public static class HBaseToHiveMapper extends TableMapper<Text, Text> {

        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder("");
            Map<String, String> m = new HashMap<String, String>();
            List<String> temp = new ArrayList<String>();
            List<String> a = new ArrayList<String>();
            String sp = "-";
            String tableName = "";
            String str = "";
            String keyStr = "";
            for (Entry<byte[], byte[]> entry : value.getFamilyMap("aaaa".getBytes()).entrySet()) {
                str = new String(entry.getValue());
                if (str != null) {
                    keyStr = new String(entry.getKey());
                    if ("event".equals(keyStr)) {
                        tableName = str;
                    } else {
                        m.put(keyStr, str);
                    }
                }
            }
            sb = sort(tableName, m, sb, temp, sp, a);
            context.write(new Text(tableName), new Text(new String(sb)));
        }

        /**
         * 按照event值类型重新组合数据存储到hdfs上
         * @param tableName
         * @param map
         * @param sb
         * @param t
         * @param sp
         * @param a
         * @return
         */
        private StringBuilder sort(String tableName, Map<String, String> map, StringBuilder sb, List<String> t, String sp, List<String> a) {
            if("Summary".equals(tableName)) {
                a = com.zz.option.Constants.SUMMARY_COLUMN;//其中一个表定义的好的List,下面也是一样的
                return handler(a, t, sb, sp, map);
            }
            return sb;
        }


        /**
         * 每条数据的各个值之间加上分隔符,值为null的用“-”占位
         * @param a
         * @param t
         * @param sb
         * @param sp
         * @param m
         * @return
         */
        private StringBuilder handler(List<String> a, List<String> t, StringBuilder sb, String sp, Map<String, String> m) {
            String mapValue;
            for (String val : a) {
                mapValue = m.get(val);
                sb.append(mapValue != null ? mapValue : sp);
                sb.append(Constants.BLANK_CHAR);
            }
            return sb.deleteCharAt(sb.length() - 1);
        }
    }

    public static class HBaseToHiveReducer extends Reducer<Text, Text, NullWritable, Text> {
        private MultipleOutputs<NullWritable, Text> out;

        @Override
        protected void setup(Context context) {
            out = new MultipleOutputs<NullWritable, Text>(context);
        }

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
            String tableName = key.toString();
            String path = tableName + "/" + tableName;
            for (Text val : values) {
                out.write(NullWritable.get(), new Text(val), path);
            }
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            out.close();
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = HBaseConfiguration.create();
        Job job = Job.getInstance(conf, "Fetch HBase2Hive" + DateUtil.getNowTime());
        job.setJarByClass(HBaseToHiveJob.class);
        Scan scan = new Scan();
        scan.setCaching(500);
        scan.setCacheBlocks(false);
        scan.addFamily(Bytes.toBytes("aaaa"));

        Path output = new Path(args[0]);
        final FileSystem fileSystem = output.getFileSystem(conf);
        fileSystem.delete(output, true);

        TableMapReduceUtil.initTableMapperJob("Fetch", scan, HBaseToHiveMapper.class, Text.class, Text.class, job, false);
        job.setReducerClass(HBaseToHiveReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, output);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值