【HBase】HBase 高级编程

本文介绍了如何结合MapReduce进行HBase到HDFS以及HDFS到HBase的数据迁移,详细讲解了HBase与MySQL之间的数据互导,并探讨了HBase与Hive的整合,包括原理、配置和验证过程。
摘要由CSDN通过智能技术生成

1. HBase 结合 mapreduce

1.1 HBase—>HDFS

hbase表user_info数据如下:
在这里插入图片描述
hbase中部分数据:

hbase(main):002:0> scan 'user_info'
ROW                                                   COLUMN+CELL  
  baiyc_20150716_0008                                 column=base_info:name, timestamp=1581066464791, value= baiyc8                                                                                              
  baiyc_20150716_0008                                 column=extra_info:Hobbies, timestamp=1581066467105, value=sport                                                                                            
 rk0001                                               column=base_info:name, timestamp=1581066463942, value=wangbaoqiang                                                                                         
 rk0002                                               column=base_info:name, timestamp=1581066463987, value=zhangsan                                                                                             
 user0001                                             column=base_info:name, timestamp=1581066463864, value=zhangsan1                                                                                            
 zhangsan_20150701_0001                               column=base_info:age, timestamp=1581066464253, value=21     

要求:从hbase中读取user_info表的数据,然后写出到hdfs

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.List;

/**
 * HBaseToHDFS Test
 * @date: 2020/2/24
 */
public class HBaseToHDFSMR {
   

    /**
     * Mapper:
     * hbase为我们封装好的接口
     * public abstract class TableMapper<KEYOUT, VALUEOUT> extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT>
     * public class ImmutableBytesWritable implements WritableComparable<ImmutableBytesWritable>
     */
    static class HBaseToHDFSMRMapper extends TableMapper<Text, NullWritable> {
   
        /**
         * @param key     rowkey
         * @param value   map方法每执行一次接收到一次,这个参数就是一个Result实例
         *                这个Result里面存的东西就是rowkey, family, qualifier, value, timestamp
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
   
            //String rowkey = Bytes.toString(key.copyBytes());

            //result
            List<Cell> cells = value.listCells();
            for (int i = 0; i < cells.size(); i++) {
   
                Cell cell = cells.get(i);
                String rowkey_result = Bytes.toString(value.getRow()) + "\t"
                        + Bytes.toString(CellUtil.cloneFamily(cell)) + "\t"
                        + Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t"
                        + Bytes.toString(CellUtil.cloneValue(cell)) + "\t"
                        + cell.getTimestamp();
                context.write(new Text(rowkey_result), NullWritable.get());
            }
        }
    }

    /**
     * Reducer:
     */
    static class HBaseToHDFSMRReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
   
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
   
            context.write(key, NullWritable.get());
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
   
        System.setProperty("HADOOP_USER_NAME
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值