Hbase 基础第二讲-javaApi

大数据学习爱好者

已于 2022-03-12 23:46:08 修改

阅读量800

点赞数

分类专栏： kafka hive hbase 文章标签： hbase java 数据库

于 2022-03-11 16:56:12 首次发布

本文链接：https://blog.csdn.net/u010160949/article/details/123416726

版权

hbase 同时被 3 个专栏收录

10 篇文章 0 订阅

订阅专栏

kafka

8 篇文章 0 订阅

订阅专栏

hive

5 篇文章 0 订阅

订阅专栏

java API

1.java api 操作hbase
2.过滤器查询
3.HBase整合Hive
- 3.1 准备HBase表和数据
4.HBaseToHDFS：HBase表数据写入HDFS

1.java api 操作hbase

public class HbaseDemoTest {
    // 声明静态配置
    static Configuration conf = null;
    private static final String ZK_CONNECT_STR =
            "bigdata02:2181,bigdata03:2181,bigdata04:2181,bigdata05:2181";

    static {
        conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", ZK_CONNECT_STR);
    }

    /* *
     创建表
    * @tableName 表名
    * @family 列簇列表
    */
    public static void creatTable(String tableName, String[] family) throws Exception {
        HBaseAdmin admin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(tableName);
        for (int i = 0; i < family.length; i++) {
            desc.addFamily(new HColumnDescriptor(family[i]));
        }
        if (admin.tableExists(tableName)) {
            System.out.println("table Exists!")
            System.exit(0);
        } else {
            admin.createTable(desc);
            System.out.println("create table Success!");
        }
    }

    /**
     * 为表添加数据（适合知道有多少列簇的固定表）
     *
     * @rowKey rowKey
     * @tableName 表名
     * @column1 第一个列簇列表
     * @value1 第一个列的值的列表
     * @column2 第二个列簇列表
     * @value2 第二个列的值的列表
     */
    public static void addData(
            String rowKey, String tableName, String[] column1, String[] value1, String[] column2, String[] value2) throws IOException {
        // 设置rowkey
        Put put = new Put(Bytes.toBytes(rowKey));
        // HTabel负责跟记录相关的操作如增删改查等//
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        // 获取所有的列簇
        HColumnDescriptor[] columnFamilies = table.getTableDescriptor().getColumnFamilies();
        for (int i = 0; i < columnFamilies.length; i++) {
            // 获取列簇名
            String familyName = columnFamilies[i].getNameAsString();
            // article列簇put数据
            if (familyName.equals("article")) {
                for (int j = 0; j < column1.length; j++) {
                    put.add(Bytes.toBytes(familyName),
                            Bytes.toBytes(column1[j]), Bytes.toBytes(value1[j]));
                }
            }
            // author列簇put数据
            if (familyName.equals("author")) {
                for (int j = 0; j < column2.length; j++) {
                    put.add(Bytes.toBytes(familyName),
                            Bytes.toBytes(column2[j]), Bytes.toBytes(value2[j]));
                }
            }
        }
        table.put(put);
        System.out.println("add data Success!");
    }


    /**
     * 遍历查询hbase表
     *
     * @tableName 表名
     */
    public static void getResultScann(String tableName) throws IOException {
        Scan scan = new Scan();
        ResultScanner rs = null;
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        try {
            rs = table.getScanner(scan);
            for (Result r : rs) {
                for (KeyValue kv : r.list()) {
                    printKeyValue(kv);
                }
            }
        } finally {
            rs.close();
        }
    }

    /**
     * 遍历查询hbase表
     *
     * @tableName 表名
     * 切记：包括下界，不包括上界
     */
    public static void getResultScann(String tableName, String start_rowkey,
                                      String stop_rowkey) throws IOException {
        Scan scan = new Scan();
        scan.setStartRow(Bytes.toBytes(start_rowkey));
        scan.setStopRow(Bytes.toBytes(stop_rowkey));
        ResultScanner rs = null;
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        try {
            rs = table.getScanner(scan);
            printResultScanner(rs);
        } finally {
            rs.close();
        }
    }

    /**
     * 查询表中的某一列
     *
     * @tableName 表名
     * @rowKey rowKey
     */
    public static void getResultByColumn(String tableName, String rowKey, String
            familyName, String columnName) throws IOException {
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        Get get = new Get(Bytes.toBytes(rowKey));
        // 获取指定列簇和列修饰符对应的列
        get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName));
        Result result = table.get(get);
        for (KeyValue kv : result.list()) {
            printKeyValue(kv);
        }
    }

    /* *更新表中的某一列
     * @tableName 表名
     * @rowKey rowKey
     * @familyName 列簇名
     * @columnName 列名
     * @value 更新后的值
     */
    public static void updateTable(
            String tableName, String rowKey, String familyName, String
            columnName, String value) throws IOException {
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        Put put = new Put(Bytes.toBytes(rowKey));
        put.add(Bytes.toBytes(familyName), Bytes.toBytes(columnName),
                Bytes.toBytes(value));
        table.put(put);
        System.out.println("update table Success!");
    }

    /**
     * 查询某列数据的多个版本
     *
     * @tableName 表名
     * @rowKey rowKey
     * @familyName 列簇名
     * @columnName 列名
     */
    public static void getResultByVersion(String tableName, String rowKey,
                                          String familyName, String columnName) throws IOException {
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        Get get = new Get(Bytes.toBytes(rowKey));
        get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName));
        get.setMaxVersions(5);
        Result result = table.get(get);
        for (KeyValue kv : result.list()) {
            printKeyValue(kv);
        }
    }

    /**
     * 删除指定的列
     *
     * @tableName 表名
     * @rowKey rowKey
     */
    public static void deleteAllColumn(String tableName, String rowKey) throws
            IOException {
        HTable table = new HTable(conf, Bytes.toBytes(tableName));
        Delete deleteAll = new Delete(Bytes.toBytes(rowKey));
        table.delete(deleteAll);
        System.out.println("all columns are deleted!");
    }/*
     * 删除表
     * @tableName 表名
     */

    public static void deleteTable(String tableName) throws IOException {
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.disableTable(tableName);
        admin.deleteTable(tableName);
        System.out.println(tableName + "is deleted!");
    }
}

2.过滤器查询

比较过滤器，专用过滤器
二.比较器

LESS <
LESS_OR_EQUAL <=
EQUAL =
NOT_EQUAL <>
GREATER_OR_EQUAL >=
GREATER >
NO_OP 不做任何操作

三.过滤器

BinaryComparator 按字节索引顺序比较指定字节数组，采用
Bytes.compareTo(byte[])
BinaryPrefixComparator 跟前面相同，只是比较左端的数据是否相同
NullComparator 判断给定的是否为空
BitComparator 按位比较
RegexStringComparator 提供一个正则的比较器，仅支持 EQUAL 和 NOT_EQUAL
SubstringComparator 判断提供的子串是否出现在 value 中，仅支持 EQUAL 和
NOT_EQUAL

四.比较过滤器
行键过滤器RowFilter

Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("user0000")));
scan.setFilter(filter1);

列簇过滤器FamilyFilter

Filter filter1 = new FamilyFilter(CompareOp.LESS,
new BinaryComparator(Bytes.toBytes("base_info")));
scan.setFilter(filter1)

列过滤器QualifierFilter

Filter filter = new QualifierFilter(CompareOp.LESS_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("name")));
scan.setFilter(filter1);

值过滤器 ValueFilter

Filter filter = new ValueFilter(CompareOp.EQUAL, new
SubstringComparator("zhangsan") );
scan.setFilter(filter1);

时间戳过滤器 TimestampsFilter

List<Long> tss = new ArrayList<Long>();
tss.add(1495398833002l);
Filter filter1 = new TimestampsFilter(tss);
scan.setFilter(filter1);

专用过滤器
单列值过滤器 SingleColumnValueFilter ----会返回满足条件的整行

SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("colfam1"),
Bytes.toBytes("col-5"),
CompareFilter.CompareOp.NOT_EQUAL,
new SubstringComparator("val-5"));
filter.setFilterIfMissing(true); //如果不设置为true，则那些不包含指定column的行也会返回
scan.setFilter(filter1);

单列值排除器SingleColumnValueExcludeFilter -----返回排除了该列的结果

SingleColumnValueFilter filter = new SingleColumnValueFilter(
        Bytes.toBytes("colfam1"),
        Bytes.toBytes("col-5"),
        CompareFilter.CompareOp.NOT_EQUAL,
        new SubstringComparator("val-5"));
       filter.setFilterIfMissing(true); //如果不设置为true，则那些不包含指定column的行也会返回 
       scan.setFilter(filter1);

前缀过滤器 PrefixFilter----针对行键

Filter filter = new PrefixFilter(Bytes.toBytes("row1"));
scan.setFilter(filter1);

列前缀过滤器 ColumnPrefixFilter

Filter filter = new ColumnPrefixFilter(Bytes.toBytes("qual2"));
scan.setFilter(filter1);

3.HBase整合Hive

Hive 与 HBase 利用两者本身对外的 API 来实现整合，主要是靠 HBaseStorageHandler 进行通信。Hive 与 HBase 利用两者本身对外的 API 来实现整合，主要是靠HBaseStorageHandler 进行通信。Hive 与 HBase 利用两者本身对外的 API 来实现整合，主要是靠 HBaseStorageHandler 进行通信。HiveHBaseTableInputFormat 完成对 HBase 表的切分，获取 RecordReader 对象来读取数据。对 HBase 按Regions，确定MapReduce 中就有多少个MapTask

3.1 准备HBase表和数据

准备HBase表和数据

create 'mingxing', {NAME => 'base_info',VERSIONS => 1},{NAME =>
'extra_info',VERSIONS => 1}

插入准备数据：

put 'mingxing','rk001','base_info:name','huangbo'
put 'mingxing','rk001','base_info:age','33'
put 'mingxing','rk001','extra_info:math','44'
put 'mingxing','rk001','extra_info:province','beijing'
put 'mingxing','rk002','base_info:name','xuzheng'
put 'mingxing','rk002','base_info:age','44'
put 'mingxing','rk003','base_info:name','wangbaoqiang'
put 'mingxing','rk003','base_info:age','55'
put 'mingxing','rk003','base_info:gender','male'
put 'mingxing','rk004','extra_info:math','33'
put 'mingxing','rk004','extra_info:province','tianjin'
put 'mingxing','rk004','extra_info:children','3'
put 'mingxing','rk005','base_info:name','liutao'
put 'mingxing','rk006','extra_info:name','liujialing'

Hive端操作

指定hbase所使用的zookeeper集群的地址：默认端口是2181，可以不写
set hbase.zookeeper.quorum=bigdata02:2181,bigdata03:2181,bigdata04:2181;
指定hbase在zookeeper中使用的根目录
set zookeeper.znode.parent=/hbase;
加入指定的处理jar
add jar /home/bigdata/apps/apache-hive-2.3.6-bin/lib/hive-hbase-handler-
2.3.6.jar;

所有列簇：

create external table mingxing(rowkey string, base_info map<string, string>,
extra_info map<string, string>)
row format delimited fields terminated by '\t'
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping" = ":key,base_info:,extra_info:")
tblproperties("hbase.table.name"="mingxing","hbase.mapred.output.outputtable"="mingxing");

列族列
hbase.columns.mapping" = ":key,base_info:,extra_info:
部分列簇部分列

create external table mingxing1(rowkey string, name string, province string)
row format delimited fields terminated by '\t'
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping" =
":key,base_info:name,extra_info:province")
tblproperties("hbase.table.name"="mingxing","hbase.mapred.output.outputtable"="m
ingxing");

部分参数解释：

org.apache.hadoop.hive.hbase.HBaseStorageHandler：处理hive到hbase转换关系的处理器
hbase.columns.mapping：定义hbase的列簇和列到hive的映射关系
hbase.table.name：hbase表名

4.HBaseToHDFS：HBase表数据写入HDFS

从HBase中读取数据，分析之后然后写入HDFS，代码实现


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.List;
/**
 * 描述： 编写 mapreduce 程序从 hbase 读取数据，然后存储到 hdfs
 */
public class HBaseDataToHDFSMR {
    public static final String ZK_CONNECT =
            "bigdata02:2181,bigdata03:2181,bigdata04:2181";
    public static final String ZK_CONNECT_KEY = "hbase.zookeeper.quorum";public static final String HDFS_CONNECT = "hdfs://hadoop277ha/";
    public static final String HDFS_CONNECT_KEY = "fs.defaultFS";
    public static void main(String[] args) throws Exception {
   // 把Hadoop集群的配置文件：core-site.xml 和 hdfs-site.xml 放入 resources 目录中。
        Configuration conf = HBaseConfiguration.create();
        conf.set(ZK_CONNECT_KEY, ZK_CONNECT);
        conf.set(HDFS_CONNECT_KEY, HDFS_CONNECT);
        System.setProperty("HADOOP_USER_NAME", "bigdata");
        Job job = Job.getInstance(conf);
        job.setJarByClass(HBaseDataToHDFSMR.class);
       // 输入数据来源于hbase的user_info表
        Scan scan = new Scan();
        TableMapReduceUtil.initTableMapperJob("user_info", scan,
                HBaseDataToHDFSMRMapper.class, Text.class,
                NullWritable.class, job);
        // RecordReader --- TableRecordReader
        // InputFormat ----- TextInputFormat
        // 数据输出到hdfs
        FileOutputFormat.setOutputPath(job, new Path("/hbase2hdfs/output2"));
        boolean waitForCompletion = job.waitForCompletion(true);
        System.exit(waitForCompletion ? 0 : 1);
    }
    /**
     * mapper的输入key-value类型是：ImmutableBytesWritable, Result
     * mapper的输出key-value类型就可以由用户自己制定
     */
    class HBaseDataToHDFSMRMapper extends TableMapper<Text, NullWritable> {
        /**
         * keyType: LongWritable -- ImmutableBytesWritable:rowkey
         * ValueType: Text -- Result:hbase表中某一个rowkey查询出来的所有的key-value
         对
         */
        @Override
        protected void map(ImmutableBytesWritable key, Result value,
                           Mapper.Context context) throws IOException,
                InterruptedException {
             // byte[] rowkey = Bytes.copy(key, 0, key.getLength());
            String rowkey = Bytes.toString(key.copyBytes());
            List<Cell> listCells = value.listCells();
            Text text = new Text();
          // 最后输出格式是: rowkey, base_info:name-huangbo, base-info:age-34
            for (Cell cell : listCells) {
                String family = new String(CellUtil.cloneFamily(cell));
                String qualifier = new String(CellUtil.cloneQualifier(cell));
                String v = new String(CellUtil.cloneValue(cell));
                long ts = cell.getTimestamp();
                text.set(rowkey + "\t" + family + "\t" + qualifier + "\t" + v +
                        "\t" + ts);
                context.write(text, NullWritable.get());
            }
        }
    }
}

HDFSToHBase：HDFS数据写入HBase
从HDFS从读入数据，处理之后写入HBase，代码实现：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
 程序运行之前，要先做两件事：
 * 1、把 students.txt 文件放入：/bigdata/student/input/目录中
 * 2、创建好一张 hbase 表：
 * create "student", "info"
 */
public class HDFSDataToHBaseMR extends Configured implements Tool {
    public static void main(String[] args) throws Exception {
        int run = ToolRunner.run(new HDFSDataToHBaseMR(), args);
        System.exit(run);
    } 
    @Override
    public int run(String[] arg0) throws Exception {
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.zookeeper.quorum",
                "bigdata02:2181,bigdata03:2181,bigdata04:2181");
        System.setProperty("HADOOP_USER_NAME", "bigdata");
        Job job = Job.getInstance(config, "HDFSDataToHBaseMR");
        job.setJarByClass(HDFSDataToHBaseMR.class);
        job.setMapperClass(HDFSDataToHBase_Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
// 设置数据的输出组件
        TableMapReduceUtil.initTableReducerJob("student",
                HDFSDataToHBase_Reducer.class, job);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Put.class);
        FileInputFormat.addInputPath(job, new Path("/bigdata/student/input"));
        boolean isDone = job.waitForCompletion(true);
        return isDone ? 0 : 1;
    } 
    static class HDFSDataToHBase_Mapper extends Mapper<LongWritable, Text, Text,
            NullWritable> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws
                IOException,
                InterruptedException {
            context.write(value, NullWritable.get());
        }
    } 
    static class HDFSDataToHBase_Reducer extends TableReducer<Text,
            NullWritable, NullWritable> {
        protected void reduce(Text key, Iterable<NullWritable> values,
                              Reducer.Context context) throws IOException,
                InterruptedException {
            String[] split = key.toString().split(",");
            Put put = new Put(split[0].getBytes());
            put.addColumn("info".getBytes(), "name".getBytes(),
                    split[1].getBytes());
            put.addColumn("info".getBytes(), "sex".getBytes(),
                    split[2].getBytes());
            put.addColumn("info".getBytes(), "age".getBytes(),
                    split[3].getBytes());
            put.addColumn("info".getBytes(), "department".getBytes(),
                    split[4].getBytes());
            context.write(NullWritable.get(), put);
        }
    }}

大数据学习爱好者

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
Hbase 基础第二讲-javaApi

java API1.java api 操作hbase1.java api 操作hbasepublic class HbaseDemoTest { // 声明静态配置 static Configuration conf = null; private static final String ZK_CONNECT_STR = "bigdata02:2181,bigdata03:2181,bigdata04:2181,bigdata05:2181";
复制链接

扫一扫