hbase语法和java_api操作hbase

最新推荐文章于 2024-06-17 21:18:30 发布

SXF2410

最新推荐文章于 2024-06-17 21:18:30 发布

阅读量633

点赞数

文章标签： hbase

本文链接：https://blog.csdn.net/Shixuefeng2410/article/details/124446417

版权

#主键
rowkey设计-+

sqoop ->hbase

hive ->hbase

java api ->hbase

console ->hbase
   #控制台建立访问接口
   hbase shell

#存储结构
   库：命名空间         namespace
       表：           table
           +列：列簇   column family

       htable 和region的关系类似于hdfs File和block

       HMaster职责
       1. 管理用户对Table的增、删、改、查操作
       2. 管理HRegionServer的负载均衡，调整Region分布
       3. 在Region Split后，负责新Region的分配
       4. 在HRegionServer停机后，负责失效HRegionServer 上的Regions迁移（容错）

       HRegionServer:Write
           HRegion*N
               HRegion1
                   menStore=>cache
                   StoreFiles =阈值=>StoreFile=>阈值=>split
               HRegion2
                   ...
       HLOg（WAL=>Write Ahead Log）
                               HMaster-迁移->HRegionServer2->....
                                   ↓
                           |-in->Hlog(备份)----------------out-----------------|
                                   |--------------------------------------------
               client-write data-> |HRegionServer-x->HRegion->MemStore->...溢写|
                                   |--------------------------------------------
#操作hbase
   #只有新增操作，通过操作类型和时间戳建立版本管理，按照时间的先后顺序0,1,2
   #可以通过切货版本追踪历史数据
   #可以设置最大的保留实例记录数

#容错机制

#hbase写的快的原因:数据写入MemStore（内存），就返回客户端；后面的溢写，合并，分裂都是延迟执行的。
#hbase读的快的原因：将基本数据缓存在blockcache里，通过bloomfilter可以快速过滤不需要的块（可以快速判断数据在不在blockcache里），如果数据在blockcache里，就可以快速提取

#hbase客户端
#DDL
#查看所有的表
list
#查看命名空间
list_namespace
#创建命名空间
create_namespace 'kb16nb'
#指定命名空间，查看命名空间下的表
list_namespace_tables 'kb16nb'
#建表
create 'kb16nb:student','base','bigdata','cloud'
#查看表的定义
describe 'kb16nb:student'

#完整建表方式
create 'kb16nb:student',
{NAME => 'base', BLOOMFILTER => 'ROW', IN_MEMORY => 'true', VERSIONS => '1', KEEP_DELETED_CELLS => 'FAL
SE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKC
ACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'},
{NAME => 'bigdata', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP_DELETED_CELLS => 'FAL
SE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKC
ACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'},
{NAME => 'cloud', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP_DELETED_CELLS => 'FAL
SE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKC
ACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'};

#BLOOMFILTER:NONE,ROW(default),or ROWCOL
#bloomfilter可以快速判断数据在不在blockcache里，快速过滤不需要的块！（hfile（storefile））
#经常查整行，ROWCOL不推荐使用，ROWCOL需要更多的内存空间，ROW兼容ROWCOL，反之不行
#经常删除不能用，设置为NONE，因为需要重构bloomfilter

#IN_MEMORY
   #blockcache(LruBlockCache默认开启)的设计，包括三种优先级：
       #Single Access：当数据第一次从HDFS上读取时为这种优先级，需要回收空间时优先清除
       #Multi Access：Single数据块再次访问时升级为Multi Access优先级，次要被考虑的范围
       #In-memory Access：指定为“IN_MEMORY”具有这种优先级，与访问次数无关，最后清除。
           #JavaAPI:HColumnDescriptor.setInMemory(true);
           #建表时：例如： create 't', {NAME=>'f',IN_MEMORY=>'true'}
is_enabled 'kb16nb:student'   #是否启用
is_disabled 'kb16nb:student' #是否禁用
enable 'kb16nb:student'           #启用表
disable 'kb16nb:student'       #禁用表

drop 'kb16nb:student' #删除表（表必须已禁用）

#DML
truncate 'kb16nb:student' #删除数据

delete 'kb16nb:student','rowkey' #删除行的最新版本
deleteall 'kb16nb:student','rowkey' #删除行的所有版本
delete 'kb16nb:student','rowkey','columnfamily:colname' #删除某行某列的最新版本
deleteall 'kb16nb:student','rowkey','columnfamily:colname' #删除某行某列的所有版本

put 'kb16nb:student','1','base:name','zhangsan'
scan 'kb16nb:student' #查询全表数据
count 'kb16nb:student' #查询数据量
最直接的方式是在hbase shell中执行count的命令可以统计行数。
hbase> count ‘t1′
hbase> count ‘t1′, INTERVAL => 100000
hbase> count ‘t1′, CACHE => 1000
hbase> count ‘t1′, INTERVAL => 10, CACHE => 1000
#其中，INTERVAL为统计的行数间隔，默认为1000，CACHE为统计的数据缓存。这种方式效率很低，如果表行数很大的话不建议采用这种方式

get 'kb16nb:student','2','base:name'
scan 'kb16nb:student',{COLUMN=>'base'}
scan 'kb16nb:student',{COLUMN=>'base:name'}
scan 'kb16nb:student',{COLUMN=>'base:name',LIMIT=>2,STARTROW=>'2'}
scan 'kb16nb:student',{
COLUMN=>'base:name',
LIMIT=>3,
STARTROW=>'2', #(包含)
STOPROW=>'4'} #(不包含)

put 'kb16nb:student','1','base:name','zhangsan'
put 'kb16nb:student','2','base:name','lisa'
put 'kb16nb:student','3','base:name','jack'
put 'kb16nb:student','4','base:name','rose'
put 'kb16nb:student','5','base:name','jane'
put 'kb16nb:student','1','base:age',18
put 'kb16nb:student','2','base:age',19
put 'kb16nb:student','3','base:age',20
put 'kb16nb:student','4','base:age',21
put 'kb16nb:student','5','base:age',22
put 'kb16nb:student','1','base:product','bigdata'
put 'kb16nb:student','2','base:product','cloud'
put 'kb16nb:student','3','base:product','bigdata'
put 'kb16nb:student','4','base:product','cloud'
put 'kb16nb:student','5','base:product','bigdata'

put 'kb16nb:student','1','bigdata:hive',88
put 'kb16nb:student','3','bigdata:hive',76
put 'kb16nb:student','5','bigdata:hive',65
put 'kb16nb:student','1','bigdata:hbase',73
put 'kb16nb:student','3','bigdata:hbase',71
put 'kb16nb:student','5','bigdata:hbase',78
put 'kb16nb:student','2','cloud:net',77
put 'kb16nb:student','4','cloud:net',66
put 'kb16nb:student','2','cloud:shell',74
put 'kb16nb:student','4','cloud:shell',68

#在shell 脚本中执行
echo "list_namespace_tables 'kb16nb'"|hbase shell -n
echo "list" |hbase shell -n
echo "describe 'kb16nb:student'"|hbase shell -n

#在hive上建表，关联hbase！
create external table kb16.hive_map_hbase_01(
stuid int,
stuname string,
stuage int,
product string
)stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping"=":key,base:name,base:age,base:product")
tblproperties("hbase.table.name"="kb16nb:student");

#布隆过滤器的误差率的内部处理问题

#java api 操作 hbase （主要批量存放数据）
#创建配置对象，创建连接对象
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum","single01:2181 ");
Connection hbaseCon=ConnectionFactory.createConnection(config);
//操作对象
Admin admin=hbaseCon.getAdmin();
admin.xxx(tableName)
//创建表名对象
final String HTable="kb16nb:student";
TableName tableName = TableName.valueOf(HTable);

//操作数据
Table table=hbaseCon.getTable(tableName);
//单行添加数据
Put row = new Put(byte[] rowkey);
row.addColumn(byte[] columnfamily,byte[] column,byte[] value)
...
table.put(row);

//多行(少量)
List<Put> rows =new ArrayList();
rows.add(row);
...
table.put(rows);

//批处理
//lambada 创建hbase 批量插入数据异常侦听对象
//java中函数式接口，可以用lambda表达式写
BufferedMutator.ExceptionListener listener=(e,mutator)->{
   //异常信息（原因）
   String msg = e.getMessage();
   //出异常的行数
   int numExceptions = e.getNumExceptions();
   //记录出异常的行的行键，以便事后检查并再处理
   //用Log4j记录
   logger.error("HBASE MUTATE EXCEPTION : "+msg+","+numExceptions);
   if(numExceptions>0){
       StringBuilder builder=new StringBuilder();
       builder.append(Bytes.toString(e.getRow(0).getRow()));
       final String SEP=",";
       for (int i = 0; i <numExceptions ; i++) {
           builder.append(SEP);
           builder.append(Bytes.toString(e.getRow(i).getRow()));
       }
       logger.error(builder.toString());
   }
};

final int BUFFER_SIZE=8*1024;
BufferedMutatorParams bmp=new BufferedMutatorParams(tableName)
.listener(listener).writeBufferSize(BUFFER_SIZE);
BufferedMutator mutator=hbaseCon.getBufferedMutator(bmp);

//创建List
List<Put> list=new ArrayList<>(BUFFER_SIZE);
...
//放入数据
mutator.mutate(list);

#log4j日志
#1导入jar包
#2做配置信息 resources/log4j.properties
log4j.rootLogger=INFO, stdout, logfile
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=log/hd.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
#3创建对象
private static final Logger logger=Logger.getLogger(App.class);

#优化
   预分区   Pre-aplitting tables
   #n+1
   create_namespace 'kb16'
   create 'kb16:pre_split_n1','cf',SPLITS=>['10','20','30']
/hbase/data/kb16/pre_split_n1
   Permission   Owner   Group   Size   Last Modified   Replication   Block Size   Name
   drwxr-xr-x   root   supergroup   0 B   Jan 30 09:25   0   0 B   .tabledesc
   drwxr-xr-x   root   supergroup   0 B   Jan 30 09:25   0   0 B   .tmp
   drwxr-xr-x   root   supergroup   0 B   Jan 30 09:25   0   0 B   57ee8f6d69fc35bd5ded359b4a0aeb1b
   drwxr-xr-x   root   supergroup   0 B   Jan 30 09:25   0   0 B   7427fc9e45153f8c118f806abf722884
   drwxr-xr-x   root   supergroup   0 B   Jan 30 09:25   0   0 B   dae423145aece1af11340aa519b63f98
   drwxr-xr-x   root   supergroup   0 B   Jan 30 09:25   0   0 B   ec85df19801bcfc46a7b3da0017912f4

#shell
hbase org.apache.hadoop.hbase.util.RegionSplitter kb16:pre_split_hsp1 HexStringSplit -c 3 -f base

#hbase shell
create 'kb16:pre_split_pdp','base',{NUMREGIONS=>4,SPLITALGO=>'DecimalStringSplit'}

#行键设计
   越短越好，16字节以内
   随机数
   UUID
   MD5
   业务有序数据（自增主键）
   考虑分区策略
       DelimitedKeyPrefixRegionSplitPolicy       343_f34343
       KeyPrefixRegionSplitPolicy

SXF2410

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hbase语法和java_api操作hbase

#主键rowkey设计-+sqoop ->hbasehive ->hbasejava api ->hbaseconsole ->hbase #控制台建立访问接口 hbase shell#存储结构库：命名空间 namespace 表： table +列：列簇 column family htable...
复制链接

扫一扫