Hbase API

最新推荐文章于 2023-08-08 20:07:24 发布

不急吃口药

最新推荐文章于 2023-08-08 20:07:24 发布

阅读量139

点赞数

分类专栏：大数据资料笔记整理

本文链接：https://blog.csdn.net/huang66666666/article/details/102168480

版权

大数据资料笔记整理专栏收录该内容

60 篇文章 1 订阅

订阅专栏

scala版见url ： https://blog.csdn.net/xiushuiguande/article/details/79766469

------------------------------------------

Admin(操作namespace、table、column)

列出所有的名称空间
NamespaceDescriptor[]nsds = admin.listNamespaceDescriptors();
创建一个名称空间
NamespaceDescriptor nsd = NamespaceDescriptor.create("ns2").build();
admin.createNamespace(nsd);
删除某个名称空间
admin.deleteNamespace("ns1");
列出某个名称空间下所有表
HTableDescriptor[]htds = admin.listTableDescriptorsByNamespace("hbase");
for (HTableDescriptor htd:htds){
System.out.println(htd.getNameAsString());
}

//获取表的描述器对象
TableName tableName = TableName.valueOf("ns1:t_userinfo");
HTableDescriptor htb = admin.getTableDescriptor(tableName);
//创建表的描述器对象
HTableDescriptor htb = new HTableDescriptor(tableName);
//获取列族描述器
HColumnDescriptor hc = htb.getFamily(Bytes.toBytes("other_info"));
//获取表的所有列族
HColumnDescriptor[] hcds = htd.getColumnFamilies();
//创建列族描述器
HColumnDescriptor hc = new HColumnDescriptor(Bytes.toBytes("base_info"));
//删除列族
HColumnDescriptor other_info2 = htd.removeFamily(Bytes.toBytes("other_info2"));
admin.modifyTable(tableName,htd);
设置布隆过滤器
hc.setBloomFilterType(BloomType.ROW);
//将列族描述器对象添加到表的描述器中
htb.addFamily(hc).addFamily(hc1);
//创建表
admin.createTable(htb);
//修改表
htb.addFamily(hc1);
admin.modifyTable(tableName,htb);
//删除表
   //判读表是否存在
   if (admin.tableExists(tb)){
       logger.info("表存在");
       //判读表是否已启用
       if (admin.isTableEnabled(tb)){
           logger.info("表未禁用");
           admin.disableTable(tb);
       }
       admin.deleteTable(tb);
   }

-----------------------------------------

Table(操作表中的数据)

//添加数据
Put rk000006 = new Put(Bytes.toBytes("rk000006"));
rk000006.addColumn(Bytes.toBytes("base_info"),Bytes.toBytes("name"),Bytes.toBytes("zhangwuji"));
table.put(rk000006);

//指定行间列族列值
Get get = new Get(Bytes.toBytes("user0000008"));
get.addColumn(Bytes.toBytes("base_info"),Bytes.toBytes("name"));
//get指定行键数据
Get get = new Get(Bytes.toBytes("rk000007"));
Result rs = table.get(get);
//获取cell的迭代器
CellScanner cellScanner = rs.cellScanner();
while (cellScanner.advance()){
   //获取当前的cell
   Cell current = cellScanner.current();
   System.out.print("\t"+new String(CellUtil.cloneFamily(current),"utf-8"));
   System.out.print(":"+new String(CellUtil.cloneQualifier(current),"utf-8"));
   System.out.print("\t"+new String(CellUtil.cloneValue(current),"utf-8"));
   System.out.println("");
}

//scan扫描表
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes("user0000001"));
//比user00000020大
scan.setStopRow(Bytes.toBytes("user00000020"+"\001"));
scan.addColumn(Bytes.toBytes("base_info"),Bytes.toBytes("name"));
   ResultScanner scanner = table.getScanner(scan);
   //获取result的迭代器
   Iterator<Result> iterator = scanner.iterator();
   while (iterator.hasNext()){
       Result result = iterator.next();
       //获取cell的迭代器
       CellScanner cellScanner = result.cellScanner();
       while (cellScanner.advance()){
           //获取当前的cell
           Cell current = cellScanner.current();
           System.out.print("\t"+new String(CellUtil.cloneFamily(current),"utf-8"));
           System.out.print(":"+new String(CellUtil.cloneQualifier(current),"utf-8"));
           System.out.print("\t"+new String(CellUtil.cloneValue(current),"utf-8"));
           System.out.println("");
       }
   }

//删除数据
Delete del = new Delete(Bytes.toBytes("rk000005"));
del.addColumn(Bytes.toBytes("base_info"),Bytes.toBytes("name"));
table.delete(dels);

----------hbase初始化工具类-----------------------------

package util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;
import org.apache.log4j.Logger;

import java.io.IOException;

/**
* @Description
* @Author cqh <caoqingghai@1000phone.com>
* @Version V1.0
* @Since 1.0
* @Date 2019/7/15 14：33
*/
public class HbaseTools {
private static Logger logger = Logger.getLogger(HbaseTools.class);
private static final String CONNECT_KEY="hbase.zookeeper.quorum";
private static final String CONNECT_VALUE="mini1:2181,mini2:2181,mini3:2181";
private static Connection conn = null;
static {
//获取配置对象
Configuration conf = new Configuration();
//设置连接参数
conf.set(CONNECT_KEY,CONNECT_VALUE);
try {
//获取一个hbase的连接
conn= ConnectionFactory.createConnection(conf);
} catch (IOException e) {
logger.error("获取连接对象异常",e);
}
}

/**
* 获取一个Admin对象
* @return
*/
public static Admin getAdmin(){
Admin admin =null;
try {
admin= conn.getAdmin();
} catch (IOException e) {
logger.error("获取Admin对象异常",e);
}
return admin;
}

/**
* 关闭资源的方法
* @param admin
*/
public static void closeAdmin(Admin admin){
//判断传入的admin是否为空
if (admin==null){
return;
}
try {
admin.close();
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}

public static Table getTable(TableName tableName){
//Configuration conf = HBaseConfiguration.create();
//conf.set("hbase.zookeeper.quorum","mini1:2181,mini2:2181,mini3:2181");
Table table =null;
try {
table =conn.getTable(tableName);

} catch (IOException e) {
e.printStackTrace();
}
return table;
}

/**
* 关闭表
* @param table
*/
public static void closeTable(Table table) {
if (table!=null){
try {
table.close();
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}

-----------------过滤器、比较器结合使用-----------------------------------

package filter;

/**
* @Description
* @Author cqh <caoqingghai@1000phone.com>
* @Version V1.0
* @Since 1.0
* @Date 2019/7/16 11：32
*/

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import util.HbaseTools;

import java.io.IOException;
import java.util.Iterator;

/**
* select * from t_userinfo where age <60 and name like 'cls%'
* select * from t_userinfo where (age <60 and name like 'cls%') or sex=1
*/
public class filterList {
public static void main(String[] args) {
//先创建一个过滤器链
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);//and
FilterList filterList1 = new FilterList(FilterList.Operator.MUST_PASS_ONE);//or

//先构造查询条件
SingleColumnValueFilter sf = new SingleColumnValueFilter(
Bytes.toBytes("base_info"),
Bytes.toBytes("age"),
CompareFilter.CompareOp.LESS,
Bytes.toBytes("60")
);
SingleColumnValueFilter sf1 = new SingleColumnValueFilter(
Bytes.toBytes("base_info"),
Bytes.toBytes("name"),
CompareFilter.CompareOp.GREATER,
Bytes.toBytes("cls")
);
//没有数据的字段直接过滤掉
//根据列的值决定这一行是否返回
//我们可以设置filter.setFilterIfMissing(true)
//如果为true,当这一列不存在时，不会返回
//如果为false,当这一列不能存在时会返回所有信息
sf.setFilterIfMissing(true);
sf1.setFilterIfMissing(true);
//将条件添加到过滤器链中
filterList.addFilter(sf);
filterList.addFilter(sf1);
//创建一个扫描器
Scan scan = new Scan();
//将过滤器链添加到扫描器中
//将过滤条件加入（and）
scan.setFilter(filterList);
Table tb = HbaseTools.getTable(TableName.valueOf("ns1:t_userinfo"));
try {
ResultScanner scanner = tb.getScanner(scan);
Iterator<Result> iterator = scanner.iterator();
while(iterator.hasNext()){
Result result = iterator.next();
CellScanner cellScanner = result.cellScanner();
while (cellScanner.advance()){
//获取当前的cell
Cell current = cellScanner.current();
System.out.print("\t"+new String(CellUtil.cloneFamily(current),"utf-8"));
System.out.print(":"+new String(CellUtil.cloneQualifier(current),"utf-8"));
System.out.print("\t"+new String(CellUtil.cloneValue(current),"utf-8"));
System.out.println("");
}
}
} catch (IOException e) {
e.printStackTrace();
}

}
}

--------

package filter;

/**
* @Description
* @Author cqh <caoqingghai@1000phone.com>
* @Version V1.0
* @Since 1.0
* @Date 2019/7/16 14：28
*/

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import util.HbaseTools;

import java.io.IOException;
import java.util.Iterator;

/**
* 多种列值比较器
*/
public class SingleColumnValueFilterDemo {
public static void main(String[] args) {
//1、创建正则表达式比较器
RegexStringComparator rsc = new RegexStringComparator("^cls");
//2、创建子串比较器
SubstringComparator sc = new SubstringComparator("in");
//3、创建二进制比较器
BinaryComparator bc = new BinaryComparator(Bytes.toBytes("cls3"));
//4、二进制前缀比较器
BinaryPrefixComparator bpc = new BinaryPrefixComparator(Bytes.toBytes("zhao"));

//创建单列值过滤器
SingleColumnValueFilter sf = new SingleColumnValueFilter(
Bytes.toBytes("base_info"),
Bytes.toBytes("name"),
CompareFilter.CompareOp.EQUAL,
bpc
);
//设置过滤不存在的列
sf.setFilterIfMissing(true);
Scan scan = new Scan();
//将过滤器链添加到扫描器中
//将过滤条件加入（and）
scan.setFilter(sf);
Table tb = HbaseTools.getTable(TableName.valueOf("ns1:t_userinfo"));
try {
ResultScanner scanner = tb.getScanner(scan);
Iterator<Result> iterator = scanner.iterator();
while(iterator.hasNext()){
Result result = iterator.next();
CellScanner cellScanner = result.cellScanner();
while (cellScanner.advance()){
//获取当前的cell
Cell current = cellScanner.current();
System.out.print("\t"+new String(CellUtil.cloneFamily(current),"utf-8"));
System.out.print(":"+new String(CellUtil.cloneQualifier(current),"utf-8"));
System.out.print("\t"+new String(CellUtil.cloneValue(current),"utf-8"));
System.out.println("");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

------------

package filter;

/**
* @Description
* @Author cqh <caoqingghai@1000phone.com>
* @Version V1.0
* @Since 1.0
* @Date 2019/7/16 14：48
*/

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import util.HbaseTools;

import java.io.IOException;
import java.util.Iterator;

/**
* 列族过滤器
*/
public class FamilyFilterDemo {
public static void main(String[] args) {
//1、创建正则表达式比较器
RegexStringComparator rsc = new RegexStringComparator("^cls");
//2、创建子串比较器
SubstringComparator sc = new SubstringComparator("in");
//3、创建二进制比较器
BinaryComparator bc = new BinaryComparator(Bytes.toBytes("extra_info"));
//4、二进制前缀比较器
BinaryPrefixComparator bpc = new BinaryPrefixComparator(Bytes.toBytes("zhao"));

FamilyFilter ff = new FamilyFilter(CompareFilter.CompareOp.EQUAL,bc);

Scan scan = new Scan();
//将过滤器链添加到扫描器中
//将过滤条件加入（and）
scan.setFilter(ff);
Table tb = HbaseTools.getTable(TableName.valueOf("ns1:t_userinfo"));
try {
ResultScanner scanner = tb.getScanner(scan);
Iterator<Result> iterator = scanner.iterator();
while(iterator.hasNext()){
Result result = iterator.next();
CellScanner cellScanner = result.cellScanner();
while (cellScanner.advance()){
//获取当前的cell
Cell current = cellScanner.current();
System.out.print("\t"+new String(CellUtil.cloneFamily(current),"utf-8"));
System.out.print(":"+new String(CellUtil.cloneQualifier(current),"utf-8"));
System.out.print("\t"+new String(CellUtil.cloneValue(current),"utf-8"));
System.out.println("");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}

}

------------

package filter;

/**
* @Description
* @Author cqh <caoqingghai@1000phone.com>
* @Version V1.0
* @Since 1.0
* @Date 2019/7/16 15：21
*/

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.util.Bytes;
import util.HbaseTools;

import java.io.IOException;
import java.util.Iterator;

/**
* 分页过滤器
* 要求：每页显示三条数据
*
* 1、第一页
* select * from t_userindo where rowkey > '' limit 3;
* 2、其他页
* select * from t_userindo where rowkey > maxrowkey limit 3;
*
* 3、循环什么时候结束？
* whlie(true)
* select * from t_userindo where rowkey > maxrowkey limit 3;
* print 3 行数据
* count 计数
* count<3的时候跳出循环
*/
public class PageFilterDemo {
public static void main(String[] args) {
//构造分页过滤器
PageFilter pf = new PageFilter(3);
//构造表扫描器
Scan scan = new Scan();
//将分页过滤器set到表扫描器中
scan.setFilter(pf);
//g构造一个表的管理器对象
Table table = HbaseTools.getTable(TableName.valueOf("ns1:t_userinfo"));
int count =0;
String maxkey = "";
try {
while (true){
count=0;
//获取一个结果集
ResultScanner results = table.getScanner(scan);
//获取结果集的迭代器
Iterator<Result> iterator = results.iterator();
while (iterator.hasNext()){
Result rs = iterator.next();
count+=1;
maxkey= Bytes.toString(rs.getRow());
CellScanner cellScanner = rs.cellScanner();
while (cellScanner.advance()){
//获取当前的cell
Cell current = cellScanner.current();
System.out.print("\t"+new String(CellUtil.cloneFamily(current),"utf-8"));
System.out.print(":"+new String(CellUtil.cloneQualifier(current),"utf-8"));
System.out.print("\t"+new String(CellUtil.cloneValue(current),"utf-8"));
System.out.println("");
}
}
if (count<3){
break;
}
//设置起始行键，进入下一次循环
scan.setStartRow(Bytes.toBytes(maxkey+"\001"));
System.out.println("下次循环开始");
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

-----------------协处理器建立hbase的二级索引------------------------------

package observer;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;
import util.HbaseTools;

import java.io.IOException;

/**
* @Description
* @Author cqh <caoqingghai@1000phone.com>
* @Version V1.0
* @Since 1.0
* @Date 2019/7/17 16：15
*/

/*

利用协处理器建立hbase的二级索引

关注表，用户关注了哪儿些明星 t_guanzhu
A-B f1:From A f1:To B f1:age 25 f1:addr 北京。。。。

现在需要查询出明星B被哪儿些用户关注了？
B-A f1:From B f1:To A
B-E f1:From B f1:To E

*/
public class IndexOBServer extends BaseRegionObserver {
@Override
public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException {
CellScanner cellScanner = put.cellScanner();
//1、拆分rowkey，再反转组成新的rowkey A-B ===> B-A
String rowkey = Bytes.toString(put.getRow());
String[]keys = rowkey.split("-");
String newRowkey = keys[1]+"-"+keys[0];
//2、获取列的内容,先构造出索引数据的put对象
Put put1 = new Put(Bytes.toBytes(newRowkey));
boolean change = false;
while (cellScanner.advance()){
Cell cell = cellScanner.current();
String columnName = new String(CellUtil.cloneQualifier(cell),"utf-8");
String columnValue= new String(CellUtil.cloneValue(cell),"utf-8");
if (columnName.equals("From")){
put1.addColumn(Bytes.toBytes("f1"),Bytes.toBytes("To"),Bytes.toBytes(columnValue));
change = true;
}else if (columnName.equals("To")) {
put1.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("From"), Bytes.toBytes(columnValue));
change = true;
}
}
if (change){
Table table = HbaseTools.getTable(TableName.valueOf("ns1:t_fensi"));
table.put(put1);
}
//super.prePut(e,put,edit,durability);
}
}

不急吃口药

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hbase API

scala版见url ： https://blog.csdn.net/xiushuiguande/article/details/79766469------------------------------------------Admin(操作namespace、table、column)列出所有的名称空间NamespaceDescriptor[]nsds = admin.list...
复制链接

扫一扫