从来没有想过会去学习Java 大数据相关的东西,但因为项目需要,不得不使用 Hbase 存储数据,所以记录一些简单的 CRUD 过程
1、安装
1.1 Docker
docker pull harisekhon/hbase
docker run -d -h docker-hbase \
-p 2181:2181 \
-p 9090:9090 \
-p 9095:9095 \
-p 16000:16000 \
-p 16010:16010 \
-p 16020:16020 \
-p 16201:16201 \
-p 16301:16301 \
--name hbase \
--restart=always \
--network my_net --network-alias hbase \
harisekhon/hbase
## 访问
http://192.168.56.120:16010
1.2 直接安装在linux上
教程很多
或者:https://gitee.com/fyhcug/note (【单机 Zookeeper + Hadoop + Hbase】.md,该教程找不到出处了……)
2、Springboot
- yaml
hbase:
zookeeper:
quorum: 192.168.56.120
property:
clientPort: 2181
- Maven
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.4.4</version>
</dependency>
- HbaseUtils
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
/**
* Created on 2021/7/17--20:57.
* hbase 工具类
* @author fengyuhao
* @Description
*/
@Component
public class HbaseUtil {
private static Admin admin = null;
private static Connection con = null;
private HbaseUtil(@Value("${hbase.zookeeper.quorum}") String zookeeperQuorum,
@Value("${hbase.zookeeper.property.clientPort}") String clientPort) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", zookeeperQuorum);
conf.set("hbase.zookeeper.property.clientPort", clientPort);
con = ConnectionFactory.createConnection(conf);
}
/**
* 创建表
*
* @param tableName 表名称
* @param cf 列族
*/
public static void createTable(String tableName, String[] cf) {
TableName tb = TableName.valueOf(tableName);
try {
admin = con.getAdmin();
if (admin.tableExists(tb)) {
System.out.println("talbe is exists!");
} else {
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tb);
List<ColumnFamilyDescriptor> list = new ArrayList<>();
// 添加列族
for (String entry : cf) {
list.add(ColumnFamilyDescriptorBuilder.newBuilder(entry.getBytes(StandardCharsets.UTF_8)).build());
}
tableDescriptorBuilder.setColumnFamilies(list);
admin.createTable(tableDescriptorBuilder.build());
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 查看已有表
*/
public static TableName[] listTables() {
TableName[] tableNames = new TableName[0];
try {
admin = con.getAdmin();
tableNames = admin.listTableNames();
} catch (IOException e) {
e.printStackTrace();
}
return tableNames;
}
/**
* 删除表
*/
public static void deleteTable(String tableName) {
try {
TableName tn = TableName.valueOf(tableName);
admin = con.getAdmin();
if (admin.tableExists(tn)) {
admin.disableTable(tn);
admin.deleteTable(tn);
}
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 添加(更新)一条记录
*
* @param tableName 表名
* @param rowkey rowkey
* @param cf columnfamily
* @param column column
* @param value value
*/
public static void put(String tableName, String rowkey, String cf, String column, String value) {
try {
Table table = con.getTable(TableName.valueOf(tableName));
Put put = new Put(Bytes.toBytes(rowkey));
put.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
table.put(put);
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 批量添加(更新)数据
*
* @param tableName 表名
* @param data 插入的数据,
* Map 结构 -->Map(rowkey,Map(cf:column,value))
*/
public static void batchPut(String tableName, Map<String, Map<String, Object>> data) {
try {
ObjectMapper objectMapper = new ObjectMapper();
Table table = con.getTable(TableName.valueOf(tableName));
List<Put> list = new ArrayList<>();
for (Map.Entry<String, Map<String, Object>> entry : data.entrySet()) {
System.out.println(entry.getKey());
// rowkey
for (Map.Entry<String, Object> map : entry.getValue().entrySet()) {
Put put = new Put(Bytes.toBytes(entry.getKey()));
String[] keys = map.getKey().split(":");
if (keys.length > 1) {
put.addColumn(Bytes.toBytes(keys[0]), Bytes.toBytes(keys[1]), Bytes.toBytes(objectMapper.writeValueAsString(map.getValue())));
} else {
put.addColumn(Bytes.toBytes(keys[0]), null, Bytes.toBytes(objectMapper.writeValueAsString(map.getValue())));
}
list.add(put);
}
}
table.put(list);
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 全表扫描
*
* @param tableName 表名
*/
public static List<Map<String, Object>> scanTable(String tableName) {
TableName tb = TableName.valueOf(tableName);
ResultScanner scanner = null;
try {
Table table = con.getTable(tb);
Scan scan = new Scan();
scanner = table.getScanner(scan);
table.close();
} catch (IOException e) {
e.printStackTrace();
}
List<Map<String, Object>> list = new ArrayList<>();
if (scanner != null) {
for (Result result : scanner) {
Map<String, Object> map = new HashMap<>(8);
//展示数据
for (Cell cell : result.rawCells()) {
map.put("rowKey", Bytes.toString(CellUtil.cloneRow(cell)));
map.put("family", Bytes.toString(CellUtil.cloneFamily(cell)));
map.put("column", Bytes.toString(CellUtil.cloneQualifier(cell)));
map.put("value", Bytes.toString(CellUtil.cloneValue(cell)));
}
list.add(map);
}
}
return list;
}
/**
* 根据 rowkey 查询一条数据
*
* @param tableName 表
* @param rowKey rewkey
*/
public static Result getByRowkey(String tableName, String rowKey) {
try {
Table table = con.getTable(TableName.valueOf(tableName));
Get get = new Get(rowKey.getBytes(StandardCharsets.UTF_8));
Result result = table.get(get);
table.close();
if (!result.isEmpty()) {
return result;
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 根据 rowkey 查询一条数据
*
* @param tableName 表
* @param rowKey rewkey
* @param cf columnfamily
*/
public static Result getByRowkey(String tableName, String rowKey,String cf) {
try {
Table table = con.getTable(TableName.valueOf(tableName));
Get get = new Get(rowKey.getBytes(StandardCharsets.UTF_8));
get.addFamily(cf.getBytes(StandardCharsets.UTF_8));
Result result = table.get(get);
table.close();
if (!result.isEmpty()) {
return result;
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 根据 rowkey 查询一条数据
*
* @param tableName 表
* @param rowKey rewkey
* @param cf columnfamily
* @param column column
*/
public static Result getByRowkey(String tableName, String rowKey,String cf,String column) {
try {
Table table = con.getTable(TableName.valueOf(tableName));
Get get = new Get(rowKey.getBytes(StandardCharsets.UTF_8));
get.addColumn(cf.getBytes(StandardCharsets.UTF_8),column.getBytes(StandardCharsets.UTF_8));
Result result = table.get(get);
table.close();
if (!result.isEmpty()) {
return result;
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* rowkey 模糊查询
* @param tableName 表
* @param pattern 正则表达式
*/
public static Iterator<Result> getByRowkeyPattern(String tableName,String pattern){
try {
Table table = con.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
RowFilter mykey = new RowFilter(CompareOperator.EQUAL, new RegexStringComparator(pattern));
scan.setFilter(mykey);
Iterator<Result> iterator = table.getScanner(scan).iterator();
table.close();
return iterator;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* rowkey 范围查询 ( start < result < end)
* @param tableName 表
* @param start 开始 key
* @param end 结束 key
*/
public static Iterator<Result> getByRowkeyRange(String tableName,String start,String end){
try {
Table table = con.getTable(TableName.valueOf(tableName));
List<Filter> list = new ArrayList<>();
Scan scan = new Scan();
RowFilter mykey1 = new RowFilter(CompareOperator.GREATER, new BinaryComparator(start.getBytes(StandardCharsets.UTF_8)));
list.add(mykey1);
RowFilter mykey2 = new RowFilter(CompareOperator.LESS, new BinaryComparator(end.getBytes(StandardCharsets.UTF_8)));
list.add(mykey2);
scan.setFilter(new FilterList(list));
Iterator<Result> iterator = table.getScanner(scan).iterator();
table.close();
return iterator;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 时间戳范围查询 ( start <= result < end)
* @param tableName 表
* @param start 开始 key
* @param end 结束 key
*/
public static Iterator<Result> getByTimestamp(String tableName,Long start,Long end){
try {
Table table = con.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.setTimeRange(start,end);
Iterator<Result> iterator = table.getScanner(scan).iterator();
table.close();
return iterator;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 根据 rowkey删除一条记录
* @param tableName 表
* @param rowKey rewkey
*/
public static void deleteByRowkey(String tableName, String rowKey){
try {
Table table = con.getTable(TableName.valueOf(tableName));
Delete delete = new Delete(rowKey.getBytes(StandardCharsets.UTF_8));
table.delete(delete);
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 根据 rowkey 删除多条记录
* @param tableName 表
* @param rowKey rewkey
*/
public static void batchDeleteByRowkey(String tableName, String[] rowKey){
try {
Table table = con.getTable(TableName.valueOf(tableName));
List<Delete> list = new ArrayList<>();
for(String s : rowKey){
Delete delete = new Delete(s.getBytes(StandardCharsets.UTF_8));
list.add(delete);
}
table.delete(list);
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
增删改查也就常用的几个类、接口等,根据类、接口大致看一下其子类或接口实现类基本就全部搞定了,没什么可说的
eg: 比如 Filter,根据其子类的名字就知道是要根据 rowkey 怎么过滤了
- Test
import com.cug.demo1.utils.HbaseUtil;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import java.util.*;
@SpringBootTest
class Demo1ApplicationTests {
/**
* 创建表
*/
@Test
public void createTable(){
String[] cf = new String[2];
cf[0] = "info";
cf[1] = "address";
HbaseUtil.createTable("test",cf);
}
/**
* 查看表信息
*/
@Test
public void tableList(){
TableName[] tableNames = HbaseUtil.listTables();
for(TableName tableName: tableNames){
System.out.println(tableName.getNameAsString());
}
}
/**
* 删除表
*/
@Test
public void deleteTable(){
HbaseUtil.deleteTable("test");
}
/**
* 添加一条数据
*/
@Test
public void put(){
HbaseUtil.put("tbl_user","keyss","info","name","zhangsan");
}
/**
* 批量添加数据
*/
@Test
public void batchPut(){
Map<String,Map<String,Object>> data = new HashMap<>(8);
Map<String,Object> list = new HashMap<>(4);
list.put("address","zhangsan");
Map<String,Object> list1 = new HashMap<>(4);
list1.put("address","lisi");
data.put("mykeys3",list);
data.put("mykeys4",list1);
HbaseUtil.batchPut("tbl_user",data);
}
/**
* 查询数据
*/
@Test
public void getByRowkey(){
// 根据 rowkey 查询一条数据
System.out.println(HbaseUtil.getByRowkey("tbl_user", "mykeys1"));
// rowkey 模糊查询
Iterator<Result> tbl_user = HbaseUtil.getByRowkeyPattern("tbl_user", "mykey*");
if(tbl_user != null){
while (tbl_user.hasNext()){
Result next = tbl_user.next();
System.out.println(Bytes.toString(CellUtil.cloneRow(next.rawCells()[0])));
System.out.println(Bytes.toString(CellUtil.cloneFamily(next.rawCells()[0])));
System.out.println(Bytes.toString(CellUtil.cloneQualifier(next.rawCells()[0])));
System.out.println(Bytes.toString(CellUtil.cloneValue(next.rawCells()[0])));
}
}
// rowkey 范围
System.out.println(HbaseUtil.getByRowkeyRange("tbl_user", "mykeys", "mykeys2"));
// 时间戳范围查询
System.out.println(HbaseUtil.getByTimestamp("tbl_user", 1626570049074L, 1626570273856L));
}
}
3、Snapshot
## 创建快照,数据保存在 --> hdfs:xxx/hbase/.hbase-snapshot/ docker容器中,默认 /hbase-data/.hbase-snapshot/
hbase> snapshot 'src_table', 'snapshot_src_table'
## 查看快照
hbase> list_snapshots
## 删除快照
hbase> delete_snapshot 'snapshot_src_table'
## 数据迁移
hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot snapshot_src_table -copy-to hdfs://192.168.1.132:8020/hbase -mappers 16 -bandwidth 20
## 数据恢复
hbase> clone_snapshot 'snapshot_src_table' , 'new_table_name'