Hbase Java API及性能优化
Hbase Java API
建Maven工程
代码
TestConn.java
package cn.kgc.kb11.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
/**
* @Author ZhangPeng
* @Date 2021/4/12
* @Description
*/
public class TestConn {
public static Connection conn;
public static Admin admin;
public static void initConn(){
if (null != conn){
System.out.println("conn has been initialed");
return;
}
Configuration conf = HBaseConfiguration.create();
// conf.set("hbase.zookeeper.quorum","192.168.56.12");
// conf.set("hbase.zookeeper.property.clientPort","2181");
conf.addResource(new Path("file:///opt/software/hbase/conf/hbase-site.xml"));
conf.addResource(new Path("file:///opt/software/hbase/conf/hbase-env.sh"));
try {
conn = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void initAdmin(){
if (null == conn){
initConn();
}
if (null != admin){
System.out.println("admin has been got");
return;
}
try {
admin = conn.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void close(){
if (null != admin){
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != conn){
try {
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.56.12");
conf.set("hbase.zookeeper.property.clientPort","2181");
Connection conn = null;
Admin admin = null;
try {
//获取链接
conn = ConnectionFactory.createConnection(conf);
//获取权限
admin = conn.getAdmin();
//以表名获取表对象
HTableDescriptor tableDes = new HTableDescriptor(TableName.valueOf("test"));
HColumnDescriptor columnFamily1 = new HColumnDescriptor("name");
HColumnDescriptor columnFamily2 = new HColumnDescriptor("addr");
tableDes.addFamily(columnFamily1);
tableDes.addFamily(columnFamily2);
admin.createTable(tableDes);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
admin.close();
conn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
CreateTable.java
package cn.kgc.kb11.hbase;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import java.io.IOException;
/**
* @Author ZhangPeng
* @Date 2021/4/12
* @Description 建表要素:表名,列cu名
*/
public class CreateTable {
public static void create(String tableName,String... columnFamily){
Connection conn = TestConn.conn;
Admin admin = TestConn.admin;
if (tableName == null || columnFamily == null){
TestConn.close();
return;
}
HTableDescriptor table = new HTableDescriptor(TableName.valueOf(tableName));
for (int i = 0; i < columnFamily.length; i++) {
if (columnFamily[i] == null){
continue;
}
table.addFamily(new HColumnDescriptor(columnFamily[i]));
}
try {
admin.createTable(table);
} catch (IOException e) {
e.printStackTrace();
} finally {
TestConn.close();
}
}
}
InsertValue.java
package cn.kgc.kb11.hbase;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import java.io.IOException;
/**
* @Author ZhangPeng
* @Date 2021/4/12
* @Description put '表名' ,'行间','列簇:列','值'
*/
public class InsertValue {
public static void insert(String tbName,String rk,String cf,String column,String value) throws IOException {
TestConn.initAdmin();
Connection conn = TestConn.conn;
Admin admin = TestConn.admin;
Table table = conn.getTable(TableName.valueOf(tbName));
Put put = new Put(rk.getBytes());
put.addColumn(cf.getBytes(),column.getBytes(),value.getBytes());
table.put(put);
TestConn.close();
}
}
DropTable.java
package cn.kgc.kb11.hbase;
import javafx.scene.control.Tab;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import java.io.IOException;
/**
* @Author ZhangPeng
* @Date 2021/4/12
* @Description
*/
public class DropTable {
public static void drop(String tbName) throws IOException {
TestConn.initAdmin();
Admin admin = TestConn.admin;
if (!admin.tableExists(TableName.valueOf(tbName))){
System.out.println("table" + tbName + "does not exists!");
return;
}
if (!admin.isTableDisabled(TableName.valueOf(tbName))){
admin.disableTable(TableName.valueOf(tbName));
}
admin.deleteTable(TableName.valueOf(tbName));
TestConn.close();
}
}
GetValue.java
package cn.kgc.kb11.hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
import java.util.List;
/**
* @Author ZhangPeng
* @Date 2021/4/12
* @Description get 表名,rokey
*/
public class GetRowValue {
public static void printRowValue(String tbName,String rk,String... cfAndColumn) throws IOException {
TestConn.initAdmin();
Connection conn = TestConn.conn;
Admin admin = TestConn.admin;
Table table = conn.getTable(TableName.valueOf(tbName));
Get get = new Get(rk.getBytes());
if (cfAndColumn == null){
}else {
if (cfAndColumn.length==1){
get.addFamily(cfAndColumn[0].getBytes());
}else if (cfAndColumn.length == 2){
get.addColumn(cfAndColumn[0].getBytes(),cfAndColumn[1].getBytes());
}else {
throw new HBaseIOException("params is not correct!");
}
}
Result result = table.get(get);
List<Cell> cells = result.listCells();
for (int i = 0; i < cells.size(); i++) {
String row = new String(cells.get(i).getRowArray());
String family = new String(cells.get(i).getFamilyArray());
}
}
}
ScanTable.java
package cn.kgc.kb11.hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
import java.util.List;
/**
* @Author ZhangPeng
* @Date 2021/4/12
* @Description
*/
public class ScanTable {
public static void scan(String tbName) throws IOException {
TestConn.initAdmin();
Connection conn = TestConn.conn;
Table table = conn.getTable(TableName.valueOf(tbName));
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
Result temp;
while ((temp = scanner.next()) != null){
List<Cell> cells = temp.listCells();
for (int i = 0; i < cells.size(); i++) {
String row = new String(cells.get(i).getRowArray());
System.out.println(row);
}
}
}
}
Hive与HBase集成原理
- 通过两者本身对外的API接口互相通信完成
- 由hive的lib目录中hive-hbase-handler-
*.jar
工具类实现
- 由hive的lib目录中hive-hbase-handler-
- HBaseStorageHandler
- 实现hive与Hbase的集成类
- 对HiveStorageHanndler接口的实现
在hive中创建外部表
create external table stu(
rowkey string,
address_country string,
address_city string,
address_road string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties(
"hbase.columns.mapping"=":key,address:country,address:city,address:road")
tblproperties("hbase.table.name"="student");
在hive中查询建的表
select * from stu;
结果
hive> select * from stu;
OK
row1 japan nanjing xiaoxing Road
Time taken: 0.298 seconds, Fetched: 1 row(s)
在hive中插入数据
hive> insert into stu values('row2','china','shanghai','huaihai mid Road');
Query ID = root_20210412174646_49278cba-c6f6-4ecd-ab88-b57f6fa24e15
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1618187091347_0015, Tracking URL = http://hadoop12:8088/proxy/application_1618187091347_0015/
Kill Command = /opt/software/hadoop/bin/hadoop job -kill job_1618187091347_0015
Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0
2021-04-12 17:46:52,362 Stage-0 map = 0%, reduce = 0%
2021-04-12 17:47:01,031 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 1.6 sec
MapReduce Total cumulative CPU time: 1 seconds 600 msec
Ended Job = job_1618187091347_0015
MapReduce Jobs Launched:
Stage-Stage-0: Map: 1 Cumulative CPU: 1.6 sec HDFS Read: 4434 HDFS Write: 0 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 600 msec
OK
Time taken: 20.0 seconds
hive> select * from stu;
OK
row1 japan nanjing xiaoxing Road
row2 china shanghai huaihai mid Road
Time taken: 0.192 seconds, Fetched: 2 row(s)
hbase查看结果
hbase(main):003:0> scan 'student'
ROW COLUMN+CELL
row1 column=address:city, timestamp=1617866338642, value=nanjing
row1 column=address:country, timestamp=1617943828426, value=japan
row1 column=address:road, timestamp=1617866366699, value=xiaoxing Road
row2 column=address:city, timestamp=1618220820003, value=shanghai
row2 column=address:country, timestamp=1618220820003, value=china
row2 column=address:road, timestamp=1618220820003, value=huaihai mid Road
2 row(s) in 0.1490 seconds