Java访问HBASE
Java访问HBASE
前言
java连接hbase可以方便的将数据文档映射到hbase表格中
以下是本篇文章正文内容,下面案例可供参考
一、创建maven工程
<name>java2hbase</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>3.1.3</version>
</dependency>
</dependencies>
<build>
二、删除test文件夹,在main下建文件夹resources,文件log4j.properties
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=log/hd.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
三.创建工程
其中主要的方法有创建命名空间,创建表,删除表,插入一条数据,插入一批数据
package cn.kgc.base;
import com.jcraft.jsch.IO;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.checkerframework.checker.units.qual.A;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Java2HBase {
private static Configuration config = null;
public static void init(String...items) {
config = HBaseConfiguration.create();
for (String item : items) {
String[] ps = item.split("=");
config.set(ps[0],ps[1]);
}
}
private static void close(AutoCloseable...closes){
for (AutoCloseable close : closes) {
try {
close.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
private static Connection con() throws IOException {
return ConnectionFactory.createConnection(config);
}
private static Admin admin(Connection con) throws IOException {
return con.getAdmin();
}
private static boolean nameSpaceExists(String nameSpace,String[] nss){
for (String ns : nss) {
if (nameSpace.equals(ns)) {
return true;
}
}
return false;
}
public static void createNameSpace(String nameSpace){
Connection con = null;
Admin admin = null;
try {
admin = admin(con = con());
if(nameSpaceExists(nameSpace,admin.listNamespaces())){
throw new IOException("namespace[ "+nameSpace+" ]created in failure for existence");
}
admin.createNamespace(NamespaceDescriptor.create(nameSpace).build());
System.out.println("namespace[ "+nameSpace+" ]created in success");
} catch (IOException e) {
e.printStackTrace();
}finally {
close(admin,con);
}
}
public static void createTable(String tableName,String columnFamily,String...columnFamilies){
Connection con = null;
Admin admin = null;
try {
admin = admin(con = con());
TableName tn = TableName.valueOf(tableName);
if(admin.tableExists(tn)){
throw new IOException("table [ "+tableName+ " ] created in failure for existence");
}
//根据表名创建 表描述构造器
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tn);
//创建列簇集合
List<ColumnFamilyDescriptor> list = new ArrayList<>();
list.add(ColumnFamilyDescriptorBuilder.of(columnFamily));
for (String family : columnFamilies) {
list.add(ColumnFamilyDescriptorBuilder.of(family));
}
//向表描述器中添加列簇
builder.setColumnFamilies(list);
admin.createTable(builder.build());
System.out.println("table [ "+tableName+ " ] created in success");
} catch (IOException e) {
e.printStackTrace();
}finally {
close(admin,con);
}
}
public static void dropTable(String tableName){
Connection con = null;
Admin admin = null;
try {
admin = admin(con = con());
TableName tn = TableName.valueOf(tableName);
if (!admin.tableExists(tn)) {
throw new IOException("table [ "+tableName+ " ] dropped in failure for absence");
}
if (admin.isTableEnabled(tn)) {
admin.disableTable(tn);
System.out.println("table [ "+tableName+ " ] enabled and is disabled in success");
}
admin.deleteTable(tn);
System.out.println("table [ "+tableName+" ] dropped in success");
} catch (IOException e) {
e.printStackTrace();
}finally {
close(admin,con);
}
}
private static boolean tableExists(Connection con,TableName tableName){
Admin admin = null;
try {
admin = admin(con);
return admin.tableExists(tableName);
} catch (IOException e) {
e.printStackTrace();
return false;
}finally {
close(admin);
}
}
public static void put(String tableName,String rowKey,String family,String column,String value){
String msg = "put [ "+rowKey+"=> "+family+" => "+column+" => ("+value+") ] into table [ "+tableName+" ]";
TableName tn = TableName.valueOf(tableName);
Connection con = null;
Table table = null;
try{
con = con();
if(!tableExists(con,tn)){
throw new IOException("table [ "+tableName+" ] not exist error");
}
table = con.getTable(tn);
//构造带有行键的Put对象
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(column),Bytes.toBytes(value));
table.put(put);
System.err.println(msg+" in success");
}catch (IOException e){
e.printStackTrace();
System.err.println(msg+" in failure");
}finally {
close(table,con);
}
}
/**
* 将file路径指向的文件数据映射到hbase
* 文件名即表名,为了防止命名冲突 : tablename_timestamp
* 文件首行为表结构: :key,cf:col,...
* @param file
*/
public static void putBatch(String file,String regexSep){
File data = new File(file);
Connection con = null;
BufferedMutator mutator = null;
BufferedReader br = null;
try{
//输入文件验证
if(!data.exists() || !data.isFile()){
throw new IOException(file + " not exist or not file error");
}
//解析base表名
String[] ns = data.getName().split("_|\\.");
String tableName = ns[0]+":"+ns[1];
TableName tn = TableName.valueOf(tableName);
con = con();
//验证hbase表是否存在
if(!tableExists(con,tn)){
throw new IOException("hbase table [ "+tableName+" ] not exist error");
}
//通过文件首行解析hbase表结构(重要从这往下)
br = new BufferedReader(new FileReader(data));
String line = null;
if(null == (line=br.readLine())){
throw new IOException("file [ "+file+" ] empty error");
}
String[] ps = line.split(regexSep);
//创建批量插入异常侦听
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
BufferedMutator.ExceptionListener listener = (e,_mutator)->{
System.err.println("put data into table [ "+tableName+" ] error "
+e.getNumExceptions()+" rows,retry put at "+
dtf.format(LocalDateTime.now()));
int count = 0;
for (int i = 0; i < e.getNumExceptions(); i++) {
Row row = e.getRow(i);
try {
_mutator.mutate((Put)row);
count++;
} catch (IOException ex) {
ex.printStackTrace();
System.err.println("return put "+row+" error, please check it");
}
}
System.err.println("put data into table [ "+tableName+" ] from error total "
+e.getNumExceptions()+" rows,finish "+ count + " rows, at"+dtf.format(LocalDateTime.now()));
};
BufferedMutatorParams bmp = new BufferedMutatorParams(tn)
.writeBufferSize(8 * 1024 * 1024)
.listener(listener);
mutator = con.getBufferedMutator(bmp);
int count = 0,CAPACITY = 1000;
List<Put> list = new ArrayList<>(CAPACITY);
Put put = null;
while (null != (line = br.readLine())){
String[] arr = line.split(regexSep);
put = new Put(Bytes.toBytes(arr[0])); //arr[0]指定行键
for (int i = 1; i < ps.length; i++) { //逗号分割以后的长度,第一行前面已经读过,从第二行
String[] ts = ps[i].split(":"); //ps为第一行,arr从第二行开始
put.addColumn(Bytes.toBytes(ts[0]),Bytes.toBytes(ts[1]),Bytes.toBytes(arr[i]));
}
list.add(put);
if(list.size()==CAPACITY){
mutator.mutate(list);
count += list.size();
list.clear();
}
}
mutator.mutate(list);
count += list.size();
list.clear();
System.err.println("batch put into [ "+tableName+"," +count+ " rows ] " +
"from [ "+file+" ]in success");
}catch (IOException e){
e.printStackTrace();
System.err.println("batch put into [ "+file+" ] in failure");
}finally {
close(br,mutator,con);
}
}
public static void main(String[] args){
init("hbase.zookeeper.quorum=192.168.91.180");
//创建命名空间(相当于数据库)
//createNameSpace("dsj");
//创建表
//createTable("dsj:test","cf1","cf2","cf3");
//删除表(先禁用表结构,再删除操作)
//dropTable("dsj:test");
//put("dsj:test","000002","cf2","gfName","angela");
putBatch("D:\\zhongbo\\project\\hbase\\dsj_test_1624725224354",",");
}
}
造数据代码,可以写在app里
package cn.kgc;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.Random;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args ) throws IOException {
BufferedWriter bw = new BufferedWriter(new FileWriter(
new File("D:\\zhongbo\\project\\hbase\\dsj_test_"+System.currentTimeMillis()),true));
bw.write(":key,cf1:name,cf1:age,cf2:pos,cf2:salary,cf3:rst");
bw.newLine();
Random rand = new Random();
for (int i = 0; i < 1000; i++) {
bw.write(MessageFormat.format
("zbstu{0},henry{0},{1},market clerk,{2},how are you"
,i,18+rand.nextInt(20),1+rand.nextInt(3)));
bw.newLine();
}
bw.close();
}
}
然后进入HBASE里,创建完命名空间可以查看一下,list_namespace,创建完表可以查看一下list,添加完数据查看 scan ‘xxx:xxx’
注意,数据添加,删除数据,不要在hdfs上操作,hdfs上是存放数据的,在hdfs上直接删除表和命名空间,然后在hbase里命名空间会出问题,会是坏的命名空间,drop不掉,还不能再添加表和数据,
总结
Java访问HBASE ,今天介绍到这里,特别是里面的批量插入数据,重要且经常使用!