一、Hbase架构
1、简单整体架构
![](https://i-blog.csdnimg.cn/blog_migrate/3aecb68156253e1d077d68068a710302.png)
2、hbase内部架构
![](https://i-blog.csdnimg.cn/blog_migrate/5c1604400d24e2cc4639e0cf750fba04.png)
二、hbase读写文件流程
1、写流程
![](https://i-blog.csdnimg.cn/blog_migrate/a4491fb0081e30c460d09574f37ac19d.png)
2、读流程
![](https://i-blog.csdnimg.cn/blog_migrate/a2e26137dd8dee8847486516437df7a1.png)
三、HbaseAPI编程
1、创建命名空间
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
public class Code02CreateNamespace {
private static Connection connection;
static {
// 创建 Configuration 对象 用来设置Zookeeper连接
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum","node1,node2,master");
try {
// 创建连接
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
public static void main(String[] args) throws IOException {
// 获取Admin对象用来做命名空间及表操作
Admin admin = connection.getAdmin();
NamespaceDescriptor namespace = NamespaceDescriptor.create("jan").build();
// 在Hbase中创建NameSpace
admin.createNamespace(namespace);
// 关闭连接
admin.close();
connection.close();
}
}
2、创建表并给定多个列族
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
public class Code04CreateTable {
private static Connection connection;
static {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum","node1:2181,node2:2181,master:2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
//创建表时给定多个列族
public static void createtable(String tableName,String... familyNames) throws IOException {
// 通过TableName包装String类型的表名,并且创建createTable方法所需的HTableDescriptor的对象
HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf(tableName));
for (String familyName : familyNames) {
//创建列族
HColumnDescriptor info = new HColumnDescriptor(familyName);
// 增加列族
hTableDescriptor.addFamily(info);
}
Admin admin = connection.getAdmin();
admin.createTable(hTableDescriptor);
admin.close();
connection.close();
}
public static void main(String[] args) throws IOException {
createtable("jan:tbl2","info1","info2");
}
}
3、删除表
//判断表存不存在
public static boolean exists(String tableName) throws IOException {
boolean exists = admin.tableExists(TableName.valueOf(tableName));
return exists;
}
//删除表
public static void deleteTable(String tableName) throws IOException {
admin.disableTable(TableName.valueOf(tableName));
admin.deleteTable(TableName.valueOf(tableName));
}
//先判断表存不存在,若存在则删除
public static void main(String[] args) throws IOException {
String tableName = "jan:tbl2";
if(!exists(tableName)){
System.err.println("表不存在,请重新输入...");
}
deleteTable(tableName);
}
4、给表中put数据
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Code07PutData {
private static Connection connection;
static {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum","node1:2181,node2:2181,master:2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
public static void main(String[] args) throws IOException {
// 1.Put操作需要先获取Table类对象
Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
// 3.创建Put对象需要提供RowKey的Bytes数组
// Params: row – row key
Put put = new Put(Bytes.toBytes("1002"));
// 添加具体的数据内容
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes("zhangsan"));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes("18"));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes("man"));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("clazz"),Bytes.toBytes("1"));
// 2.put方法需要提供Put类对象
table.put(put);
// 关闭连接
table.close();
connection.close();
}
}
工作中常用IO流
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
//工作中推荐使用如下方法:
public class Code13PutData {
private static Connection connection;
static {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
public static void main(String[] args) throws IOException {
Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
// 创建读取IO流进行数据读取
BufferedReader bufferedReader = new BufferedReader(new FileReader("D:\\CodeSpace\\hbasedemo15\\data\\students.txt"));
String line;
List<Put> putList = new ArrayList<>();
while ((line = bufferedReader.readLine()) != null) {
String[] columns = line.split(",");
Put put = new Put(Bytes.toBytes(columns[0]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(columns[1]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(columns[2]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("gender"), Bytes.toBytes(columns[3]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("clazz"), Bytes.toBytes(columns[4]));
putList.add(put);
// putList中的数据大于等于100时,对数据进行上传并情况putList防止造成数据重复上传
if (putList.size() >= 100) {
table.put(putList);
putList.clear();
}
}
// putList中的数据没有上传完 继续上传
if(!putList.isEmpty()){
table.put(putList);
}
table.close();
connection.close();
}
}
5、get表中的数据
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
//该样例中的方法只能获取部分列数据,列的信息需要手动给定
public class Code08GetData {
private static Connection connection;
static {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
public static void main(String[] args) throws IOException {
Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
Get get = new Get(Bytes.toBytes("1001"));
Result result = table.get(get);
String rowKey = Bytes.toString(result.getRow());
String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));
String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));
String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));
System.out.println("rowKey =>" + rowKey + " " + name + ":" + age + ":" + gender);
table.close();
connection.close();
}
}
6、delete表中数据
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Code09deleteData {
private static Connection connection;
static {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
public static void main(String[] args) throws IOException {
Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
Delete delete = new Delete(Bytes.toBytes("1001"));
table.delete(delete);
table.close();
connection.close();
}
}
7、scan获取表中多列数据
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Code11ScanData {
private static Connection connection;
static {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "node1:2181,node2:2181,master:2181");
try {
connection = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
System.err.println("未正确获取到连接信息");
}
}
public static void main(String[] args) throws IOException {
Table table = connection.getTable(TableName.valueOf("jan:tbl1"));
Scan scan = new Scan();
scan.setMaxVersions(3);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) {
// 获取一个RowKey中的所有Cells
String rowKey = Bytes.toString(result.getRow());
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
String Family = Bytes.toString(CellUtil.cloneFamily(cell));
String Qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String Value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("rowKey:"+rowKey+" Family:"+Family+" Qualifier"+ Qualifier + "Value:"+Value);
}
}
table.close();
connection.close();
}
}