package com.shujia;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/*
HBase的操作分类:
1.对表结构(元数据)的操作-->Admin对象
2.对表数据的操作-->Table对象
*/
public class Demo02HBaseJavaAPI {
Connection conn;
Admin admin;
@Before
public void init() throws IOException {
//创建HBase的配置
Configuration conf = HBaseConfiguration.create();
//配置HBase所属ZK集群的地址
conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
//建立连接
conn = ConnectionFactory.createConnection(conf);
//获取所有表的名字,这个操作跟表的数据无关
admin= conn.getAdmin();
}
@Test
//list 查看所有表
public void listAllTables() throws IOException {
//HBase中的表名都以TableName对象形式存在
TableName[] tableNames = admin.listTableNames();
for (TableName tableName : tableNames) {
System.out.println(tableName.getNameAsString());
}
}
@Test
//创建一张表 tb1 有一个列簇 ttl为10s
public void createTable() throws IOException {
//创建一个HTableDescriptor对象,用于描述HBase的表结构
HTableDescriptor tb1 = new HTableDescriptor(TableName.valueOf("tb1"));
//添加一个列簇
//创建一个HColumnDescriptor对象,用于描述列簇的属性
HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
cf1.setTimeToLive(10);
tb1.addFamily(cf1);
admin.createTable(tb1);
}
@Test
//测试表存不存在
public void existsTable() throws IOException {
System.out.println(admin.tableExists(TableName.valueOf("tb1")));
System.out.println(admin.tableExists(TableName.valueOf("tb10")));
}
@Test
//修改表结构:将tb1的列簇cf1的ttl设置为100秒,并且新加一个列簇cf2
public void modityTable() throws IOException {
TableName tb1Name = TableName.valueOf("tb1");
//获取原有的表结构
HTableDescriptor tb1 = admin.getTableDescriptor(tb1Name);
HColumnDescriptor[] columnFamilies = tb1.getColumnFamilies();
//找到cf1列簇并重新设置ttl
for (HColumnDescriptor cf : columnFamilies) {
if("cf1".equals(cf.getNameAsString())){
System.out.println("cf1原有的ttl为:"+cf.getTimeToLive());
cf.setTimeToLive(100);
}
}
//添加列簇cf2
tb1.addFamily(new HColumnDescriptor("cf2"));
admin.modifyTable(tb1Name,tb1);
}
@Test
//插入数据
public void put() throws IOException {
//需要对数据进行操作,所以创建一个Table对象
Table tb1 = conn.getTable(TableName.valueOf("tb1"));
Put put = new Put("001".getBytes());
put.addColumn("cf1".getBytes(),"name".getBytes(),"张三".getBytes());
put.addColumn("cf1".getBytes(),"age".getBytes(),"22".getBytes());
put.addColumn("cf1".getBytes(),"clazz".getBytes(),"文科一班".getBytes());
tb1.put(put);
}
@Test
//获取一条数据 get
public void getData() throws IOException {
Table tb1 = conn.getTable(TableName.valueOf("tb1"));
Get get = new Get("001".getBytes());
get.addColumn("cf1".getBytes(),"name".getBytes());
Result rs = tb1.get(get);
byte[] value = rs.getValue("cf1".getBytes(), "name".getBytes());
System.out.println(Bytes.toString(value));
}
@Test
//读取students.txt数据并且创建stu表(),将数据写入stu表
public void putStu() throws IOException {
TableName stuName = TableName.valueOf("stu");
//判断表不存在即创建
if(!admin.tableExists(stuName)) {
admin.createTable(new HTableDescriptor(stuName).addFamily(new HColumnDescriptor("info")));
}else{
System.out.println("表已存在,不需要重复创建");
}
Table stu=conn.getTable(stuName);
//读文件
BufferedReader br = new BufferedReader(new FileReader("data/students.txt"));
String line;
//用于批量插入
ArrayList<Put> puts = new ArrayList<>();
//每次插入100条
int batchSize=99;
while((line = br.readLine())!=null){
String[] splits=line.split(",");
String id=splits[0];
String name=splits[1];
String age=splits[2];
String gender=splits[3];
String clazz=splits[4];
//写入数据
Put put = new Put(id.getBytes());
put.addColumn("info".getBytes(),"name".getBytes(),name.getBytes());
put.addColumn("info".getBytes(),"age".getBytes(),age.getBytes());
put.addColumn("info".getBytes(),"gender".getBytes(),gender.getBytes());
put.addColumn("info".getBytes(),"clazz".getBytes(),clazz.getBytes());
//每条数据会调用一次,效率太低
// stu.put(put);
//使用批量输入
puts.add(put);//将每条数据构建好的put对象放入put的list中等待批量插入
//每100条提交一次
if (puts.size()==batchSize){
stu.put(puts);
//清空put的list
puts.clear();
}
}
//如果数据的条数不是batchSize的整数倍 put的list中还会有数据未插入
if(puts.size()!=0){
stu.put(puts);
}
}
@Test
//获取一批数据 scan
public void scanTableData() throws IOException {
TableName stuName = TableName.valueOf("stu");
Table stu = conn.getTable(stuName);
Scan scan = new Scan();
//配置scan
scan.withStartRow("1500100001".getBytes());
scan.withStopRow("1500100030".getBytes(),true);
scan.setLimit(10);
ResultScanner rss = stu.getScanner(scan);
//一个Result对象表示一条数据
for (Result rs : rss) {
String id = Bytes.toString(rs.getRow());
String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes()));
String age = Bytes.toString(rs.getValue("info".getBytes(), "age".getBytes()));
String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes()));
String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes()));
System.out.println(id+","+name+","+age+","+gender+","+clazz);
}
//使用CellUtil获取数据
//适用于每条数据不一样的情况
for (Result rs : stu.getScanner(scan)) {
//获取当前这一行语句包含的所有的Cell
List<Cell> cells = rs.listCells();
String id =Bytes.toString(rs.getRow());
for (Cell cell : cells) {
//cell就是每一列数据
String value = Bytes.toString(CellUtil.cloneValue(cell));
String cf = Bytes.toString(CellUtil.cloneFamily(cell));
String qua = Bytes.toString(CellUtil.cloneQualifier(cell));
System.out.println(id+","+cf+":"+qua+","+value);
}
}
}
@Test
//删除一条数据
public void deleteData() throws IOException {
Table stu = conn.getTable(TableName.valueOf("stu"));
Delete delete = new Delete("1500100001".getBytes());
// delete.addColumn("info".getBytes(),"age".getBytes());
stu.delete(delete);
}
@After
public void close() throws IOException {
//关闭连接
admin.close();
conn.close();
}
}
获取数据的两种方式
最新推荐文章于 2022-06-13 11:28:25 发布