hbase的api操作

最新推荐文章于 2024-10-06 02:59:46 发布

IT小鸟鸟

最新推荐文章于 2024-10-06 02:59:46 发布

阅读量2.3k

点赞数 1

分类专栏： HBASE

本文链接：https://blog.csdn.net/u013111855/article/details/104740508

版权

HBASE 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

hbase的api操作

hbase的api列表

几个主要 Hbase API 类和数据模型之间的对应关系：
在这里插入图片描述

TestHbaseConnection

两大对象：

HbaseConfiguration：
	hbase的加载配置文件的对象，用于加载默认配置文件 hbase-dafult.xml

Connection：
	hbase的连接对象

package hbase.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;

public class TestHbaseConn {
	public static void main(String[] args) throws IOException {
		//加载hbase的配置文件   hbase-default.xml  hbase-site.xml
		Configuration conf=HBaseConfiguration.create();
		//设定zk的地址
		conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181"); //在 hbase-site.xml 中配置
		//获取连接
		Connection conn = ConnectionFactory.createConnection(conf);
		System.out.println(conn);
	}

}

TestHbaseDDL

hbase的两个核心操作对象：

hbaseadmin|admin

是hbase的ddl（namespace）操作的句柄对象
是hbase的管理对象，用于 namespace 和 table 的创建和定义

htable|table

hbase的相关操作对象
ddl
    HTableDescriptor  表描述器：指定表名、列族
    HColumnDescriptor  列族描述器：列族名|存储属性

package hbase.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HBaseAdmin;

/*
 * hbaseAdmin
 */
public class TestHbaseDDL {
	public static void main(String[] args) throws IOException {
		//加载hbase的配置文件   hbase-default.xml  hbase-site.xml
		Configuration conf=HBaseConfiguration.create();
		//设定zk的地址
		conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
		//获取连接
		Connection conn = ConnectionFactory.createConnection(conf);
		
		HBaseAdmin admin = (HBaseAdmin) conn.getAdmin(); //获取句柄对象
//		createNS(admin, "test_api");
		listNS(admin);
		System.out.println("############");
//		dropNS(admin, "test_api");
		listT(admin);
		System.out.println("**********");
		createT(admin, "test_api_table");
		listT(admin);
//		dropT(admin, "test_api");
		conn.close();
		admin.close();
		
	}
	
	//hbase shell里的  create_namespace ""  新建命名空间
	public static void createNS(HBaseAdmin admin,String name) throws IOException{
		//namespace描述器， 描述namespace的名称
		NamespaceDescriptor nd=NamespaceDescriptor.create(name).build(); //封装namespace描述器
		admin.createNamespace(nd);
	}
	
	//查看namespoace列表
	public static void listNS(HBaseAdmin admin) throws IOException{
		NamespaceDescriptor[] listns = admin.listNamespaceDescriptors(); //封装表描述器
		for(NamespaceDescriptor ns:listns){
			System.out.println(ns.getName());
		}
	}
	//删除ns
	public static void dropNS(HBaseAdmin admin,String name) throws IOException{
		admin.deleteNamespace(name);
	}
	
	//table 
	//建表
	public static void createT(HBaseAdmin admin,String name) throws IOException{
		TableName tname=TableName.valueOf(name); //封装表名
		//HTableDescriptor：描述表名和列族相关信息的方法
		HTableDescriptor td=new HTableDescriptor(tname); //表描述器封装表名
		HColumnDescriptor family01=new HColumnDescriptor("info01");//封装列族名
		HColumnDescriptor family02=new HColumnDescriptor("info02");
		td.addFamily(family01);
		td.addFamily(family02);
		admin.createTable(td);
	}
	
	//查看表列表
	public static void listT(HBaseAdmin admin) throws IOException{
		TableName[] name = admin.listTableNames();
		for(TableName n:name){
			System.out.println(n.getNameAsString());
		}
	}
	
	//删除表
	public static void dropT(HBaseAdmin admin,String name) throws IOException{
		if(admin.tableExists(name)){
			admin.disableTable(name); //删表先禁用表
			admin.deleteTable(name);
		}

	}

}

TestHbaseDML

DML操作主要包括：

插入数据，对应shell为 put，可以单条插入，也可以批量插入
查询数据，对应shell为 get，可以单挑获取数据，也可以批量获取数据
扫描数据，对应shell为 scan，可以单条数据扫描，也可以全表扫描，也可以指定行键范围扫描

package hbase.test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;

/*
 * DDL操作对象是 htable|table 
 */
public class TestHbaseDML {
	//插入表数据: shell里面：  put "表名"，行健，列族：列，值，ts
	//插入单条数据
	public static void putOneData(HTable table) throws IOException{
		//Put 封装的一个需要插入的数据
		Put p=new Put("rk002".getBytes()); //rk=rk002
		//put 参数1：列族，  参数2：列，  参数3：值
		p.addColumn("base_info".getBytes(), "name".getBytes(), "zs".getBytes());
		p.addColumn("base_info".getBytes(), "age".getBytes(), "13".getBytes());
		table.put(p);
	}
	
	//思路1：先放入集合，再插入表中
	//1000条数据   hbase中一次插入多条数据   
	public static void putOneDatas(HTable table) throws IOException{
		long start = System.currentTimeMillis();
		List<Put> list=new ArrayList<Put>();
		for(int i=0;i<1000;i++){
			Put p=new Put(("rk0000"+i).getBytes());
			p.addColumn("data1".getBytes(), "name".getBytes(), 
					("zs"+i).getBytes());
			p.addColumn("info1".getBytes(), "age".getBytes(), 
					((i+10)+"").getBytes());
			//table.put(p); //如果一个一个提交，需要连接1000次，测试用时8360ms
			list.add(p);  //放入集合中，一次性提交，测试用时1326ms，效率相对高一些（因为list集合是基于内存的）
		}
		table.put(list);
		long end = System.currentTimeMillis();
		System.out.println(end-start);
	}
	
	
	//批量数据插入的方式2 ， eg：10000次，3000 提交一次
	public static void putDatas02(HTable table) throws IOException{
		//参数   是否自动提交hbase 默认true   table。put 自动提交
		//false 不会自动提交的   需要手动提交
		long start = System.currentTimeMillis();
		table.setAutoFlushTo(false);
		for(int i=0;i<10000;i++){
			Put p=new Put(("rk00"+i).getBytes());
			p.addColumn("data1".getBytes(), "name".getBytes(), 
					("zs"+i).getBytes());
			p.addColumn("info1".getBytes(), "age".getBytes(), 
					((i+1)+"").getBytes());
			//table.setAutoFlushTo(false);  这句话不会提交hbase的 ，而是提交到了本地缓存中
			table.put(p);
			if(i%3000==0){
				//真正提交  将本地缓存的数据提交到hbase中
				table.flushCommits();
			}
		}
		table.flushCommits();
		long end = System.currentTimeMillis();
		System.out.println(end-start);
	}
	
	
	//数据查询 get （重点学习）
	public static void getData(HTable table) throws IOException{
		//封装 
		Get g=new Get("rk002".getBytes());
		
		//Result 封装是  一条数据（一个行键的数据）， 多个单元格
		Result result = table.get(g);
		
		//获取一行数据的所有单元格信息
		List<Cell> listCells = result.listCells();
		for(Cell c:listCells){
			//每一个单元格的内容
			System.out.print(new String(c.getFamily())+"\t");
			System.out.print(new String(c.getQualifier())+"\t");
			System.out.println(new String(c.getValue()));
		}
	}
	
	
	//查询多条数据(实际应用居多)
	public static void getDatas(HTable table) throws IOException{
		List<Get> list=new ArrayList<Get>();
		//封装 
		Get get0=new Get("rk0001".getBytes());
		list.add(get0);
		
		Get get1=new Get("rk002".getBytes());
		list.add(get1);
		
		Get get2=new Get("user0001".getBytes());
		get2.addColumn("base_info".getBytes(), "name".getBytes());
		list.add(get2);
		
		//Result 封装的是一条数据，多个单元格
		Result[] results = table.get(list);
		
		//循环遍历数组
		for(Result r:results){
			//获取一行数据的所有单元格信息
			List<Cell> listCells = r.listCells();
			for(Cell c:listCells){
				//每一个单元格的内容
				System.out.print(new String(c.getFamily())+"\t");
				System.out.print(new String(c.getQualifier())+"\t");
				System.out.println(new String(c.getValue()));
			}
		}
	}
	
	
	//表扫描 scan ""  
	public static void scanData(HTable table) throws IOException{
		//Scan 表扫描对象    封装表扫描  一系列的过滤条件  无惨构造  全表扫描
		Scan scan=new Scan();
		scan.setStartRow("rk0001".getBytes()); //包含起始键
		scan.setStopRow("zhangsan_20150701_0004".getBytes()); //不包含结束键
		//scan.setTimeRange(minStamp, maxStamp)  //指定时间戳范围，很少用，一般不用，因为数据是按照rk排序的，但按时间戳范围查找的话，就需要进行全表扫描
		
		ResultScanner scanner = table.getScanner(scan); //ResultScanner  List<Result>  类似于Result的集合， 封装了多个resullt
		Iterator<Result> iterator = scanner.iterator();
		//循环遍历  hasNext   next
		//所有迭代器都会有 hasNext 和 next这两个方法
		while(iterator.hasNext()){
			//一行结果
			Result next = iterator.next(); //返回的是一条数据的一个结果集
			
			System.out.println("-----------------------------");
			System.out.println(new String(next.getRow()));
			List<Cell> listCells = next.listCells();
			for(Cell c:listCells){
				//每一个单元格的内容
				System.out.print(new String(c.getFamily())+"\t");
				System.out.print(new String(c.getQualifier())+"\t");
				System.out.println(new String(c.getValue()));
			}
		}
	}
	
	
	/*public static void deleteData(HTable table){
		table.delete(delete);
	}*/
	
	public static void main(String[] args) throws IOException {
		Configuration conf=HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
		Connection conn = ConnectionFactory.createConnection(conf);
		
		//HTable table 是dml句柄对象。指定表名table，所以下面的所有的操作都是准对这个table表
		HTable table = (HTable) conn.getTable(TableName.valueOf("user_info"));
		//putOneData(table);
		//putOneDatas(table);
		//putDatas02(table);
		//getData(table);
		//getDatas(table);
		scanData(table);
		conn.close();
		table.close();
	}
}

TestHbaseFilter

    比较过滤器
        比较操作符  > < == != >= <=
        FilterBase
            CompareFilter
                RowFilter  		行健过滤器
                FamilyFilter   	列族过滤器
                QqulifierFilter 列名过滤器 
                ValueFilter 	列值过滤器（列对应的值）
                
            scan.setfilter()	设置过滤器

专用过滤器:
        SingleColumnFilter： 单列值过滤器，过滤  某一列的值的，会返回满足条件的整行
        SingleColumnValueExcludeFilter：单列值排除器 ，-----返回排除了该列的结果（与单列值过滤器相反）
        PrefixFilter：前缀过滤器，针对行键
        ColumnPrefixFilter：列前缀过滤器
        PageFilter：分页过滤器，用来设计每一页显示的数据条数
            随便搜索“魑魅魍魉”，假如总共10000条数据
            设计每页显示的数据条数：   20   30   50。。。
            底部显示页码：1,2,3,4,5.。。。。10
            PageFilter是用来设计每一页显示的数据条数的

package hbase.test;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import javax.ws.rs.core.NewCookie;

import org.apache.directory.api.asn1.util.Oid;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.thrift.generated.Hbase.Processor.getRow;
import org.apache.hadoop.hdfs.server.namenode.status_jsp;

/*
 * hbase的过滤器
 * 表扫描  scan
 * 针对scan对象  做过滤
 * 
 * select * from stu where age>19;
 * 
 * Filter
 */
public class TestHbaseFilter {
	public static void main(String[] args) throws IOException {
		Configuration conf=HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
		Connection conn = ConnectionFactory.createConnection(conf);
		HTable table = (HTable) conn.getTable(TableName.valueOf("user_info"));
		
		//ScanDataWithFilter01(table);
		scanWithFilter02(table);
	}
	
	
	public static void ScanDataWithFilter01(HTable table) throws IOException {
		Scan scan = new Scan(); //封装扫描对象
		ValueFilter filter = new ValueFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator("rk0001".getBytes()));   //创建过滤器
		scan.setFilter(filter);  //指定过滤器
		ResultScanner scanner = table.getScanner(scan);  //获取扫描结果集(多条数据)
		Iterator<Result> iterator = scanner.iterator();  //获取迭代器
		while(iterator.hasNext()) {
			Result next = iterator.next(); //返回一条数据的一个结果集
			System.out.println("********************");
			System.out.println("行键是："+new String(next.getRow())); //获取并打印行键
			List<Cell> listCells = next.listCells(); //获取单元格（多个）
			for (Cell cell : listCells) {
				System.out.print(new String(cell.getFamily())+"\t");
				System.out.print(new String(cell.getQualifier())+"\t");
				System.out.println(new String(cell.getValue()));
			}
		}
		
	}
	
	
	
	
	public static void ScanDataWithFilter(HTable table) throws IOException{
		//Scan 表扫描对象    封装表扫描  一系列的过滤条件  无惨构造  全表扫描
		Scan scan=new Scan();
		

		//创建过滤器
		/*
		 * 参数：
		 * 	1.CompareOP   指定比较操作符的   6
		 * 

    LESS,

    LESS_OR_EQUAL,

    EQUAL,

    NOT_EQUAL,
  GREATER_OR_EQUAL,

    GREATER,

    NO_OP,
    
    *参数2：ByteArrayCompareble   指定比较机制
    *BinaryComparator  按照全字节进行比较  byte[]
  
    >= rk0001
    */
		/*RowFilter filter1=new RowFilter(CompareOp.GREATER_OR_EQUAL, 
				new BinaryComparator("rk0001".getBytes()));
		FamilyFilter filter2=new FamilyFilter(CompareOp.NOT_EQUAL, 
				new BinaryComparator("extra_info".getBytes()));
		QualifierFilter filter3=new QualifierFilter(
				CompareOp.EQUAL, new BinaryComparator("age".getBytes()));*/
		/*
		 * 使用列值过滤器时，过滤的是所有列，所有列值会和给定的值 一起比较
		 */
		
		
		//设置过滤器
		ValueFilter filter4=new ValueFilter(CompareOp.GREATER, 
				new BinaryComparator("22".getBytes()));
		
		
		//FilterList
		//参数1 ： 指定多个过滤器之间的逻辑运算关系   and  or  
		// MUST_PASS_ALL,   MUST_PASS_ONE
	    
		//参数2：过滤器的可变参数
		//  FilterList list=new FilterList(Operator.MUST_PASS_ALL,filter1,filter2,filter3,filter4);
		
		
		scan.setFilter(filter4); 
		//scan.setFilter(filter2);
		//scan.setFilter(list); //setFilter（）的参数必须是Filter类型
		
		
		//ResultScanner  List<Result>  Result的集合   封装了多个resullt
		ResultScanner scanner = table.getScanner(scan);
		Iterator<Result> iterator = scanner.iterator();
		//循环遍历  hasNext   next
		while(iterator.hasNext()){
			//一行结果
			Result next = iterator.next();
			System.out.println(new String(next.getRow()));
			System.out.println("-----------------------------");
			List<Cell> listCells = next.listCells();
			for(Cell c:listCells){
				//每一个单元格的内容
				System.out.print(new String(c.getFamily())+"\t");
				System.out.print(new String(c.getQualifier())+"\t");
				System.out.println(new String(c.getValue()));
			}
		}
	}
	
	
	public static void scanWithFilter02(HTable table) throws IOException{
		//Scan 表扫描对象    封装表扫描  一系列的过滤条件  无惨构造  全表扫描
		Scan scan=new Scan();
		
			
		//设计单列值过滤器，输出的结果 是一整行结果，相当于  select * from stu where age>22; 单列值过滤器
		/*SingleColumnValueFilter filter=new SingleColumnValueFilter(
				"base_info".getBytes(), 
				"age".getBytes(), CompareOp.GREATER,
				"22".getBytes());*/
		
		
		//行健前缀过滤器：  参数为  需要过滤的行健前缀  或 行健   
		/*PrefixFilter filter = new PrefixFilter("zhangsan".getBytes());
		scan.setFilter(filter);*/
		
		
		//分页过滤器 ：PageFilter（）。参数  ：显示条数  默认过滤出来的是第一页  默认从第一条数据
		PageFilter filter=new PageFilter(5);  //设置一页显示5条数据，默认从第一条数据开始显示
		
		// 00000100  第一页末尾  显示第二页  设计起始行健
		//startrow 其实行健   行健》= 参数   00000100
		//scan.setStartRow(("00000100"+"\001").getBytes());
		
		
		scan.setFilter(filter);
		
		
		//ResultScanner  List<Result>  Result的集合   封装了多个resullt
		ResultScanner scanner = table.getScanner(scan);
		Iterator<Result> iterator = scanner.iterator();
		//循环遍历  hasNext   next
		while(iterator.hasNext()){
			//一行结果
			Result next = iterator.next();
			System.out.println(new String(next.getRow()));
			System.out.println("-----------------------------");
			List<Cell> listCells = next.listCells();
			for(Cell c:listCells){
				//每一个单元格的内容
				System.out.print(new String(c.getFamily())+"\t");
				System.out.print(new String(c.getQualifier())+"\t");
				System.out.println(new String(c.getValue()));
			}
		}
		
	}

}