大数据之hbase_hbase的java客户端基础操作2

最新推荐文章于 2024-01-05 16:17:16 发布

普罗米修斯之火

最新推荐文章于 2024-01-05 16:17:16 发布

阅读量197

点赞数

分类专栏： Hbase 文章标签： hbase

本文链接：https://blog.csdn.net/WuBoooo/article/details/108310176

版权

Hbase 专栏收录该内容

8 篇文章 1 订阅

订阅专栏

先封装两个方法,获取连接对象和遍历result对象,打印输出

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
/**
 *
 */
public class HbaseUtils {
    /**
     * 封装获取连接对象的方法
     * @return
     */
    public static Connection getConnection(){
        //创建配置对象
    Configuration conf = HBaseConfiguration.create();
    //配置zookeeper的连接对象,因为zookeeper存储了所有的hbase的储存信息,所以可以通过zookeeper操作hbase
        conf.set("hbase.zookeeper.quorum","linux01:2181,linux02:2181,linux03:2181");
    //获取hbase的连接对象
        Connection conn = null;
        try {
            conn = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return conn;
    }
    /**
     * 传入result参数,打印它里面的值
     * @param result
     */
    public static void showData(Result result) {
        while (result.advance()) {
            //获得单元格对象
            Cell cell = result.current();
            //使用CellUtil工具类,克隆获取列族/属性/值,避免编码格式出问题
            byte[] family = CellUtil.cloneFamily(cell);
            byte[] qualifier = CellUtil.cloneQualifier(cell);
            byte[] value = CellUtil.cloneValue(cell);
            System.out.println(Bytes.toString(family)+"-"+Bytes.toString(qualifier)+"-"+Bytes.toString(value));
        }
    }
}

get方法获取hbase表中数据

public class TableDemo {

    public static void main(String[] args) throws Exception {
        Connection conn = HbaseUtils.getConnection();
        //获取表的对象
        Table table = conn.getTable(TableName.valueOf("tb_user2"));
        //获取行对象
        Get get1 = new Get("r1".getBytes());
        Get get2 = new Get("r2".getBytes());
         //将行对象放入集合中
		List<Get> gets = new ArrayList<>();
        gets.add(get1);
        gets.add(get2);
       //获得results的数组
        Result[] results = table.get(gets);
        for (Result result : results) {
            //遍历数组调用showData方法,打印表中的数据
            HbaseUtils.showData(result);
        }
    }
}

scan方法获取hbase表中数据

四种获取值的方式,可根据具体情况进行取舍

  		//1.获取Scan的对象,scan对象不调用设定读取范围的方法,就能全表读取
        Scan scan = new Scan();
        //2.查询nf2列族下的name属性
        scan.addColumn("nf2".getBytes(), "name".getBytes());
        //3.查询nf2列族下的所有的值
        scan.addFamily("nf2".getBytes());
        //4.设置读取的起始行和结束行(包前不包后)
        scan.withStartRow("r1".getBytes());
        scan.withStopRow("r2".getBytes());
        //获得scanner对象,并生成迭代器取值
        ResultScanner scanner = table.getScanner(scan);
        Iterator<Result> iterator = scanner.iterator();
        while (iterator.hasNext()) {
            Result next = iterator.next();
            HbaseUtils.showData(next);
        }

get方法和scan方法相对get方法的效率更高,因为scan方法需要全表检索然后过滤出我们想要的值,get方法是直接用行获取我们想要的值

put方法,将数据写入表中

        //new Put对象,括号内需要设定插入的行,然后往此行插入数据,注意选择插入的列族必须存在,不然会报错
        Put put1 = new Put("r3".getBytes());
        put1.addColumn("nf1".getBytes(),"age".getBytes(),"18".getBytes());
        put1.addColumn("nf1".getBytes(),"name".getBytes(),"zs".getBytes());
        put1.addColumn("nf2".getBytes(),"job".getBytes(),"teacher".getBytes());
        //可一次插入两行,然后放入集合中写出去
        Put put2 = new Put("r4".getBytes());
        put2.addColumn("nf1".getBytes(),"age".getBytes(),"20".getBytes());
        put2.addColumn("nf1".getBytes(),"name".getBytes(),"li".getBytes());
        put2.addColumn("nf2".getBytes(),"job".getBytes(),"teacher".getBytes());
        //new一个装Put的ArrayList集合
        ArrayList<Put> puts = new ArrayList<>();
        puts.add(put1);
        puts.add(put2);
        table.put(puts);

使用缓存批次导入数据,减少hbase服务器的压力

		//使用缓存批次导入数据,可避免每次导入一行就请求一次hbase服务器,减少hbase服务器的压力
        BufferedMutator tb_user2 = conn.getBufferedMutator(TableName.valueOf("tb_user2"));
        //new Put对象,括号内需要设定插入的行,然后往此行插入数据,注意选择插入的列族必须存在,不然会报错
        Put put3 = new Put("r5".getBytes());
        put3.addColumn("nf1".getBytes(),"age".getBytes(),"18".getBytes());
        put3.addColumn("nf1".getBytes(),"name".getBytes(),"zs".getBytes());
        put3.addColumn("nf2".getBytes(),"job".getBytes(),"teacher".getBytes());
        Put put4 = new Put("r6".getBytes());
        put4.addColumn("nf1".getBytes(),"age".getBytes(),"20".getBytes());
        put4.addColumn("nf1".getBytes(),"name".getBytes(),"li".getBytes());
        put4.addColumn("nf2".getBytes(),"job".getBytes(),"teacher".getBytes());
        ArrayList<Put> putss = new ArrayList<>();
        putss.add(put3);
        putss.add(put4);
        tb_user2.mutate(putss);
        //导入后需要手动刷新一下,不然不到一定大小不会自动存入hbase中,而是存在缓存区中
        tb_user2.flush();

普罗米修斯之火

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
大数据之hbase_hbase的java客户端基础操作2

先封装两个方法,获取连接对象和遍历result对象,打印输出import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.Cell;import org.apache.hadoop.hbase.CellUtil;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Connection;i
复制链接

扫一扫

专栏目录