真实项目 Hbase操作表 进行获取数据

24 篇文章 0 订阅
6 篇文章 0 订阅
package com.newegg.modesty.service;

import com.newegg.modesty.config.AutoPartsConfig;
import com.newegg.modesty.hbase.HbaseOperation;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

/**
 * @author Modesty.P.Gao
 * @version 1.0
 * @description: TODO
 * @date 2021/12/10 9:43
 */
@Service
public class AuroPartsService {

    @Autowired
    private AutoPartsConfig config;

    public void startJob(){
        HbaseOperation hbase=new HbaseOperation(config.getHbaseHost(),config.getHbaseTable());
        hbase.initConnect();
        hbase.scanTable();
    }

}

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>HbaseDemoWork</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
    </properties>

    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.3.12.RELEASE</version>
    </parent>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
            <exclusions>
                <exclusion>
                    <artifactId>logback-classic</artifactId>
                    <groupId>ch.qos.logback</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.0-cdh5.9.3</version>
            <exclusions>
                <exclusion>
                    <artifactId>jdk.tools</artifactId>
                    <groupId>jdk.tools</groupId>
                </exclusion>
                <exclusion>
                    <artifactId>servlet-api</artifactId>
                    <groupId>javax.servlet</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.78</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>
</project>
package com.newegg.modesty.constant;

/**
 * @author Modesty.P.Gao
 * @version 1.0
 * @description: TODO
 * @date 2021/12/10 9:05
 */
public class Constant {
    private Constant(){

    }

    public static final String NAME_COUNT = "NameCount";
    public static final String PHONE_COUNT = "PhoneCount";
    public static final String AGE_COUNT = "AgeCount";
    public static final String EMAIL_COUNT = "EmailCount";

    public static final String FAMILY_INFO = "information";
    public static final String FAMILY_CONTACT = "contact";
    public static final String COLUMN_AGE = "age";
    public static final String COLUMN_NAME = "name";
    public static final String COLUMN_PHONE = "phone";
    public static final String COLUMN_EMAIL = "email";


}

package com.newegg.modesty.config;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;

/**
 * @author Modesty.P.Gao
 * @version 1.0
 * @description: TODO
 * @date 2021/12/10 9:00
 */
@Configuration
public class AutoPartsConfig {

    @Value("${hbase.hosts}")
    private  String hbaseHost;

    @Value("${hbase.table}")
    private  String hbaseTable;

    public String getHbaseHost() {
        return hbaseHost;
    }

    public void setHbaseHost(String hbaseHost) {
        this.hbaseHost = hbaseHost;
    }

    public String getHbaseTable() {
        return hbaseTable;
    }

    public void setHbaseTable(String hbaseTable) {
        this.hbaseTable = hbaseTable;
    }
}

package com.newegg.modesty.hbase;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.newegg.modesty.constant.Constant;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.tomcat.util.threads.ThreadPoolExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.CollectionUtils;


import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * @author Modesty.P.Gao
 * @version 1.0
 * @description: TODO
 * @date 2021/12/10 9:42
 */
public class HbaseOperation {

    private static final Logger LOGGER = LoggerFactory.getLogger(HbaseOperation.class);
    private Connection connection;
    private String hbaseHost;
    private String hbaseTable;
    private ExecutorService executorService = new ThreadPoolExecutor(10, 10, 0L, TimeUnit.MINUTES, new LinkedBlockingQueue<>(1024));

    public HbaseOperation(String hbaseHost, String hbaseTable) {
        this.hbaseHost = hbaseHost;
        this.hbaseTable = hbaseTable;
    }

    public void initConnect() {
        Configuration baseConf = HBaseConfiguration.create();
        baseConf.set("hbase.zookeeper.quorum", hbaseHost);
        try {
            connection = initConnect(connection, baseConf);
        } catch (IOException e) {
            LOGGER.error("connect hbase has error", e);
        }
    }

    private Connection initConnect(Connection connection, Configuration configuration) throws IOException {
        if (Objects.nonNull(configuration) && Objects.nonNull(connection)) {
            connection.close();
        }
        connection = ConnectionFactory.createConnection(configuration);
        return connection;

    }

    public ConcurrentHashMap<String, AtomicInteger> scanTable() {
        Table table = null;
        LOGGER.info("begin to run hbase data");
        TableName tableName = TableName.valueOf(this.hbaseTable);
        try {
            table = connection.getTable(tableName);
            List<HRegionInfo> regionInfoList = connection.getAdmin().getTableRegions(tableName);
            LOGGER.info("fetch table regionInfoList:{}", regionInfoList);
            if (Objects.nonNull(regionInfoList) && !CollectionUtils.isEmpty(regionInfoList)) {
                return scanMultipleRegions(table, regionInfoList);
            }
        } catch (IOException e) {
            LOGGER.info("scan table has error,", e);
        } finally {
            if (Objects.nonNull(table)) {
                try {
                    table.close();
                } catch (IOException e) {
                    LOGGER.info("table close has error,", e);
                }
            }
        }
        return new ConcurrentHashMap<>();
    }

    private ConcurrentHashMap<String, AtomicInteger> scanMultipleRegions(final Table table, List<HRegionInfo> regionInfoList) {
        LOGGER.info("start run multiple region function.");
        ConcurrentHashMap<String, AtomicInteger> result = new ConcurrentHashMap<>();
        result.put(Constant.NAME_COUNT, new AtomicInteger(0));
        result.put(Constant.PHONE_COUNT, new AtomicInteger(0));
        result.put(Constant.AGE_COUNT, new AtomicInteger(0));
        result.put(Constant.EMAIL_COUNT, new AtomicInteger(0));
        Map<String, Future<?>> futureMap = new LinkedHashMap<>();
        for (HRegionInfo hRegionInfo : regionInfoList) {
            Future<?> f = executorService.submit(() -> scanTableByRegion(hRegionInfo, result, table));
            futureMap.put(hRegionInfo.getRegionNameAsString(), f);
        }
        if (!CollectionUtils.isEmpty(futureMap)) {
            for (Map.Entry<String, Future<?>> entry : futureMap.entrySet()) {
                Future<?> future = futureMap.get(entry.getKey());
                try {
                    LOGGER.info("start to wait region result:[{}]", entry.getKey());
                    future.get();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    LOGGER.info("wait thread pool has error.", e);
                    Thread.currentThread().interrupt();
                }
            }
            LOGGER.info("end to scan table");
        }
        print("End result", result);
        return result;
    }

    private void print(String message, ConcurrentHashMap<String, AtomicInteger> resultMap) {
        LOGGER.info(message + ", nameCount:{}, phoneCount:{}, ageCount:{}, emailCount:{}", resultMap.get(Constant.NAME_COUNT),
                resultMap.get(Constant.PHONE_COUNT), resultMap.get(Constant.AGE_COUNT), resultMap.get(Constant.EMAIL_COUNT));
    }

    private void scanTableByRegion(HRegionInfo hRegionInfo, ConcurrentHashMap<String, AtomicInteger> resultMap, Table finalTable) {
        LOGGER.info("start to scan region:[{}]", Objects.nonNull(hRegionInfo) ? hRegionInfo.getRegionNameAsString() : "all");
        long start = System.currentTimeMillis();
        String regionName = "all";
        Scan scan = new Scan();

        try {

            if (Objects.nonNull(hRegionInfo)) {
                regionName = hRegionInfo.getRegionNameAsString();
                LOGGER.info("region [{}] start:{}, end:{}", regionName,
                        Bytes.toString(hRegionInfo.getStartKey()), Bytes.toString(hRegionInfo.getEndKey()));
                scan.setStartRow(hRegionInfo.getStartKey());
                scan.setStopRow(hRegionInfo.getEndKey());
            }

            scan.setCaching(5000);

            ResultScanner results = finalTable.getScanner(scan);
            long rowCount = 0;
            long itemCount = 0;
            for (Result result : results) {
                transFormResult(result, resultMap);
                rowCount++;
                if (rowCount % 5000 == 0) {

                    // sleep
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                    print(new String(result.getRow()), resultMap);
                }
            }

            LOGGER.info("end to scan region:[{}], row sum is {},hbase item sum is {}, used time {} s.",
                    regionName, rowCount, itemCount,
                    (System.currentTimeMillis() - start) / 1000);

        } catch (IOException e) {
            LOGGER.info("search table has error,", e);
            Thread.currentThread().interrupt();
        }
    }

    private void transFormResult(Result result, ConcurrentHashMap<String, AtomicInteger> resultMap) {

        try {
            byte[] infoNames = result.getValue(Constant.FAMILY_INFO.getBytes(StandardCharsets.UTF_8), Constant.COLUMN_NAME.getBytes(StandardCharsets.UTF_8));
            byte[] infoAge = result.getValue(Constant.FAMILY_INFO.getBytes(StandardCharsets.UTF_8), Constant.COLUMN_AGE.getBytes(StandardCharsets.UTF_8));
            byte[] contPhones = result.getValue(Constant.FAMILY_CONTACT.getBytes(StandardCharsets.UTF_8), Constant.COLUMN_PHONE.getBytes(StandardCharsets.UTF_8));
            byte[] contEmail = result.getValue(Constant.FAMILY_CONTACT.getBytes(StandardCharsets.UTF_8), Constant.COLUMN_EMAIL.getBytes(StandardCharsets.UTF_8));


            if (!Objects.isNull(infoNames)) {
                String data = new String(infoNames, StandardCharsets.UTF_8);
//                JSONArray array = JSON.parseArray(data)
                if (data.length() > 0) {
                    // Add cart count
                    resultMap.get(Constant.NAME_COUNT).addAndGet(1);
                }
            }

            if (!Objects.isNull(infoAge)) {
                String data = new String(infoAge, StandardCharsets.UTF_8);
//                JSONArray array = JSON.parseArray(data)
                if (data.length() > 0) {
                    // Add cart count
                    resultMap.get(Constant.AGE_COUNT).addAndGet(1);

                }
            }
            if (!Objects.isNull(contPhones)) {
                String data = new String(contPhones, StandardCharsets.UTF_8);
//                JSONArray array = JSON.parseArray(data)
                if (data.length() > 0) {
                    // Add cart count
                    resultMap.get(Constant.PHONE_COUNT).addAndGet(1);

                }
            }
            if (!Objects.isNull(contEmail)) {
                String data = new String(contEmail, StandardCharsets.UTF_8);
//                JSONArray array = JSON.parseArray(data)
                if (data.length() > 0) {
                    // Add cart count
                    System.out.println(data);
                    resultMap.get(Constant.EMAIL_COUNT).addAndGet(1);

                }
            }


        } catch (Exception e) {
            LOGGER.info("Analysis row[{}] error.", new String(result.getRow()));
        }
    }
}

注释

package com.newegg.modesty.hbase;

import com.newegg.modesty.Config.Config;
import com.newegg.modesty.property.Prop;
import org.apache.commons.configuration.ConfigurationFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.CollectionUtils;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * @author Modesty.P.Gao
 * @version 1.0
 * @description: TODO
 * @date 2021/12/14 13:49
 */
public class HbaseJob {
    //开启log日志
    private static final Logger LOGGER = LoggerFactory.getLogger(HbaseJob.class);
    //获取hbase的连接对象
    private Connection connection;
    //hbase相关的地址
    private String hbaseHost;
    //hbase相关的表
    private String hbaseTable;
    //开始线程池
    private ExecutorService executorService = new ThreadPoolExecutor(10, 10, 0L, TimeUnit.MINUTES, new LinkedBlockingDeque<>(1024));

    /**
     * 构造方法
     *
     * @param hbaseHost
     * @param hbaseTable
     */
    public HbaseJob(String hbaseHost, String hbaseTable) {
        this.hbaseHost = hbaseHost;
        this.hbaseTable = hbaseTable;
    }

    /**
     * 获取hbase相关的属性
     */
    public void initConnect() {
        //获取hbase相关的配置对象
        Configuration configuration = HBaseConfiguration.create();
        //设置相关集群地址
        configuration.set("hbase.zookeeper.quorum", hbaseHost);
        try {
            //进行连接方法的调用
            connection = initConnect(connection, configuration);
        } catch (IOException e) {

            LOGGER.error("connect has a error" + e);
        }
    }


    /**
     * 连接方法
     *
     * @param connection
     * @param configuration
     * @return
     */
    private Connection initConnect(Connection connection, Configuration configuration) throws IOException {
        //判断连接的对象是否存在,存在则关闭,否则则创建一个新的连接对象
        if (Objects.nonNull(configuration) && Objects.nonNull(connection)) {
            connection.close();
        }
        return ConnectionFactory.createConnection();
    }

    public HashMap<String, AtomicInteger> scanTable() {
        Table table = null;
        LOGGER.info("start rub hbase data");
        //hbase 获取需要进行扫描的表名称
        TableName tableName = TableName.valueOf(hbaseTable);
        //用连接的hbase的连接对象,去操作这个需要进行扫描的表
        try {
            //通过表名得到这个表对象
            table = connection.getTable(tableName);
            //去得到这个表的内容
            List<HRegionInfo> regionsInfoList = connection.getAdmin().getTableRegions(tableName);
            LOGGER.info("regionsInfoList:{}", regionsInfoList);
            //判断扫描的表 内容是否为空
            if (!CollectionUtils.isEmpty(regionsInfoList)) {
                //不是空的话,继续深入表中去得到数据
                return scanMultipleRegions(table, regionsInfoList);
            }
        } catch (IOException e) {
            LOGGER.error("scan table has error" + e);
        } finally {
            //最后释放表对象资源
            if (Objects.nonNull(table)) {
                try {
                    table.close();
                } catch (IOException e) {
                    LOGGER.error("table close has error" + e);
                }
            }
        }
        //如果执行到这 还没有return,那么返回一个空集合
        return new HashMap<>();
    }

    /**
     * 去扫描整张表,去获取数据。
     *
     * @param table
     * @param regionsInfoList
     * @return
     */
    private HashMap<String, AtomicInteger> scanMultipleRegions(Table table, List<HRegionInfo> regionsInfoList) {
        LOGGER.info("start scan multiple table");
        HashMap<String, AtomicInteger> result = new HashMap<>();
        //因为是用线程池来完成,考虑到并发去读取,这里使用原子类来保证数据的统一。
        result.put(Prop.COLUMN_NAME, new AtomicInteger(0));
        result.put(Prop.COLUMN_AGE, new AtomicInteger(0));
        result.put(Prop.COLUMN_PHONE, new AtomicInteger(0));
        //这里用LinkedHashMap的目的是:保证插入map中的数据是有序插入(先扫描就在前面)以便后续的打印或者日志查询。
        Map<String, Future<?>> futureMap = new LinkedHashMap<>();
        //把整个表拆分开,去使用线程去完成各行的数据,
        for (HRegionInfo hRegionInfo : regionsInfoList) {
            //返回的f 是每一个线程是否完成这次的Task
            Future<?> f = executorService.submit(() -> scanTableByRegion(hRegionInfo, result, table));
            //以这里用一个map集合去收集这些信息,以便后续的日志打印。
            futureMap.put(hRegionInfo.getRegionNameAsString(), f);
        }
        if (!CollectionUtils.isEmpty(futureMap)) {
            for (Map.Entry<String, Future<?>> entry : futureMap.entrySet()) {
                Future<?> future = futureMap.get(entry.getKey());
                LOGGER.info("start to  region result:[{}]", entry.getKey());
                try {
                    future.get();
                } catch (Exception e) {
                    LOGGER.error("region result has error", e);
                    Thread.currentThread().interrupt();
                }
            }
            LOGGER.info("end to scan table");
        }
        print("End result", result);
        return result;
    }

    private void print(String message, HashMap<String, AtomicInteger> resultMap) {
        LOGGER.info(message + ", nameCount:{}, phoneCount:{}, emailCount{},ageCount:{}", resultMap.get(Prop.COUNT_NAME),
                resultMap.get(Prop.COLUMN_PHONE), resultMap.get(Prop.COUNT_EMAIL), resultMap.get(Prop.COUNT_AGE));
    }

    /**
     * 这里其实每个线程去负责一个区域(最少数据的话,可以看做是每一行的扫描)
     *
     * @param hRegionInfo
     * @param resultMap
     * @param table
     */
    private void scanTableByRegion(HRegionInfo hRegionInfo, HashMap<String, AtomicInteger> resultMap, Table table) {
        LOGGER.info("start to scan region:[{}]", Objects.nonNull(hRegionInfo) ? hRegionInfo.getRegionNameAsString() : "all");
        //获取开始的当前时间
        long start = System.currentTimeMillis();
        //默认的区域是 代表全表
        String regionName = "all";
        //创建扫描对象
        Scan scan = new Scan();

        if (Objects.nonNull(hRegionInfo)) {
            //得到每一个区域的名称。
            regionName = hRegionInfo.getRegionNameAsString();
            LOGGER.info("region[{}] start{},end{}", regionName, Bytes.toString(hRegionInfo.getStartKey()), Bytes.toString(hRegionInfo.getEndKey()));
            scan.setStartRow(hRegionInfo.getStartKey());
            scan.setStopRow(hRegionInfo.getEndKey());
        }
        //这个是针对行,代表有多个数据再去提交一下,提高性能。但是会浪费空间,  采用的是空间换取时间。
        scan.setCaching(100);

        try {    //进行表的扫描 得到这个区域的结果
            ResultScanner results = table.getScanner(scan);
            //将这个区域的数据进行遍历,去得到这行 每个数据
            for (Result result : results) {
                transFormResult(result, resultMap);
            }
        } catch (IOException e) {
            LOGGER.error("table scan has error" + e);
        }
    }

    /**
     * 得到每一行的 每个数据 ,进行操作。
     *
     * @param result
     * @param resultMap
     */
    private void transFormResult(Result result, HashMap<String, AtomicInteger> resultMap) {
        try {
            //这个方法 传入参数是:family的名称和列的名称,用UTF_8的格式返回。得到一个Byte[]数组
            byte[] infoNames = result.getValue(Prop.FAMILY_INFO.getBytes(StandardCharsets.UTF_8), Prop.COLUMN_NAME.getBytes(StandardCharsets.UTF_8));
            byte[] infoAge = result.getValue(Prop.FAMILY_INFO.getBytes(StandardCharsets.UTF_8), Prop.COLUMN_AGE.getBytes(StandardCharsets.UTF_8));
            byte[] ConPhone = result.getValue(Prop.FAMILY_CONST.getBytes(StandardCharsets.UTF_8), Prop.COLUMN_PHONE.getBytes(StandardCharsets.UTF_8));
            byte[] ConEmail = result.getValue(Prop.FAMILY_CONST.getBytes(StandardCharsets.UTF_8), Prop.COLUMN_EMAIL.getBytes(StandardCharsets.UTF_8));

            //得到这些数组后,进行判断
            if (Objects.nonNull(infoNames)) {
                //将这个数组转为UTF_8格式的String类型。
                String data = new String(infoNames, StandardCharsets.UTF_8);
                if (data.length() > 0) {
                    //如果有数据的话,去增加1
                    resultMap.get(Prop.COUNT_NAME).addAndGet(1);
                }
            }
            if (Objects.nonNull(infoAge)) {
                //将这个数组转为UTF_8格式的String类型。
                String data = new String(infoAge, StandardCharsets.UTF_8);
                if (data.length() > 0) {
                    //如果有数据的话,去增加1
                    resultMap.get(Prop.COUNT_AGE).addAndGet(1);
                }
            }
            if (Objects.nonNull(ConPhone)) {
                //将这个数组转为UTF_8格式的String类型。
                String data = new String(ConPhone, StandardCharsets.UTF_8);
                if (data.length() > 0) {
                    //如果有数据的话,去增加1
                    resultMap.get(Prop.COUNT_PHONE).addAndGet(1);
                }
            }
            if (Objects.nonNull(ConEmail)) {
                //将这个数组转为UTF_8格式的String类型。
                String data = new String(ConEmail, StandardCharsets.UTF_8);
                if (data.length() > 0) {
                    //如果有数据的话,去增加1
                    resultMap.get(Prop.COUNT_EMAIL).addAndGet(1);
                }
            }
        } catch (Exception e) {

            LOGGER.error("add data has error" + e);
        }
    }
}

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小高求学之路

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值