HBase和ElasticSearch整合构建二级索引

HBase和ElasticSearch整合构建二级索引

1 maven的pom文件的配置

	 <dependencies>
        <!--es客户端-->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>6.0.0</version>
        </dependency>
        <!--日志记录器-->
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.9.1</version>
        </dependency>
        <!--用于解析JSON的工具类-->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
        <!--单元测试-->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <!--用于解析Excel表格的工具类-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.8</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.8</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.8</version>
        </dependency>
        <!--Hbase的依赖-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.0</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

2 EsArticle

package com.bangzhang.center.es.bean;


public class EsArticle {
    private String id;
    private String title;
    private String from;
    private String time;
    private String readCount;
    private String content;

    public EsArticle(){

    }

    public EsArticle(String id, String title, String from, String time, String readCount, String content) {
        this.id = id;
        this.title = title;
        this.from = from;
        this.time = time;
        this.readCount = readCount;
        this.content = content;
    }

    public void setAll(String id, String title, String from, String time, String readCount, String content){
        this.setId(id);
        this.setTitle(title);
        this.setFrom(from);
        this.setTime(time);
        this.setReadCount(readCount);
        this.setContent(content);
    }


    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getFrom() {
        return from;
    }

    public void setFrom(String from) {
        this.from = from;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public String getReadCount() {
        return readCount;
    }

    public void setReadCount(String readCount) {
        this.readCount = readCount;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    @Override
    public String toString() {
        return "EsArticle{" +
                "id='" + id + '\'' +
                ", title='" + title + '\'' +
                ", from='" + from + '\'' +
                ", time='" + time + '\'' +
                ", readCount=" + readCount +
                ", content='" + content + '\'' +
                '}';
    }
}

3 解析Excel表的数据

package com.bangzhang.center.es.util;

import com.bangzhang.center.es.bean.EsArticle;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


public class ExcelParseUtil {
    
    public static List<EsArticle> parseExcel(String path) throws IOException {
        //构建返回的对象
        List<EsArticle> esArticles = new ArrayList<>();
        //实现解析
        FileInputStream inputStream = new FileInputStream(path);
        XSSFWorkbook sheets = new XSSFWorkbook(inputStream);
        //获取第一张表
        XSSFSheet sheet = sheets.getSheetAt(0);
        //获取表格最后一条数据的下标
        int lastRowNum = sheet.getLastRowNum();
        //取出每一条
        for(int i = 1;i<=lastRowNum ;i++){
            XSSFRow row = sheet.getRow(i);
            //取出每个字段
            String id = row.getCell(0).toString();
            String title = row.getCell(1).toString();
            String from = row.getCell(2).toString();
            String time = row.getCell(3).toString();
            String readCount = row.getCell(4).toString();
            String content = row.getCell(5).toString();
            //构建成JavaBean
            EsArticle esArticle = new EsArticle(id, title, from, time, readCount, content);
            //放入集合
            esArticles.add(esArticle);
        }
        //返回
        return  esArticles;
    }
}

4 HbaseUtil工具类

package com.bangzhang.center.es.util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;


public class HbaseUtil {

    //返回一个hbase表的对象
    public static Table getHbaseTable(String tbname) throws IOException {
        //先构建连接
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","node-01:2181,node-02:2181,node-03:2181");
        Connection conn = ConnectionFactory.createConnection(conf);
        Table table = conn.getTable(TableName.valueOf(tbname));
        return table;
    }


    //将数据写入Hbase
    public static  void writeToHbase(String tableName,String rowkey,String family,String column,String value) throws IOException {
        //获取表的对象
        Table table = getHbaseTable(tableName);
        //构建Put
        Put put = new Put(Bytes.toBytes(rowkey));
        put.addColumn(Bytes.toBytes(family),Bytes.toBytes(column),Bytes.toBytes(value));
        //执行
        table.put(put);
    }

    //返回对应rowkey的正文
    public static String readFromHbase(String tableName,String rowkey,String family,String column) throws IOException {
        Table table = getHbaseTable(tableName);
        //构建get对象
        Get get = new Get(Bytes.toBytes(rowkey));
        //直接执行
        Result result = table.get(get);
        //只取column那一列
        byte[] value = result.getValue(Bytes.toBytes(family), Bytes.toBytes(column));
        return Bytes.toString(value);
    }
}

5 ESUtil

package com.bangzhang.center.es.util;

import com.bangzhang.center.es.bean.EsArticle;
import com.alibaba.fastjson.JSON;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;


public class ESUtil {

    static String indexName = "articles";
    static String typeName = "article";

    //获取ES的客户端
    public static TransportClient getClient() throws UnknownHostException {
        //构建配置setting对象
        Settings settings = Settings.builder().put("cluster.name","myes").build();
        //构建客户端对象
        TransportClient client = new PreBuiltTransportClient(settings)
                .addTransportAddress(new TransportAddress(InetAddress.getByName("node-01"),9300))
                .addTransportAddress(new TransportAddress(InetAddress.getByName("node-02"),9300))
                .addTransportAddress(new TransportAddress(InetAddress.getByName("node-03"),9300));
        return client;
    }


    //用于将数据写入ES
    public static void writeToES(List<EsArticle> esArticles) throws UnknownHostException {
        TransportClient client = getClient();
        //写入数据
        BulkRequestBuilder bulk = client.prepareBulk();
        for (EsArticle esArticle : esArticles) {
            //将JavaBean转为JsonString
            String jsonString = JSON.toJSONString(esArticle);
            IndexRequestBuilder requestBuilder = client.prepareIndex(indexName, typeName, esArticle.getId()).setSource(jsonString, XContentType.JSON);
            //将请求放入bulk
            bulk.add(requestBuilder);
        }
        //批量执行
        bulk.get();
    }

    //检索数据
    public  static  List<EsArticle> searchTitle(String keyword) throws UnknownHostException {
        //构建返回的对象
        List<EsArticle> esArticles = new ArrayList<>();
        //匹配
        TransportClient client = getClient();
        SearchResponse title = client.prepareSearch(indexName)
                .setTypes(typeName)
                .setQuery(QueryBuilders.termQuery("title", keyword))
                .get();
        SearchHit[] hits = title.getHits().getHits();
        for (SearchHit hit : hits) {
            String sourceAsString = hit.getSourceAsString();
            //将每条JSONString转为JavaBean
            EsArticle esArticle = JSON.parseObject(sourceAsString, EsArticle.class);
            //放入集合
            esArticles.add(esArticle);
        }

        //返回
        return  esArticles;
    }
}

6 LogstashTCP基于TCP发送数据

package com.bangzhang.center.es.tcp;

import java.io.PrintWriter;
import java.net.Socket;


public class LogstashTCP {
    public static void main(String[] args) throws Exception{
        // 向服务器端发送请求,服务器IP地址和服务器监听的端口号
        Socket client = new Socket("node-01", 45454);

        // 通过printWriter 来向服务器发送消息
        PrintWriter printWriter = new PrintWriter(client.getOutputStream());
        System.out.println("连接已建立...");
        for(int i=0;i<10;i++){
            // 发送消息
            printWriter.println("hello logstash , 这是第"+i+" 条消息");
            printWriter.flush();
        }
    }

}

7 HbaseAndES

package com.bangzhang.center.es.app;

import com.bangzhang.center.es.bean.EsArticle;
import com.bangzhang.center.es.util.ESUtil;
import com.bangzhang.center.es.util.ExcelParseUtil;
import com.bangzhang.center.es.util.HbaseUtil;

import java.io.IOException;
import java.util.List;


public class HbaseAndES {

    static  String tableName = "articles";
    static  String family = "article";

    //程序的入口
    public static void main(String[] args) throws IOException {
        //todo:1-解析Excel中的数据,将每一条数据变成一个JavaBean
        String path = "datas/excel/hbaseEs.xlsx";
        List<EsArticle> esArticles = ExcelParseUtil.parseExcel(path);
//        System.out.println(esArticles);
        //todo:2-将解析好的数据写入Hbase和ES
//        writeToHbaseAndES(esArticles);
        //todo:3-根据给定的关键词实现正文检索
        searchContent("电信");
    }

    //检索数据
    private static void searchContent(String keyword) throws IOException {
        //检索ES,获取与关键相关的所有数据对应的id
        List<EsArticle> esArticles = ESUtil.searchTitle(keyword);
//        System.out.println(esArticles);
        //根据id到Hbase获取对应正文内容
        for (EsArticle esArticle : esArticles) {
            String row = esArticle.getId();
            String content = HbaseUtil.readFromHbase(tableName, row, family, "content");
            System.out.println(content);
        }
    }

    private static void writeToHbaseAndES(List<EsArticle> esArticles) throws IOException {
        //写Hbase
        writeHbase(esArticles);
        //写ES
        ESUtil.writeToES(esArticles);
    }

    private static void writeHbase(List<EsArticle> esArticles) throws IOException {
        for (EsArticle esArticle : esArticles) {
            //获取每条数据,id作为rowkey
            HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"title",esArticle.getTitle());
            HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"from",esArticle.getFrom());
            HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"time",esArticle.getTime());
            HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"readCount",esArticle.getReadCount());
            HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"content",esArticle.getContent());
        }
    }
}

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值