HBase和ElasticSearch整合构建二级索引
1 maven的pom文件的配置
<dependencies>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
2 EsArticle
package com.bangzhang.center.es.bean;
public class EsArticle {
private String id;
private String title;
private String from;
private String time;
private String readCount;
private String content;
public EsArticle(){
}
public EsArticle(String id, String title, String from, String time, String readCount, String content) {
this.id = id;
this.title = title;
this.from = from;
this.time = time;
this.readCount = readCount;
this.content = content;
}
public void setAll(String id, String title, String from, String time, String readCount, String content){
this.setId(id);
this.setTitle(title);
this.setFrom(from);
this.setTime(time);
this.setReadCount(readCount);
this.setContent(content);
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getFrom() {
return from;
}
public void setFrom(String from) {
this.from = from;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
public String getReadCount() {
return readCount;
}
public void setReadCount(String readCount) {
this.readCount = readCount;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@Override
public String toString() {
return "EsArticle{" +
"id='" + id + '\'' +
", title='" + title + '\'' +
", from='" + from + '\'' +
", time='" + time + '\'' +
", readCount=" + readCount +
", content='" + content + '\'' +
'}';
}
}
3 解析Excel表的数据
package com.bangzhang.center.es.util;
import com.bangzhang.center.es.bean.EsArticle;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ExcelParseUtil {
public static List<EsArticle> parseExcel(String path) throws IOException {
List<EsArticle> esArticles = new ArrayList<>();
FileInputStream inputStream = new FileInputStream(path);
XSSFWorkbook sheets = new XSSFWorkbook(inputStream);
XSSFSheet sheet = sheets.getSheetAt(0);
int lastRowNum = sheet.getLastRowNum();
for(int i = 1;i<=lastRowNum ;i++){
XSSFRow row = sheet.getRow(i);
String id = row.getCell(0).toString();
String title = row.getCell(1).toString();
String from = row.getCell(2).toString();
String time = row.getCell(3).toString();
String readCount = row.getCell(4).toString();
String content = row.getCell(5).toString();
EsArticle esArticle = new EsArticle(id, title, from, time, readCount, content);
esArticles.add(esArticle);
}
return esArticles;
}
}
4 HbaseUtil工具类
package com.bangzhang.center.es.util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HbaseUtil {
public static Table getHbaseTable(String tbname) throws IOException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","node-01:2181,node-02:2181,node-03:2181");
Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(TableName.valueOf(tbname));
return table;
}
public static void writeToHbase(String tableName,String rowkey,String family,String column,String value) throws IOException {
Table table = getHbaseTable(tableName);
Put put = new Put(Bytes.toBytes(rowkey));
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(column),Bytes.toBytes(value));
table.put(put);
}
public static String readFromHbase(String tableName,String rowkey,String family,String column) throws IOException {
Table table = getHbaseTable(tableName);
Get get = new Get(Bytes.toBytes(rowkey));
Result result = table.get(get);
byte[] value = result.getValue(Bytes.toBytes(family), Bytes.toBytes(column));
return Bytes.toString(value);
}
}
5 ESUtil
package com.bangzhang.center.es.util;
import com.bangzhang.center.es.bean.EsArticle;
import com.alibaba.fastjson.JSON;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
public class ESUtil {
static String indexName = "articles";
static String typeName = "article";
public static TransportClient getClient() throws UnknownHostException {
Settings settings = Settings.builder().put("cluster.name","myes").build();
TransportClient client = new PreBuiltTransportClient(settings)
.addTransportAddress(new TransportAddress(InetAddress.getByName("node-01"),9300))
.addTransportAddress(new TransportAddress(InetAddress.getByName("node-02"),9300))
.addTransportAddress(new TransportAddress(InetAddress.getByName("node-03"),9300));
return client;
}
public static void writeToES(List<EsArticle> esArticles) throws UnknownHostException {
TransportClient client = getClient();
BulkRequestBuilder bulk = client.prepareBulk();
for (EsArticle esArticle : esArticles) {
String jsonString = JSON.toJSONString(esArticle);
IndexRequestBuilder requestBuilder = client.prepareIndex(indexName, typeName, esArticle.getId()).setSource(jsonString, XContentType.JSON);
bulk.add(requestBuilder);
}
bulk.get();
}
public static List<EsArticle> searchTitle(String keyword) throws UnknownHostException {
List<EsArticle> esArticles = new ArrayList<>();
TransportClient client = getClient();
SearchResponse title = client.prepareSearch(indexName)
.setTypes(typeName)
.setQuery(QueryBuilders.termQuery("title", keyword))
.get();
SearchHit[] hits = title.getHits().getHits();
for (SearchHit hit : hits) {
String sourceAsString = hit.getSourceAsString();
EsArticle esArticle = JSON.parseObject(sourceAsString, EsArticle.class);
esArticles.add(esArticle);
}
return esArticles;
}
}
6 LogstashTCP基于TCP发送数据
package com.bangzhang.center.es.tcp;
import java.io.PrintWriter;
import java.net.Socket;
public class LogstashTCP {
public static void main(String[] args) throws Exception{
Socket client = new Socket("node-01", 45454);
PrintWriter printWriter = new PrintWriter(client.getOutputStream());
System.out.println("连接已建立...");
for(int i=0;i<10;i++){
printWriter.println("hello logstash , 这是第"+i+" 条消息");
printWriter.flush();
}
}
}
7 HbaseAndES
package com.bangzhang.center.es.app;
import com.bangzhang.center.es.bean.EsArticle;
import com.bangzhang.center.es.util.ESUtil;
import com.bangzhang.center.es.util.ExcelParseUtil;
import com.bangzhang.center.es.util.HbaseUtil;
import java.io.IOException;
import java.util.List;
public class HbaseAndES {
static String tableName = "articles";
static String family = "article";
public static void main(String[] args) throws IOException {
String path = "datas/excel/hbaseEs.xlsx";
List<EsArticle> esArticles = ExcelParseUtil.parseExcel(path);
searchContent("电信");
}
private static void searchContent(String keyword) throws IOException {
List<EsArticle> esArticles = ESUtil.searchTitle(keyword);
for (EsArticle esArticle : esArticles) {
String row = esArticle.getId();
String content = HbaseUtil.readFromHbase(tableName, row, family, "content");
System.out.println(content);
}
}
private static void writeToHbaseAndES(List<EsArticle> esArticles) throws IOException {
writeHbase(esArticles);
ESUtil.writeToES(esArticles);
}
private static void writeHbase(List<EsArticle> esArticles) throws IOException {
for (EsArticle esArticle : esArticles) {
HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"title",esArticle.getTitle());
HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"from",esArticle.getFrom());
HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"time",esArticle.getTime());
HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"readCount",esArticle.getReadCount());
HbaseUtil.writeToHbase(tableName,esArticle.getId(),family,"content",esArticle.getContent());
}
}
}