一、高级javaAPI操作
现有结构化数据内容如下:
name | age salary | team | position |
---|---|---|---|
张云雷 | 26 | 2000 | war |
特斯拉 20 | 500 | tim | sf |
于谦 | 25 | 2000 | cav |
爱迪生 | 40 | 1000 | tim |
爱因斯坦 | 21 | 300 | tim |
郭德纲 | 33 | 3000 | cav |
牛顿 | 21 | 500 | tim |
岳云鹏 | 29 | 1000 | war |
初始化一批数据到es索引库当中去
/**
* 批量添加数据
* @throws IOException
* @throws ExecutionException
* @throws InterruptedException
*/
@Test
public void addIndexDatas() throws IOException, ExecutionException, InterruptedException {
//获取settings
//配置es集群的名字
Settings settings = Settings.builder().put("cluster.name", "myes").build();
//获取客户端
TransportAddress transportAddress = new TransportAddress(InetAddress.getByName("node01"), 9300);
TransportAddress transportAddress2 = new TransportAddress(InetAddress.getByName("node02"), 9300);
TransportAddress transportAddress3 = new TransportAddress(InetAddress.getByName("node03"), 9300);
//获取client客户端
TransportClient client = new PreBuiltTransportClient(settings).addTransportAddress(transportAddress).addTransportAddress(transportAddress2).addTransportAddress(transportAddress3);
/**
* 创建索引
* */
client.admin().indices().prepareCreate("player").get();
//构建json的数据格式,创建映射
XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()
.startObject()
.startObject("player")
.startObject("properties")
.startObject("name").field("type","text").field("index", "true").field("fielddata","true").endObject()
.startObject("age").field("type","integer").endObject()
.startObject("salary").field("type","integer").endObject()
.startObject("team").field("type","text").field("index", "true").field("fielddata","true").endObject()
.startObject("position").field("type","text").field("index", "true").field("fielddata","true").endObject()
.endObject()
.endObject()
.endObject();
PutMappingRequest request = Requests.putMappingRequest("player")
.type("player")
.source(mappingBuilder);
client.admin().indices().putMapping(request).get();
//批量添加数据开始
BulkRequestBuilder bulkRequest = client.prepareBulk();
// either use client#prepare, or use Requests# to directly build index/delete requests
bulkRequest.add(client.prepareIndex("player", "player", "1")
.setSource(jsonBuilder()
.startObject()
.field("name", "郭德纲")
.field("age", 33)
.field("salary",3000)
.field("team" , "cav")
.field("position" , "sf")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "2")
.setSource(jsonBuilder()
.startObject()
.field("name", "于谦")
.field("age", 25)
.field("salary",2000)
.field("team" , "cav")
.field("position" , "pg")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "3")
.setSource(jsonBuilder()
.startObject()
.field("name", "岳云鹏")
.field("age", 29)
.field("salary",1000)
.field("team" , "war")
.field("position" , "pg")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "4")
.setSource(jsonBuilder()
.startObject()
.field("name", "孙越")
.field("age", 26)
.field("salary",2000)
.field("team" , "war")
.field("position" , "sg")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "5")
.setSource(jsonBuilder()
.startObject()
.field("name", "张云雷")
.field("age", 26)
.field("salary",2000)
.field("team" , "war")
.field("position" , "pf")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "6")
.setSource(jsonBuilder()
.startObject()
.field("name", "爱迪生")
.field("age", 40)
.field("salary",1000)
.field("team" , "tim")
.field("position" , "pf")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "7")
.setSource(jsonBuilder()
.startObject()
.field("name", "牛顿")
.field("age", 21)
.field("salary",500)
.field("team" , "tim")
.field("position" , "c")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "4")
.setSource(jsonBuilder()
.startObject()
.field("name", "爱因斯坦")
.field("age", 21)
.field("salary",300)
.field("team" , "tim")
.field("position" , "sg")
.endObject()
)
);
bulkRequest.add(client.prepareIndex("player", "player", "8")
.setSource(jsonBuilder()
.startObject()
.field("name", "特斯拉")
.field("age", 20)
.field("salary",500)
.field("team" , "tim")
.field("position" , "sf")
.endObject()
)
);
BulkResponse bulkResponse = bulkRequest.get();
client.close();
}
1、统计每个球队当中球员的数量
sql语句实现
select team, count(*) as player_count from player group by team;
使用javaAPI实现
@Test
public void groupAndCount() {
//1:构建查询提交
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
//2:指定聚合条件
TermsAggregationBuilder team = AggregationBuilders.terms("player_count").field("team");
//3:将聚合条件放入查询条件中
builder.addAggregation(team);
//4:执行action,返回searchResponse
SearchResponse searchResponse = builder.get();
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
StringTerms stringTerms = (StringTerms) aggregation;
List<StringTerms.Bucket> buckets = stringTerms.getBuckets();
for (StringTerms.Bucket bucket : buckets) {
System.out.println(bucket.getKey());
System.out.println(bucket.getDocCount());
}
}
}
2、统计每个球队中每个位置的球员数量
sql语句实现
select team, position, count(*) as pos_count from player group by team, position;
java代码实现
/**
* 统计每个球队中每个位置的球员数量
*/
@Test
public void teamAndPosition(){
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
TermsAggregationBuilder team = AggregationBuilders.terms("player_count").field("team");
TermsAggregationBuilder position = AggregationBuilders.terms("posititon_count").field("position");
team.subAggregation(position);
SearchResponse searchResponse = builder.addAggregation(team).addAggregation(position).get();
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
// System.out.println(aggregation.toString());
StringTerms stringTerms = (StringTerms) aggregation;
List<StringTerms.Bucket> buckets = stringTerms.getBuckets();
for (StringTerms.Bucket bucket : buckets) {
long docCount = bucket.getDocCount();
Object key = bucket.getKey();
System.out.println("当前队伍名称为" + key + "该队伍下有"+docCount + "个球员");
Aggregation posititon_count = bucket.getAggregations().get("posititon_count");
if(null != posititon_count){
StringTerms positionTrem = (StringTerms) posititon_count;
List<StringTerms.Bucket> buckets1 = positionTrem.getBuckets();
for (StringTerms.Bucket bucket1 : buckets1) {
Object key1 = bucket1.getKey();
long docCount1 = bucket1.getDocCount();
System.out.println("该队伍下面的位置为" + key1+"该位置下有" + docCount1 +"人");
}
}
}
}
}
3、分组求各种值
计算每个球队年龄最大值
select team, max(age) as max_age from player group by team;
/**
* 计算每个球队年龄最大值
*/
@Test
public void groupAndMax(){
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
TermsAggregationBuilder team = AggregationBuilders.terms("team_group").field("team");
MaxAggregationBuilder age = AggregationBuilders.max("max_age").field("age");
team.subAggregation(age);
SearchResponse searchResponse = builder.addAggregation(team).get();
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
StringTerms stringTerms = (StringTerms) aggregation;
List<StringTerms.Bucket> buckets = stringTerms.getBuckets();
for (StringTerms.Bucket bucket : buckets) {
Aggregation max_age = bucket.getAggregations().get("max_age");
System.out.println(max_age.toString());
}
}
}
4、统计每个球队年龄最小值
计算每个球队年龄最大/最小/总/平均的球员年龄
select team, min(age) as min_age from player group by team;
/**
* 统计每个球队中年龄最小值
*/
@Test
public void teamMinAge(){
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
TermsAggregationBuilder team = AggregationBuilders.terms("team_count").field("team");
MinAggregationBuilder age = AggregationBuilders.min("min_age").field("age");
TermsAggregationBuilder termAggregation = team.subAggregation(age);
SearchResponse searchResponse = builder.addAggregation(termAggregation).get();
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
System.out.println(aggregation.toString());
StringTerms stringTerms = (StringTerms) aggregation;
List<StringTerms.Bucket> buckets = stringTerms.getBuckets();
for (StringTerms.Bucket bucket : buckets) {
Aggregations aggregations1 = bucket.getAggregations();
for (Aggregation aggregation1 : aggregations1) {
System.out.println(aggregation1.toString());
}
}
}
}
5、分组求平均值
计算每个球队年龄最大/最小/总/平均的球员年龄
select team, avg(age) as max_age from player group by team;
/**
* 计算每个球队的年龄平均值
*/
@Test
public void avgTeamAge(){
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
TermsAggregationBuilder team_field = AggregationBuilders.terms("player_count").field("team");
AvgAggregationBuilder age_avg = AggregationBuilders.avg("age_avg").field("age");
team_field.subAggregation(age_avg);
SearchResponse searchResponse = builder.addAggregation(team_field).get();
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
System.out.println(aggregation.toString());
StringTerms stringTerms = (StringTerms) aggregation;
}
}
6、分组求和
计算每个球队球员的平均年龄,同时又要计算总年薪
select team, avg(age)as avg_age, sum(salary) as total_salary from player group by team;
/**
* 统计每个球队当中的球员平均年龄,以及队员总年薪
*/
@Test
public void avgAndSum(){
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
TermsAggregationBuilder team_group = AggregationBuilders.terms("team_group").field("team");
AvgAggregationBuilder avg_age = AggregationBuilders.avg("avg_age").field("age");
SumAggregationBuilder sumMoney = AggregationBuilders.sum("sum_money").field("salary");
TermsAggregationBuilder termsAggregationBuilder = team_group.subAggregation(avg_age).subAggregation(sumMoney);
SearchResponse searchResponse = builder.addAggregation(termsAggregationBuilder).get();
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
System.out.println(aggregation.toString());
}
}
7、聚合排序
计算每个球队总年薪,并按照总年薪倒序排列
select team, sum(salary) as total_salary from player group by team order by total_salary desc;
/**
* 计算每个球队总年薪,并按照年薪进行排序
*/
@Test
public void orderBySum(){
SearchRequestBuilder builder = client.prepareSearch("player").setTypes("player");
TermsAggregationBuilder teamGroup = AggregationBuilders.terms("team_group").field("team").order(BucketOrder.count(true));
SumAggregationBuilder sumSalary = AggregationBuilders.sum("sum_salary").field("salary");
TermsAggregationBuilder termsAggregationBuilder = teamGroup.subAggregation(sumSalary);
SearchResponse searchResponse = builder.addAggregation(termsAggregationBuilder).get();
Map<String, Aggregation> stringAggregationMap = searchResponse.getAggregations().asMap();
System.out.println(stringAggregationMap.toString());
Aggregations aggregations = searchResponse.getAggregations();
for (Aggregation aggregation : aggregations) {
System.out.println(aggregation.toString());
}
}
二、ES当中的地理位置搜索
ES不仅可以对我们的数据进行聚合操作,还可以针对我们的地理位置经纬度进行搜索,可以通过ES搜索我们附近的人等等各种基于地理位置的需求。
经纬度在线解析网站:http://www.gpsspg.com/maps.htm
1、创建索引库并添加数据
直接在kibana当中通过以下操作创建索引库,并添加一条数据
PUT platform_foreign_website
{
"mappings": {
"store":{
"properties": {
"id": {
"type": "text"
},
"storeName": {
"type": "text"
},
"location":{
"type": "geo_point"
}
}
}
}
}
添加数据:
40.0488115498,116.4320345091
PUT /platform_foreign_website/store/40?pretty
{"id": "40", "storeName": "北京市北京市朝阳区清河营东路2号院","longitude":116.4320345091,"latitude":40.0488115498,"isdelete":true,"location":{
"lat":40.0488115498,
"lon":116.4320345091
}
}
40.0461174292,116.4360685514
PUT /platform_foreign_website/store/41?pretty
{"id": "40", "storeName": "北京市北京市朝阳区北苑东路","longitude":116.4360685514,"latitude":40.0461174292,"isdelete":false,"location":{
"lat":40.0461174292,
"lon":116.4360685514
}
}
40.0519526142,116.4178513254
PUT /platform_foreign_website/store/42?pretty
{"id": "42", "storeName": "北京市北京市朝阳区立通路","longitude":116.4178513254,"latitude":40.0519526142,"isdelete":true,"location":{
"lat":40.0519526142,
"lon":116.4178513254
}
}
40.0503813013,116.4562592119
PUT /platform_foreign_website/store/43?pretty
{"id": "43", "storeName": "北京市北京市朝阳区来广营北路","longitude":116.4562592119,"latitude":40.0503813013,"isdelete":false,"location":{
"lat":40.0503813013,
"lon":116.4562592119
}
}
40.0385828363,116.4465266673
PUT /platform_foreign_website/store/44?pretty
{"id": "44", "storeName": "北京市北京市朝阳区顺白路","longitude":116.4465266673,"latitude":40.0385828363,"isdelete":false,"location":{
"lat":40.0385828363,
"lon":116.4465266673
}
}
查询所有的数据
GET /platform_foreign_website/store/_search?pretty
{
"query": {
"match_all": {}
}
}
2、添加以下jar包坐标依赖
<!-- https://mvnrepository.com/artifact/org.locationtech.spatial4j/spatial4j -->
<dependency>
<groupId>org.locationtech.spatial4j</groupId>
<artifactId>spatial4j</artifactId>
<version>0.6</version>
</dependency>
<dependency>
<groupId>com.vividsolutions</groupId>
<artifactId>jts</artifactId>
<version>1.13</version>
<exclusions>
<exclusion>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
</exclusion>
</exclusions>
</dependency>
这里说了 geo_distance
(找出与指定位置在给定距离内的点) 和 geo_polygon
(找出落在多边形中的点)两种方法,
elasticsearch其实还有另外两种 方法 geo_bounding_box
(找出落在指定矩形框中的坐标点) 和 geo_distance_range
(找出与指定点距离在给定最小距离和最大距离之间的点),因为需求没有涉及到暂时没有调研,以后会慢慢晚上
3、使用javaAPI来实现基于地理位置的搜索
经纬度在线解析 http://www.gpsspg.com/maps.htm
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
public class ESOperate {
public static void main(String[] args) throws UnknownHostException {
Settings settings = Settings.builder()
.put("cluster.name", "myes")
.build();
TransportClient client = new PreBuiltTransportClient(settings)
.addTransportAddress(new TransportAddress(InetAddress.getByName("node03"), 9300));
/**
* 找出落在指定矩形框当中的坐标点
*
40.0519526142,116.4178513254
40.0385828363,116.4465266673
*/
SearchResponse searchResponse = client.prepareSearch("platform_foreign_website")
.setTypes("store")
.setQuery(QueryBuilders.geoBoundingBoxQuery("location")
.setCorners(40.0519526142, 116.4178513254, 40.0385828363, 116.4465266673))
.get();
for(SearchHit searchHit : searchResponse.getHits().getHits()) {
System.out.println(searchHit.getSourceAsString());
}
/**|
* 找出坐落在多边形当中的坐标点
*
* 40.0519526142,116.4178513254
*
* 40.0519526142,116.4178513254
*
* 40.0385828363,116.4465266673
*
*
*/
List<GeoPoint> points = new ArrayList<GeoPoint>();
points.add(new GeoPoint(40.0519526142, 116.4178513254));
points.add(new GeoPoint(40.0519526142, 116.4178513254));
points.add(new GeoPoint(40.0385828363, 116.4465266673));
searchResponse = client.prepareSearch("platform_foreign_website")
.setTypes("store")
.setQuery(QueryBuilders.geoPolygonQuery("location", points))
.get();
for(SearchHit searchHit : searchResponse.getHits().getHits()) {
System.out.println(searchHit.getSourceAsString());
}
System.out.println("==================================================");
/**
* 以当前的点为中心,搜索落在半径范围内200公里的经纬度坐标点
*40.0488115498,116.4320345091
*/
searchResponse = client.prepareSearch("platform_foreign_website")
.setTypes("store")
.setQuery(QueryBuilders.geoDistanceQuery("location")
.point(40.0488115498, 116.4320345091)
.distance(200, DistanceUnit.KILOMETERS))
.get();
for(SearchHit searchHit : searchResponse.getHits().getHits()) {
System.out.println(searchHit.getSourceAsString());
}
client.close();
}
}
三、elasticsearch 的sql插件使用
对于这些复杂的查询,es使用javaAPI都可以实现,但是相较于sql语句来说,我们更加熟悉sql语句,所以es也提供了sql语句的开发,让我们通过sql语句即可实现ES的查询,接下来我们就来安装并学习sql的插件的使用方法吧!
在es版本6.3之前都不支持sql语句的开发,如果需要使用sql语句来开发es的数据查询,那么我们需要手动的自己安装插件,插件下载地址:https://github.com/NLPchina/elasticsearch-sql/
但是在6.3版本之后,es自带就安装了sql的插件,我们可以直接通过sql语句的方式实现es当中的数据查询。
对于sql语句的使用,es给我们提供了==三种方式=,接下来我们分别看看三种方式如何实现es数据的查询。
1、通过rest风格实现数据的查询
第一步:使用rest方式向索引库当中添加数据
使用kibana向索引库当中添加数据
PUT /library/book/_bulk?refresh
{"index":{"_id": "Leviathan Wakes"}}
{"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561}
{"index":{"_id": "Hyperion"}}
{"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482}
{"index":{"_id": "Dune"}}
{"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604}
第二步:使用rest风格方式查询数据
curl -X POST "node01:9200/_xpack/sql?format=txt" -H 'Content-Type: application/json' -d'
{
"query": "SELECT * FROM library WHERE release_date < \u00272000-01-01\u0027"
}'
2、使用sql脚本的方式进入sql客户端进行查询
我们也可以使用sql脚本的方式,进入sql客户端,通过sql语句的方式实现数据的查询。
第一步:node01进入sql脚本客户端
node01执行以下命令进入sql脚本客户端
cd /kkb/install/elasticsearch-6.7.0
bin/elasticsearch-sql-cli node01:9200
第二步:执行sql语句
sql> select * from library;
author | name | page_count | release_date
----------------+---------------+---------------+------------------------
Dan Simmons |Hyperion |482 |1989-05-26T00:00:00.000Z
James S.A. Corey|Leviathan Wakes|561 |2011-06-02T00:00:00.000Z
Frank Herbert |Dune |604 |1965-06-01T00:00:00.000Z
3、通过jdbc连接的方式
当然了,我们也可以通过jdbc连接的方式,通过java代码来实现ES当中数据的查询操作。
官网介绍:https://www.elastic.co/guide/en/elasticsearch/reference/6.7/sql-jdbc.html
使用javaAPI访问数据库当中的数据,会产生一个错误,参见这篇文章:https://www.cnblogs.com/hts-technology/p/9282421.html
第一步:导入jar包
在我们的maven依赖中添加以下坐标,导入es-sql的jar包
<repositories>
<repository>
<id>elastic.co</id>
<url>https://artifacts.elastic.co/maven</url>
</repository>
</repositories>
<dependency>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>x-pack-sql-jdbc</artifactId>
<version>6.7.0</version>
</dependency>
第二步:开发java代码,实现查询
通过java代码,使用jdbc连接es服务器,然后查询数据
@Test
public void esJdbc() throws SQLException {
EsDataSource dataSource = new EsDataSource();
String address = "jdbc:es://http://node01:9200" ;
dataSource.setUrl(address);
Properties connectionProperties = new Properties();
dataSource.setProperties(connectionProperties);
Connection connection = dataSource.getConnection();
Statement statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery("select * from library");
while(resultSet.next()){
String string = resultSet.getString(0);
String string1 = resultSet.getString(1);
int anInt = resultSet.getInt(2);
String string2 = resultSet.getString(4);
System.out.println(string + "\t" + string1 + "\t" + anInt + "\t" + string2);
}
connection.close();
}