全文检索基础
全文检索流程
相关概念
索引库
document对象(相当于一条记录)
field对象
term对象
ElasticSearch入门
核心概念
索引 index
类型 type
字段 Field
映射mapping
文档document
接近实时NRT
集群 cluster
节点 node
分片和复制 shards&replicas
安装(docker)
docker pull elasticsearch:5.6.8
安装es容器
docker run -id --name=es -p 9200:9200 -p 9300:9300 elasticsearch:5.6.8
开启远程连接
docker exec -it es /bin/bash
cd /usr/share/elasticsearch/config
elastucsearch没有vi命令,安装编辑器
apt-get update
apt-get install vim
vi elasticsearch.yml
打开transport.host: 0.0.0.0前面的注释
同时在下面添加
cluster.name:my-application
系统参数配置
在/etc/security/limits.conf追加
* soft nofile 65536
* hard nofile 65536
追加 /etc/sysctl.conf
vm.max_map_count=655360
执行下列命令,修改内核参数马上生效
sysctl -p
跨域配置
修改/usr/share/elasticsearch/config/elasticsearch.yml
http.cors.enabled: true
http.cors.allow-origin: "*"
network.host: 192.168.238.129
重启
docker restart es
elasticsearch-head插件 客户端操作
使用Postman工具进行Restful接口访问
ElasticSearch接口语法
创建索引index和映射mapping
请求url
PUT 192.168.238.129:9200/blog1
创建索引后设置mapping
请求的url
POST http://192.168.238.129:9200/blog2/hello/_mapping
删除索引index
DELETE 192.168.238.129:9200/blog1
创建文档document
POST 192.168.238.129:9200/blog1/article/1
请求体
修改文档document
POST 192.168.238.129:9200/blog1/article/1
请求体
删除文档document
DELETE 192.168.238.129:9200/blog1/article/1
查询文档-根据id
GET 192.168.238.129:9200/blog1/article/1
查询文档-querystring
POST 192.168.238.129:9200/blog1/article/_search
查询文档-term查询
POST 192.168.238.129:9200/blog1/article/_search
IK分词器
unzip elasticsearch-analysis-ik-5.6.8.zip
mv elasticsearch ik
将目录拷贝进容器plugins下
docker cp id es:/usr/share/elasticsearch/plugins
测试
http://192.168.238.129:9222/_analyze?analyzer=ik_smart&pretty=true&text=我是程序员
http://192.168.238.129:9222/_analyze?analyzer=ik_max_word&pretty=true&text=我是程序员
指定分词器类型即可
Kibana(docker)
docker pull docker.io/kibana:5.6.8
docker run -it -d -e ELASTICSEARCH_URL=http://192.168.238.129:9200 --name kibana --restart=always -p 5601:5601 kibana:5.6.8
ELASTICSEARCH_URL:链接的ES地址
restart=always 开机启动
快捷键
测试访问
http://192.168.238.129:5601
DSL语句
获取所有索引
GET /_cat/indices?v
删除索引
DELETE /usr
创建索引
PUT /usr
创建索引和映射
新增文档数据
更新数据
查询
GET /user/userinfo/4
查询全部
GET /user/userinfo/_search
排序
分页
查询所有数据
GET _search
term查询(不分词)
match查询(分词)
query查询(分词)
range查询(安装指定范围查找一批数据)
exists查询(过滤可以用于查找某个域的数据)
bool查询
match查询(分词)
prefix查询(以什么字符开头的)
multi_match查询(match查询的基础上同时搜索多个字段,在多个字段中同时查一个)
filter查询
ElasticSearch编程
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>5.6.8<version>
</dependency>
TransportClient client;
@Before
public void init() throws Exception{
//配置
Settings settings = Settings.builder().put("cluster.name","my-elasticsearch").build();
//客户端
client = new PreBuiltTransportClient(settings);
}
创建索引index
@Test
public void createIndex() throws Exception {
client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName("192.168.238.129"),9300));
//创建映射
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("article")
.startObject("properties")
.startObject("id")
.field("type","integer").field("store","yes")
.endObject()
.startObject("title")
.field("type","string").field("store","yes").field("analyzer","ik_smart")
.endObject()
.startObject("content")
.field("type","string").field("store","yes").field("analyzer","ik_smart")
.endObject()
.endObject()
.endObject()
.endObject();
//使用api创建索引
client.admin().indices()
.preparePutMapping("index_hello")
.setType("article")
.setSource(builder)
.get();
//关闭client
client.close();
}
建立文档
@Test
public void testAddDocument() throws Exception{
//创建文档对象
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.field("id",21)
.field("title","北方入秋速度加快")
.field("content","阿联酋一架客机在纽约机场")
.endObject();
//把文档对象添加到索引库
client.prepareIndex()
//设置索引名称
.setIndex("index_hello")
//设置type
.setType("article")
//设置文档id,默认自动生成
.setId("2")
//设置文档信息
.setSource(builder)
//执行
.get();
//关闭client
client.close();
}
使用Jackson转换实体
public class Article{
private Integer id;
private String title;
private String content;
}
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.8.1<version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databing</artifactId>
<version>2.8.1<version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.8.1<version>
</dependency>
@Test
public void testAddDocument2() throws Exception {
Article article = new Article(31,"北方入秋速度加快","阿联酋一架客机在纽约机场");
ObjectMapper objectMapper = new ObjectMapper();
String jsonDocuemnt = objectMapper.writeValueAsString(article);
//使用client对象把文档写入索引库
client.prepareIndex()
//设置索引名称
.setIndex("index_hello")
//设置type
.setType("article")
//设置文档id,默认自动生成
.setId("2")
//设置文档信息
.setSource(jsonDocument,XContentType.JSON)
//执行
.get();
client.prepareIndex("index_hello","article","3")
.setSource(jsonDocument,XContentType.JSON)
.get();
//关闭client
client.close();
}
查询文档
结果显示
public void search(queryBuilder){
SearchResponse searchResponse = client.prepareSearch("index_hello")
.setTypes("article")
.setQuery(queryBuilder)
.get();
SearchHits searchHits = searchResponse.getHits();
System.out.println("总行数"+searchHits.getTotalHits());
Iterator<SearchHit> it = searchHits.iterator();
while(it.hasNext()){
SearchHit searchHits = it.next();
System.out.println(searchHits.getSourceAsString());
System.out.println("文档属性");
Map<String,Object> document = searchHit.getSOurce();
System.out.println(document.get("id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
}
//关闭client
client.close();
}
term查询
@Test
public void testQueryByTerm() throws Exception {
QueryBuilder queryBuilder = QueryBuilders.termQuery("title","女护士");
search(queryBuilder)
}
quertString 查询
@Test
public void testQueryByQuertString() throws Exception {
QueryBuilder queryBuilder = QueryBuilders.quertStringQuery("女护士").defaultField("title");
search(queryBuilder)
}
match查询
@Test
public void testQueryByMatchQuery() throws Exception {
QueryBuilder queryBuilder = QueryBuilders.matchQuery("title","女护士");
search(queryBuilder)
}
id查询
@Test
public void testQueryById() throws Exception {
QueryBuilder queryBuilder = QueryBuilders.idsQuery().addIds("1"."2")
search(queryBuilder)
}
分页查询
@Test
public void testQueryByPage() throws Exception {
QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
SearchResponse searchResponse = client.prepareSearch("index_hello")
.setTypes("article")
.setQuery(queryBuilder)
.setFrom(0)
.setSize(5)
.get();
SearchHits searchHits = searchResponse.getHits();
System.out.println("总行数"+searchHits.getTotalHits());
Iterator<SearchHit> it = searchHits.iterator();
while(it.hasNext()){
SearchHit searchHits = it.next();
System.out.println(searchHits.getSourceAsString());
System.out.println("文档属性");
Map<String,Object> document = searchHit.getSOurce();
System.out.println(document.get("id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
}
//关闭client
client.close();
}
高亮显示
@Test
public void testQueryByHighlight() throws Exception {
QueryBuilder queryBuilder = QueryBuilders.matchQuery("title","女护士");
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.preTages("<em>")
highlightBuilder.postTags("</em>")
SearchResponse searchResponse = client.prepareSearch("index_hello")
.setTypes("article")
.setQuery(queryBuilder)
.highlighter(highlightBuilder)
.get();
SearchHits searchHits = searchResponse.getHits();
System.out.println("总行数"+searchHits.getTotalHits());
Iterator<SearchHit> it = searchHits.iterator();
while(it.hasNext()){
SearchHit searchHits = it.next();
System.out.println(searchHits.getSourceAsString());
System.out.println("文档属性");
Map<String,Object> document = searchHit.getSOurce();
System.out.println(document.get("id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
System.out.println("高亮结果");
Map<String,HighlightBuilder> highlightBuilder = searchHit.getHighlightFields();
for(Map.Entry<String,HighlightFields> entry : highlightBuilder.entrySet()){
System.out.println(entry.getKey()+":\t"+Arrays.toString(entry.getValue().getFragments()));
}
}
//关闭client
client.close();
}
SpringDataElasticSearch
基本使用
springboot版本选择2.1.16
<dependency>
<groupId>org.springframework,boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
spring:
data:
elasticsearch:
cluster-name: my-elasticsearch
cluster-nodes: 192.168.238.129:9300
@Document(indexName="blog1",type="article")
public class Ariticle(){
@Id
@Field(type=FieldType.Long,sore=true)
private Integer id;
@Field(type=FieldType.Text,sore=true,analyzer="ik_smart")
private String title;
@Field(type=FieldType.Text,sore=true,analyzer="ik_smart")
private String content;
}
public interface ArticleDao extends ElasticsearchRepository<Article.Long>{
List<Article> findByTitleLike(String title);
List<Article> findByTitleLikeOrContent(String title,String content);
List<Article> findByTitleOrContent(String title,String content,Pageable pageable);
}
@Autowried
private ElasticsearchTemplate template;
@Autowried
private ArticleDao dao;
@Test
public void createIndex(){
template.createIndex(Article.class);
template.putMapping(Article.class);
}
内置保存
@Test
public void addDocument() throws Exception{
Article article = new Article(31,"北方入秋速度加快","阿联酋一架客机在纽约机场");
dao.save(article);
}
@Test
public void findByTitle(){
dao.findByTitleLike("美丽的女护士").forEach(System.out::println);
}
@Test
public void findByTitleOrContent(){
dao.findByTitleLikeOrContent("美丽的女护士","男护士").forEach(System.out::println);
}
@Test
public void findByTitlePage(){
Pageable pageable = PageRequest.of(1,5);
dao.findByTitleOrContent("美丽的女护士","男护士",pageable).forEach(System.out::println);
}
@Test
public void testNativeSearchQuery() throw Exception{
NativeSearchQuery query = new NativeSearchQueryBuilder()
.withQuery(QueryBuilders.queryStringQuery("女护士"),defaultField("title"))
.withPageable(PageRequest.of(1,5))
.build();
template.queryForList(query,Article.class).forEach(System.out::println);
}
聚合查询
@Document(indexName="blog2",type="car")
public class Car(){
@Id
@Field(type=FieldType.Long,sore=true)
private Integer id;
@Field(type=FieldType.Text,sore=true,analyzer="ik_smart")
private String name;
@Field(type=FieldType.Text,sore=true,analyzer="ik_smart",fielddata=true)
private String brand;
@Field(type=FieldType.Text,sore=true,analyzer="ik_smart",fielddata=true)
private String color;
@Field(type=FieldType.Double,sore=true,analyzer="ik_smart",fielddata=true)
private Double price;
}
public interface CarDao extends ElasticsearchRepository<Car.Long>{
}
@Test
public void testQueryByAggs(){
NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder().withQuery(QueryBuilders.matchAllQuery());
queryBuilder.withSourceFilter(new FetchSourceFilter(new String[],new String[]{"brand"}));
//添加聚合
queryBuilder.addAggregation(AggregationBuilders.terms("group_by_color").field("color"));
//执行结果
AggregatePage<Car> aggPage = (AggregatePage<Car>) carDao.search(queryBuilder.build());
//从聚合结果中获得bucket的名字对应的聚合
StringTerms agg = (StringTerms) aggPage.getAggregation("group_by_color");
List<StringTerms.Bucket> buckets = agg.getBuckets();
buckers.forEach(b->{
String color = b.getKeyAsString();
Long docCount = b.getDocCount();
System.out.println("color"+color+"总数"+docCount)
})
}
@Test
public void testQueryBySubAggs(){
NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder().withQuery(QueryBuilders.matchAllQuery());
queryBuilder.withSourceFilter(new FetchSourceFilter(new String[],new String[]{"brand"}));
//添加聚合
queryBuilder.addAggregation(AggregationBuilders.terms("group_by_color").field("color").subAggregation(AggregationBuilders.avg("avg_price").field("price")));
//执行结果
AggregatePage<Car> aggPage = (AggregatePage<Car>) carDao.search(queryBuilder.build());
//从聚合结果中获得bucket的名字对应的聚合
StringTerms agg = (StringTerms) aggPage.getAggregation("group_by_color");
List<StringTerms.Bucket> buckets = agg.getBuckets();
buckers.forEach(b->{
String color = b.getKeyAsString();
Long docCount = b.getDocCount();
//取得内部聚合
InternalAvg avg = (InternalAvg)b.getAggregations().asMap().get("avg_price");
System.out.println("color"+color+"总数"+docCount+"平均价格"+avg.getValue());
})
}