ES学习
ES安装
jdk必须是1.8或以上
ES是基于java开发的 java核心的jar包必须一致
ES默认端口:9200
head默认端口:9300
kibana默认端口:5601
1.启动
cnpm install
npm run start
设置跨域:
http.cors.enabled: true
http.cors.allow-origin: "*"
ik分词器
ik_smart和ik_max_word,ik_smart是最少切分,ik_max_word是最细粒度划分。
GET _analyze
{
"analyzer": "ik_smart",
"text": "中国共产党"
}
GET _analyze
{
"analyzer": "ik_max_word",
"text": "中国共产党"
}
ik分词器增加自己的配置
在IKAnalyzer.cfg.xml中加入自己写的dic配置文件注意编码格式要是utf-8才会生效中文
Rest风格
基本Rest命令
method | url地址 | 描述 |
---|---|---|
PUT | localhost:9200/索引名字/类型名字/文档id | 创建文档(制定文档ID) |
POST | localhost:9200/索引名称/类型名称 | 创建文档(随机ID) |
POST | localhost:9200/索引名称/类型名称/文档id/_Update | 修改文档 |
DELETE | localhost:9200/索引名称/类型名称/文档id | 删除文档 |
GET | localhost:9200/索引名称/类型名称/文档id | 通过文档ID查询文档 |
POST | localhost:9200/索引名称/类型名称/_search | 查询所有数据 |
基本数据类型:
字符串类型:
text,keyword。
数值类型
long,integer,short,byte,double,float,half float,scaled float
日期类型
date
布尔类型
boolean
二进制类型
binart
等等
1.创建索引(新版以后type默认为_doc)
PUT /test1/type1/1
{
"name":"高帆",
"age":""
}
1.1指定字段类型创建索引规则
PUT /test2
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"age":{
"type": "integer"
},
"brith":{
"type": "date"
}
}
}
}
2.获得规则
GET请求获得索引信息
GET test2
//获取默认信息
GET _cat/indices?v
3.修改值第一种方法
PUT /test3/_doc/1
{
"name":"123",
"age":12,
"birth": "1999-7-7"
}
## 修改后返回值
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "1",
##版本号增加了
"_version" : 3,
##状态为update
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 2,
"_primary_term" : 1
}
3.1修改值第二种方法
POST /test3/_doc/1/_update
{
"doc":{
"name":"李四"
}
}
4.删除索引
DELETE /test1
关于文档的操作
基本操作:
简单查询
/_search 代表查询 q代表搜索 name代表字段,gf代表查询的值
GET gf/user/_search?q=name:gf
复杂操作查询(排序,分页,高亮,模糊查询,精准查询):
查询
GET gf/user/_search
{
//查询参数体 向mysql里的where
"query":{
"match": {
"name": "高帆"
}
},
//需要查询的列,mysql查询后跟上列名称
"_source": ["name","desc"],
//sort 进行排序
"sort":[{
//排序字段
"age":{
//排序方式 desc降序,asc升序
"order":"desc"
}
}
],
//分页参数,form相当于index,size:一页多少条数据
"from":0,
"size":1
}
{
"took" : 528,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.5884793,
//hits 索引和文档信息,查询结果总数 ,和一些基本信息
"hits" : [
{
"_index" : "gf",
"_type" : "user",
"_id" : "1",
//score,匹配度越高分值越高
"_score" : 1.5884793,
"_source" : {
"name" : "高帆12"
}
},
{
"_index" : "gf",
"_type" : "user",
"_id" : "4",
"_score" : 1.5884793,
"_source" : {
"name" : "高帆21"
}
}
]
}
}
多条件查询
match and操作
GET gf/user/_search
{
"query":{
//多条件查询 must 相当于sql中的and where name=“gf” and age=“1”
"bool": {
"must": [
{
"match": {
"name": "高帆"
}
},
{
"match": {
"age": 1
}
}
]
}
},
"_source": ["name","desc","age"],
"sort": [{
"age":{
"order":"desc"
}
}
]
}
should or操作
GET gf/user/_search
{
"query":{
"bool": {
//多条件查询 must 相当于sql中的or where name=“gf” or age=“1”
"should": [
{
"match": {
"name": "高帆"
}
},
{
"match": {
"age": 1
}
}
]
}
},
"_source": ["name","desc","age"],
"sort": [{
"age":{
"order":"desc"
}
}
]
}
must_not 不等于
GET gf/user/_search
{
"query":{
"bool": {
//不等于操作 where age <>1
"must_not": [
{
"match": {
"age": 1
}
}
]
}
},
"_source": ["name","desc","age"],
"sort": [{
"age":{
"order":"desc"
}
}
]
}
fiter 过滤器
GET gf/user/_search
{
"query":{
"bool": {
"must": [
{
"match": {
"name": "高帆"
}
}
],
//过滤器 过滤 名字大于(gt)1并且小于(lt)10的数据
//gt 大于,gte 大于等于,lt 小于,lte:小于等于
"filter": [
{
"range": {
"age": {
"lt": 10,
"gt": 1
}
}
}
]
}
},
"_source": ["name","desc","age"],
"sort": [{
"age":{
"order":"desc"
}
}
]
}
匹配多个条件
GET gf/user/_search
{
"query":{
"match": {
//多个条件用空格隔开只要满足其中一个结果就可以查询所有出来取分值最高的就可以了
"tags": "大 游"
}
}
}
精确查找
term查询是直接通过倒排索引指定的词条进程精确查找
term,直接查询准确的不使用分词器,
match,会使用分词器解析
两个类型
text:会被拆开查询如like
keyWord:不会被拆开查询如=
GET testdb/_search
{
"query": {
"term": {
//desc 为 keyWord查询的是精准的
"desc":"张三在唱歌 desc"
}
}
}
//查询结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "testdb",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.6931471,
"_source" : {
"name" : "张三在唱歌 name",
"desc" : "张三在唱歌 desc"
}
}
]
}
}
//使用text查询
GET testdb/_search
{
"query": {
"term": {
"name":"张"
}
}
}
//查询结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.18232156,
"hits" : [
{
"_index" : "testdb",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.18232156,
"_source" : {
"name" : "张三在唱歌 name",
"desc" : "张三在唱歌 desc"
}
},
{
"_index" : "testdb",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.18232156,
"_source" : {
"name" : "张三在唱歌 name",
"desc" : "张三在唱歌 desc2"
}
}
]
}
}
精确查询多个值
GET testdb/_search
{
"query": {
"bool": {
"should": [
{
"term": {
"desc":"张三在唱歌 desc"
}
},
{
"term": {
"desc":"张三在唱歌 desc2"
}
}
]
}
}
}
高亮查询
GET testdb/_search
{
"query": {
"match": {
"name":"张"
}
},
//高亮查询
"highlight": {
//自定义高亮标签
"pre_tags": "<p class='key' style='color:red'>",
//结束标签
"post_tags": "</p>",
//高亮字段选择
"fields": {
"name":{}
}
}
}
高级客户端索引api
配置es
@Configuration
public class ElasticSearchConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
return new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")
)
);
}
}
创建索引(表)
//创建索引请求
CreateIndexRequest gaofan = new CreateIndexRequest("gaofan");
//执行创建请求IndicesClient
CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(gaofan, RequestOptions.DEFAULT);
System.out.println(createIndexResponse);
判断索引是否存在
//获取索引是否存在
@Test
void testExistIndex() throws IOException {
GetIndexRequest getIndexRequest = new GetIndexRequest("gaofan2");
//判断索引是否存在
boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
System.out.println(exists);
}
删除索引
//删除索引
@Test
void testDeleteIndex() throws IOException {
DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest("ilm-history-2-000001");
AcknowledgedResponse delete = restHighLevelClient.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
System.out.println(delete.isAcknowledged());
}
添加文档
//添加文档
@Test
void testAddDocument() throws IOException {
ObjectMapper objectMapper = new ObjectMapper();
//创建对象
User user = new User("gf", 1);
//创建请求
IndexRequest request = new IndexRequest("gaofan");
//创建规则 PUT/gaofan/_doc/1
request.id("1");
//过期时间1s
request.timeout(TimeValue.timeValueSeconds(1));
//放入我们的数据json格式
request.source(objectMapper.writeValueAsString(user), XContentType.JSON);
//客户端发送请求获取响应结果
IndexResponse index = restHighLevelClient.index(request, RequestOptions.DEFAULT);
System.out.println(index.toString());
System.out.println(index.status());//对应我们命令返回的状态
}
判断文档是否存在
@Test
void testIsExists() throws IOException {
GetRequest getRequest = new GetRequest("gaofan", "1");
//不获取返回的_source的上下文
getRequest.fetchSourceContext(new FetchSourceContext(false));
getRequest.storedFields("_none_");
boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
System.out.println(exists);
}
获取文档消息
//获取文档消息
@Test
void testGetDocument() throws IOException {
GetRequest getRequest = new GetRequest("gaofan", "1");
GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
//打印文档内容
System.out.println(documentFields.getSourceAsString());
//打印全部内容
System.out.println(documentFields);
}
更新文档
@Test
void testUpdateDocument() throws IOException {
ObjectMapper objectMapper = new ObjectMapper();
UpdateRequest updateRequest = new UpdateRequest("gaofan", "1");
updateRequest.timeout(TimeValue.timeValueSeconds(1));
User user = new User("张三", 12);
updateRequest.doc(objectMapper.writeValueAsString(user), XContentType.JSON);
UpdateResponse update = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
System.out.println(update.status());
}
删除文档
//删除文档
@Test
void testDeleteDocument() throws IOException {
DeleteRequest deleteRequest = new DeleteRequest("gaofan", "1");
deleteRequest.timeout("1s");
DeleteResponse delete = restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT);
System.out.println(delete.status());
}
批量插入
//批量插入
@Test
void testBulkRequest() throws IOException {
//转换json使用
ObjectMapper objectMapper = new ObjectMapper();
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("10s");
List<User> users = new ArrayList<>();
users.add(new User("张三", 1));
users.add(new User("李四", 2));
users.add(new User("王五", 3));
users.add(new User("刘六", 4));
for (int i = 0; i < users.size(); i++) {
//批量更新和批类删除一样的操作修改对应的请求
bulkRequest.add(new IndexRequest("gaofan")
.id(i + 1 + "").source(objectMapper.writeValueAsString(users.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
//是否失败返回false代表成功
System.out.println(bulk.hasFailures());
}
查询
//查询
@Test
void testSearch() throws IOException {
ObjectMapper objectMapper=new ObjectMapper();
SearchRequest searchRequest = new SearchRequest("gaofan");
//构建搜索条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//构建查询(查询条件)
//QueryBuilders 工具类
//termQuery 精确查询
//QueryBuilders.matchAllQuery() 匹配所有 加上.keyword避免搜索不到中文
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name.keyword", "张三");
searchSourceBuilder.query(termQueryBuilder);
//设置超时
searchSourceBuilder.timeout(TimeValue.timeValueSeconds(60));
//分页有默认值
searchSourceBuilder.size();
searchSourceBuilder.from();
searchRequest.source(searchSourceBuilder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//所有内容都在Hits里
System.out.println(objectMapper.writeValueAsString(search.getHits()));
System.out.println("-----------------------------");
for (SearchHit hit : search.getHits().getHits()) {
System.out.println(hit.getSourceAsMap());
}
}
实战模仿jd搜索
爬虫
导入解析网页的包
<!--解析网页-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
package com.gf.utils;
import com.gf.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* 解析网页
*
* @author Administrator
*/
@Component
public class HtmlParseUtil {
public List<Content> parseJd(String keyWords) throws Exception {
//获取请求 https://search.jd.com/Search?keyword=java
String url = "https://search.jd.com/Search?keyword=" + keyWords;
//解析网页 (document就是js页面对象)所有js中的方法都能用
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("J_goodsList");
//获取li元素
Elements li = element.getElementsByTag("li");
ArrayList<Content> goodsList = new ArrayList<>();
//获取元素中的内容
for (Element el : li) {
Content content = new Content();
//如果有懒加载模式就找到source-data-lazy-img 这个节点获取
//图片
String img = el.getElementsByTag("img").eq(0).attr("src");
//价格
String price = el.getElementsByClass("p-price").eq(0).text();
//标题
String title = el.getElementsByClass("p-name").eq(0).text();
content.setImg(img);
content.setTitle(title);
content.setPrice(price);
goodsList.add(content);
}
return goodsList;
}
}
将爬虫爬到的数据放入ES并搜索
package com.gf.service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.gf.pojo.Content;
import com.gf.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @author Administrator
*/
@Service
public class ContentService {
@Resource
private RestHighLevelClient restHighLevelClient;
@Resource
private HtmlParseUtil htmlParseUtil;
/**
* 爬取jd数据放入es
*
* @param keyWords
* @return
* @throws Exception
*/
public Boolean parseContent(String keyWords) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
List<Content> contents = htmlParseUtil.parseJd(keyWords);
//批量插入es
BulkRequest bulkRequest = new BulkRequest();
//设置超时时间
bulkRequest.timeout("2m");
for (Content content : contents) {
bulkRequest.add(new IndexRequest("jd-goods").source(objectMapper.writeValueAsString(content), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
/**
* 查询
*
* @param keywords
* @param pageNo
* @param pageSize
* @return
* @throws IOException
*/
public List<Map<String, Object>> searchPage(String keywords, int pageNo, int pageSize) throws IOException {
if (pageNo <= 1) {
pageNo = 1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest("jd-goods");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//分页
searchSourceBuilder.from(pageNo);
searchSourceBuilder.size(pageSize);
//精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keywords);
searchSourceBuilder.query(termQueryBuilder);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//构建高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
//要高亮的字段
highlightBuilder.field("title");
//是否多个高亮显示
highlightBuilder.requireFieldMatch(false);
//标签
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
searchSourceBuilder.highlighter(highlightBuilder);
//查询
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
ArrayList<Map<String, Object>> mapArrayList = new ArrayList<>();
for (SearchHit hit : searchResponse.getHits().getHits()) {
//解析高亮的字段
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
HighlightField title = highlightFields.get("title");
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
if (title != null) {
Text[] fragments = title.fragments();
StringBuilder newTitle = new StringBuilder();
for (Text fragment : fragments) {
newTitle.append(fragment);
}
sourceAsMap.put("title", newTitle);
}
mapArrayList.add(hit.getSourceAsMap());
}
return mapArrayList;
}
}