【ElasticSearch8.X】学习笔记
六、中文分词
为了能够更好地对中文进行搜索和查询,就需要在Elasticsearch中集成好的分词器插件,而 IK 分词器就是用于对中文提供支持得插件
6.1、下载
选择下载的版本要与 Elasticsearch 版本对应。我们这里选择 8.1.0
下载地址
6.2、安装
如果是es集群的话,每台es都需要安装ik分词器
先在plugins里创建elasticsearch-analysis-ik文件夹
安装包上传到elk的 elasticsearch-analysis-ik中并解压
unzip elasticsearch-analysis-ik-8.1.0.zip
重启elk即可
6.3、使用
IK 分词器提供了两个分词算法:
- ik_smart: 最少切分
- ik_max_word:最细粒度划分
GET _analyze
{
"analyzer": "ik_smart",
"text": ["我是一个学生"]
}
GET _analyze
{
"analyzer": "ik_max_word",
"text": ["我是一个学生"]
}
6.4、自定义分词效果
集群都是一样操作
/usr/soft/elasticsearch8/plugins/elasticsearch-analysis-ik-8.1.0/config
新建分词eg.dic
修改配置文件:IKAnalyzer.cfg.xml
重启集群
七、文档得分
Lucene 和 ES 的得分机制是一个基于词频和逆文档词频的公式,简称为 TF-IDF 公式
公式中将查询作为输入,使用不同的手段来确定每一篇文档的得分,将每一个因素最后通过公式综合起来,返回该文档的最终得分。这个综合考量的过程,就是我们希望相关的文档被优先返回的考量过程。在 Lucene 和 ES 中这种相关性称为得分。
- TF (词频)
Term Frequency : 搜索文本中的各个词条(term)在查询文本中出现了多少次,出现次数越多,就越相关,得分会比较高 - IDF(逆文档频率)
Inverse Document Frequency : 搜索文本中的各个词条(term)在整个索引的所有文档中出现了多少次,出现的次数越多,说明越不重要,也就越不相关,得分就比较低。
分析文档数据打分过程
# 增加分析参数
GET /myindex/_search?explain=true
{
"query": {
"match_all": {}
}
}
八、新JavaAPI
8.1、maven
<dependencies>
<dependency>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>x-pack-sql-jdbc</artifactId>
<version>8.1.0</version>
</dependency>
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>8.1.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.12.3</version>
</dependency>
<dependency>
<groupId>jakarta.json</groupId>
<artifactId>jakarta.json-api</artifactId>
<version>2.0.1</version>
</dependency>
</dependencies>
8.2、获取客户端操作
Java 通过客户端操作 Elasticsearch 也要获取到连接后才可以。咱们现在使用的基于 https 安全的 Elasticsearch 服务,所以首先我们需要将之前的证书进行一个转换
cd /usr/soft/elasticsearch8/config/certs
openssl pkcs12 -in elastic-stack-ca.p12 -clcerts -nokeys -out java-ca.crt
将java-ca.crt下载下来 在项目文件下创建cert文件夹,并将java-ca.crt拷贝进去
8.2、UserBean
import java.io.Serializable;
public class User implements Serializable {
private Integer id;
private String name;
private Integer age;
public Integer getId() {return id;}
public void setId(Integer id) this.id = id;}
public String getName() {return name;}
public void setName(String name) {this.name = name;}
public Integer getAge() {return age;}
public void setAge(Integer age) {this.age = age;}
public User() {}
public User(Integer id, String name, Integer age) {
this.id = id;
this.name = name;
this.age = age;
}
}
8.3、代码
import co.elastic.clients.elasticsearch.*;
import co.elastic.clients.elasticsearch._types.FieldValue;
import co.elastic.clients.elasticsearch._types.query_dsl.MatchQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.Query;
import co.elastic.clients.elasticsearch.core.*;
import co.elastic.clients.elasticsearch.core.bulk.BulkOperation;
import co.elastic.clients.elasticsearch.core.bulk.CreateOperation;
import co.elastic.clients.elasticsearch.indices.*;
import co.elastic.clients.elasticsearch.indices.ExistsRequest;
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
import co.elastic.clients.transport.ElasticsearchTransport;
import co.elastic.clients.transport.endpoints.BooleanResponse;
import co.elastic.clients.transport.rest_client.RestClientTransport;
import org.apache.http.HttpHost;
import org.apache.http.auth.*;
import org.apache.http.client.*;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.impl.client.*;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.apache.http.ssl.*;
import org.elasticsearch.client.*;
import javax.net.ssl.SSLContext;
import java.io.InputStream;
import java.nio.file.*;
import java.security.KeyStore;
import java.security.cert.*;
import java.util.ArrayList;
import java.util.List;
public class ESClient {
//同步操作客户端
private static ElasticsearchClient client;
//异步操作客户端
private static ElasticsearchAsyncClient asyncClient;
//关闭客户端
private static ElasticsearchTransport transport;
public static void main(String[] args) throws Exception{
//初始化ES的连接
initESConnection();
//创建索引
//createIndex();
//查询索引
//queryIndex();
//删除索引
//deleteIndex();
//创建文档
//createDoc();
// 批量创建文档
//batchCreateDoc();
// 删除文档
//deleteDoc();
// 查询文档
queryDoc();
}
private static void queryDoc()throws Exception{
final SearchRequest.Builder searchRequestBuilder = new SearchRequest.Builder().index("myindex");
MatchQuery matchQuery = new MatchQuery.Builder().field("name").query(FieldValue.of("zhangsan")).build();
Query query = new Query.Builder().match(matchQuery).build();
searchRequestBuilder.query(query);
SearchRequest searchRequest = searchRequestBuilder.build();
final SearchResponse<Object> search = client.search(searchRequest, Object.class);
System.out.println(search);
//关闭连接
transport.close();
}
private static void deleteDoc()throws Exception{
// 删除文档
DeleteRequest deleteRequest = new DeleteRequest.Builder().index("myindex").id("1001").build();
DeleteResponse deleteResponse = client.delete(deleteRequest);
System.out.println("删除操作"+deleteResponse);
//关闭连接
transport.close();
}
private static void batchCreateDoc()throws Exception{
// 批量创建文档
final List<BulkOperation> operations = new ArrayList<BulkOperation>();
for ( int i= 1;i <= 5; i++ ) {
final CreateOperation.Builder builder = new CreateOperation.Builder();
builder.index("myindex");
builder.id("200" + i);
builder.document(new User(2000 + i, "zhangsan" + i,10 + i));
final CreateOperation<Object> objectCreateOperation = builder.build();
final BulkOperation bulk = new BulkOperation.Builder().create(objectCreateOperation).build();
operations.add(bulk);
}
BulkRequest bulkRequest = new BulkRequest.Builder().operations(operations).build();
final BulkResponse bulkResponse = client.bulk(bulkRequest);
System.out.println("数据操作成功:" + bulkResponse);
//关闭连接
transport.close();
}
private static void createDoc()throws Exception{
User user=new User();
user.setId(1001);
user.setName("zhangsan");
user.setAge(18);
// 创建文档
IndexRequest indexRequest = new IndexRequest.Builder<User>()
.index("myindex")
.id("1001")
.document(user)
.build();
final IndexResponse index = client.index(indexRequest);
System.out.println("文档操作结果:" + index.result());
//关闭连接
transport.close();
}
private static void deleteIndex()throws Exception{
// 删除索引
DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest.Builder().index("myindex1").build();
final DeleteIndexResponse delete = client.indices().delete(deleteIndexRequest);
final boolean acknowledged = delete.acknowledged();
System.out.println("删除索引成功:" + acknowledged);
//关闭连接
transport.close();
}
private static void queryIndex()throws Exception{
// 查询索引
GetIndexRequest getIndexRequest = new GetIndexRequest.Builder().index("myindex1").build();
final GetIndexResponse getIndexResponse = client.indices().get(getIndexRequest);
System.out.println("索引查询成功:" + getIndexResponse);
//关闭连接
transport.close();
}
private static void createIndex()throws Exception{
//判断索引是否存在
ExistsRequest existsRequest=new ExistsRequest.Builder().index("myindex1").build();
Boolean exists = client.indices().exists(existsRequest).value();
if(exists){
System.out.println("索引已经存在");
}else {
//创建索引
CreateIndexRequest request = new CreateIndexRequest.Builder().index("myindex1").build();
//获取索引客户端对象
final CreateIndexResponse createIndexResponse = client.indices().create(request);
System.out.println("创建索引成功:" + createIndexResponse.acknowledged());
}
//关闭连接
transport.close();
}
private static void initESConnection()throws Exception{
//获取客户端对象
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("elastic", "A1Kw4X+fl+6HYxXPsDHD"));
Path caCertificatePath = Paths.get("EXArea02-ELK8/certs/java-ca.crt");
CertificateFactory factory = CertificateFactory.getInstance("X.509");
Certificate trustedCa;
try (InputStream is = Files.newInputStream(caCertificatePath)) {
trustedCa = factory.generateCertificate(is);
}
KeyStore trustStore = KeyStore.getInstance("pkcs12");
trustStore.load(null, null);
trustStore.setCertificateEntry("ca", trustedCa);
SSLContextBuilder sslContextBuilder = SSLContexts.custom().loadTrustMaterial(trustStore, null);
final SSLContext sslContext = sslContextBuilder.build();
RestClientBuilder builder = RestClient.builder(new HttpHost("192.168.3.34", 9200, "https"))
.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
return httpClientBuilder.setSSLContext(sslContext)
.setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE)
.setDefaultCredentialsProvider(credentialsProvider);
}
});
RestClient restClient = builder.build();
transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
//同步操作客户端
client = new ElasticsearchClient(transport);
//异步操作客户端
asyncClient = new ElasticsearchAsyncClient(transport);
}
}