1、对于自建ElasticSearch的分词配置相信大家都知道了,不多说
2、对于购买的阿里云ElasticSearch服务,要用api远程调整分词配置
条件和步骤:
1、首先购买阿里云的OSS服务,创建存储空间,这个费用不高,只收取存储和上传下载费用
2、引入依赖jar包,开发上传文件到OSS的接口
<!-- ik分词通过oss维护 start-->
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.8.0</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>aliyun-java-sdk-elasticsearch</artifactId>
<version>3.0.11</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>aliyun-java-sdk-core</artifactId>
<optional>true</optional>
<version>4.4.2</version>
</dependency>
<!-- ik分词通过oss维护 end-->
3、开发通知elasticsearch热更新IK分词插件的接口
4、使用过程为,先调用上传文件接口更新字典,再调用通知接口刷新
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.model.GetObjectRequest;
import com.aliyun.oss.model.OSSObject;
import com.aliyuncs.DefaultAcsClient;
import com.aliyuncs.elasticsearch.model.v20170613.UpdateHotIkDictsRequest;
import com.aliyuncs.http.FormatType;
import com.aliyuncs.http.HttpResponse;
import com.aliyuncs.profile.DefaultProfile;
import com.aliyuncs.profile.IClientProfile;
import com.oa.common.error.exception.OaBusinessException;
import com.oa.common.util.DateUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
/**
* 功能: ElasticSearch分词字典热更新<br/>
*
* @author ql
* @create 2020-01-21 14:06
**/
@Slf4j
@RestController
@RequestMapping
public class ElasticIkDictEndpoint {
/**
* endpoint是访问OSS的域名
*/
@Value("${oss.endpoint}")
private String endpoint;
/**
* 阿里云平台访问key
*/
@Value("${oss.accessKeyId}")
private String accessKeyId;
/**
* 阿里云平台访问Secret
*/
@Value("${oss.accessKeySecret}")
private String accessKeySecret;
/**
* 存储空间
*/
@Value("${oss.bucketName}")
private String bucketName;
/**
* oss存储目录
*/
@Value("${oss.fileDir}")
private String fileDir;
/**
* es地域
*/
@Value("${elasticsearch.regionId}")
private String regionId;
/**
* es实例
*/
@Value("${elasticsearch.instanceId}")
private String esInstanceId;
/**
* 阿里云Elasticsearch API的服务接入地址(Endpoint)
*
* | 华东1(杭州) | elasticsearch.cn-hangzhou.aliyuncs.com |
* | 华东2(上海) | elasticsearch.cn-shanghai.aliyuncs.com |
*/
@Value("${elasticsearch.domain}")
private String esDomain;
/**
* ik主分词文件名称,需要和es插件配置上的文件名一致,否则会导致es实例重启
*/
@Value("${elasticsearch.ikword}")
private String mainFileName;
/**
* ik停止词文件名称,需要和es插件配置上的文件名一致,否则会导致es实例重启
*/
@Value("${elasticsearch.stopword}")
private String stopFileName;
/**
* 功能: 上传es分词文件<br/>
*
* @author ql
* @create 2019/12/25 18:34
* @param file 上传文件
* @param wordType 分词类型:MAIN -IK主分词词库;STOP-IK停用词库
* @return java.lang.String
**/
@PostMapping("/es/ikWords/upload")
public String uploadIkDict2Oss(@RequestParam("file") MultipartFile file,@RequestParam("wordType") String wordType) throws Exception{
log.info("OSS文件开始上传,时间={}", DateUtil.getTime());
String fileName = "";
switch (wordType){
case "MAIN" :
fileName = mainFileName;
break;
case "STOP" :
fileName = stopFileName;
break;
default:
throw new OaBusinessException("分词类型不对。");
}
// 生成OSSClient,您可以指定一些参数,详见“SDK手册 > Java-SDK > 初始化”,
// 链接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/init.html?spm=5176.docoss/sdk/java-sdk/get-start
OSS ossClient = new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret);
try {
// 判断Bucket是否存在。详细请参看“SDK手册 > Java-SDK > 管理Bucket”。
// 链接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/manage_bucket.html?spm=5176.docoss/sdk/java-sdk/init
if (ossClient.doesBucketExist(bucketName)) {
log.debug("您已经创建Bucket:{}" , bucketName );
} else {
log.info("您的Bucket不存在,创建Bucket:{}" , bucketName );
// 创建Bucket。详细请参看“SDK手册 > Java-SDK > 管理Bucket”。
// 链接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/manage_bucket.html?spm=5176.docoss/sdk/java-sdk/init
ossClient.createBucket(bucketName);
}
//存入oss的url
String ossFileUrl = fileDir + fileName;
// 上传文件流。
ossClient.putObject(bucketName, ossFileUrl, file.getInputStream());
log.info("Object:" + fileName + "存入OSS成功。");
} catch (OSSException oe) {
log.error("OSS文件上传失败,error={}",oe.getMessage(),oe);
return "failure";
} catch (ClientException ce) {
log.error("OSS文件上传失败,error={}",ce.getMessage(),ce);
return "failure";
} catch (Exception e) {
log.error("OSS文件上传失败,error={}",e.getMessage(),e);
return "failure";
} finally {
ossClient.shutdown();
log.debug("============流关闭===============");
}
log.info("OSS文件上传成功,时间={}", DateUtil.getTime());
return "success";
}
/**
* 功能: 下载分词文件<br/>
*
* @author ql
* @create 2020/01/21 19:34
* @param response
* @param wordType 分词类型:MAIN -IK主分词词库;STOP-IK停用词库
* @return java.lang.String
**/
@GetMapping("/es/ikWords/download")
public String downloadIkDict2Oss(HttpServletResponse response, @RequestParam("wordType") String wordType) throws Exception{
log.info("OSS文件开始下载,时间={}", DateUtil.getTime());
String fileName = "";
switch (wordType){
case "MAIN" :
fileName = mainFileName;
break;
case "STOP" :
fileName = stopFileName;
break;
default:
throw new OaBusinessException("分词类型不对。");
}
// 生成OSSClient,您可以指定一些参数,详见“SDK手册 > Java-SDK > 初始化”,
// 链接地址是:https://help.aliyun.com/document_detail/oss/sdk/java-sdk/init.html?spm=5176.docoss/sdk/java-sdk/get-start
OSS ossClient = new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret);
//oss的url
String ossFileUrl = fileDir + fileName;
OSSObject object = ossClient.getObject(new GetObjectRequest(bucketName, ossFileUrl));
response.setContentType("application/binary;charset=UTF-8");
response.setHeader("Content-disposition", "attachment; filename=" + fileName);
BufferedInputStream bis = null;
OutputStream out = null;
try {
// 实现文件下载
byte[] buffer = new byte[1024];
bis = new BufferedInputStream(object.getObjectContent());
out = response.getOutputStream();
int i = bis.read(buffer);
while (i != -1) {
out.write(buffer, 0, i);
i = bis.read(buffer);
}
log.info("Object:" + fileName + "下载成功。");
} catch (OSSException oe) {
log.error("OSS文件下载失败,error={}",oe.getMessage(),oe);
return "failure";
} catch (ClientException ce) {
log.error("OSS文件下载失败,error={}",ce.getMessage(),ce);
return "failure";
} catch (Exception e) {
log.error("OSS文件下载失败,error={}",e.getMessage(),e);
return "failure";
} finally {
ossClient.shutdown();
if(bis != null){
bis.close();
}
if(out != null){
out.close();
}
log.debug("============流关闭===============");
}
log.info("OSS文件下载成功,时间={}", DateUtil.getTime());
return "success";
}
/**
* 功能: 刷新es的字典<br/>
*
* @author ql
* @create 2020/01/21 19:09
* @return java.lang.String
**/
@GetMapping("/es/ikWords/refresh")
public String refreshIKDictFromOss() throws Exception{
//针对上海区域的阿里云ES实例
IClientProfile profile = DefaultProfile.getProfile(regionId,accessKeyId, accessKeySecret);
//添加自定义endpoint。
DefaultProfile.addEndpoint(regionId,"elasticsearch", esDomain);
DefaultAcsClient client = new DefaultAcsClient(profile);
// updating es hot ik dicts
log.info(" Start updating es hot ik dicts...");
UpdateHotIkDictsRequest updateHotIkDictsRequest = new UpdateHotIkDictsRequest();
updateHotIkDictsRequest.setAcceptFormat(FormatType.JSON);
updateHotIkDictsRequest.setInstanceId(esInstanceId);
JSONObject mainObject = this.buildJSONObject(mainFileName, "MAIN", bucketName, fileDir + mainFileName);
JSONObject stopObject = this.buildJSONObject(stopFileName, "STOP", bucketName, fileDir + stopFileName);
JSONArray jsonArray = new JSONArray();
jsonArray.add(mainObject);
jsonArray.add(stopObject);
String content = JSONArray.toJSONString(jsonArray);
log.info("刷新es的字典,请求内容:"+content);
updateHotIkDictsRequest.setHttpContent(content.getBytes(), "UTF-8", FormatType.JSON);
try {
HttpResponse response = client.doAction(updateHotIkDictsRequest);
log.info("刷新时间={},status={},result={}",DateUtil.getTime(),response.getStatus(),response.getHttpContentString());
} catch (ClientException e) {
log.error("======刷新es的字典失败============{}",e.getMessage(),e);
return "failure";
}
return "success";
}
/**
* 功能: 组装请求报文 <br/>
*
* @author ql
* @create 2020/01/21 19:33
* @param fileName 文件名称
* @param type 文件类型:IK主分词词库取值为MAIN,如果是作为IK停用词库取值为STOP
* @param bucketName 桶名称
* @param key oss中的对应IK热词典文件路径
* @return com.alibaba.fastjson.JSONObject
**/
private JSONObject buildJSONObject(String fileName, String type, String bucketName, String key) {
JSONObject object = new JSONObject();
//name参数表示上传对应文件的文件名。
object.put("name", fileName);
//type参数值,如果是作为IK主分词词库取值为MAIN,如果是作为IK停用词库取值为STOP。
object.put("type", type);
//sourceType参数表示数据源类型,例如此处取值为OSS。
object.put("sourceType", "OSS");
JSONObject ossObject = new JSONObject();
//bucketName参数表示OSS数据源中的project名称
ossObject.put("bucketName", bucketName);
//key参数值表示oss中的对应IK热词典文件路径
ossObject.put("key", key);
//etag是对这个文件中的内容的md5值,可以取MD5后的32位大写数值,用来比对是否和上次的内容有差异,如果有差异或做更新。
// ossObject.put("etag","35C44BC0C7AFF5AE6E73B195F933810E");
//OSS中的热词文件需要有公共读权限。
object.put("ossObject", ossObject);
return object;
}
}