springboot python整合_SpringBoot整合Elasticsearch7.4.1实现建议搜索(Python爬取数据并存es)-Go语言中文社区...

背景:

最近做了一个App需要使用的搜索建议的功能,效果就是我们再使用百度搜索的时候:

format,png

可以看到,每输入一个字符就会发送一个请求;

format,png

这个功能看似很高大上,其实做一个简单的实现还是很简单的。

原理:

这个技术,主要有以下的技术点:

Elasticsearch7.4.1(以下称为ES)推荐

ik中文分词插件

首先需要将数据存放到ES中,当然存放的时候需要做一些简单的处理,需要将搜索的field做分词放在列表中,存入到ES之中。由于数据爬取是采用的Python,所以在这里贴Python代码,数据采集是使用的JD的商品数据

//由传过来的字符生成suggest的数组

def gen_suggest(index, info_tuple):

# 由字符串生成建议

used_word = set()

suggest = []

for text, wight in info_tuple:

if text:

words = es.indices.analyze(index="jd_product", body={"analyzer":"ik_max_word", "text": "{0}".format(text)})

analyzed_word = set([r["token"] for r in words["tokens"] if len(r["token"])>1])

print(analyzed_word)

naw_words = analyzed_word - used_word

else:

naw_words = set()

if naw_words:

suggest.append({"input": list(naw_words),"weight":wight})

print(suggest)

return suggest

def save_ES(result):

jd = JD_Product()

jd.by_self = result["by-self"]

jd.comment_cnt = result["comment-cnt"]

jd.title = result["title"]

jd.pid = result["pid"]

jd.image_data_lazy_img = result["image-data-lazy-img"]

jd.image_src = result["image-src"]

jd.price = result["price"]

jd.shop_name = result["shop-name"]

# 生成建议的数据

jd.suggest = gen_suggest(JD_Product, ((jd.title, 10), (jd.shop_name, 7)))

jd.save()

此时存放在ES中的数据是这样的:

{

"_index": "jd_product",

"_type": "_doc",

"_id": "opSOOXEBy66jXuB0CIVc",

"_version": 1,

"_seq_no": 15536,

"_primary_term": 1,

"found": true,

"_source": {

"by_self": "",

"comment_cnt": "2200+",

"title": "京东国际贝玲妃(Benefit)妆前乳/打底霜 毛孔遮盖脸部底霜(反恐精英/反孔) 22ml 【部分满199减100】护肤彩妆一站购全,点击进入点击进入",

"pid": "25715154185",

"image_data_lazy_img": "//img10.360buyimg.com/n7/jfs/t1/105249/25/17056/74155/5e84303eE9afd3891/fd91ef9acfdff12f.jpg",

"image_src": "//img10.360buyimg.com/n7/jfs/t1/105249/25/17056/74155/5e84303eE9afd3891/fd91ef9acfdff12f.jpg",

"price": "¥198.00",

"shop_name": "星线美妆海外专营店",

"suggest": [

{

"input": [

"22",

"点击",

"22ml",

"精英",

"100",

"ml",

"反恐",

"进入",

"199",

"京东",

"遮盖",

"反恐精英",

"打底",

"国际",

"一站",

"部分",

"毛孔",

"护肤",

"脸部",

"benefit"

],

"weight": 10

},

{

"input": [

"专营店",

"海外",

"专营"

],

"weight": 7

}

]

}

}

可以看到tittle分词了;这样就可以实现API了。

实现:

suggest的API采用SpringBoot实现,由于ES版本比较高(7.4.1),所以使用原生的RestHighLevelClient来进行操作。

添加相关依赖:

org.elasticsearch

elasticsearch

7.4.1

org.elasticsearch.client

elasticsearch-rest-high-level-client

7.4.1

此外需要在pom.xml中添加以下配置,避免出错

1.8

7.4.1

ESClient工厂:

import org.apache.http.HttpHost;

import org.elasticsearch.client.RestClient;

import org.elasticsearch.client.RestClientBuilder;

import org.elasticsearch.client.RestHighLevelClient;

import java.io.IOException;

public class ESClientSpringFactory {

public static int CONNECT_TIMEOUT_MILLIS = 1000;

public static int SOCKET_TIMEOUT_MILLIS = 30000;

public static int CONNECTION_REQUEST_TIMEOUT_MILLIS = 500;

public static int MAX_CONN_PER_ROUTE = 10;

public static int MAX_CONN_TOTAL = 30;

private static HttpHost HTTP_HOST;

private RestClientBuilder builder;

private RestClient restClient;

private RestHighLevelClient restHighLevelClient;

private static ESClientSpringFactory esClientSpringFactory = new ESClientSpringFactory();

private ESClientSpringFactory(){}

public static ESClientSpringFactory build(HttpHost httpHost,

Integer maxConnectNum, Integer maxConnectPerRoute){

HTTP_HOST = httpHost;

MAX_CONN_TOTAL = maxConnectNum;

MAX_CONN_PER_ROUTE = maxConnectPerRoute;

return esClientSpringFactory;

}

public static ESClientSpringFactory build(HttpHost httpHost,Integer connectTimeOut, Integer socketTimeOut,

Integer connectionRequestTime,Integer maxConnectNum, Integer maxConnectPerRoute){

HTTP_HOST = httpHost;

CONNECT_TIMEOUT_MILLIS = connectTimeOut;

SOCKET_TIMEOUT_MILLIS = socketTimeOut;

CONNECTION_REQUEST_TIMEOUT_MILLIS = connectionRequestTime;

MAX_CONN_TOTAL = maxConnectNum;

MAX_CONN_PER_ROUTE = maxConnectPerRoute;

return esClientSpringFactory;

}

public void init(){

builder = RestClient.builder(HTTP_HOST);

setConnectTimeOutConfig();

setMutiConnectConfig();

restClient = builder.build();

restHighLevelClient = new RestHighLevelClient(builder);

System.out.println("init factory");

}

// 配置连接时间延时

public void setConnectTimeOutConfig(){

builder.setRequestConfigCallback(requestConfigBuilder -> {

requestConfigBuilder.setConnectTimeout(CONNECT_TIMEOUT_MILLIS);

requestConfigBuilder.setSocketTimeout(SOCKET_TIMEOUT_MILLIS);

requestConfigBuilder.setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT_MILLIS);

return requestConfigBuilder;

});

}

// 使用异步httpclient时设置并发连接数

public void setMutiConnectConfig(){

builder.setHttpClientConfigCallback(httpClientBuilder -> {

httpClientBuilder.setMaxConnTotal(MAX_CONN_TOTAL);

httpClientBuilder.setMaxConnPerRoute(MAX_CONN_PER_ROUTE);

return httpClientBuilder;

});

}

public RestClient getClient(){

return restClient;

}

public RestHighLevelClient getRhlClient(){

return restHighLevelClient;

}

public void close() {

if (restClient != null) {

try {

restClient.close();

} catch (IOException e) {

e.printStackTrace();

}

}

System.out.println("close client");

}

}

ESConfig:

import lombok.extern.slf4j.Slf4j;

import org.apache.http.HttpHost;

import org.elasticsearch.client.RestClient;

import org.elasticsearch.client.RestHighLevelClient;

import org.springframework.beans.factory.annotation.Value;

import org.springframework.context.annotation.Bean;

import org.springframework.context.annotation.ComponentScan;

import org.springframework.context.annotation.Configuration;

import org.springframework.context.annotation.Scope;

@Configuration

@Slf4j

@ComponentScan(basePackageClasses=ESClientSpringFactory.class)

public class ESConfig {

@Value("${elasticSearch.host}")

private String host;

@Value("${elasticSearch.port}")

private int port;

@Value("${elasticSearch.client.connectNum}")

private Integer connectNum;

@Value("${elasticSearch.client.connectPerRoute}")

private Integer connectPerRoute;

@Bean

public HttpHost httpHost() {

return new HttpHost(host, port, "http");

}

@Bean(initMethod = "init", destroyMethod = "close")

public ESClientSpringFactory getFactory() {

return ESClientSpringFactory.

build(httpHost(), connectNum, connectPerRoute);

}

@Bean

@Scope("singleton")

public RestClient getRestClient() {

return getFactory().getClient();

}

@Bean

@Scope("singleton")

public RestHighLevelClient getRHLClient() {

return getFactory().getRhlClient();

}

}

yaml中的配置数据:

elasticSearch.host=ip地址

elasticSearch.port=9200

elasticSearch.client.connectNum=10

elasticSearch.client.connectPerRoute=50

编写一个测试:

@Qualifier("getRHLClient")

@Autowired

RestHighLevelClient restHighLevelClient;

@Test

public void getSuggest()throws Exception{

String data = "手机";

SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

// 构建模糊相关的参数

FuzzyOptions fuzzy = FuzzyOptions.builder().setFuzzyPrefixLength(1).setFuzziness(0).setFuzzyMinLength(3).build();

CompletionSuggestionBuilder completionSuggestionBuilder = new CompletionSuggestionBuilder("suggest");

SuggestionBuilder termSuggestionBuilder = SuggestBuilders.completionSuggestion("suggest").prefix(data,fuzzy);

SuggestBuilder suggestBuilder = new SuggestBuilder();

suggestBuilder.addSuggestion("my-suggest", termSuggestionBuilder);

searchSourceBuilder.suggest(suggestBuilder);

// 返回指定的字段

String[] incloud = {"shop_name","title"};

String[] excloud = {};

searchSourceBuilder.fetchSource(incloud,excloud);

SearchRequest searchRequest = new SearchRequest("jd_product"); //索引

searchRequest.source(searchSourceBuilder);

SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

Suggest suggestions = response.getSuggest(); //SearchHits提供有关所有匹配的全局信息,例如总命中数或最高分数:

System.out.println("suggestions = " + suggestions);

}

获取到返回的数据:

format,png

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值