ES6.1.2 +LTR 常用DSL
https://elasticsearch-learning-to-rank.readthedocs.io/
LTR插件中用的DSL是老的版本,新版本很多内容都改变了,新版本特征集构建部分应改为以下形式,在es6.1.2上测试成功。
function 返回某一列
GET tmdb/movie/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [
{
"script_score": {
"script": "_score +doc['vote_average'].value"
}
}
]
}
},
"_source": [
"original_title",
"id",
"vote_average"
]
}
LTR构建特征集
#more movie features
PUT _ltr/_featureset/more_movie_features
{
"featureset": {
"features": [
{
"name": "body_query",
"params": [
"keywords"
],
"template": {
"match": {
"overview": "{{keywords}}"
}
}
},
{
"name": "title_query",
"params": [
"keywords"
],
"template": {
"match": {
"title": "{{keywords}}"
}
}
}
]
}
}
增加一个Function_score的特征
POST /_ltr/_featureset/more_movie_features/_addfeatures
{
"features": [
{
"name": "user_rating",
"params": [],
"template_language": "mustache",
"template": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [
{
"script_score": {
"script": "doc['vote_average'].value"
}
}
]
}
}
}
]
}
对以上特征进行查询验证
GET tmdb/movie/_search
{
"query": {
"bool": {
"filter": [
{
"terms": {
"_id": [
"7555",
"1370",
"1369"
]
}
},
{
"sltr": {
"_name": "logged_featureset",
"featureset": "more_movie_features",
"params": {
"keywords": "rambo"
}
}
}
]
}
},
"ext": {
"ltr_log": {
"log_specs": {
"name": "log_entry1",
"named_query": "logged_featureset"
}
}
},
"_source": [
"original_title",
"id",
"vote_average"
]
}
创建模型
POST _ltr/_featureset/more_movie_features/_createmodel
{
"model": {
"name": "my_ranklib_model",
"model": {
"type": "model/ranklib",
"definition": """
## LambdaMART
## No. of trees = 1000
## No. of leaves = 10
## No. of threshold candidates = 256
## Learning rate = 0.1
## Stop early = 100
<ensemble>
<tree id="1" weight="0.1">
<split>
<feature> 1 </feature>
<threshold> 10.357836 </threshold>
<split pos="left">
<feature> 2 </feature>
<threshold> 11.950331 </threshold>
<split pos="left">
<feature> 1 </feature>
<threshold> 6.815881 </threshold>
<split pos="left">
<feature> 2 </feature>
<threshold> 8.725015 </threshold>
<split pos="left">
<feature> 1 </feature>
<threshold> 0.0 </threshold>
<split pos="left">
<output> -2.0 </output>
</split>
<split pos="right">
<output> -2.0 </output>
</split>
</split>
<split pos="right">
<output> -1.990678310394287 </output>
</split>
</split>
<split pos="right">
<feature> 2 </feature>
<threshold> 8.814796 </threshold>
<split pos="left">
<feature> 2 </feature>
<threshold> 7.3007236 </threshold>
<split pos="left">
<output> -1.9988385438919067 </output>
</split>
<split pos="right">
<output> -1.9554523229599 </output>
</split>
</split>
<split pos="right">
<feature> 1 </feature>
<threshold> 7.0104666 </threshold>
<split pos="left">
<output> -1.739653468132019 </output>
</split>
<split pos="right">
<output> -1.516905426979065 </output>
</split>
</split>
</split>
</split>
<split pos="right">
<output> 2.0 </output>
</split>
</split>
<split pos="right">
<feature> 1 </feature>
<threshold> 10.686367 </threshold>
<split pos="left">
<output> 2.0 </output>
</split>
<split pos="right">
<output> 2.0 </output>
</split>
</split>
</split>
</tree>
</ensemble>
"""
}
}
}
查看当前model
GET _ltr/_model/my_ranklib_model
search use model
POST tmdb/_search
{
"query": {
"query_string": {
"query": "rambo"
}
},
"rescore":{
"query":{
"rescore_query":{
"sltr":{
"params":{
"keywords":"rambo"
},
"model":"my_ranklib_model"
}
}
}
},
"ext":{
"ltr_log":{
"log_specs":{
"name":"log_entry1",
"rescore_index":0
}
}
}
,
"_source": [
"original_title",
"id",
"vote_average"
]
}
取列的值
GET tmdb/movie/71508
POST tmdb/_search
{
"query": {
"function_score": {
"field_value_factor": {
"field": "popularity",
"missing": 0
}
}
},
"_source": ["original_title",
"id",
"vote_average",
"popularity"
]
}