IK分词器的分词
ik_max_word
有IK分词器提供,分化的程度按照插件的自行判断,分得更加细而已。
缺点:过于分化,多余条件检索
#方式一ik_max_word 粗密度
GET /_analyze
{
"analyzer": "ik_max_word",
"text": "LOL全球总决赛No.1"
}
ik_smart
少量关键字,推荐使用这个。
#方式二ik_smart 细密度
GET /_analyze
{
"analyzer": "ik_smart",
"text": "LOL全球总决赛No.1"
}
准备测试数据
# 添加索引
PUT person2
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"address": {
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}# 添加文档
POST /person2/_doc/1
{
"name":"张三",
"age":18,
"address":"重庆市江北区"
}POST /person2/_doc/2
{
"name":"李四",
"age":19,
"address":"重庆市渝中区"
}POST /person2/_doc/3
{
"name":"王五",
"age":20,
"address":"重庆市南岸区"
}POST /person2/_doc/4
{
"name":"何六",
"age":21,
"address":"四川省高新区"
}POST /person2/_doc/5
{
"name":"田七",
"age":22,
"address":"四川省双流区"
}POST /person2/_doc/7
{
"name":"黄黄",
"age":25,
"address":"上海市长宁区"
}
单个词条检索 [ term ]
term:词条查询
# 单个词条检索
# ik分词器分后的词条进行检索
# keyword类型必须完全一致,才能查询出
# text类型会进行单个相匹配的词条查询
GET /person2/_search
{
"query": {
"term": {
"address": {
"value": "南岸"
}
}
}
}
多词条检索 [ terms ]
# 多词条检索
GET /person2/_search
{
"query": {
"terms": {
"address": ["重庆","区"]
}
}
}
模糊查询 [ fuzzy ]
# 模糊查询
# 允许关键字出现两个字符的偏差,默认偏差为1
# fuzziness:设置偏差
GET /person2/_search
{
"query": {
"fuzzy": {
"address": {
"value": "南岸12",
"fuzziness":"2"
}
}
}
}
全文查询 [ match ]
# 条件检索 [重庆][南岸]
# 对关键字进行分词
# 默认取并集(or)
GET /person2/_search
{
"query": {
"match": {
"address":"重庆南岸"
}
}
}# 交集(and)
GET /person2/_search
{
"query": {
"match": {
"address": {
"query": "重庆南岸",
"operator": "and"
}
}
}
}
并集(or)
交集(and)
全部( match_all)
# 全文检索
GET /person2/_search
{
"query": { "match_all": {} }
}
多字段检索 [ multi_match ]
# 多字段检索 [address] [name] 只要含有“重庆”两字关键字,查询出相应文档
GET /person2/_search
{
"query": {
"multi_match": {
"query": "重庆",
"fields": ["address","name"]
}
}
}
通配符检索 [ wildcard ]
# 模糊查询
# "重*"以“重”开头所有关键字
GET /person2/_search
{
"query": {
"wildcard": {
"address":"重*"
}
}
}
# 模糊查询
# "岸?"以“岸”开头所有关键字
GET /person2/_search
{
"query": {
"wildcard": {
"address":"岸?"
}
}
}
正则查询 [ regexp ]
正则查询取决于正则表达式的效率
# 正则查询
# (.)*为任意字符
GET /person2/_search
{
"query": {
"regexp": {
"address":"(.)*"
}
}
}
前缀查询 [ prefix ]
# 前缀查询
# 末尾字查询结果为无
GET /person2/_search
{
"query": {
"prefix": {
"address":"上"
}
}
}
范围查询 [ range ]
# 范围查询
# gt大于 gte大于等于 lt小于 lte小于等于
GET /person2/_search
{
"query": {
"range": {
"age":{
"gte":18,
"lte":20
}
}
}
}
多条件查询 [ query_string ]
注意:看着下面两个查询,除了“重庆”和“上海”关键字不同外,再无区别。而添加的文档里面,两种类型的数据都是存在,并且分词器,也成功将“重庆市”和“上海市”分词,结果却大出意外。
# 多条件查询
# 识别单个词条 and 两个词条都要查询,得到交集
# or 查询出并集
GET /person2/_search
{
"query": {
"query_string": {
"default_field": "address",
"query": "重庆 AND 市 OR 区"
}
}
}
GET /person2/_search
{
"query": {
"query_string": {
"default_field": "address",
"query": "上海 AND 市 OR 区"
}
}
}
上海的数据没有查询出来!!!
结论:query_string查询数据有遗漏问题,选择使用下面的方式补救。
自动分词的连接符 [ default_operator ]
# 自动分词的连接符
GET /person2/_search
{
"query": {
"query_string": {
"default_field": "address",
"default_operator": "AND",
"query": "上海市"
}
}
}
多条件多词条查询 (or交集)
GET /person2/_search
{
"query": {
"query_string": {
"fields": ["address"],
"query": "上海 OR 市"
}
}
}
单例多条件查询 [ simple_query_string ]
# 不识别连接符(and、or),分别将[重庆][市][区]词条以and查询
GET /person2/_search
{
"query": {
"simple_query_string": {
"query": "重庆 AND 市 OR 区",
"fields": ["address"]
}
}
}
布尔查询 [ bool ]
must
# 布尔查询
# must:条件必须成立
# must_not:条件必须不成立
# _score(得分):每条数据匹配度,匹配度越高,得分越高
GET /person2/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"address": {
"value": "重庆"
}
}
}
]
}
}
}
filter
# filter:过滤
# _score(得分)不显示,性能更优
GET /person2/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"address": {
"value": "重庆"
}
}
}
]
}
}
}
should
# should:或许
GET /person2/_search
{
"query": {
"bool": {
"should": [
{
"terms": {
"address": [
"河南",
"河北",
"重庆"
]
}
}
]
}
}
}
组合使用
# 组合
GET /person2/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"address": {
"value": "重庆"
}
}
}
],
"filter": [
{
"term": {
"address": {
"value": "南岸"
}
}
}
]
}
}
}
倒序 [ sort ]
# id 倒序 asc / desc
GET /person2/_search
{
"query": { "match_all": {} },
"sort": [
{ "_id": "desc" }
]
}
关键字查询
# 关键字查询
# 查询name和张三关键字,
# 张三不存在,默认忽视
GET /person2/_search
{
"query": { "match_all": {} },
"_source": ["name","张三"]
}
排除关键字查询
# 排除关键字查询
GET /person2/_search
{
"_source": {
"excludes":"age"
},
"query": { "match_all": {} }
}
分页查询
# 分页查询
# from:当前页,size:总页数
GET /person2/_search
{
"query": { "match_all": {} },
"from": 3,
"size": 3,
"sort": { "age": "asc" }
}
桶聚合 [ aggs ]
# 桶聚合 分组
# 格式
# "aggs": {
# "自定义属性名": {
# "terms": {
# "field": "属性",
# "size": 10
# }
# }
# }
GET person2/_search
{
"size": 0,
"query": {
"match": {
"address": "重庆"
}
},
"aggs": {
"address_name": {
"terms": {
"field": "name",
"size": 10
}
}
}
}
指标聚合 [ aggs ]
max,avg,min,sun....
# 指标聚合 聚合函数
# 四川省内最大年纪的人
GET person2/_search
{
"query": {
"match": {
"address": "四川"
}
},
"aggs": {
"max_age": {
"max": {
"field": "age"
}
}
}
}
嵌套查询
GET person2/_search
{
"size": 0,
"aggs": {
"my_name": {
"terms": {
"field": "name"
},
"aggs": {
"age_max":{
"max": {
"field": "age"
}
}
}
}
}
}
# 嵌套不能查询keyword类型
GET person2/_search
{
"size": 0,
"aggs": {
"age_max":{
"max": {
"field": "age"
}
},
"my_name":{
"terms": {
"field": "name"
}
}
}
}
阶梯分桶 [ histogram ]
# 阶梯分桶
# interval:阶梯值
# min_doc_count:最小文档数
GET /person2/_search
{
"size": 0,
"aggs": {
"age": {
"histogram": {
"field": "age",
"interval": 2,
"min_doc_count":1
}
}
}
}
高亮查询 [ highlight ]
# 高亮查询
GET person2/_search
{
"query": {
"match": {
"address": "四川"
}
},
"highlight": {
"fields": {
"address": {
"pre_tags": "<font color='red'>",
"post_tags": "</font>"
}
}
}
}
批量操作 [ _bulk ]
# 批量操作
# 删除索引为person2,id为5
# 新增索引为person2,id为6
# 内容:名字=徐八,年龄=18,住址=上海市
# 修改索引为person2,id为2
# 内容:名字=刘九
POST _bulk
{"delete":{"_index":"person2","_id":"5"}}
{"create":{"_index":"person2","_id":"6"}}
{"name":"徐八","age":18,"address":"上海市黄浦区"}
{"update":{"_index":"person2","_id":"2"}}
{"doc":{"name":"刘九"}}
查询别名 [ _alias ]
# 查询别名
GET person2/_alias/
起别名
# 起别名
POST person2/_alias/person
复制索引 [ _reindex ]
# 创建索引
PUT person1
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"age":{
"type": "integer"
},
"address":{
"type": "keyword"
}
}
}
}PUT person1/_doc/1
{
"name":"唐一",
"age":52,
"address":"斗罗大陆"
}GET person1/_search
# person1 拷贝到 person2 中
POST _reindex
{
"source": {"index": "person1"},
"dest": {"index": "person2"}
}# 使用别名查询
GET person/_search
结果
删除别名和索引 [ actions ]
# 删除别名和索引
POST /_aliases
{
"actions": [
{
"remove": {
"index": "person2",
"alias": "person"
}
}
]
}