一、基本术语
二、基本操作
#以下操作基于kibana
获取集群/节点等基本信息
相关命令:
GET /_cluster/health
GET /_cluster/state
GET _nodes/stats
GET _mapping
等等
添加文档
POST /megacorp/employee
{
"first_name" : "John",
"last_name" : "Smith",
"age" : 25,
"about" : "I love to go rock climbing",
"interests": [ "sports", "music" ]
}
POST /megacorp/employee
{
"first_name" : "Jane",
"last_name" : "Smith",
"age" : 32,
"about" : "I like to collect rock albums",
"interests": [ "music" ]
}
POST /megacorp/employee
{
"first_name" : "Douglas",
"last_name" : "Fir",
"age" : 35,
"about": "I like to build cabinets",
"interests": [ "forestry" ]
}
查看文档是否存在
HEAD /megacorp/employee/1
删除文档
DELETE /megacorp/employee/1
查询某一条数据
GET megacorp/employee/AWV_TVH2xpSIouFg4Tco
index/type/_id为查询依据
相关命令:
得到具体某个字段
GET megacorp/employee/1/_source
查询所有数据(_search)
GET megacorp/employee/_search
{
"query": {
"match_all": {}
}
}
_search命令代替了_id的位置
返回所有该index中的数据
指定条件详细查找
GET topic/_search
{
"_source": ["commented_times","clicks_times","url"],
"query": {
"match": {
"url": "http://cryptbb2gezhohku.onion/showthread.php?"
}
},
"size": 500
}
查看数据量(_count)
GET megacorp/employee/_count
_count命令代替_id
控制返回数据量大小(from、size)
GET /megacorp/employee/_search
{
"from": 0
, "size": 20
}
某一字段匹配(match)
GET megacorp/employee/_search
{
"query": {
"match": {
"about": "rock"
}
}
}
多字段匹配(multi_match)
GET /megacorp/employee/_search
{
"query" : {
"multi_match": {
"query": "rock",
"fields": ["about","interests"]
}
}
}
还可以提高某一字段的权重值
GET /megacorp/employee/_search
{
"query" : {
"multi_match": {
"query": "rock",
"fields": ["about","interests^3"]
}
}
}
添加高亮显示(highlight)
GET /megacorp/employee/_search
{
"query": {
"match": {
"about": "rock"
}
}
, "highlight": {
"fields": {
"about":{}
}
}
}
指定条件高亮
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"details": "毒品"
}
},
{
"term": {
"domain": {
"value": "7zj4oshsyhokgus6fyk7pmdiubu4mkjpjjprjkvopnhnwylr522tymqd.onion"
}
}
}
]
}
},
"highlight": {
"pre_tags": "<font color='red'>",
"post_tags": "</font>",
"fields": {
"details": {}
}
}
}
bool查询(bool)
包括must、must_not 、should、filter等逻辑
GET /megacorp/employee/_search
{
"query" : {
"bool": {
"should": [
{"match": {
"about": "rock"
}},
{"match": {
"about": "climbing"
}}
],
"minimum_should_match": 1
}
}
}
GET /megacorp/employee/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"about": "rock"
}
},
{
"match": {
"interests": "music"
}
}
],
"should": [
{
"match": {
"first_name": "Jane"
}
}
],
"filter": {
"range": {
"age": {
"gt": 20
}
}
}
}
}
}
模糊搜索(fuzziness)
GET /megacorp/employee/_search
{
"query": {
"match": {
"about": {
"query": "rockyy climbinga",
"fuzziness": 1
}
}
}
}
通配符搜索(wildcard)
GET /megacorp/employee/_search
{
"query": {
"wildcard": {
"about": {
"value": "roc*"
}
}
}
}
使用script更新数据
POST /megacorp/employee/lvP3kWMBIzE12K7Jp7xQ/_update
{
"script" : "ctx._source.age += 5"
}
添加索引settings设置为异步,并设置刷新数据量
PUT extensive/_settings
{
"index":{
"number_of_replicas": 0,
"max_result_window" :"147483647",
"refresh_interval":"120s",
"translog":{
"flush_threshold_size":"512mb",
"sync_interval":"60s",
"durability":"async"
}
}
}
分词器
GET /_analyze
{
"analyzer": "standard",
"text": "Text to analyze"
}
phrase查询
GET /megacorp/employee/_search
{
"query" : {
"match_phrase" : {
"about" : "rock climbing"
}
}
}
term查询
GET /megacorp/employee/_search
{
"query": {
"term": {
"age": {
"value": 25
}
}
}
}
按ID查询
POST hiddendata/_search
{
"query": {
"term": {
"_id": {
"value": "10976837496098520137",
"boost": 1
}
}
}
}
multi_match查询
GET extensive/_search
{
"query": {
"multi_match": {
"query": "毒品",
"fields": [
"title",
"content"
]
}
},
"sort": {
"crawl_time": {
"order": "desc"
}
},
"size": 2500
}
must和should条件过滤
GET extensive/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"domain": "7zj4oshsyhokgus6fyk7pmdiubu4mkjpjjprjkvopnhnwylr522tymqd.onion"
}
}
],
"should": [
{
"exists": {
"field": "phone_number"
}
},
{
"exists": {
"field": "emails"
}
},
{
"exists": {
"field": "wechat_id"
}
},
{
"exists": {
"field": "telegram_id"
}
},
{
"exists": {
"field": "alipay_id"
}
},
{
"exists": {
"field": "bitcoin_addresses"
}
}
],
"minimum_should_match" : 1
}
},
"_source": {
"excludes": ["html", "content"]
}
}
指定时间范围查询
GET extensive/_search
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"domain": "*.onion"
}
},
{
"range": {
"crawl_time": {
"gte": "2020-03-01T00:00:00",
"lte": "2021-07-10T00:00:00"
}
}
}
]
}
}
}
统计某一个字段不为空的数量
GET intelligence_cloud/_count
{
"query": {
"constant_score": {
"filter": {
"exists": {
"field": "pgp"
}
}
}
}
}
类型为keyword的某个字段的值按字符长度筛选
GET intelligence_cloud/_search
{
"_source": "qq",
"size": 20000,
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['qq'].toString().length() > 3 && doc['qq'].toString().length() < 8 " ,
"lang": "painless"
}
}
}
}
}
}
遍历索引下全部_id
GET darknetdata_customize/_search
{
"query": {
"match_all": {}
},
"stored_fields": []
}
排除某个字段查询
GET hiddendata/_search
{
"_source":{
"include":[
],
"exclude":[
"html"
]
},
"query": {
"match_all": {}
},
"size": 200
}
统计语言占比top10
GET hiddendata/_search
{
"size": 0,
"aggs": {
"record": {
"terms": {
"field": "language",
"size": 10
}
}
}
}
kibana批量更新
更新某个字段的值
POST hiddendata/_update_by_query
{
"script": {
"source": "ctx._source.language = 'zh_cn'",
"params": {
"field": "language",
"new_value": "zh_cn"
}
},
"query": {
"match": {
"language": "中文"
}
}
}
更新tags字段为空或者为null的值为[]
POST /hiddendata/_update_by_query
{
"script": {
"source": """
if (ctx._source.tags == null || ctx._source.tags.isEmpty()) {
ctx._source.tags = [];
}
""",
"lang": "painless"
},
"query": {
"match_all": {}
}
}
遍历更新tags列表中某个值
POST /hiddendata/_update_by_query
{
"script": {
"source": """
for (int i = 0; i < ctx._source.tags.size(); i++) {
if (ctx._source.tags[i] == 'datadata') {
ctx._source.tags[i] = 'data';
}
}
""",
"lang": "painless"
},
"query": {
"match_phrase": {
"domain": "jbeg2dct2zhku6c2vwnpxtm2psnjo2xnqvvpoiiwr5hxnc6wrp3uhnad.onion"
}
}
}
reindex API索引
POST _reindex
{
"source": {
"index": "hiddendata"
},
"dest": {
"index": "hiddendata2"
}
}
指定索引添加一个字段
PUT /hiddendata/_mapping
{
"properties": {
"company_domain": {
"type": "keyword"
}
}
}