Elasticsearch6.5.4+Logstash6.5.4的基本应用

Elasticsearch跨域访问

修改elasticsearch.yml中的参数:

http.cors.enabled: true                                                 //是否支持跨域访问,默认false
http.cors.allow-origin: “*”                                             //访问的网段,可以对网段进行限制
http.cors.allow-methods: OPTIONS,HEAD,GET,POST,PUT,DELETE               //跨域请求的方式
http.cors.allow-headers: X-Requested-With,Content-Type,Content-Length   //跨越请求头的信息

创建mapping

创建动态mapping,默认使用ik分词器。

PUT _template/sc_template
{
  "index_patterns":"*",
  "order":0,
  "settings":{
    "number_of_shards": 3,
    "number_of_replicas": 0    
  },
  "mappings":{
    "doc":{
      "dynamic_templates":[{
         "text_use_ik":{
           "match_mapping_type":"text",
           "mapping":{
             "type":"text",
             "analyzer":"ik_max_word",
             "search_analyzer":"ik_smart"
           }
         } 
        }]
    }
  }
}

在kibana中创建omega的mapping,应用ik分词器对中文字段进行分词。

PUT /omega/
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 0
  },
  "mappings": {
    "logs":{
    "properties": {
      "receiver":{
        "type":"text"
      },
      "date":{
        "type":"date"
      },
      "value":{
        "type":"long"
      },
      "xingming":{
        "type":"text",
        "analyzer": "ik_max_word",
        "fields": {
          "keyword":{
            "type":"keyword"
          }
        }
      },
      "bumen":{
        "type":"text",
        "analyzer": "ik_smart",
        "fields": {
          "keyword":{
            "type":"keyword"
          }
        }
      }
    }
  }
  }
}

查看maping

GET /omega/_mapping

数据同步

应用 Logstash 将数据库中的数据同步至 Elasticsearch 中。
同步一个数据库中的表,创建 postgres.conf 文件:

input {
    jdbc {	
      jdbc_connection_string => "jdbc:postgresql://192.168.108.126:5432/omega"
      jdbc_driver_library => "/usr/local/logstash-6.4.1/test/postgresql-9.4.1212.jar"
      jdbc_driver_class => "org.postgresql.Driver"
      jdbc_user => "postgres"
      jdbc_password => "passwd"
      jdbc_paging_enabled => "true"
      jdbc_page_size => "50000"
      statement => "SELECT a.receiver,a.date,a.value,b.xingming,b.bumen FROM renyuan b JOIN "result" a ON a.receiver=b.receiver ORDER BY b.bumen,a.date" 
      type => "jdbc"
}
}
filter {
   json {
     source => "message"
     remove_field => "[message]"
}
  mutate {
   remove_field => ["@version","@timestamp"]
  }
}
output {
  elasticsearch {
    hosts => "192.168.108.126:9200"
    index => "omega"
    document_type => "logs"
  }
  stdout {
    codec => json_lines
}
}

运行logstash程序:bin/logstash -f test/postgres.conf
同时同步两个表格到同一个index里面:

input {
    jdbc {	
      jdbc_connection_string => "jdbc:postgresql://192.168.108.126:5432/omega"
      jdbc_driver_library => "/usr/local/logstash-6.4.1/test/postgresql-9.4.1212.jar"
      jdbc_driver_class => "org.postgresql.Driver"
      jdbc_user => "postgres"
      jdbc_password => "passwd"
      jdbc_paging_enabled => "true"
      jdbc_page_size => "50000"
      statement => "select * from omega.renyuan limit 5" 
      add_field => {"table_name" => "renyuan"}
}
    jdbc {      
      jdbc_connection_string => "jdbc:postgresql://192.168.108.126:5432/omega"
      jdbc_driver_library => "/usr/local/logstash-6.4.1/test/postgresql-9.4.1212.jar"
      jdbc_driver_class => "org.postgresql.Driver"
      jdbc_user => "postgres"
      jdbc_password => "passwd"
      jdbc_paging_enabled => "true"
      jdbc_page_size => "50000"
      statement => "select * from omega.result limit 5"
      add_field => {"table_name" => "result"}
}
}
filter {
  mutate {
    remove_field => ["@version","@timestamp"]
  }
}
output {
  elasticsearch {
    hosts => "192.168.108.126:9200"
    index => "omega2"
    document_type => "logs"
  }
  stdout {     // 这是在交互式界面进行展示
    codec => json_lines
}
}

查询

multi_match 多字段查询:

GET /omega2/_search?scroll=1m
{
  "size": 5,
  "query": {
    "multi_match": {
      "query": "秦**",                                         //查询参数
      "fields": ["bumen","xingming"]                           //被查询字段
    }
  }
}

高亮显示

highlight 标签可以对检索的字段进行高亮显示:

GET zhaobiao/_search
{
  "from": 0, 
  "size": 20, 
  "query": {
    "multi_match": {
      "query": "商务科研服务",
      "fields": ["bid_area","bid_title","bid_industry"]
    }
  },
  "highlight": {
    "fields": {
      "bid_area":{},
      "bid_title":{},
      "bid_industry":{}
    }
  }
}

高亮显示的标签默认是,可以对此标签进行修改:

GET zhaobiao/_search
{
  "from": 0, 
  "size": 20, 
  "query": {
    "multi_match": {
      "query": "商务科研服务",
      "fields": ["bid_area","bid_title","bid_industry"]
    }
  },
  "highlight": {
    "fields": {
      "bid_title":{
        "pre_tags": ["<mark>"],
        "post_tags": ["</mark>"]
      }
    }
  }
}

Elasticsearch 支持三种高亮器:unifiedplainfvh

  • 1)Unified高亮器
    unified高亮器使用Lucene统一高亮器。 这个高亮器将文本分解为句子,并使用BM25算法对单个句子进行评分,就好像它们是文集中的文档一样。 它还支持准确的短语和多项(模糊,前缀,正则表达式)突出显示。 这是默认的高亮器。
  • 2)Plain高亮器
    plain高亮器使用标准的Lucene高亮器。 它试图在短语查询中理解单词重要性和任何单词定位标准来反映查询匹配逻辑。
  • 3)fvh高亮器
    fvh高亮器使用Lucene Fast Vector高亮器。此高亮器可用于在映射中将term_vector设置为with_positions_offsets的字段

可对高亮器进行选择:

GET zhaobiao/_search
{
  "from": 0, 
  "size": 20, 
  "query": {
    "multi_match": {
      "query": "商务科研服务",
      "fields": ["bid_area","bid_title","bid_industry"]
    }
  },
  "highlight": {
    "fields": {
      "bid_title":{
		"type":"plain"
      }
    }
  }
}

性能调优

scroll 游标深度查询

对于数据量较大的深度查询,建议使用scroll参数进行分页查询,提升查询速率。
查询语句:

GET /omega2/_search?scroll=1m                                 
{
  "size": 5,
  "query": {
    "multi_match": {
      "query": "秦**",                                         //查询参数
      "fields": ["bumen","xingming"]                           //被查询字段
    }
  }
}

返回结果

{
  "_scroll_id": "DnF1ZXJ5VGhlbkZldGNoAwAAAAAAAXfVFnlpSFpmX1cyU1R5UkhEY1RpLURCdGcAAAAAAAF31BZ5aUhaZl9XMlNUeVJIRGNUaS1EQnRnAAAAAAABd9YWeWlIWmZfVzJTVHlSSERjVGktREJ0Zw==",
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 2.9424875,
    "hits": [
      {
        "_index": "omega2",
        "_type": "logs",
        "_id": "Wx2lHGgBZ5G2BooG78en",
        "_score": 2.9424875,
        "_source": {
			......
        }
      }
    ]
  }
}

第二次查询时,直接带入scroll_id参数进行查询:

GET /_search/scroll
{
  "scroll":"1m",
  "scroll_id":"DnF1ZXJ5VGhlbkZldGNoAwAAAAAAAXguFnlpSFpmX1cyU1R5UkhEY1RpLURCdGcAAAAAAAF4LxZ5aUhaZl9XMlNUeVJIRGNUaS1EQnRnAAAAAAABeDAWeWlIWmZfVzJTVHlSSERjVGktREJ0Zw=="
}

**优缺点:**解决深度分页查询时,使用from,size带来的冗余问题和查询效率慢的问题,但需要使用两个接口进行转换,且只能一页一页的查询,无法从第1页调到第5页进行查询。

copy_to

对于多个文本字段进行检索时,使用copy_to对检索的所有字段进行拼接,组合成新的字段,最后在检索时对新的字段进行检索,从而提升检索速率。
创建索引 mapping

PUT /zhaobiao2
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 0
  },
  "mappings": {
    "doc":{
    "properties": {
      "bid_title":{
        "type": "text",
        "analyzer": "ik_max_word",
        "copy_to":"all_text"
      },
      "release_time":{
        "type": "text",
        "analyzer": "ik_smart",
        "copy_to":"all_text"
      },
      "bid_industry":{
        "type": "text",
        "analyzer": "ik_smart",
        "copy_to":"all_text"
      },
      "bid_area":{
        "type": "text",
        "analyzer": "ik_smart",
        "copy_to":"all_text"
      },
      "bid_url":{
        "type": "text"
      },
      "bid_text":{
        "type": "text"
      },
      "all_text":{
        "type":"text",
        "analyzer":"ik_max_word"
      }
    }
  }
  }
}

检索数据

GET zhaobiao2/_search
{
  "query": {
    "multi_match": {
      "query": "商务科研服务",
      "fields": ["all_text"]
    }
  }
}

检索结果

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  ......
}

此次检索大约70W+的数据,未使用copy_to字段进行检索时所消耗的时长是使用copy_to字段所耗时长的5倍以上,大大提升了检索速率。
注意copy_to 只能对 text 文本字段进行使用,对 datelong(无法分词)字段无法使用。

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值