Elasticsearch跨域访问
修改elasticsearch.yml中的参数:
http.cors.enabled: true //是否支持跨域访问,默认false
http.cors.allow-origin: “*” //访问的网段,可以对网段进行限制
http.cors.allow-methods: OPTIONS,HEAD,GET,POST,PUT,DELETE //跨域请求的方式
http.cors.allow-headers: X-Requested-With,Content-Type,Content-Length //跨越请求头的信息
创建mapping
创建动态mapping,默认使用ik分词器。
PUT _template/sc_template
{
"index_patterns":"*",
"order":0,
"settings":{
"number_of_shards": 3,
"number_of_replicas": 0
},
"mappings":{
"doc":{
"dynamic_templates":[{
"text_use_ik":{
"match_mapping_type":"text",
"mapping":{
"type":"text",
"analyzer":"ik_max_word",
"search_analyzer":"ik_smart"
}
}
}]
}
}
}
在kibana中创建omega的mapping,应用ik分词器对中文字段进行分词。
PUT /omega/
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 0
},
"mappings": {
"logs":{
"properties": {
"receiver":{
"type":"text"
},
"date":{
"type":"date"
},
"value":{
"type":"long"
},
"xingming":{
"type":"text",
"analyzer": "ik_max_word",
"fields": {
"keyword":{
"type":"keyword"
}
}
},
"bumen":{
"type":"text",
"analyzer": "ik_smart",
"fields": {
"keyword":{
"type":"keyword"
}
}
}
}
}
}
}
查看maping
GET /omega/_mapping
数据同步
应用 Logstash 将数据库中的数据同步至 Elasticsearch 中。
同步一个数据库中的表,创建 postgres.conf 文件:
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://192.168.108.126:5432/omega"
jdbc_driver_library => "/usr/local/logstash-6.4.1/test/postgresql-9.4.1212.jar"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_user => "postgres"
jdbc_password => "passwd"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
statement => "SELECT a.receiver,a.date,a.value,b.xingming,b.bumen FROM renyuan b JOIN "result" a ON a.receiver=b.receiver ORDER BY b.bumen,a.date"
type => "jdbc"
}
}
filter {
json {
source => "message"
remove_field => "[message]"
}
mutate {
remove_field => ["@version","@timestamp"]
}
}
output {
elasticsearch {
hosts => "192.168.108.126:9200"
index => "omega"
document_type => "logs"
}
stdout {
codec => json_lines
}
}
运行logstash程序:bin/logstash -f test/postgres.conf
同时同步两个表格到同一个index里面:
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://192.168.108.126:5432/omega"
jdbc_driver_library => "/usr/local/logstash-6.4.1/test/postgresql-9.4.1212.jar"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_user => "postgres"
jdbc_password => "passwd"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
statement => "select * from omega.renyuan limit 5"
add_field => {"table_name" => "renyuan"}
}
jdbc {
jdbc_connection_string => "jdbc:postgresql://192.168.108.126:5432/omega"
jdbc_driver_library => "/usr/local/logstash-6.4.1/test/postgresql-9.4.1212.jar"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_user => "postgres"
jdbc_password => "passwd"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
statement => "select * from omega.result limit 5"
add_field => {"table_name" => "result"}
}
}
filter {
mutate {
remove_field => ["@version","@timestamp"]
}
}
output {
elasticsearch {
hosts => "192.168.108.126:9200"
index => "omega2"
document_type => "logs"
}
stdout { // 这是在交互式界面进行展示
codec => json_lines
}
}
查询
multi_match 多字段查询:
GET /omega2/_search?scroll=1m
{
"size": 5,
"query": {
"multi_match": {
"query": "秦**", //查询参数
"fields": ["bumen","xingming"] //被查询字段
}
}
}
高亮显示
highlight
标签可以对检索的字段进行高亮显示:
GET zhaobiao/_search
{
"from": 0,
"size": 20,
"query": {
"multi_match": {
"query": "商务科研服务",
"fields": ["bid_area","bid_title","bid_industry"]
}
},
"highlight": {
"fields": {
"bid_area":{},
"bid_title":{},
"bid_industry":{}
}
}
}
高亮显示的标签默认是,可以对此标签进行修改:
GET zhaobiao/_search
{
"from": 0,
"size": 20,
"query": {
"multi_match": {
"query": "商务科研服务",
"fields": ["bid_area","bid_title","bid_industry"]
}
},
"highlight": {
"fields": {
"bid_title":{
"pre_tags": ["<mark>"],
"post_tags": ["</mark>"]
}
}
}
}
Elasticsearch 支持三种高亮器:unified
,plain
和fvh
。
- 1)Unified高亮器
unified高亮器使用Lucene统一高亮器。 这个高亮器将文本分解为句子,并使用BM25算法对单个句子进行评分,就好像它们是文集中的文档一样。 它还支持准确的短语和多项(模糊,前缀,正则表达式)突出显示。 这是默认的高亮器。 - 2)Plain高亮器
plain高亮器使用标准的Lucene高亮器。 它试图在短语查询中理解单词重要性和任何单词定位标准来反映查询匹配逻辑。 - 3)fvh高亮器
fvh高亮器使用Lucene Fast Vector高亮器。此高亮器可用于在映射中将term_vector设置为with_positions_offsets的字段
可对高亮器进行选择:
GET zhaobiao/_search
{
"from": 0,
"size": 20,
"query": {
"multi_match": {
"query": "商务科研服务",
"fields": ["bid_area","bid_title","bid_industry"]
}
},
"highlight": {
"fields": {
"bid_title":{
"type":"plain"
}
}
}
}
性能调优
scroll 游标深度查询
对于数据量较大的深度查询,建议使用scroll参数进行分页查询,提升查询速率。
查询语句:
GET /omega2/_search?scroll=1m
{
"size": 5,
"query": {
"multi_match": {
"query": "秦**", //查询参数
"fields": ["bumen","xingming"] //被查询字段
}
}
}
返回结果
{
"_scroll_id": "DnF1ZXJ5VGhlbkZldGNoAwAAAAAAAXfVFnlpSFpmX1cyU1R5UkhEY1RpLURCdGcAAAAAAAF31BZ5aUhaZl9XMlNUeVJIRGNUaS1EQnRnAAAAAAABd9YWeWlIWmZfVzJTVHlSSERjVGktREJ0Zw==",
"took": 1,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 2.9424875,
"hits": [
{
"_index": "omega2",
"_type": "logs",
"_id": "Wx2lHGgBZ5G2BooG78en",
"_score": 2.9424875,
"_source": {
......
}
}
]
}
}
第二次查询时,直接带入scroll_id参数进行查询:
GET /_search/scroll
{
"scroll":"1m",
"scroll_id":"DnF1ZXJ5VGhlbkZldGNoAwAAAAAAAXguFnlpSFpmX1cyU1R5UkhEY1RpLURCdGcAAAAAAAF4LxZ5aUhaZl9XMlNUeVJIRGNUaS1EQnRnAAAAAAABeDAWeWlIWmZfVzJTVHlSSERjVGktREJ0Zw=="
}
**优缺点:**解决深度分页查询时,使用from,size
带来的冗余问题和查询效率慢的问题,但需要使用两个接口进行转换,且只能一页一页的查询,无法从第1页调到第5页进行查询。
copy_to
对于多个文本字段进行检索时,使用copy_to
对检索的所有字段进行拼接,组合成新的字段,最后在检索时对新的字段进行检索,从而提升检索速率。
创建索引 mapping
PUT /zhaobiao2
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 0
},
"mappings": {
"doc":{
"properties": {
"bid_title":{
"type": "text",
"analyzer": "ik_max_word",
"copy_to":"all_text"
},
"release_time":{
"type": "text",
"analyzer": "ik_smart",
"copy_to":"all_text"
},
"bid_industry":{
"type": "text",
"analyzer": "ik_smart",
"copy_to":"all_text"
},
"bid_area":{
"type": "text",
"analyzer": "ik_smart",
"copy_to":"all_text"
},
"bid_url":{
"type": "text"
},
"bid_text":{
"type": "text"
},
"all_text":{
"type":"text",
"analyzer":"ik_max_word"
}
}
}
}
}
检索数据
GET zhaobiao2/_search
{
"query": {
"multi_match": {
"query": "商务科研服务",
"fields": ["all_text"]
}
}
}
检索结果
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
......
}
此次检索大约70W+的数据,未使用copy_to
字段进行检索时所消耗的时长是使用copy_to
字段所耗时长的5倍以上,大大提升了检索速率。
注意:copy_to
只能对 text
文本字段进行使用,对 date
和 long
(无法分词)字段无法使用。