elasticsearch-head 操作 elasticsearch 7.8 7.10
1、Filebeat(采集数据)+Elasticsearch(建立索引)+Kibana(展示)
2、Filebeat(采集数据)+Logstash(过滤)+Elasticsearch(建立索引)+Kibana(展示) ##################我们采用这种
3、Filebeat(采集数据)+Kafka/Redis/File/Console(数据传输)+应用程序(处理,存储,展示)
4、Filebeat(采集数据)+Logstash(过滤)+Kafka/Redis/File/Console(数据传输)+应用程序(处理,存储,展示)
整体ELK参看 解决es跨域配置、kibana配置文件中关于es访问时间参数elasticsearch.requestTimeout: 120000(默认是30000)太短会报错kibana [elasticsearch] [TimeoutError]: Request timed out
https://www.cnblogs.com/taoweizhong/p/10462194.html
----------------------------------------
IK中文分词
https://github.com/medcl/elasticsearch-analysis-ik
#首先使用”索引“进行索引"test"的创建 包括两步 mappings(type 结构和字段类型)和settings(分词器)的创建
#然后在"复合查询"进行如下操作
#name的"type": "keyword" 这个时候name不会分词
#name的"type": "text" 这个时候name会分词
1.进行mappings的配置(include_type_name=true使得 address 作为 type 进行自动创建; properties的内容作为type的mapping进行设置)
三个位置上分别填写
http://localhost:9200/test/address/
_mapping?include_type_name=true POST
gns mapping模板
{
"properties": {
"name": {
"type": "text" , #################text,string 类型进行分词 唯独keyword不进行分词
#"analyzer": "ik_max_word" 或 ik_smart #################字段上设置分词(还可以在索引整体上设置 无需字段上设置)
},
"location_point": {
"type": "geo_shape"
},
"point": {
"type": "geo_point"
}
}
}
geonames数据的mapping
geonames mapping
{
"properties": {
"name": {
"type": "keyword"
},
"alternatenames": {
"type": "text"
},
"lon": {
"type": "float"
},
"lat": {
"type": "float"
},
"location": {
"type": "geo_point"
}
}
}
ik_max_word: 会将文本做最细粒度的拆分,比如会将 "中华人民共和国国歌"拆分为
"中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌",会穷尽各种可能的组合
ik_smart: 会做最粗粒度的拆分, 比如会将 "中华人民共和国国歌"拆分为 "中华人民共和国,国歌" .
2.进行settings的配置(如果不设置则采取默认的 standard分词器) 索引上ik分词器全局设置
关闭索引 test :POST http://localhost:9200/test/_close/
http://localhost:9200/test
_settings PUT
{
"analysis": {
"analyzer": {
"my_ik_analyzer": {
"tokenizer": "ik_max_word"
}
}
}
}
打开索引;POST http://localhost:9200/test/_open/
#数据添加
http://localhost:9200/test/address/ post
{
"name": "Chipotle Mexican Grill",
"location_point": {
"type": "point",
"coordinates": [
55.75,
37.616667
]
},
"point": {
"lat": 40.722,
"lon": -73.989
}
}
#数据修改 跟上数据对应id 直接提交新数据即可
http://localhost:9200/test/address/id post
{
"name": "中华人民共和国国歌",
"location_point": {
"type": "point",
"coordinates": [
55.75,
37.616667
]
},
"point": {
"lat": 40.722,
"lon": -73.989
}
}
#数据查询 验证分词器
http://localhost:9200/test/address/
_search POST
{
"query": {
"match": {
"name": "篮球"
}
}
}
http://localhost:9200/
_analyze POST
{
"analyzer": "ik_max_word",
"text": "乔丹是篮球之神。"
}
# 数据删除操作
POST http://localhost:9200/gns/gnsaddress/_delete_by_query
{
"query": {
"match_all": {
}
}
}
#正则表达式在线工具https://tool.oschina.net/regex/#
#正则基本语法https://www.jianshu.com/p/e25c4cfc8f4e
#地址.txt
#[\u4e00-\u9fa5]+\,[0-9]{1,3}\.[0-9]{1,20}\,[0-9]{1,3}\.[0-9]{1,20}\,
#(?<a>[\u4e00-\u9fa5]+\,[0-9]{1,3}\.[0-9]{1,20}\,[0-9]{1,3}\.[0-9]{1,20}\,)
#进行logstash启动脚本设置
-t 进行验证写的logstash-geoaddress.conf中是否有格式错误
logstash.bat -f ../config/logstash-geoaddress.conf -t
提示如下则格式无误
[2020-09-08T09:56:12,864][INFO ][logstash.runner ] Using config.test_an
d_exit mode. Config Validation Result: OK. Exiting Logstash
如果没有错误 则
logstash.bat -f ../config/logstash-geoaddress.conf 进行启动
# filebeats+logstash+es 将csv导入到ES
#开启es和es-head 且用es-head建好 csv数据格式 对应index type mapping到es中
#*****#csv、txt格式 删除数据第一行表头字段名 直接开始就是数据内容#########################################重点
17.05,-61.8,PRESIDENCYOFANTIGUA,Presidency of Antigua
区别于
17.05,-61.8,PRESIDENCYOFANTIGUA,Presidency of Antigua,
末尾有个逗号 在映射字段时 就会多出一个字段来 记得自己处理 删除或者其他操作
#index(库) type(表) mapping(字段和类型)
首先es-head创建索引gns
http://localhost:9200/gns/gnsaddress
_mapping?include_type_name=true post
{
"properties": {
"SORT_NAME_RO": {
"type": "keyword", ####################################注意 keyword类型不分词
#"analyzer": "ik_max_word" ####################################注意 如果不指定分词方式 则默认 “standard”
},
"FULL_NAME_RO": {
"type": "text" ####################################注意 text类型分词
},
"lon": {
"type": "float"
},
"lat": {
"type": "float"
},
"location": {
"type": "geo_point"
}
}
}
#filebeats配置路径和编码
*切记原文件的编码必须也是UTF-8 自己检查 否则会出现乱码
*去除表头行记录
*源文件路径最好都是用英文 避免中文(不做硬性要求)
type: log
# Change to true to enable this input configuration.
enabled: true
encoding: UTF-8
# Paths that should be crawled and fetched. Glob based paths.
paths:
#- /var/log/*.log
- E:\fgq\ELK\dataofgeo\gns.csv 或 E:\fgq\ELK\dataofgeo\gns.txt
output类型配置
logstash或es选其一 注释另一个 一般选择使用logstash
#logstash配置 切记作为geo_point类型的字段结构必须是 lon,lat 不可以是longitude,latitude;################################重点
经纬度坐标格式 参看 官网 规定https://www.elastic.co/guide/cn/elasticsearch/guide/current/lat-lon-formats.html
logstash-gns.conf
input {
beats {
port => 5044
}
}
filter {
csv {
separator => ","
columns => ["lat","lon","SORT_NAME_RO","FULL_NAME_RO"] #############字段名字必须跟es 的 mapping中字段名字一致 字段数量不要求一致
add_field => ["[location][lon]","%{lon}"]
add_field => ["[location][lat]","%{lat}"]
remove_field => ["message","headers","@version","version","ecs","@timestamp","tags","agent","input","host","log","offset"]
}
mutate {
convert => { ####################################重点必须写转换类型 且跟 es中mapping一致对应 否则报类型转换错误
# 类型转换
"SORT_NAME_RO" => "string"
"FULL_NAME_RO" => "string"
"lon" => "float"
"lat" => "float"
"[location][lon]" => "float"
"[location][lat]" => "float"
}
}
}
output {
#stdout {
# codec => rubydebug
# codec => json_lines
#}
elasticsearch {
hosts => ["127.0.0.1:9200"]
index => "gns"
document_type => "gnsaddress"
}
}
logstash-geonames.conf
# Sample Logstash configuration for creating a simple
# Beats -> Logstash -> Elasticsearch pipeline.
input {
beats {
port => 5044
}
}
filter {
mutate {
gsub => ["message",'"',"'"]
}
csv {
separator => " "
columns => ["geonameid","name","asciiname","alternatenames","lat","lon"]
add_field => ["[location][lon]","%{lon}"]
add_field => ["[location][lat]","%{lat}"]
remove_field => ["message","headers","@version","version","ecs","@timestamp","tags","agent","input","host","log","offset","geonameid","asciiname","column7","column8","column9","column10","column11","column12","column13","column14","column15","column16","column17","column18","column19"]
}
ruby {
code => '
hash = event.to_hash
hash.each do |k,v|
if v == nil and k=="alternatenames"
event.set(k,event.get("name"))
end
end
'
}
mutate {
convert => {
# 类型转换
"name" => "string"
"alternatenames" => "string"
"lon" => "float"
"lat" => "float"
"[location][lon]" => "float"
"[location][lat]" => "float"
}
}
}
output {
stdout {
codec => rubydebug
}
#或
elasticsearch {
hosts => ["127.0.0.1:9200"]
index => "geonames"
document_type => "address"
}
}
分隔符分隔字段当中会出现值为null (nil的 )情况的处理
可以在filter中添加如下 删除相关字段
ruby {
code => "
hash = event.to_hash
hash.each do |k,v|
if v == nil
event.remove(k)
end
end
"
}
或者重新赋值
ruby {
code => "
hash = event.to_hash
hash.each do |k,v|
if v == nil
event.set("字段1",event.get("字段x"))
end
end
"
}
收集的分隔字段的值中存在 双引号 问题的解决
“ ” ’‘中文单双引号不必管 "" 英文双引号要替换 ''英文单引号不必管
mutate {
gsub => [ "message",'"',"'" ]
}
#开启logstash
logstash.bat -f ../config/logstash-gns.conf
#开启filebeats
filebeat启动命令 filebeat.exe -e -c filebeat.yml
#查看导入进度和数据分布情况 可以到kibana中进行查看
# filebeats+logstash+es 将txt导入到ES
# 没反应的话 可以改变一下数据文件内容 触发一下
#elasticsearch控制台 中文乱码 -Dfile.encoding=GBK ######################################重点
ES java.io.IOException: 杩滅▼涓绘満寮鸿揩鍏抽棴浜嗕竴涓幇鏈夌殑杩炴帴銆?
应该是在 Elasticsearch 的 config/jvm.options 文件里把“-Dfile.encoding=UTF-8”改为“-Dfile.encoding=GBK”
然后重启 Elasticsearch 即可
如果没有 则直接添加一行 -Dfile.encoding=GBK
#Filebeat+logstash直接往ES中传输数据中文乱码 要求原数据文件的编码也是UTF-8(如果不是则使用记事本打开另存为 选择utf-8编码或推荐使用nodepad++编码转成utf-8) 否则即便是Filebeat配置改了 还是会导入es的是乱码
添加encoding: UTF-8
type: log
# Change to true to enable this input configuration.
enabled: true
encoding: UTF-8
# Paths that should be crawled and fetched. Glob based paths.
paths:
#- /var/log/*.log
- E:\fgq\ELK\dataofgeo\gns.csv
如果在cmd窗口中看到数据是乱码 别管。。,。,。。
# filebeat------》logstash------》elasticsearch------》kibana
Filebeat(采集数据)+Logstash(过滤)+Elasticsearch(建立索引)+Kibana(展示)
filebeat发送数据给logstash,但是之前解析的日志有点问题,我想重新解析,重新读取日志,但是发现已经发送了的日志不会重新发送,就是我在elasticsearch删除了索引,尝试重新写一次进去,可是还是一样,我们可以怎么清空已经发送的数据,重新从开头写呢
关闭filebeat --> 删掉registry文件 --> 启动filebeat
# es安装分词器ik
官网下载https://github.com/medcl/elasticsearch-analysis-ik对应版本的ik(直接下载releases版本,避免maven打包!!!如果不是这个版本,则需要进入解压后的目录使用mvn package打包,然后在target->releases目录下会生成对应的zip文件)。
IK分词器必须和elasticsearch 版本一致
在es的安装目录下->plugins目录下新建ik目录将zip压缩包的内容解压到此
在header插件中测试
#es-head 查看分词器 是怎么分的
http://localhost:9200/
_analyze POST
{
"analyzer": "ik_max_word",
"text": "乔丹是篮球之神。"
}
# es geo查询 空间查询 参看官网地址
https://www.elastic.co/guide/cn/elasticsearch/guide/current/filter-by-geopoint.html
# es分词策略
默认standard 一个个字的拆分
# es-head 对于字段类型为keyword的字段的查询
http://localhost:9200/gns_up/gnsaddress_up/ ############################记住是post不是get,get必须在后面?a=&b=&c= 这种模式
_search POST
相等查询 完全匹配
{
"query": {
"match": {
"FULL_NAME_RO": "三里畈"
}
}
}
相等的模糊查询:(拼写出错 出错更改步骤fuzziness指定范围内
{
"query": {
"fuzzy": {
"text": {
"value": "surprize",
"fuzziness": 1
}
}
}
}
{
"query": {
"match": {
"text": {
"query": "SURPRIZE ME!",
"fuzziness": "AUTO",
"operator": "and"
}
}
}
}
like查询:
{
"query": {
"wildcard": {
"FULL_NAME_RO": "*zhou*"
}
}
}
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"FULL_NAME_RO": "*zhou*"
}
}
],
"must_not": [],
"should": []
}
},
"from": 0,
"size": 5,
"sort": [],
"aggs": {}
}
{
"from": 0,
"size": 10,
"query": {
"query_string": {
"query": "FULL_NAME_RO:(*三里畈* NOT *美国* OR *VIP* OR *经济* OR *金融*)",
"default_operator": "OR"
}
}
}
#首先咱们可以对Elasticsearch提供的内置分词器的作用进行如下总结:
分词器 作用
Standard ES默认分词器,按单词分类并进行小写处理
Simple 按照非字母切分,然后去除非字母并进行小写处理
Stop 按照停用词过滤并进行小写处理,停用词包括the、a、is
Whitespace 按照空格切分
Language 据说提供了30多种常见语言的分词器
Patter 按照正则表达式进行分词,默认是\W+ ,代表非字母
Keyword 不进行分词,作为一个整体输出
--------------------------------------------临时写板
enname,cnname,country,lat,lon
{
"properties": {
"enname": {
"type": "text"
},
"cnname": {
"type": "keyword" ####################################注意 text类型分词
},
"cnname": {
"type": "keyword" ####################################注意 text类型分词
},
"lon": {
"type": "float"
},
"lat": {
"type": "float"
},
"location": {
"type": "geo_point"
}
}
}
# es 索引中分片概念
stdout {
codec => rubydebug
codec => json_lines
}
#stdout {
# codec => rubydebug
# codec => json_lines
#}
elasticsearch {
hosts => ["127.0.0.1:9200"]
index => "gns"
document_type => "gnsaddress"
}
#elasticsearch {
# hosts => ["127.0.0.1:9200"]
# index => "gns"
# document_type => "gnsaddress"
#}
geonames mapping
{
"properties": {
"name": {
"type": "keyword"
},
"alternatenames": {
"type": "text"
},
"lon": {
"type": "float"
},
"lat": {
"type": "float"
},
"location": {
"type": "geo_point"
}
}
}
[2020-09-11T13:11:41,366][WARN ][logstash.filters.csv ][main][194d98404188c3
5548291338272023bdde014fbd6116d23ab70557e6c8dc7963] Error parsing csv {:field=>"
message", :source=>"11979557\tBoya Norte \"Brasileira M\"\tBoya Norte \"Brasilei
ra M\"\t\t-35.17333\t-56.61167\tS\tBCN\tAR\t\t\t\t\t\t0\t\t-9999\t\t2018-11-29",
:exception=>#<CSV::MalformedCSVError: Illegal quoting in line 1.>}
ELK(-head)操作csv(txt)
最新推荐文章于 2024-09-18 17:46:57 发布