elasticsearch应用(第三篇)
elasticsearch应用
1、定义分词器与使用常用过虑器
2、定义字段映射
不需要分词的定义为:type:keyword
“size” : {
“type” : “keyword”
},
不能被搜索的定义为:index:false 比如身份证号不希望被当做查询字段使用 “index” : false。
“zoneid” : {
“type” : “short”,
“index” : false
}
使用分词器并需要把全文本建立索引的指定多字段:
“country” : {
“type” : “text”,
“analyzer” : “ik_syno”
“fields” : {
“keyword” : { //这个字段是被建立索引 查询这个字段需要匹配全文本。
“type” : “keyword”
}
}
},
需要被分词并指定分词器,否则系统使用默认分词器 “analyzer” : “ik_syno”
“gcontent” : {
“type” : “text”,
“analyzer” : “ik_syno”
},
上面的会把 country的整行文本建立索引,这种使用,建意在短文本上。比如 姓名
不希望es自动建立字段 “dynamic”: false
默认es在添加数据是如果添加的数据没有对应的字段则es自动建立。一般为使es字段保持完整性,建意关闭此功能。
“mappings” : {
“dynamic” : “false”,//不希望es自动建立字段
“properties” : {
“catestr” : {
“type” : “text”,
“store” : true,
“fields” : {
“keyword” : {
“type” : “keyword”
}
}
},
3、测试映射字段类
型
注意:关闭自动添加字段功能:“dynamic”:“false”
比如 添加文档时如果sipa_photo没有对应的字段
es会自动添加一个新的字段,所有一般情况下关闭这个功能。
type : keyword 指字段不分词,直接存入索引
store:true/false 指是否在_source存储数据
测试1: type:keyword,store:true/false 有什么不同
结论:
index:false 是不能搜索的。
测试2:type:date,store:true/false 有什么不同
“photoid”:{
“type”:“keyword”,
“store”:true
},
“renewtime”:{
“type”:“date”,
“store”: false
},
测试的mapping
PUT /sipa_photo_t/_mapping
{
"dynamic":false,
"properties": {
"photoid":{
"type":"keyword",
"store":true
},
"renewtime":{
"type":"date",
"store": false
},
"moddate":{
"type":"date",
"store": true
},
"uploaddate":{
"type":"date",
"store": true
},
"ifmod":{
"type":"keyword",
"store": true
},
"groupid":{
"type":"keyword",
"store": true
},
"price":{
"type":"keyword",
"store": true
},
"syspid":{
"type":"long",
"store": false
},
"ifstock":{
"type":"keyword",
"store": false
},
"title":{
"type":"text",
"analyzer":"ik_syno",
"search_analyzer":"ik_syno"
},
"gtitle":{
"type":"text",
"analyzer":"ik_syno",
"search_analyzer":"ik_syno"
},
"keyword":{
"type":"text",
"analyzer":"ik_syno",
"search_analyzer":"ik_syno"
},
"gcontent":{
"type":"text",
"analyzer":"ik_syno",
"search_analyzer":"ik_syno"
},
"pindex":{
"type":"keyword",
"store": true
},
"upuid":{
"type":"keyword",
"store": true
},
"ifedit":{
"type":"keyword",
"store": true
},
"upway":{
"type":"keyword",
"store": true
},
"size":{
"type":"keyword",
"store": false
},
"width":{
"type":"keyword",
"store": false
},
"height":{
"type":"keyword",
"store": false
},
"upip":{
"type":"keyword",
"store":true
},
"penname":{
"type":"text",
"fields": {
"keyword":{
"type":"keyword"
}
}
},
"country":{
"type":"text",
"fields": {
"keyword":{
"type":"keyword"
}
}
},
"city":{
"type":"text",
"fields": {
"keyword":{
"type":"keyword"
}
}
},
"pdate":{
"type":"date"
},
"plevel":{
"type":"keyword",
"store": false,
"index":false
},
"psname":{
"type":"keyword",
"store":false,
"index":false
},
"pwebsid":{
"type":"keyword",
"store":false,
"index":false
},
"zoneid":{
"type":"short",
"store": false,
"index":false
},
"pclick":{
"type":"integer",
"store": true,
"fields": {
"keyword":{
"type":"keyword"
}
}
},
"pfcate":{
"type":"keyword",
"store": true
},
"catestr":{
"type":"text",
"store": true,
"fields": {
"keyword":{
"type":"keyword"
}
}
},
"picori":{
"type":"keyword",
"store": true
},
"porder":{
"type":"keyword",
"store": true
}
}
}
字段映射 实例
1.安装 IK 中文分词器
获取官网 IK 地址
https://github.com/medcl/elasticsearch-analysis-ik
https://github.com/medcl/elasticsearch-analysis-ik/releases 安装包
选择安装es对应的版本:
我们安装的是 7.3.0所以 复制 7.3.0 ik 地址
点击7.3.0进入复制zip包地址
https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.3.0/elasticsearch-analysis-ik-7.3.0.zip
使用 es插件命令安装ik插件
yum 安装的es源码在/usr/share/elasticsearch
# cd /usr/share/elasticsearch/
# ./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.3.0/elasticsearch-analysis-ik-7.3.0.zip
查看是否已完成好,可以查看/ect/elasticsearch/,可以看到多了 ik 相关的文件夹 analysis-ik
配置IK分词:本地过虑词典、远程过虑词典,本地同义词词典,远程同义词词典,自定义过虑器:集合英文、大写转小写、中文分词等常用过虑器。.
打开 analysis-ik 目录
里面有自带的常用词典
基中 IKAnalyzer.cfg.xml是ik分词器的配置文档,先备份 :
cp IKAnalyzer.cfg.xml IKAnalyzer.cfg.xml.bak
1、新建一个 同义词典: echo “”>/etc/elasticsearch/analysis-ik/synonym.dic
2、新建一个sipa-es专用词典: echo “”>/etc/elasticsearch/analysis-ik/sipa_main.dic
3、配置nginx 可以访问 远程过虑词词典、远程专用词典。
配置远程词典的作用是:更新远程词典时可以不用重启es
echo “”>/www/es_sipa_dict/sipaStopDict.txt //远程过虑词词典
echo “”>/www/es_sipa_dict/sipaDict.txt //远程专用词典
访问:
http://152.136.108.221/es_sipa_dict/sipaStopDict.txt
http://152.136.108.221/es_sipa_dict/sipaDict.txt
配置
多个词典用分号隔开
vim /IKAnalyzer.cfg.xml.bak
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">main.dic;extra_main.dic;sipa_main.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">stopword.dic;extra_stopword.dic;sipa_stopword.dic</entry>
<!--用户可以在这里配置远程扩展字典 -->
<entrykey="remote_ext_dict">http://152.136.108.221/es_sipa_dict/sipaDict.txt</entry>
<!--用户可以在这里配置远程扩展停止词字典-->
<entry key="remote_ext_stopwords">http://152.136.108.221/es_sipa_dict/sipaStopDict.txt</entry>
</properties>
安装好IK分词器重新 es
systemctl restart elasticsearch
2.自定义分词器
GET /_cat/indices
#1:定义sipa_photo用的分司器
#2:使用lowercase,stemmer过虑器
#3:自定义指定同义词词典路径
(/etc/elasticsearch/analysis-ik/synonym.txt) analysis-ik/synonym.dic
synonym.dic 同义词写法
西红柿,番茄
黑龙江,黑龙江省
F1,一级方程式
PUT sipa_photo_t
{
"settings":{
"analysis":{
"analyzer":{
"ik_syno":{
"type":"custom",
"tokenizer":"ik_max_word",
"filter":["lowercase","stemmer","my_synonym_filter"]
},
"ik_syno_smart":{
"type":"custom",
"tokenizer":"ik_smart",
"filter":["lowercase","stemmer","my_synonym_filter"]
}
},
"filter":{
"my_synonym_filter":{
"type":"synonym",
"synonyms_path": "analysis-ik/synonym.dic"
}
}
}
}
}
注意:关闭自动添加字段功能:“dynamic”:“false”
比如 添加文档时如果sipa_photo没有对应的字段
es会自动添加一个新的字段,所有一般情况下关闭这个功能。
3.定义字段
PUT /sipa_photo/_mapping
{
"dynamic":false,
"properties" : {
"catestr" : {
"type" : "text",
"analyzer" : "ik_syno",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"city" : {
"type" : "text",
"analyzer" : "ik_syno",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"country" : {
"type" : "text",
"analyzer" : "ik_syno",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"gcontent" : {
"type" : "text",
"analyzer" : "ik_syno"
},
"groupid" : {
"type" : "long",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"gtitle" : {
"type" : "text",
"analyzer" : "ik_syno"
},
"height" : {
"type" : "integer",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"ifedit" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"ifmod" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"ifstock" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"keyword" : {
"type" : "text",
"analyzer" : "ik_syno"
},
"moddate" : {
"type" : "date",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"pclick" : {
"type" : "integer",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"pdate" : {
"type" : "date",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"penname" : {
"type" : "keyword",
"store" : true
},
"pfcate" : {
"type" : "text",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"photoid" : {
"type" : "long",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"picori" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"pindex" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"plevel" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"porder" : {
"type" : "integer",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"price" : {
"type" : "float",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"psname" : {
"type" : "text",
"analyzer" : "ik_syno"
},
"pwebsid" : {
"type" : "keyword",
"store" : true
},
"renewtime" : {
"type" : "date",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"size" : {
"type" : "integer",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"syspid" : {
"type" : "long",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"title" : {
"type" : "text",
"analyzer" : "ik_syno"
},
"upip" : {
"type" : "keyword",
"store" : true
},
"uploaddate" : {
"type" : "date",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"upuid" : {
"type" : "long",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"upway" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"width" : {
"type" : "integer",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"zoneid" : {
"type" : "short",
"store" : true,
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
}
}
}
# 查看字段
GET /sipa_photo_t/_mapping
测试自定义的IK分词器,敏感词典、字典过虑器等
#新建一个sipa_phtot_t库并自定义IK分词器
PUT sipa_photo_t
{
"settings":{
"analysis":{
"analyzer":{
"ik_syno":{
"type":"custom",
"tokenizer":"ik_max_word",
"filter":["lowercase","stemmer","my_synonym_filter"]
},
"ik_syno_smart":{
"type":"custom",
"tokenizer":"ik_smart",
"filter":["lowercase","stemmer","my_synonym_filter"]
}
},
"filter":{
"my_synonym_filter":{
"type":"synonym",
"synonyms_path": "analysis-ik/synonym.dic"
}
}
}
}
}
测试中文分词器:
#测试 IK 分词器
GET /sipa_photo_t/_analyze
{
"analyzer": "ik_syno_smart",
"text":"万圣节F1"
}
查看到F1可以分词为 “f1”,“一级方程式”,说明synonym.dic同义词字典已生效。
#测试 IK 分词器-2
GET /sipa_photo_t/_analyze
{
"analyzer": "ik_syno_smart",
"text":"万圣节色情"
}
如图,可以把“色情”过虑,说明远程 过虑词典已生效
未加 过虑词
#测试 IK 分词器-2
GET /sipa_photo_t/_analyze
{
“analyzer”: “ik_syno_smart”,
“text”:“万圣节色情 裸体”
}
添加 "裸体"过虑词
echo 裸体>>sipaStopDict.txt
测试分词:
#测试 IK 分词器-2
GET /sipa_photo_t/_analyze
{
"analyzer": "ik_syno_smart",
"text":"万圣节色情 裸体"
}
结果查看已过虑