安装
解压安装包
tar -zxvf elasticsearch-7.6.2-linux-x86_64.tar.gz -C /opt/module/
后台启动
创建补贴库
PUT /dpxdata-njdata1
{
"mappings": {
"properties": {
"id":{
"type": "long"
},
"pSerialNumber":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"serialNumber":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"province": {
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"city":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"county":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"town":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"village":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"name":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"machinesTool":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"factory":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"productName":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"type":{
"type": "keyword"
},
"total":{
"type": "integer"
},
"distributor":{
"type": "text",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"buyDate":{
"type": "date"
},
"price":{
"type": "long"
},
"subsidize":{
"type": "long"
},
"subsidizeTotal":{
"type": "long"
},
"number":{
"type": "keyword"
},
"state":{
"type": "integer"
},
"createTime":{
"type": "date"
},
"year":{
"type": "keyword"
},
"state2021":{
"type": "keyword"
},
"subsiduzePro":{
"type": "long"
},
"subsidizeCity":{
"type": "long"
},
"subsidizeCounty":{
"type": "long"
},
"subsidizeTiexi":{
"type": "long"
}
}
}
}
设置最大窗口,避免大数据量报错
补贴数据六百多万,一直在增加。这里设置一千万,可以根据自己的情况调整
put dpxdata-njdata1/_settings
{
"index.max_result_window":10000000
}
ik分词器和拼音分词器使用
## 添加索引
PUT /pinyin_test
{
"mappings":{
"properties": {
"id": {
"type": "integer"
},
"name": {
"fields": {
"ik": {
"type": "text",
"analyzer": "ik_max_word"
},
"pinyin":{
"type": "text",
"analyzer": "pinyin"
}
},
"type": "text"
}
}
},
"settings":{
"index": {
"refresh_interval": "1s",
"number_of_shards": 3,
"max_result_window": "10000000",
"number_of_replicas": 0
}
}
}
## 批量插入测试数据
POST _bulk
{"index":{"_index":"pinyin_test"}}
{"name":"啤酒"}
{"index":{"_index":"pinyin_test"}}
{"name":"壁虎"}
{"index":{"_index":"pinyin_test"}}
{"name":"闭户"}
{"index":{"_index":"pinyin_test"}}
{"name":"币户"}
{"index":{"_index":"pinyin_test"}}
{"name":"啤酒杯"}
{"index":{"_index":"pinyin_test"}}
{"name":"喝花酒"}
{"index":{"_index":"pinyin_test"}}
## 检索
GET pinyin_test/_search
{
"query": {"match": {
"name.pinyin": "bh"
}}
}
数据清洗使用
创建索引
PUT /equipment_brand/
{
"mappings": {
"properties": {
"id": {
"type": "integer"
},
"brand": {
"type": "text",
"analyzer": "eq_analyzer",
"fields": {
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"eq_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter":"eq_filter"
}
},
"filter":{
"eq_filter":{
"type": "pinyin",
"keep_separate_first_letter": false,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"lowercase": true,
"remove_duplicated_term": true,
"keep_joined_full_pinyin": true
}
}
}
}
}