Elasticsearch 创建索引导读数据

最新推荐文章于 2024-07-01 10:11:57 发布

赵起明

最新推荐文章于 2024-07-01 10:11:57 发布

阅读量309

点赞数

分类专栏： elasticsearch 文章标签： elasticsearch

本文链接：https://blog.csdn.net/lh_zqm/article/details/107491938

版权

elasticsearch 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import pandas as pd
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch

es = Elasticsearch("ip:host")

# 设置 mapping 使集群健康值为绿色 全部转换为小写
mapping = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0,
        "analysis": {
            "normalizer": {
                "my_lowercase": {
                    "type": "custom",
                    "filter": [
                        "lowercase"
                    ]
                }
            }
        }
    },
    "mappings": {
        "doc": {
            "properties": {
                "cid": {
                    "type": "keyword",
                    "normalizer": "my_lowercase"
                },
                "mol_name": {
                    "type": "keyword",
                    "normalizer": "my_lowercase"
                }
            }
        }
    }
}


# 索引名 必须小写
index_name = "0508_compound_names"

# 检查索引是否存在
if es.indices.exists(index=index_name):
    # 如果存在删除重新创建
    es.indices.delete(index=index_name, ignore=[400, 404])

# 创建索引
es.indices.create(index=index_name, ignore=400, body=mapping)

# 读文件
data = pd.read_csv("compound_properties.txt", sep="\t", index_col=None, header=0)

# 添加_index列 不加 插入数据时 会报错
data["_index"] = index_name

# 添加_type列  不加 插入数据时 会报错
data["_type"] = "doc"

# 添加id列
row, col = data.shape
_id = range(1, row + 1)
data["_id"] = _id

# 填充 缺失值
data = data.fillna("None")

# 取值需要的值
data = data[["cid", "mol_name", "_index", "_type", "_id"]]

# 保存文件方便检查
data.to_csv("compound_names_index.txt", sep="\t", index=None)

# 转换成需要的格式
dict_in_list_object = data.to_dict("records")

# 添加数据到es中
bulk(es, dict_in_list_object)

print("ok!")

示例数据为下表格式,文件中应为tab 键分割

id	cid	status	mol_name
1	CMNPD1	synonym	1,2,4-Trithiolane
2	CMNPD1	synonym	1,2,4-Trithiolan
3	CMNPD1	synonym	1,3,5-Trithiolan
4	CMNPD1	synonym	1.2.4-Trithiolan
5	CMNPD1	synonym	1.3.5-Trithiolan
6	CMNPD1	synonym	[1,2,4]trithiolane
7	CMNPD2	synonym	1,2,4-Trithiolane 4-oxide
8	CMNPD2	synonym	1,2,4-Trithiolan-4-oxid
9	CMNPD2	synonym	1,2,4-trithiolane 4-S-oxide
10	CMNPD2	synonym	1.2.4-Trithiolan-4-oxid