【Es】增删改查

最新推荐文章于 2024-09-04 19:00:19 发布
小毛驴吃梨子
最新推荐文章于 2024-09-04 19:00:19 发布
阅读量167
点赞数 1
文章标签： elasticsearch 大数据搜索引擎
本文链接：https://blog.csdn.net/qq_39057568/article/details/141720948
版权
from elasticsearch import Elasticsearch

ES_CONFIG = {
    "hosts": ["127.0.0.1:9200"],
    "http_auth": "007er:XXXXXXXXXXXXXXXXXXXXXXXXX"
}
# 连接到Elasticsearch实例
# 假设Elasticsearch运行在本机的默认端口9200上
es = Elasticsearch(
    hosts=ES_CONFIG.get('hosts'),
    http_auth=ES_CONFIG.get('http_auth'),
    maxsize=60,
    timeout=30,
    max_retries=3,
    retry_on_timeout=True
)

# 要写入的数据
data = {
    "ip_addr": "1.1.1.4",
    "domain": "hao123.com",
    "geographic_location": 0
}

# 索引名称
index_name = "assets_distinguish"

# 检查索引是否存在，如果不存在则创建它
if not es.indices.exists(index=index_name):
    # 创建一个索引，没有指定mapping
    es.indices.create(index=index_name)

index_name = 'device_msg'
index_type = 'DeviceInformation'

res = es.index_name(index=index_name, doc_type=index_type, body=data)

# 打印结果
print(res['result'])  # 输出：'created' 或 'updated'，取决于文档是否已存在
print(res['_id'])  # 输出：Elasticsearch为该文档生成的ID


# 随便插入些数据
for num in range(100):
    discovery_time = "2024-{}-{} {}:{}:{}".format(
        random.randint(1, 12), random.randint(1, 28),
        random.randint(0, 23), random.randint(0, 59), random.randint(0, 59),
    )
    # 先转换为时间数组 转换为时间戳
    discoveryTimeStamp = time.mktime(time.strptime(discovery_time, "%Y-%m-%d %H:%M:%S"))

    item = {
        "ip_addr": "{}.{}.{}.{}".format(
            random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
        "domain": "hao{}.cn".format(random.randint(0, 1000)),
        "geographic_location": ["北京", "hhhhhhh", "湖北", "京都", "男北京", "西北京", "西北", ][random.randint(0, 5)],
        "discovery_time": discoveryTimeStamp,
    }

    print(item)

    print("插入数据：", es.index_name(index=index, body=item))



must = []
if discovery_time_start and discovery_time_start:
    must.append({'range': {'discovery_time': {"gte": discovery_time_start, "lte": discovery_time_start}}})
if id_start and id_end:
    must.append({'range': {'id': {"gte": id_start, "lte": id_end}}})
if ip_addr:
    must.append({"terms": {"ip_addr.keyword": [ip_addr]}})

# 查询数据
body = {
    "query": {
        "bool": {
            'must': must
        }
    },
    'from': page * limit,  # 从0开始
    'size': limit  # size可以在es.search中指定，也可以在此指定，默认是10
}

res = es.search(index=index, body=body)

data = {

    "all_count": res["_shards"]["total"],
    "cur_count": res["hits"]["total"],
    "data": [x["_source"] for x in res["hits"]["hits"]]
}

for x in data["data"]:
    print(x)


must = []
# if discovery_time_start and discovery_time_start:
#     must.append({'range': {'discovery_time': {"gte": discovery_time_start, "lte": discovery_time_start}}})
if id_start and id_end:
    must.append({'range': {'id': {"gte": id_start, "lte": id_end}}})
if ip_addr:
    must.append({"terms": {"ip_addr.keyword": [ip_addr]}})

#     query = {
#         "bool": {
#             'must': [
#                 {
#                 "terms": {"ip_addr.keyword": [ip_addr]}
#
#             }, {
#                 'range': {'discovery_time': {"gte": discovery_time_start, "lte": discovery_time_start}}
#             }
#             ]
#         }
#     }
# elif discovery_time_start and discovery_time_start:
#     query={
#
#         # "range": {
#         #     "timestamp": {
#         #         "gte": discovery_time_start,
#         #         "lte": discovery_time_end
#         #     }
#         # }
#     }

# 查询数据
body = {
    "query": {
        # "terms": {
        #     "ip_addr.keyword": [ip_addr]  # 查询keyword="小白菜"或"大白"...的数据
        # },

        # "range": {
        #     "timestamp": {
        #         "gte": discovery_time_start,
        #         "lte": discovery_time_end
        #     }
        # }

        # "range": {
        #     "id": {
        #         "gte": 2,
        #         "lte": 3
        #     }
        #  }

        "bool": {
            'must': must
            #     [
            #     # {
            #     #     "terms": {"ip_addr.keyword": [ip_addr]}
            #     #
            #     # }, {
            #     #     'range': {'id': {"gte": 2, "lte": 2}}
            #     # }
            # ]
        }

    },
    'from': page * limit,  # 从0开始
    'size': limit  # size可以在es.search中指定，也可以在此指定，默认是10
}

res = es.search(index=index, body=body)

# print(res)

data = {

    "all_count": res["_shards"]["total"],
    "cur_count": res["hits"]["total"],
    "data": [x["_source"] for x in res["hits"]["hits"]]
}

for x in data["data"]:
    print(x)

# print(data)

params = {
    "page": 6300, "page_size": 100, "discovery_time_start": "2024-08-20 00:00:00",
    "discovery_time_end": "2024-08-20 23:59:59"
}
ip_addr = params.get("ip_addr", [])
page = params.get("page", 1)
limit = params.get("limit", 20)
discovery_time_start = params.get("discovery_time_start", "")
discovery_time_end = params.get("discovery_time_end", "")
if discovery_time_start and discovery_time_end:
    discovery_time_start = time.mktime(time.strptime(discovery_time_start, "%Y-%m-%d %H:%M:%S"))
    discovery_time_end = time.mktime(time.strptime(discovery_time_end, "%Y-%m-%d %H:%M:%S"))

must = []
if discovery_time_start and discovery_time_start:
    must.append({'range': {'discovery_time': {"gte": discovery_time_start, "lte": discovery_time_end}}})
if ip_addr:
    must.append({"terms": {"ip_addr.keyword": [ip_addr]}})

res = es.search(index=index, body=body)

body = {
    "query": {
        "bool": {
            'must': must
        }
    },
    "sort": {
        "discovery_time": {"order": "desc"}
    },
    'from': (page - 1) * limit,
    'size': limit
}

res = es.search(index=index_name, body=body)
data = []
for item in res["hits"]["hits"]:
    item["_source"]["id"] = item["_id"]
    item["_source"]["discovery_time"] = time.strftime("%Y-%m-%d %H:%M:%S",
                                                      time.localtime(item["_source"]["discovery_time"]))
    data.append(item["_source"])
print(res)
body = {
    "mappings": {
        "properties": {
            "geographic_location": {"type": "text"},  # 根据需要更改为 "geo_point"
            "discovery_time": {"type": "long"},
            "domain": {"type": "text"},
            "ip_addr": {"type": "text"}  # 如果Elasticsearch版本支持，可以使用 "ip" 类型
        }
    },
    "settings": {
        "index": {
            "max_result_window": 50000000  # 数量限制，可以自己定义
        }
    }
}

# 创建索引，并设置映射
if not es.indices.exists(index=index_name):
    es.indices.create(index=index_name, body=body)
else:
    print(f"Index {index_name} already exists.")

# print(res)
res = es.indices.delete(index=index)