ES索引恢复

最新推荐文章于 2024-11-01 10:26:53 发布

行走的山峰

最新推荐文章于 2024-11-01 10:26:53 发布

阅读量145

点赞数 1

文章标签： elasticsearch jenkins 大数据

本文链接：https://blog.csdn.net/qq_35573061/article/details/142652116

版权

#!usr/bin/env python  
# -*- coding:utf-8 _*-

"""
/**************************************************************
 * Copyright (c) 2021 Baidu.com, Inc. All Rights Reserved
 * @author:
 * @email: 
 * @dept: KG
 * @file: index_data.py
 * @time: 2021/04/01 
**************************************************************/
创建索引数据
"""

import os
import sys
import json
import requests
import random
from elasticsearch import Elasticsearch
from elasticsearch import helpers

reload(sys)
sys.setdefaultencoding("utf-8")

ip = "127.0.0.1"
port = 9200

es = Elasticsearch(hosts=ip, port=port, timeout=20)
def index_data_bulk(data_file):
    """
    :param data_file: 文件地址
    """
    actions=[]
    index = "_".join(os.path.basename(data_file).split(".")[0].split("_")[0:2])
    with open(data_file, "r") as lines:
        counter = 0
        for line in lines:
            line_json = json.loads(line)
            action = {'_op_type':'index',#操作 index update create delete  
                '_index':index,
                '_type':'_doc',  #type
                '_id':line_json["@id"],
                '_source':line_json}
            if counter == 1000:
                index_bulk(actions)
                counter = 1
                actions = [action]
            else:
                counter += 1
                actions.append(action)
        if len(actions) > 0:
            index_bulk(actions)


def index_bulk(actions):
    """
    进行批量建库
    """
    #使用bulk方式
    helpers.bulk(client=es,actions=actions)
    #streaming_bulk与parallel_bulk类似  需要遍历才会运行
    #都可以设置每个批次的大小，parallel_bulk还可以设置线程数  
    #for ok,response in helpers.streaming_bulk(es,actions):
    #    if not ok:
    #        print(response)

def main():
    """
    """
    data_file = sys.argv[1]
    index_data_bulk(data_file)

if __name__ == '__main__':
    main()

ip = "127.0.0.1" # elasticsearch-master svc的IP

port = 9200

恢复执行如下：

ls | awk '{print $1}' | xargs -i python ../restore_indices.py {}

行走的山峰

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫