用Python将xml转换为json格式,调用bulk借口写入ES
废话不多说,直接上代码
import xml.etree.ElementTree as ET
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
def packNodes(nodes,m):
if len(nodes) < 0:
return
tag = nodes.tag
text = nodes.text
m[tag] = {}
m[tag].update(nodes.attrib)
if text != None:
if text.strip() != "":
m[tag] = text
for node in nodes:
packNodes(node, m[tag])
return m
if __name__ == '__main__':
mvalue = []
actions = []
es = Elasticsearch(hosts='http://192.168.xx.xx', port=9200) # IP端口
if es.indices.exists(index='hz_windows-event'): # 判断index是否存在,存在则删除,然后创建
es.indices.delete(index='hz_windows-event')
action = { # 初始化action
'_on_type': 'index',
'_index': 'hz_windows-event',
'_type': 'doc',
'_source': '',
}
path = '/home/windows_xml' # 文件路径
allfiles = os.listdir(path) # 得到文件夹下的所有文件的集合[192.168.xxx.xx, ......]
j = 0
while j < len(allfiles):
files = os.listdir(path + "/" + allfiles[j]) # 得到所有xml文件的集合[app.xml,safe.xml,......]
for file in files: # 遍历文件夹
if not os.path.isdir(file): # 判断是否为文件夹,不是文件夹才打开
xml = path + "/" + allfiles[j] + "/" + file # 打开文件
# 创建字典
# 创建文档树
tree = ET.ElementTree(file=xml)
root = tree.getroot()
for nodes in root:
m = {}
packNodes(nodes, m)
mvalue.append(m)
for mjson in mvalue:
clean_str_t = str(mjson).replace("\\t", '') # 将第一条json文件转换成str后,把\t替换成''
clean_str_http = clean_str_t.replace("{http://schemas.microsoft.com/win/2004/08/events/event}", "")
clean_str_none = clean_str_http.replace("{}","None")
new_clean_dict = eval(clean_str_none) # 将删除\t后的str转换成dict
useablefile = new_clean_dict['Event'] # 去掉Event层
action['_source'] = useablefile
actions.append(action)
if len(actions) == 2000: # 达到两千条后,批量写入
helpers.bulk(client=es, actions=actions)
actions.clear()
helpers.bulk(client=es, actions=actions)
j += 1
print('all down!')
我也是一名python小白,短短的代码里边也有很多坑必须得踩,但是结果还是好的。发出来希望可以帮助大家,更希望可以和大家讨论研究。
q:913793582
有不对的地方希望大神斧正!!!