Elasticsearch
官方和社区提供了各种各样的客户端库,在之前的博客中,我陆陆续续提
到和演示过
Perl
的,
Javascript
的,
Ruby
的。上周写了一版
Python
的,考虑到好像很难
找到现成的示例,如何用
python
批量写数据进
Elasticsearch
,今天一并贴上来。
#!/usr/bin/env pypy
#coding:utf-8
import re
import sys
import time
import datetime
import logging
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from elasticsearch import ConnectionTimeout
es
=
Elasticsearch(['192.168.0.2',
'192.168.0.3'],
sniff_on_start=True,
sniff_on_connection_fail=True, max_retries=3, retry_on_timeout=True)
logging.basicConfig()
logging.getLogger('elasticsearch').setLevel(logging.WARN)
logging.getLogger('urllib3').setLevel(logging.WARN)
def parse_www(logline):
try:
time_local,
request,
http_user_agent,
staTus,
remote_addr,
http_referer,
request_time,
body_bytes_sent,
http_x_forwarded_proto,
http_x_forwarded_for,
http_host,
http_cookie,
upstream_response_time = logline.split('`')
try:
upstream_response_time = float(upstream_response_time)
except:
upstream_response_time = None
method, uri, verb = request.split(' ')
arg = {}
try:
url_path, url_args = uri.split('?')
for args in url_args.split('&'):
k, v = args.split('=')
arg[k] = v
except:
url_path = uri
# Why %z do not implement?
date = datetime.datetime.strptime(time_local, '[%d/%b/%Y:%H:%M:%S +0800]')
ret = {
"@timestamp": date.strftime('%FT%T+0800'),
"host": "127.0.0.1",
"method": method.lstrip('"'),