日志抓取的python脚本
#! /usr/bin/env python
import getopt, sys, os, re
from stat import *
from datetime import datetime
from datetime import timedelta
LOG_LEVELS = ['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL']
def usage() :
print """Usage: %s [OPTIONS...] grab logs in specified duration
Options:
-d, --log_dir=<log_dir> directory of log files
-b, --base_name=<base_name> log file base name
-s, --start_time=<start_time> start time, mandatory
-e, --end_time=<end_time> end time, optional, default: now
-l, --level=<level> lowest log level, optional, options: DEBUG, INFO, WARN, ERROR, FATAL, default: ERROR
-k, --keyword=<keyword> keyword to grab
-h, --help display this help
Note: time format: %%Y-%%m-%%d %%H:%%M, e.g. 2012-07-07 10:00
""" % (sys.argv[0])
def parse_datetime(arg) :
try :
return datetime.strptime(arg, '%Y-%m-%d %H:%M')
except ValueError, err :
print 'ERROR: datetime formart error\n'
usage()
sys.exit(1)
def print_match_logs(log_file, start_time, end_time, levels, keyword) :
f = open(log_file)
for line in f :
m = re.match('(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+\s\[.+\] (\w+) .+', line)
if m :
timestr = m.group(1)
levelstr = m.group(2)
try :
cur_time = datetime.strptime(timestr,'%Y-%m-%d %H:%M:%S')
if cur_time >= start_time and cur_time <= end_time and levelstr in levels :
if keyword :
if -1 != line.find(keyword) :
print line
else :
print line
except ValueError :
pass
f.close()
def log_extract(log_dir, log_file_base_name, start_time, end_time, levels, keyword) :
start_date = start_time.replace(hour=0, minute=0, second=0,microsecond=0)
end_date = end_time.replace(hour=0, minute=0, second=0,microsecond=0)
today = datetime.today().replace(hour=0, minute=0, second=0,microsecond=0)
while start_date <= end_date :
log_file = None
if start_date == today :
log_file = log_dir + log_file_base_name
else :
log_file = log_dir + log_file_base_name + '.' + datetime.strftime(start_date, '%Y-%m-%d')
if os.path.isfile(log_file) :
print_match_logs(log_file, start_time, end_time, levels, keyword)
else :
print 'WARN: log file %s does not exist' % log_file
start_date += timedelta(days=1)
def get_levels(level) :
for i, l in enumerate(LOG_LEVELS) :
if (level == l) :
return LOG_LEVELS[i:]
def main() :
try :
opts, args = getopt.getopt(sys.argv[1:], 'd:b:s:e:l:k:h', ['log_dir=', 'base_dir=', 'start_time=', 'end_time=', 'level=', 'keyword=', 'help'])
except getopt.GetoptError, err :
print str(err)
usage()
sys.exit(1)
now = datetime.now()
log_dir = None
log_file_base_name = None
start_time = None
end_time = now
level = 'ERROR'
keyword = None
for o, a in opts :
if o in ('-d', '--log_dir') :
log_dir = a
if o in ('-b', '--base_name') :
log_file_base_name = a
elif o in ('-s', '--start_time') :
start_time = parse_datetime(a)
elif o in ('-e', '--end_time') :
end_time = parse_datetime(a)
elif o in ('-l', '--level') :
level = a
elif o in ('-k', '--keyword') :
keyword = a
elif o in ('-h', '--help') :
usage()
sys.exit(0)
if not log_dir :
print 'ERROR: log_dir is mandatory'
sys.exit(1)
else :
if not log_dir[len(log_dir) - 1] == '/' :
log_dir += '/'
if not log_file_base_name :
print 'ERROR: log_file_base_name is mandatory'
sys.exit(1)
if not start_time :
print 'ERROR: start_time is mandatory'
sys.exit(1)
if start_time > datetime.now() or end_time > datetime.now() :
print 'ERROR: start_time or end_time is over now'
sys.exit(1)
if start_time > end_time :
print 'ERROR: start_time is over end_time'
sys.exit(1)
if level not in LOG_LEVELS :
print 'ERROR: invalid level'
sys.exit(1)
log_extract(log_dir, log_file_base_name, start_time, end_time, get_levels(level), keyword)
if __name__ == '__main__' :
main()
集群抓取日志的bash脚本
loggrab.sh
#!/bin/bash LOG_LEVELS=('DEBUG' 'INFO' 'WARN' 'ERROR' 'FATAL') function in_array() { local findee=${1} shift for elem in ${@}; do [ $elem == $findee ] && return 0 done return 1 } function usage() { cat<<EOD Usage: `basename $0` [OPTIONS...] grab logs in specified duration Options: -s start time, mandatory -e end time, optional, default: now -l lowest log level, optional, options: DEBUG, INFO, WARN, ERROR, FATAL, default: ERROR -k grab keyword -h display this help EOD } function parse_args() { START_TIME= END_TIME=`date "+%Y-%m-%d %H:%M"` # default now LOG_LEVEL=ERROR KEYWORD= OPTIONS=:s:e:l:k:h: while getopts $OPTIONS OP do case $OP in s)START_TIME=$OPTARG;; e)END_TIME=$OPTARG;; l)LOG_LEVEL=$OPTARG;; k)KEYWORD=$OPTARG;; h | ?)usage;; esac done if [ -n "$START_TIME" ] ;then START_TIME=`date -d $START_TIME "+%Y-%m-%d %H:%M"` || exit 1 else echo "ERROR: start time is mandatory"; exit 1 fi if [ -n "$END_TIME" ] ; then END_TIME=`date -d "$END_TIME" "+%Y-%m-%d %H:%M"` || exit 1 fi if [ -n "$KEYWORD" ] ; then KEYWORD="-k \"$KEYWORD\"" fi in_array $LOG_LEVEL ${LOG_LEVELS[@]} if [ 0 -ne $? ]; then echo "ERROR: unknow log leve: $LOG_LEVEL"; exit 1 fi } function main() { STARTUP_DIR=`pwd` BASE_DIR=$STARTUP_DIR/`dirname $0` . $BASE_DIR/conf.sh || { exit 1; } parse_args $@ pssh -i -H $HOSTS "python $LOG_GRAB_PY -d \"$LOG_DIR\" -b \"$BASE_NAME\" -s \"$START_TIME\" -e \"$END_TIME\" -l $LOG_LEVEL $KEYWORD > $TMP_LOG_FILE_PATH" || { exit 1; } grab_log=grab.log.`date "+%s"` for host in $HOSTS; do tmp_log=/tmp/$host.log scp $host:$TMP_LOG_FILE_PATH $tmp_log cat $tmp_log >> $grab_log done less $grab_log } main $@
conf.sh
#!/bin/bash LOG_DIR="/tomcat6/logs/" BASE_NAME="demo.log" HOSTS=10.249.213.154 10.249.213.155 LOG_GRAB_PY=/loggrab.py TMP_LOG_FILE_PATH=/tmp/log.`date "+%s"`