#!coding=utf-8
import re,time,struct,base64,linecache,glob,requests,json,pymysql,linecache
db = pymysql.connect("localhost","root","root","demo",charset="utf8")
cursor = db.cursor()
def parsetime(date, month, year, log_time):
time_str = '%s%s%s %s' %(year, month, date, log_time)
return time.strptime(time_str, '%Y%b%d %H:%M:%S')
def parserequest(rqst):
param = r"?P.*"
p = re.compile(r"/report\?(%s)" %param, re.VERBOSE)
return re.findall(p, rqst)
def geturlapi(apiurl,postvalue):
data = {'ua': postvalue}
r = requests.post(apiurl, data)
return r
def readline(path):
return linecache.getlines(path)
def phonedata(UserAgent):
if UserAgent['status'] == 0 and len(UserAgent['data'])>0:
if 'device' in UserAgent['data']:
phone_model = UserAgent['data']['device'] # 手机型号
else:
phone_model =""
if 'os' in UserAgent['data']:
phone_os= UserAgent['data']['os'] # 手机系统
else:
phone_os =""
if 'os_version' in UserAgent['data']:
os_version = UserAgent['data']['os_version'] # 手机系统版本
else:
os_version =""
if 'browser' in UserAgent['data']:
browser = UserAgent['data']['browser'] # 手机浏览器
else:
browser =""
if 'browser_version' in UserAgent['data']:
browser_version = UserAgent['data']['browser_version'] # 手机浏览器版本
else:
browser_version =""
arr={'phone_model':phone_model,'phone_os':phone_os+' '+os_version,'browser':browser+' '+browser_version}
else:
arr = {'phone_model': '', 'phone_os': '','browser': ''}
return arr
def getLogin(logstr):
ip = r"?P[\d.]*"
date = r"?P\d+"
month = r"?P\w+"
year = r"?P\d+"
log_time = r"?P\S+"
method = r"?P\S+"
request = r"?P\S+"
status = r"?P\d+"
bodyBytesSent = r"?P\d+"
refer = r"""?P
[^\"]*
"""
userAgent = r"""?P
.*
"""
reObject = re.compile(
r"(%s)\ -\ -\ \[(%s)/(%s)/(%s)\:(%s)\ [\S]+\]\ \"(%s)?[\s]?(%s)?.*?\"\ (%s)\ (%s)\ \"(%s)\"\ \"(%s).*?\"" % (
ip, date, month, year, log_time, method, request, status, bodyBytesSent, refer, userAgent), re.VERBOSE)
p1 = reObject.findall(logstr)
x=p1[0]
ip = x[0] # ip地址
dataarray = parsetime(x[1], x[2], x[3], x[4])
timestamp = int(time.mktime(dataarray)) # 时间戳
year = x[3] # 年
month = x[2] # 月
day = x[1] # 日
methon = x[5]
urlarray = x[6].split('?')
url = urlarray[0]
url_parameter = ''
if len(urlarray)>1:
url_parameter = urlarray[1]
response_code = x[7]
response_size = x[8]
UserAgent = geturlapi('http://www.demo.com/xx/xx', x[10])#自己找User-Agent库或者自己分析,
UserAgent = UserAgent.json()
UserAgentData=phonedata(UserAgent)
phone_model = UserAgentData['phone_model'] # 手机型号
phone_os = UserAgentData['phone_os'] # 手机系统
browser = UserAgentData['browser'] # 浏览器
sql = "INSERT INTO `nginxlog` (`logid`, `ip`, `timestamp`, `year`, `month`, `day`, `url`, `url_parameter`, `response_code`, `response_size`, `methon`, `phone_model`, `phone_os`, `browser`) VALUES (NULL, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
ip, timestamp, year, month, day, url, url_parameter, response_code, response_size, x[5], phone_model, phone_os,
browser)
cursor.execute(sql)
#if __init__=='__main__':
i=9
while i<100000:
w = linecache.getline('access.log', i)
getLogin(w)
if(i%100==0):
linecache.clearcache()#每读取100条数据清除缓存空下内存,
i+=1
print(i)
db.close()
写点有点简陋,自己可以优化下。