python分析iis服务器系统日志

# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime

def getData():
    data = pd.DataFrame()
    for f in os.listdir(r'D:\\python\\python3\\sort\\logdata'):
        file = os.path.join('D:\\python\\python3\\sort\\logdata\\',f)
        #print(file)
        data1 = pd.read_table(file, sep=' ', header='infer',error_bad_lines=False)
        data1['ifhtml'] = [type(x) == str and (x.endswith('.gif') or x.endswith('.mp4') or x.endswith('.png') or x.endswith('.js') or x.endswith('.css') or x.endswith('.jpg') or x.endswith('.jpeg') or x.endswith('.rar') or x.endswith('.txt') or x.endswith('.ico') or x.endswith('.svg') or x.startswith('/wap') or x.startswith('/plus') or x.endswith('.gz')  or x.endswith('.7z') or x.endswith('eval($_POST[c]))') or x.endswith('.jsp')) for x in data1['cs-uri-stem']]   
        data = pd.concat([data,data1],axis=0)
    data = data[data['ifhtml'] == False]
    return data
def countByDate(data,date):
    #每日时间访问图
    data75 = data[data['date'] == date]
    data75['datetime'] = data75['date']+" "+data75['time']    
    data75.index = pd.to_datetime(data75.datetime)
    print(data75.info())
    print(data75.head())
    #按日期绘图   
    data75.resample('1H', closed='left')['datetime'].count().plot(kind='barh')

def countByIp(data, date):
    #当日ip地址访问统计
    data75 = data[data['date'] == date]
    data75count = data75.groupby('c-ip')['c-ip'].count().sort_values()
    print(data75count[-10:])
    data75count[-10:].plot(kind='barh')
    
data = getData()

print('0-'*50)
#print(data.head())

 #data.groupby(['date'])['date'].count().plot(kind='barh')

def getTopPerson(data):
    #获取ip 访问量最高的人
    print(data.groupby(['date','c-ip'])['c-ip'].count().sort_values()[-20:])
    data.groupby(['date','c-ip'])['c-ip'].count().sort_values()[-20:].plot(kind='barh')

def countStatus(data):
    #总状态码分布
    print(data.groupby('date')['sc-status'].value_counts())
    #获取有效访问次数
    #datastatus = set(data['sc-status'])
    #print(datastatus)    
    data =data[data['sc-status'].apply(checkStatus)]
    data.groupby(['sc-status'])['sc-status'].count().plot(kind='barh')
    
def checkStatus(x):
        return x in [200, 301, 206, 302, 304]

def unStatus(data):
    #获取无效访问图
    data = data[data['sc-status'].apply(lambda x: x in [404, 500, 401])]
    datasc = data.groupby('date')['sc-status'].value_counts().to_frame().unstack()    
    datasc.plot.bar()

def useStatus(data):
    #每天有效访问次数统计
    data =data[data['sc-status'].apply(checkStatus)]
    data.groupby('date')['date'].count().sort_index().plot(kind='barh')
    print('日均访问量:')
    print(data.groupby('date')['date'].count().mean())

def urlcate(data):
    #查看访问网站链接类型    
    datausri = set(data['cs-uri-stem'])
    datausri = pd.DataFrame(datausri)
    
def countUrl(data):    
    #访问链接类型统计
    data.groupby(['cs-uri-stem'])["cs-uri-stem"].count().sort_values()[-20:-1].plot(kind='barh')
    
def countMethd(data):
    #请求方式类型统计
    data =data[data['sc-status'].apply(checkStatus)]
    data = data[data['cs-method']!='GET']
    data.groupby(['date','cs-method'])['cs-method'].count().sort_values()[-10:-1].plot(kind='barh')
if __name__ == '__main__':
    data = getData()
    countMethd(data)
    

 

转载于:https://my.oschina.net/tianhuahua/blog/3071136

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值