前面是日记,或者是牢骚吧
就感觉挺可惜的,一月份的时候因为分析日志的需求,写过一个几十行的代码,能够做到Arcgis站点分析,并且录入到Excel,根据Arcgis站点信息进行日志分析的部分还没写,这次需要用的时候就找不到了,于是重新写了代码,实现了 Arcgis站点分析+Nginx日志分析+录入到Excel 的功能。写完以后没几天又找到了旧代码。
颇有“知不可乎骤得,托遗响于悲风”的感伤
所以这一次先紧急贴代码并脱敏,会周末花两三天时间完成这个博客——所以现在就是一个半成品博客
旧代码记录
from asyncio.windows_events import NULL
import json
import os
import urllib.request
import tablib
import pandas
#import jieba
arc_srvs = {
'a_map_1',
'a_map_2',
'a_map_3',
'a_map_4',
'a_map_5',
'a_map_6',
#'e_map_1',
#'f_map_1',
'b_map_1',
'b_map_2',
'c_map_1',
'd_map_1',
'a_map_xzq',
'g_map_1',
'g_map_2',
'g_map_3',
'b_map_dghy',
}
xls_data = []
header = tuple( ['server', 'name', 'url', 'count'] )
def get_record(url):
try:
resp = urllib.request.urlopen(url)
except:
print("FAIL: ",url)
return NULL
else:
ele_json = json.loads(resp.read())
return ele_json
def get_services(arc_services, arc_url, arc_srv):
for arc_service in arc_services :
xls_body = []
xls_body.append(arc_srv)
xls_body.append(arc_service['name'])
xls_body.append(arc_url+'/'+arc_service['name'])
xls_body.append(0)
xls_data.append(list(xls_body))
# for log analyse
# jieba.add_word(arc_url+'/'+arc_service['name'])
def get_folder_services(arc_folders, arc_url, arc_srv):
for arc_folder in arc_folders :
arc_fd_json = get_record('http://此处已省去敏感url信息' + arc_url + '/' + arc_folder + '此处已省去敏感后缀')
if arc_fd_json == NULL:
print("INFO: FD NULL")
continue
arc_fd_services = arc_fd_json['services']
arc_fd_folders = arc_fd_json['folders']
if arc_fd_folders != []:
get_folder_services(arc_fd_folders, arc_url, arc_srv)
get_services(arc_fd_services, arc_url, arc_srv)
if __name__ == '__main__':
# analyse arcgis service
for arc_srv in arc_srvs :
# 站点格式: http://URL地址入口/站点名称/arcgis/rest/services
arc_url = '/' + arc_srv + '/arcgis/rest/services'
arc_json = get_record('http://此处已省去敏感url信息' + arc_url + '此处已省去敏感后缀')
if arc_json == NULL:
continue
# print(arc_url,arc_json)
arc_folders = arc_json['folders']
arc_services = arc_json['services']
get_services(arc_services, arc_url, arc_srv)
get_folder_services(arc_folders, arc_url, arc_srv)
# analyse nginx log files
with open("F:\\nginx-log\\access.log") as logfin:
for line in logfin:
arr = line.split(' ')
# print(arr[6])
for xlsd in xls_data:
#print(xlsd)
if xlsd[2] in arr[6]:
# print(xlsd[2])
# print(xlsd[3])
xlsd[3] += 1
# save data to xls file
xls_data = tablib.Dataset(*xls_data,headers=header)
fin = open('data.xls', 'wb')
fin.write(xls_data.xls)
fin.close()
相关的敏感信息已经经过了脱敏处理。
仅仅做记录,逻辑还是很清楚的,我的缩进和命名也很规范,应该一看就明白。
新的代码思路
新的代码是面向对象的思想写的,目录结构为
│ config.py
│ run.py
│
├─app
│ analyse_log.py
│ check_exists.py
│ get_sites.py
│ save_to_excel.py
│
├─db
│ site.csv
│
├─excel
│ 站点信息.xlsx
│
├─test_log
access.log
config.py为全局配置
from pathlib import Path
# 项目所在目录
BASE_PATH = Path(__file__).parent
# excel文件名
EXCEL_NAME = "站点信息.xlsx"
# excel路径
EXCEL_PATH = BASE_PATH.joinpath("excel")
MY_EXCEL_PATH=EXCEL_PATH.joinpath(EXCEL_NAME)
# 全局配置数据目录
SITE_PATH = BASE_PATH.joinpath("db").joinpath("site.csv")
# 全局配置日志路径
# NGINX_LOG_PATH = "/home/nginx/logs/host.access.log"
NGINX_LOG_PATH = BASE_PATH.joinpath("test_log").joinpath("host.access.log")
# 创建目录
EXCEL_PATH.mkdir(exist_ok=True, parents=True)
run.py为项目入口
from config import NGINX_LOG_PATH, SITE_PATH
from app.get_sites import Sitemsg
from app.analyse_log import AnalyseLog, ReadLog
from app.check_exists import check_exists
from app.save_to_excel import SaveFile
from pathlib import Path
if __name__ == '__main__':
if not check_exists:
exit(-1)
stmsg = Sitemsg(SITE_PATH)
stmsg.get_site_service()
AnalyseLog(NGINX_LOG_PATH, stmsg)
# stmsg.print_site_service()
SaveFile(stmsg)
ReadLog(NGINX_LOG_PATH, 10).show()
analyse_log.py
from app.get_sites import Sitemsg
class AnalyseLog(object):
def __init__(self,log_filepath,sitemsg_obj):
self.filepath = log_filepath
self.sitemsg = sitemsg_obj
self.analyse()
def analyse(self):
with open(self.filepath, mode='r', encoding='utf-8') as log_file_obj:
for line in log_file_obj:
for site in self.sitemsg.siteobjs:
"""
class Sites
self.name = name
self.url = url
self.isnetsite = isnetsite
self.services = []
"""
name = site.name
services = site.services
if not name in line:
continue
for service in services:
if service in line:
site.add_services_count(service)
class ReadLog(object):
def __init__(self,log_filepath,count):
self.line = count
self.filepath = log_filepath
def show(self):
with open(self.filepath, mode='r', encoding='utf-8') as log_file_obj:
for line in log_file_obj:
print(line)
if not self.line:
break
self.line -= 1
check_exists.py
from config import NGINX_LOG_PATH, SITE_PATH
from pathlib import Path
def check_exists():
if not Path(SITE_PATH).exists():
return 0
if not Path(NGINX_LOG_PATH).exists():
return 0
return 1
get_sites.py
from config import SITE_PATH
import requests
import json
class Sites(object):
def __init__(self,name,url,*,isnetsite=False):
self.name = name
self.url = url
self.isnetsite = isnetsite
self.services = {}
def append_services(self, service,*,count=0):
# if service in self.services:
# print("[ERROR] service {} is exists.{}. now parents is {}".format(service,self.services[service],self.name))
# print(self.services)
# exit(2)
self.services[service] = {"parent":self.name, "count":count}
def add_services_count(self,service,*,count=1):
self.services[service]["count"] += count
# print(self.services[service], self.services[service]["count"])
def print_services(self):
for item in self.services:
print(item,self.services[item])
class Sitemsg(object):
def __init__(self,filepath):
self.filepath = filepath
self.title = None
self.siteobjs = []
self.create()
def create(self):
# 读取站点列表信息
with self.filepath.open(mode='r', encoding="utf-8") as site_file_obj:
self.title = site_file_obj.readline().strip().split(',')
# print("[DEBUG] class Sitemsg self.title {}".format(self.title))
for line in site_file_obj:
name,isnetsite,url = line.strip().split(',')
isnetsite = True if isnetsite == "互联网" else False
# print("[DEBUG] class Sitemsg name={}, isnetsite={}, url={}".format(name,isnetsite,url))
site = Sites(name,url,isnetsite=isnetsite)
self.siteobjs.append(site)
def get_site_service(self):
# TODO 获取站点的服务
for site in self.siteobjs:
ReqGetService.get_service(site)
def print_site_service(self):
for site in self.siteobjs:
site.print_services()
def join(self):
# TODO 拼接站点请求
pass
class ReqGetService(object):
__url_tail="?f=pjson"
@classmethod
def get_service(cls,site):
# TODO 互联网逻辑请求
# print("[DEBUG] class ReqGetService url: "+site.url+cls.__url_tail)
res = requests.get(site.url+cls.__url_tail)
# print("[DEBUG] class ReqGetService res.text:\n{}".format(res.text))
"""
[DEBUG] class Sitemsg res.text
{
"currentVersion": 10.61,
"folders": [
"FOLDER1",
"FOLDER2",
"FOLDER3"
],
"services": []
}
"""
data_dict = json.loads(res.text) # {'currentVersion': 10.61, 'folders': ['FOLDER1', 'FOLDER2', 'FOLDER3'], 'services': []}
# print("[DEBUG] class ReqGetService data_dict:\n{}".format(data_dict))
for folder in data_dict['folders']:
ReqGetService.get_folder_service(site,site.url,folder)
for service in data_dict['services']:
site.append_services(service['name'])
@classmethod
def get_folder_service(cls,site,url,folder):
# print("[DEBUG] class ReqGetService folder url: "+url+'/'+folder+cls.__url_tail)
res = requests.get(url+'/'+folder+cls.__url_tail)
data_dict = json.loads(res.text) # {'currentVersion': 10.61, 'folders': [], 'services': [{'name': 'FOLDER1/SERVICE2020', 'type': 'MapServer'}, {'name': 'FOLDER2/SERVICE22222', 'type': 'MapServer'}, ....]}
# print("[DEBUG] class ReqGetService folder_data_dict:\n{}".format(data_dict))
for folder in data_dict['folders']:
print("folder again!")
ReqGetService.get_folder_service(site,url,folder)
for service in data_dict['services']:
site.append_services(service['name'])
save_to_excel.py
from openpyxl import workbook
from config import MY_EXCEL_PATH
from app.get_sites import Sitemsg
class SaveFile(object):
def __init__(self,sitemsg_opt):
self.wb = workbook.Workbook()
# del self.wb['Sheet']
self.save(sitemsg_opt)
self.wb.save(MY_EXCEL_PATH)
def save(self,sitemsg_opt):
sheet = self.wb['Sheet']
sheet.cell(1,1).value = "Site"
sheet.cell(1,2).value = "Service"
sheet.cell(1,3).value = "Count"
row_index = 2
siteobjs = sitemsg_opt.siteobjs
for site in siteobjs:
services = site.services
for item in services:
sheet.cell(row_index,1).value = services[item]["parent"]
sheet.cell(row_index,2).value = item
sheet.cell(row_index,3).value = services[item]["count"]
row_index += 1
site.csv
站点名,网络环境,访问地址
a_map_1,局域网,http://URL地址入口/a_map_1/arcgis/rest/services
a_map_2,局域网,http://URL地址入口/a_map_2/arcgis/rest/services
a_map_3,局域网,http://URL地址入口/a_map_3/arcgis/rest/services