一、需求
1、将多台远端服务器上已归档的log日志文件拉取到本地服务器上;
2、日志文件名有指定前缀及日期;
3、远端服务器有空闲时段指定;
4、下载后的日志文件有特殊命名要求;
5、本地日志文件有保留期限,需定期自动清理;
*6、下载指定路径下以日期yyyymmdd格式命名的文件夹,有多重子目录及文件;
二、解决方案
1、json配置文件:将多台远端服务器的信息汇总在json文件中,如server_ip,server_dir,server_port,username,password,prefix,freetime,使用脚本循环读取执行复制下载;
2、linux的scp命令:该指令可远端拷贝文件或文件夹,对应使用python中的paramiko库;
3、re模块正则匹配:筛选保留指定文件、定期清理本地文件;
*4、迭代器:遍历远端目标路径,把下级所有文件及子目录下的文件提取汇总;
*5、shutil库:shutil.rmtree删除非空文件夹;
#第一版:机器日志审计,满足需求1-5#
import json #处理json文件
import paramiko #链接服务器
import os
import re #处理正则表达式
import datetime
json_dir = "/opt/jsonfile/test" #配置json文件
local_dir = "/home/root/" #本地存放文件目录
input_date ="" #获取日期
days=15 #文件生命周期(天)
#任务启动时间
n_time = datetime.datetime.now()
def connect(host, port=22, username='root', password='8888888'): #链接服务器
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) #生成ssh对象等
try:
ssh.connect(host, port, username, password) #测试链接
print('connect success')
return ssh #成功返回连接ssh对象
except Exception as e:
print('connect erro', e)
return None #错误返回空
def command(args, outpath): #拼接命令
cmd = '%s %s' % (outpath, args)
return cmd
def exec_commands(conn, cmd): #执行命令
stdin, stdout, stderr = conn.exec_command(cmd)
results = stdout.read()
return results #返回命令执行结果
def excutor(host, port, username, password, outpath, args): #调用执行命令函数将结果处理返回
conn = connect(host, port, username, password)
if not conn:
return None
# exec_commands(conn,'chmod +x %s' % outpath)
cmd = command(args, outpath)
result = exec_commands(conn, cmd)
result = json.dumps(result.decode(encoding="utf-8"),
indent=4,
ensure_ascii=False)
return result
def copy_module(conn, inpath, outpath): #使用paramiko库自带SFTP传输文件
ftp = conn.open_sftp()
ftp.get(inpath, outpath) #从服务器到本地
ftp.close()
return outpath
def read_json(): #打开json文件并处理
with open(json_dir, 'r+') as f:
load_dict = json.load(f)
return load_dict
def scan_local_file(days, local_dir):
local_f_list = os.listdir(local_dir)
start_day = (datetime.datetime.now() -
datetime.timedelta(days=days)).strftime("%Y-%m-%d")
for i in local_f_list:
if re.findall(r'\d{4}-\d{1,2}-\d{1,2}', i, flags=0) and (datetime.datetime.now() - datetime.datetime.strptime(
re.findall(r'\d{4}-\d{1,2}-\d{1,2}', i, flags=0)[0],
'%Y-%m-%d')).days > days:
os.remove(local_dir + i)
elif re.findall(r'\d{8}', i, flags=0) and (datetime.datetime.now() - datetime.datetime.strptime(
re.findall(r'\d{8}', i, flags=0)[0],
'%Y%m%d')).days > days:
os.remove(local_dir + i)
if __name__ == '__main__':
if input_date == "":
less_1_now = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") # 1
less_2_now = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y%m%d") # 2
# input_date=input('请输入要获取的天数格式为:xxxx-xx-xx')#2
getday = less_1_now # 1
getday2 = less_2_now # 2
else:
getday = input_date
getday2 = datetime.datetime.strptime(input_date, "%Y-%m-%d").strftime("%Y%m%d") # 匹配yyyymmdd
load_dict = read_json()
for i in load_dict: #遍历配置
prefix = i['prefix']
server_ip = i['server_ip']
server_dir = i["server_dir"]
server_port = i["server_port"]
username = i['username']
password = i['password']
freetime = i['freetime']#空闲时间 默认给 "" 或者格式为 "15:00-21:00"
if freetime!='':
freet_s=freetime.split('-')[0]
freet_e=freetime.split('-')[1]
# 范围时间
d_time = datetime.datetime.strptime(str(datetime.datetime.now().date()) + freet_s, '%Y-%m-%d%H:%M')
d_time1 = datetime.datetime.strptime(str(datetime.datetime.now().date()) + freet_e, '%Y-%m-%d%H:%M')
if n_time > d_time and n_time < d_time1:
pass
else:
continue
result = excutor(server_ip, server_port, username, password, 'ls ',
server_dir)
result=result.replace('"','')
result_lis = result.split('\\n') #返回的结果 每个文件名之间有换行 用来分隔
conn = connect(server_ip, server_port, username, password)
for j in result_lis:
if getday in j and prefix in j: #先用正则匹配年月 再用 in 匹配前缀
copy_module(conn, server_dir + j,local_dir + server_ip + j)
# 增加获取文件名为yyyymmdd
for m in result_lis:
if getday2 in m and prefix in m: # 先用正则匹配年月 再用 in 匹配前缀
copy_module(conn, server_dir + m, local_dir + server_ip + '-' + m)
scan_local_file(days, local_dir)
print('success')
#第二版:收集sql执行日志,满足需求1-6
# -*- coding:utf-8 -*-
import paramiko as pm
import datetime as dt
import os
import stat
import re
import json
import shutil
input_date='' # 格式为yyyymmdd
json_dir='/opt/jsonfile/server_info_new' #配置json文件目录
local_dir='/opt/newtest/' #配置本地目录
days=15
#任务启动时间
n_time = dt.datetime.now()
def read_json(): #打开json文件并处理
with open(json_dir, 'r+') as f:
load_dict = json.load(f)
return load_dict
def getRemoteFiles(remoteDir):
# 加载sftp服务器文件对象(根目录)
filesAttr = sftp.listdir_attr(remoteDir)
try:
# foreach遍历
for fileAttr in filesAttr:
if stat.S_ISDIR(fileAttr.st_mode):
# 1.当是文件夹时
# 计算子文件夹在ftp服务器上的路径
son_remoteDir = remoteDir + '/' + fileAttr.filename
# 生成器, 迭代调用函数自身
yield from getRemoteFiles(son_remoteDir)
else:
# 2.当是文件时
# 生成器, 添加"路径+文件名"到迭代器"
yield remoteDir + '/' + fileAttr.filename
except Exception as e:
print('getAllFilePath exception:', e)
# 远程目录remoteDir文件下载保存到本地目录localDir
def download_file(localDir, remoteDir):
try:
# 实例化生成器, 获取sftp指定目录下的所有文件路径
files = getRemoteFiles(remoteDir)
if not (os.path.exists(localDir)) and (sftp.stat(remoteDir)):
# 若本地目录不存在,则创建该目录
os.makedirs(localDir)
# foreach遍历
for file in files:
# 要下载的远程文件, 本地时路径+文件名
remoteFileName = file
# 定义下载保存到本地时的路径+文件名
localFileName = os.path.join(localDir, file.split('/')[-1])
try:
# 下载文件, 本地已有同名文件则覆盖
sftp.get(remoteFileName, localFileName)
print('sftp服务器文件 {} 下载成功!\n该文件保存本地位置是 {} !\n'.format(
remoteFileName, localFileName))
except Exception as e:
print('%s下载出错!:\n' % (remoteFileName), e)
# 下载失败, 关闭连接
sftp.close()
# 判断输入的本地目录是否存在
except Exception as e:
print('该日期文件不存在')
sftp.close()
# 下载成功, 关闭连接
sftp.close()
# 过滤非T3.log文件
def scan_local_file(local_dir):
try:
local_f_list = os.listdir(local_dir)
for i in local_f_list:
if re.findall(r'^T3.*log$', i, flags=0) :
pass
else:
os.remove(local_dir + i)
except Exception as e:
print('不存在T3.log文件')
# 定期清理日志文件
def scan_local_date(days, local_dir):
local_f_list = os.listdir(local_dir)
for i in local_f_list:
if re.findall(r'\d{8}', i, flags=0) and (dt.datetime.now() - dt.datetime.strptime(
re.findall(r'\d{8}', i, flags=0)[0],
'%Y%m%d')).days > days:
shutil.rmtree(local_dir + i) #删除非空文件夹
if __name__ == '__main__':
if input_date =='':
less_2_now = (dt.datetime.now() - dt.timedelta(days=1)).strftime("%Y%m%d") # 2
date = less_2_now
else:
date = input_date
load_dict = read_json()
for i in load_dict: #遍历配置
server_ip = i['server_ip']
server_port = i["server_port"]
username = i['username']
password = i['password']
remoteDir = i["server_dir"]+date+'/'
freetime = i['freetime'] # 空闲时间 默认给 "" 或者格式为 "15:00-21:00"
if freetime != '':
freet_s = freetime.split('-')[0]
freet_e = freetime.split('-')[1]
# 范围时间
d_time = dt.datetime.strptime(str(dt.datetime.now().date()) + freet_s, '%Y-%m-%d%H:%M')
d_time1 = dt.datetime.strptime(str(dt.datetime.now().date()) + freet_e, '%Y-%m-%d%H:%M')
if n_time > d_time and n_time < d_time1:
pass
else:
continue
#下载文件保存目录
localDir=local_dir+server_ip+'-'+'sql_log'+date+'/'
tran = pm.Transport((server_ip, server_port))
tran.connect(username=username, password=password)
# 获取sftp实例
sftp = pm.SFTPClient.from_transport(tran)
download_file(localDir, remoteDir)
scan_local_file(localDir)
scan_local_date(days, local_dir)