脚本作用:
- 将项目全量日志备份到阿里云OSS。
- 配合crontab定时任务和OSS的生命周期,使日志归档策略可控。
- 备份完成之后清空日志,避免了全量日志占用磁盘空间。
- 判断大日志文件,采用分片上传。
上传到OSS的截图
脚本如下:
"""
# Install:
# step 1 : yum install python-devel / apt-get install python-dev
# step 2 : pip install oss2
# Modify:
# access_key_id, access_key_secret, bucket_name, endpoint, project, logs_list ( allow /x/y/z/*.xxx )
# Use:
# step 1 : upload this script into server's path : /usr/local/seektruth/logs_backup_oss
# step 2 : exec command 'crontab -e' . put next shell into text
0 2 */7 * * nohup python -u /usr/local/seektruth/logs_backup_oss/logs_backup_oss.py >> /tmp/oss_backup_oss.log 2>&1 &
"""
import os
import sys
import time
from urllib2 import urlopen
from oss2 import SizedFileAdapter, determine_part_size
from oss2.models import PartInfo
import oss2
access_key_id = 'aaa'
access_key_secret = 'bbb'
bucket_name = 'fff'
endpoint = 'https://ggg'
def clear(log_abs_path):
with open(log_abs_path, 'w') as f1:
f1.seek(0)
f1.truncate()
def multi_upload(objectName, localfile):
"""
:param objectName: 上传到OSS之后的key
:param localfile: 本地打日志文件绝对路径
"""
key = objectName
filename = localfile
total_size = os.path.getsize(filename)
part_size = determine_part_size(total_size, preferred_size=100 * 1024)
upload_id = bucket.init_multipart_upload(key).upload_id
parts = []
with open(filename, 'rb') as fileobj:
part_number = 1
offset = 0
while offset < total_size:
num_to_upload = min(part_size, total_size - offset)
result = bucket.upload_part(key, upload_id, part_number,
SizedFileAdapter(fileobj, num_to_upload))
parts.append(PartInfo(part_number, result.etag))
offset += num_to_upload
part_number += 1
bucket.complete_multipart_upload(key, upload_id, parts)
def get_mulity_files(abs_parenet_dir, suffix):
"""
:param abs_parenet_dir: 以 suffix为结尾的所有日志文件的父级绝对路径
:param suffix: 后缀 file log
:return: List列表,重新组合的包含所有此次重组的日志文件绝对路径列表
"""
new_compose_file_list = list()
for file in os.listdir(abs_parenet_dir):
if os.path.isdir(file):
pass
else:
if file.endswith(suffix):
new_compose_file_list.append(os.path.join(abs_parenet_dir, file))
return new_compose_file_list
def backup(project_arg, backup_date_arg, logs_list_arg):
try:
list_sample = logs_list_arg
public_ip = urlopen('http://ip.42.pl/raw').read()
for file in logs_list_arg:
if file.split('/')[-1].split('.')[0] == '*':
abs_parenet_dir = file.rstrip(file.split('/')[-1])
suffix = file.split('/')[-1].split('.')[-1]
new_compose_file_list = get_mulity_files(abs_parenet_dir, suffix)
list_sample.extend(new_compose_file_list)
list_sample.remove(file)
for logsfle_abs_path in list_sample:
objectName = os.path.join(project_arg, public_ip, backup_date_arg, logsfle_abs_path.lstrip('/'))
localFile = logsfle_abs_path
if os.path.getsize(logsfle_abs_path) > 1073741824 * 2:
multi_upload(objectName, localFile)
else:
bucket.put_object_from_file(objectName, localFile)
clear(logsfle_abs_path)
except Exception, e:
if sys.version[0] == "2":
print backup_date_arg
print str(e)
else:
print(backup_date_arg)
print(str(e))
if __name__ == '__main__':
assert ('linux' in sys.platform), "该脚本目前只能在 Linux 下执行"
bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
project = 'jjj'
backup_date = time.strftime('%Y-%m-%d %H-%M')
logs_list = [
'/a/b/c/d.file',
'/a/bc/c/*.log',
'/d/e/f/2.txt'
]
backup(project_arg=project, backup_date_arg=backup_date, logs_list_arg=logs_list)