import boto3
from datetime import datetime, timedelta
import pytz as pytz
def split_list(lst, batch_size):
"""
将列表分割成指定大小的批次
"""
for i in range(0, len(lst), batch_size):
yield lst[i:i + batch_size]
def list_versions_with_time_range(bucket_name, prefix, start_time, end_time):
"""
列出指定存储桶和前缀下特定时间范围内的所有对象版本
"""
s3 = boto3.client('s3',
aws_access_key_id='XXX',
aws_secret_access_key='XX',
endpoint_url='http://s3.XXX',
region_name='XXXXXXX',
use_ssl=False)
versions = []
response = s3.list_object_versions(Bucket=bucket_name, Prefix=prefix)
for version in response.get('Versions', []):
last_modified = version['LastModified']
if start_time <= last_modified <= end_time:
versions.append({'Key': version['Key'], 'VersionId': version['VersionId']})
while response.get('NextKeyMarker') and response.get('NextVersionIdMarker'):
response = s3.list_object_versions(
Bucket=bucket_name,
Prefix=prefix,
KeyMarker=response['NextKeyMarker'],
VersionIdMarker=response['NextVersionIdMarker']
)
for version in response.get('Versions', []):
last_modified = version['LastModified']
if start_time <= last_modified <= end_time:
versions.append({'Key': version['Key'], 'VersionId': version['VersionId']})
return versions
def delete_objects(bucket_name, objects):
"""
分批次删除对象
"""
s3 = boto3.client('s3',
aws_access_key_id='XXXXXX',
aws_secret_access_key='XXX',
endpoint_url='http://s3.',
region_name='XX',
use_ssl=False)
# 将对象列表分割成较小的批次
batches = split_list(objects, 1000) # 将每个批次的大小设为 1000
for batch in batches:
# 构建删除对象请求
delete_list = [{'Key': obj['Key'], 'VersionId': obj['VersionId']} for obj in batch]
# 执行删除对象操作
response = s3.delete_objects(
Bucket=bucket_name,
Delete={'Objects': delete_list}
)
# 处理响应
print("Deleted objects:", response.get('Deleted', []))
print("Errors:", response.get('Errors', []))
# 替换为你的存储桶名称和指定目录路径
bucket_name = 'XXX'
directory_path = 'XXXX'
utc = pytz.utc
start_time = utc.localize(datetime(2023, 1, 1)) # 开始时间
end_time = utc.localize(datetime(2024, 2, 12)) # 结束时间
# 列出特定时间范围内的对象版本
versions_to_delete = list_versions_with_time_range(bucket_name, directory_path, start_time, end_time)
print(versions_to_delete)
# 分批次删除对象版本
delete_objects(bucket_name, versions_to_delete)
python脚本分批次删除指定时间范围内指定路径下的s3的历史version
于 2024-03-28 11:15:27 首次发布