python 查询oss阿里云bucket日志数据
import oss2
import pandas as pd
access_key_id = 'xxxxxxxxxxx'
access_key_secret = 'xxxxxxxxxxx'
endpoint = 'xxxxxxxxxxx'
bucker_name = 'xxxxxxxxx'
def bucketInfo(bucket):
print("========= 获取bucket_info ==========")
bucket_info = bucket.get_bucket_info()
print(' bucket_info:')
print(' bucket name: ' + bucket_info.name)
print(' bucket storage class: ' + bucket_info.storage_class)
print(' bucket creation date: ' + bucket_info.creation_date)
print("bucket_stat:")
bucket_stat = bucket.get_bucket_stat()
print(' bucket storage: ' + str(bucket_stat.storage_size_in_bytes))
print(' bucket object count: ' + str(bucket_stat.object_count))
print(' bucket multi part upload count: ' + str(bucket_stat.multi_part_upload_count))
def fileInfo(bucket,path):
for obj in oss2.ObjectIterator(bucket,delimiter=path):
if obj.is_prefix():
dicInfo(bucket,obj.key)
else:
print('file:' + obj.key + ' length:' + str(obj.size) + "Byte.")
Data[obj.key] = obj.size
def dicInfo(bucket,folder):
for obj in oss2.ObjectIterator(bucket, prefix=folder):
if obj.is_prefix():
dicInfo(bucket, obj.key)
else:
print('file:' + obj.key + ' length:' + str(obj.size) + "Byte.")
Data[obj.key] = obj.size
def changeSize(size):
'''
递归实现,精确为最大单位值 + 小数点后三位
'''
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
integer, remainder, level = strofsize(size, 0, 0)
if level + 1 > len(units):
level = -1
return ('{}.{:>03d} {}'.format(integer, remainder, units[level]))
def strofsize(integer, remainder, level):
if integer >= 1024:
remainder = integer % 1024
integer //= 1024
level += 1
return strofsize(integer, remainder, level)
else:
return integer, remainder, level
def delBucketFile(bucket,files):
num = len(files)
count = num // 1000
if num % 1000 > 0:
count += 1
for i in range(count):
delFiles = files[i*1000:(i+1)*1000]
result = bucket.batch_delete_objects(delFiles)
print('\n'.join(result.deleted_keys))
if __name__ == "__main__":
auth = oss2.Auth(access_key_id, access_key_secret)
bucket = oss2.Bucket(auth, endpoint, bucket_name)
Data = {}
bucketInfo(bucket)
fileInfo(bucket, '/')
if len(Data) > 0:
values = sorted(Data.items(), key=lambda d: d[1],reverse=True)
names = []
datas = []
for t in values:
names.append(t[0])
datas.append(changeSize(t[1]))
dataframe = pd.DataFrame({'文件路径': names, '文件大小': datas})
dataframe.to_csv("bucket.csv", index=False, sep=',')