centos7.6,python 2.7.5,ceph luminious
1、多线程上传脚本
# !/bin/python
# -*- encoding:utf-8 -*-
endpoint_url = "http://192.168.1.10:6780";
access_key = "xxx"
secret_key = "xxx"
# bucket name in oss
default_bucketname = "lalala"
class S3Operate(object):
def __init__(self,
endpoint_url=endpoint_url,
access_key=access_key,
secret_key=secret_key):
self.endpoint_url = endpoint_url
self.access_key = access_key
self.secret_key = secret_key
# 建立连接
self.conn = boto3.client("s3",
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
endpoint_url=endpoint_url)
def put_file(self, filepath, key, bucketname=bucketname):
# 上传单个文件至 bucket
conn = self.conn
try:
conn.upload_file(filepath, bucketname, key)
except Exception as e:
print("Upload {filepath} failed !!".format(filepath=filepath))
print(e)
return
print("Put file {filepath} complete !".format(filepath=filepath))
def pool_put_dir_files(self, dir, workers=1, bucketname=bucketname):
# 上传文件夹下内容至桶
from multiprocessing import Pool
obj_list = walkFile(dir)
list_len = len(obj_list)
worker_num = min(workers, 512)
worker_num = 1 if list_len < workers else worker_num
print("Workers:", worker_num)
po = Pool(worker_num)
batch_num = list_len//worker_num
divide_list = [obj_list[(i*batch_num):min((i+1)*batch_num, list_len)]
for i in range(0, worker_num)]
print("params:", len(divide_list))
po.map(put_file, (*divide_list,))
po.close()
po.join()
print("Finish uploading !!")
s3 = S3Operate()
s3.pool_put_dir_files("29",100,"Long")
下载脚本
#!/usr/bin/python
#-*- coding:utf-8 -*-
import boto.s3.connection
import time
import boto3
import os
from multiprocessing import Pool
def download_obj(objname_list):
'''
@Function: 下载对象文件至本地
'''
conn3 = boto3.client("s3",
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
endpoint_url=endpoint_url)
for m in objname_list:
savepath = bucketname + '/' + m
split_seg = savepath.split('/')
path_dir = '/'.join(split_seg[0:-1])
#print(savepath)
if not os.path.isdir(path_dir):
os.makedirs(path_dir)
print("Just mkdir")
#print("Downloading %s"%savepath)
with open(savepath, "wb") as f:
conn3.download_fileobj(bucketname, m, f)
print("---------- One batch download complete ! ----------")
access_key = 'xxx'
secret_key = 'xxx'
endpoint_url = "http://192.168.1.10";
bucketname = 'Long'
conn = boto.connect_s3(
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
host='192.168.1.10', port=6780,
is_secure=False, calling_format=boto.s3.connection.OrdinaryCallingFormat(),
)
bucket = conn.get_bucket(bucketname)
time1 = time.time()
obj_list = []
for i in bucket.list():
obj_list.append(i.name) # bucket.list()包含所有的信息,bucket.get_all_list 等方法只打印 1000 条信息
time2 = time.time()
print("Get obj_list cost %0.2fs"%(time2-time1))
multi_download(workers=50)
stop_time = time.time()
print("Donload cost %0.2fs"%(stop_time-start_time))
stop_timeh = time.strftime("%Y-%m-%d %H:%M",time.localtime(stop_time))
stop_indicator = """
***************************************************************************
Finish download at %s
***************************************************************************
"""%stop_timeh
print stop_indicator
def multi_download(workers=1):
list_len = len(obj_list)
worker_num = min(workers, 512)
worker_num = 1 if list_len < workers else worker_num
print("Workers:", worker_num)
po = Pool(worker_num)
batch_num = list_len//worker_num
divide_list = [obj_list[(i*batch_num):min((i+1)*batch_num, list_len)]
for i in range(0, worker_num)]
print("params:", len(divide_list))
for i in divide_list:
result = po.apply_async(download_obj, args=(i,))
result.get() # 让进程抛出错误信息
po.close()
po.join()
print("============== Download %s complete ! =============="%bucketname)
start_time = time.time()
start_timeh = time.strftime("%Y-%m-%d %H:%M",time.localtime(start_time))
start_indicator = """
***************************************************************************
Start download at %s
***************************************************************************
"""%start_timeh
print start_indicator