python 多进程上传下载 ceph 对象存储

最新推荐文章于 2024-05-04 11:25:50 发布

Man_In_The_Night

最新推荐文章于 2024-05-04 11:25:50 发布

阅读量843

点赞数

分类专栏： ceph python 文章标签： python 多进程上传下载对象存储

记录学习中的各种问题以及自己的理解，供以后温故以及分享一下，错误之处，希望批评指正，以免误导更多人。如有侵权，请联系删除。谢谢！

本文链接：https://blog.csdn.net/man_in_the_night/article/details/111911746

版权

ceph 同时被 2 个专栏收录

14 篇文章 5 订阅

订阅专栏

python

9 篇文章 0 订阅

订阅专栏

centos7.6，python 2.7.5，ceph luminious
1、多线程上传脚本

# ！/bin/python
# -*- encoding:utf-8 -*-

endpoint_url = "http://192.168.1.10:6780";
access_key = "xxx"
secret_key = "xxx"
# bucket name in oss
default_bucketname = "lalala"

class S3Operate(object):
    def __init__(self,
                 endpoint_url=endpoint_url,
                 access_key=access_key,
                 secret_key=secret_key):
        self.endpoint_url = endpoint_url
        self.access_key = access_key
        self.secret_key = secret_key
        # 建立连接
        self.conn = boto3.client("s3",
                                 aws_access_key_id=access_key,
                                 aws_secret_access_key=secret_key,
                                 endpoint_url=endpoint_url)
                                 
    def put_file(self, filepath, key, bucketname=bucketname):
        # 上传单个文件至 bucket
        conn = self.conn
        try:
            conn.upload_file(filepath, bucketname, key)
        except Exception as e:
            print("Upload {filepath} failed !!".format(filepath=filepath))
            print(e)
            return
        print("Put file {filepath} complete !".format(filepath=filepath))

    def pool_put_dir_files(self, dir, workers=1, bucketname=bucketname):
        # 上传文件夹下内容至桶
        from multiprocessing import Pool
        obj_list = walkFile(dir)
        list_len = len(obj_list)
        worker_num = min(workers, 512)
        worker_num = 1 if list_len < workers else worker_num
        print("Workers:", worker_num)
        po = Pool(worker_num)
        batch_num = list_len//worker_num
        divide_list = [obj_list[(i*batch_num):min((i+1)*batch_num, list_len)]
                       for i in range(0, worker_num)]
        print("params:", len(divide_list))
        po.map(put_file, (*divide_list,))
        po.close()
        po.join()
        print("Finish uploading !!")

s3 = S3Operate()

s3.pool_put_dir_files("29",100,"Long")

下载脚本

#!/usr/bin/python
#-*- coding:utf-8 -*-
import boto.s3.connection
import time
import boto3
import os
from multiprocessing import Pool

def download_obj(objname_list):
    '''
    @Function: 下载对象文件至本地
    '''   
    conn3 = boto3.client("s3",
                             aws_access_key_id=access_key,
                             aws_secret_access_key=secret_key,
                             endpoint_url=endpoint_url)  
    for m in objname_list:
        savepath = bucketname + '/' + m 
        split_seg = savepath.split('/')
        path_dir = '/'.join(split_seg[0:-1])
        #print(savepath)        
        if not os.path.isdir(path_dir):
            os.makedirs(path_dir) 
            print("Just mkdir")            
        #print("Downloading %s"%savepath)        
        with open(savepath, "wb") as f:
            conn3.download_fileobj(bucketname, m, f)
    print("---------- One batch download complete ! ----------")


access_key = 'xxx'
secret_key = 'xxx'
endpoint_url = "http://192.168.1.10"; 
bucketname = 'Long'   
conn = boto.connect_s3(
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key,
    host='192.168.1.10', port=6780,
    is_secure=False, calling_format=boto.s3.connection.OrdinaryCallingFormat(),
   )                             
bucket = conn.get_bucket(bucketname)
time1 = time.time()
obj_list = []
for i in  bucket.list():
    obj_list.append(i.name)    # bucket.list()包含所有的信息，bucket.get_all_list 等方法只打印 1000 条信息
time2 = time.time()
print("Get obj_list cost %0.2fs"%(time2-time1))    
multi_download(workers=50)
stop_time = time.time()
print("Donload cost %0.2fs"%(stop_time-start_time))
stop_timeh = time.strftime("%Y-%m-%d %H:%M",time.localtime(stop_time))
stop_indicator = """
***************************************************************************

                 Finish download at %s

***************************************************************************
"""%stop_timeh
print stop_indicator

def multi_download(workers=1):       
    list_len = len(obj_list)
    worker_num = min(workers, 512)
    worker_num = 1 if list_len < workers else worker_num
    print("Workers:", worker_num)
    po = Pool(worker_num)    
    batch_num = list_len//worker_num
    divide_list = [obj_list[(i*batch_num):min((i+1)*batch_num, list_len)]
                       for i in range(0, worker_num)]  
    print("params:", len(divide_list))
    for i in divide_list:
        result = po.apply_async(download_obj, args=(i,))
        result.get()  # 让进程抛出错误信息
    po.close()
    po.join()
    print("============== Download %s complete ! =============="%bucketname)
        

 
start_time = time.time()
start_timeh = time.strftime("%Y-%m-%d %H:%M",time.localtime(start_time))
start_indicator = """
***************************************************************************

                 Start download at %s

***************************************************************************
"""%start_timeh
print start_indicator