minio分布式环境并行（上传、下载、删除）测试

Carry_NJ

已于 2022-11-28 14:42:15 修改

阅读量785

点赞数 1

分类专栏： # minio 文章标签：分布式 python

于 2022-09-13 16:19:56 首次发布

本文链接：https://blog.csdn.net/xkx_07_10/article/details/126835926

版权

minio 专栏收录该内容

2 篇文章 1 订阅

订阅专栏

import glob

from minio import Minio
from minio.error import S3Error
from datetime import timedelta
from minio.deleteobjects import DeleteObject
import concurrent.futures
import os
import time

import requests


def call_time(func):
    def inner(*args, **kwargs):
        old_time = time.time()
        result = func(*args, **kwargs)
        func_name = str(func).split(' ')[1]
        print('{} use time: {}s'.format(func_name, time.time() - old_time))
        return result

    return inner


class Bucket(object):
    client = None
    policy = '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:GetBucketLocation","s3:ListBucket"],"Resource":["arn:aws:s3:::%s"]},{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:GetObject"],"Resource":["arn:aws:s3:::%s/*"]}]}'

    def __new__(cls, *args, **kwargs):
        if not cls.client:
            cls.client = object.__new__(cls)
        return cls.client

    def __init__(self, service, access_key, secret_key, secure=False):
        self.service = service
        self.client = Minio(service, access_key=access_key, secret_key=secret_key, secure=secure)

    def exists_bucket(self, bucket_name):
        """
        判断桶是否存在
        :param bucket_name: 桶名称
        :return:
        """
        return self.client.bucket_exists(bucket_name=bucket_name)

    def create_bucket(self, bucket_name: str, is_policy: bool = True):
        """
        创建桶 + 赋予策略
        :param bucket_name: 桶名
        :param is_policy: 策略
        :return:
        """
        if self.exists_bucket(bucket_name=bucket_name):
            return False
        else:
            self.client.make_bucket(bucket_name=bucket_name)
        if is_policy:
            policy = self.policy % (bucket_name, bucket_name)
            self.client.set_bucket_policy(bucket_name=bucket_name, policy=policy)
        return True

    def get_bucket_list(self):
        """
        列出存储桶
        :return:
        """
        buckets = self.client.list_buckets()
        bucket_list = []
        for bucket in buckets:
            bucket_list.append(
                {"bucket_name": bucket.name, "create_time": bucket.creation_date}
            )
        return bucket_list

    def remove_bucket(self, bucket_name):
        """
        删除桶
        :param bucket_name:
        :return:
        """
        try:
            self.client.remove_bucket(bucket_name=bucket_name)
        except S3Error as e:
            print("[error]:", e)
            return False
        return True

    def bucket_list_files(self, bucket_name, prefix=None):
        """
        列出存储桶中所有对象
        :param bucket_name: 同名
        :param prefix: 前缀
        :return:
        """
        try:
            files_list = self.client.list_objects(bucket_name=bucket_name, prefix=prefix, recursive=True)
            return files_list
        except S3Error as e:
            print("[error]:", e)
            return None

    def bucket_policy(self, bucket_name):
        """
        列出桶存储策略
        :param bucket_name:
        :return:
        """
        try:
            policy = self.client.get_bucket_policy(bucket_name)
        except S3Error as e:
            print("[error]:", e)
            return None
        return policy

    def download_file(self, bucket_name, file, file_path, stream=1024 * 32):
        """
        从bucket 下载文件 + 写入指定文件
        :return:
        """
        try:
            data = self.client.get_object(bucket_name, file)
            with open(file_path, "wb") as fp:
                for d in data.stream(stream):
                    fp.write(d)
        except S3Error as e:
            print("[error]:", e)

    def fget_file(self, bucket_name, file, file_path):
        """
        下载保存文件保存本地
        :param bucket_name:
        :param file:
        :param file_path:
        :return:
        """
        self.client.fget_object(bucket_name, file, file_path)

    def copy_file(self, bucket_name, file, file_path):
        """
        拷贝文件（最大支持5GB）
        :param bucket_name:
        :param file:
        :param file_path:
        :return:
        """
        self.client.copy_object(bucket_name, file, file_path)

    def upload_file(self, bucket_name, file, file_path, content_type):
        """
        上传文件 + 写入
        :param bucket_name: 桶名
        :param file: 文件名
        :param file_path: 本地文件路径
        :param content_type: 文件类型
        :return:
        """
        try:
            with open(file_path, "rb") as file_data:
                file_stat = os.stat(file_path)
                self.client.put_object(bucket_name, file, file_data, file_stat.st_size, content_type=content_type)
        except S3Error as e:
            print("[error]:", e)

    def fput_file(self, bucket_name, file, file_path):
        """
        上传文件
        :param bucket_name: 桶名
        :param file: 文件名
        :param file_path: 本地文件路径
        :return:
        """
        try:
            self.client.fput_object(bucket_name, file, file_path)
        except S3Error as e:
            print("[error]:", e)

	 def put_file(self, bucket_name, object_name, file_url):
        """
           通过URL上传文件
        :param bucket_name: 
        :param object_name: 
        :param file_url: 
        :return: 
        """
        data = urlopen(file_url)
        self.client.put_object(bucket_name, object_name, data, length=-1, part_size=10 * 1024 * 1024)

    def stat_object(self, bucket_name, file):
        """
        获取文件元数据
        :param bucket_name:
        :param file:
        :return:
        """
        try:
            data = self.client.stat_object(bucket_name, file)
            print(data.bucket_name)
            print(data.object_name)
            print(data.last_modified)
            print(data.etag)
            print(data.size)
            print(data.metadata)
            print(data.content_type)
        except S3Error as e:
            print("[error]:", e)

    def remove_file(self, bucket_name, file):
        """
        移除单个文件
        :return:
        """
        self.client.remove_object(bucket_name, file)

    def remove_files(self, bucket_name, file_list):
        """
        删除多个文件
        :return:
        """
        delete_object_list = [DeleteObject(file) for file in file_list]
        for del_err in self.client.remove_objects(bucket_name, delete_object_list):
            print("del_err", del_err)

    def presigned_get_file(self, bucket_name, file, days=7):
        """
        生成一个http GET操作 签证URL
        :return:
        """
        return self.client.presigned_get_object(bucket_name, file, expires=timedelta(days=days))


def upload_minio(file_path):
    """

    Args:
        file_path:上传文件路径 /mnt/d/xxx

    Returns:

    """
    # 上传文件
    object_name = "test_" + file_path.split("/")[-1]
    minio_obj.fput_file(bucket_name, object_name, file_path)


def download_minio(object_name):
    """

    Args:
        object_name:桶里的文件名称
        file_path: 下载保存的本地路径

    Returns:

    """
    # 下载文件
    file_path = "/mnt/e/download_dataset/" + object_name
    minio_obj.fget_file(bucket_name, object_name, file_path)


def del_bucket_list_files(object_name):
    """
    删除桶里数据
    Returns:

    """
    minio_obj.remove_file(bucket_name, object_name)


@call_time
def main(files_list, max_workers=32):
    """

    Args:
        files_list:
        max_workers:

    Returns:

    """
    # 获取桶里所有文件名
    object_name_list = []
    bucket_list_files = minio_obj.bucket_list_files(bucket_name)
    s01 = time.time()
    for obj in bucket_list_files:
        object_name_list.append(obj.object_name)
    print(f'获取桶里所有文件名 call time:{time.time() - s01}')

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # 上传
        # future_to_url = {executor.submit(upload_minio, url): url for url in files_list}
        # 下载
        # future_to_url = {executor.submit(download_minio, i): i for i in object_name_list}
        # 删除
        future_to_url = {executor.submit(del_bucket_list_files, i): i for i in object_name_list}
        for i, future in enumerate(concurrent.futures.as_completed(future_to_url)):
            url = future_to_url[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))
            else:
                print(f"done {i},{int(round(time.time()))}")


if __name__ == '__main__':
    files_list = glob.glob("/mnt/e/dataset/*.mp4")
    bucket_name = "bucket-test"
    minio_obj = Bucket(service="192.168.3.201:7777", access_key="admin", secret_key="password")
    main(files_list)