删除 Azure Data Lake Storage Gen2 中的blob对象

删除 Azure Data Lake Storage Gen2 中的blob对象

# -*- encoding: utf-8 -*-


import os
import time

from retrying import retry
from azure.storage.blob import BlobServiceClient


class DirectoryClient:
    def __init__(self, connection_string, container_name):
        service_client = BlobServiceClient.from_connection_string(connection_string)
        self.client = service_client.get_container_client(container_name)

    def ls_files(self, path, recursive=False):
        """
        列举当前路径下所有文件
        @params1:path 路径
        @params2: recursive 是否递归
        """
        if not path == '' and not path.endswith('/'):
            path += '/'

        blob_iter = self.client.list_blobs(name_starts_with=path)
        files = []
        for blob in blob_iter:
            relative_path = os.path.relpath(blob.name, path)
            if recursive or not '/' in relative_path:
                files.append(relative_path)
        return files

    def ls_dirs(self, path, recursive=False):
        """
        列举当前路径下所有子路径
        """
        if not path == '' and not path.endswith('/'):
            path += '/'

        blob_iter = self.client.list_blobs(name_starts_with=path)
        dirs = []
        for blob in blob_iter:
            relative_dir = os.path.dirname(os.path.relpath(blob.name, path))
            if relative_dir and (recursive or not '/' in relative_dir) and not relative_dir in dirs:
                dirs.append(relative_dir)

        return dirs

    def rm(self, path, recursive=False):
        """
        删除指定路径文件
        """
        if recursive:
            self.rmdir(path)
        else:
            print(f'Deleting {path}')
            self.client.delete_blob(path)

    def rmdir(self, path):
        """
        递归删除指定路径下所有内容(子路径/文件)
        """
        blobs = self.ls_files(path, recursive=True)
        if not blobs:
            return

        if not path == '' and not path.endswith('/'):
            path += '/'
        blobs_list = [path + blob for blob in blobs]
        blobs_length = len(blobs_list)
        if blobs_length <= 200:
            self.client.delete_blobs(*blobs_list)

        else:
            start = 0
            end = 250

            while end <= blobs_length:
                # each time, delete 250 blobs at most
                self.client.delete_blobs(*blobs_list[start:end])
                start = start + 200
                end = end + 200
                if start < blobs_length and end > blobs_length:
                    self.client.delete_blobs(*blobs_list[start:blobs_length])
            print(path + ':blob 删除完成')

    def droptable(self, dbName, tableName):
        spark.sql(f'DROP TABLE IF EXISTS {dbName}.{tableName}')
        print('{0}.{1} 删除完成'.format(dbName, tableName))


@retry(stop_max_attempt_number=20, wait_incrementing_increment=200)
def main():
    blob_connect_string = 'DefaultEndpointsProtocol=https;AccountName=aalsabddev01e2;AccountKey=xxxxxxxxxxxxxxxxxxx==;EndpointSuffix=core.chinacloudapi.cn'
    
    container_name = 'dwm-storage'
    dbName = 'dwm_dev'
    
    Client = DirectoryClient(connection_string=blob_connect_string, container_name=container_name)

    del_table_list = ['m02_iap_track_logon_evt', 'm02_iap_track_vist_evt', 'm02_iap_track_page_clos_evt']
    
    for table in del_table_list:
        Client.rmdir(table)
        # Client.rm(table)
        Client.droptable(dbName, table)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值