使用Python将文件上传到blob

最新推荐文章于 2023-06-25 16:47:27 发布

wangyanglongcc

最新推荐文章于 2023-06-25 16:47:27 发布

阅读量1.3k

点赞数

分类专栏： Python相关

本文链接：https://blog.csdn.net/qq_33246702/article/details/107319873

版权

Python相关专栏收录该内容

24 篇文章 0 订阅

订阅专栏

1. 导入相关包及配置

# -*- coding：utf-8 -*-
# pip install azure-storage-blob==1.5.0
import os, sys
import argparse
import pandas as pd
from azure.storage.blob import BlockBlobService
import time
from tabulate import tabulate
import datetime as dt

FilePath = os.path.abspath(__file__)
CurrentPath = os.path.dirname(FilePath)
FatherPath = os.path.dirname(CurrentPath)
import logo
today = time.strftime('%Y%m%d',time.localtime(time.time()))
# 日志记录
logger = logo.create_logo(os.getpid(), __name__, os.path.join(FatherPath, F'log/{today}.log'))
# 文件与blob路径对应关系
df = pd.read_excel('/'.join([f'{FatherPath}/doc', 'filename_dict.xlsx']))
df['filename'] = df['filename'].map(lambda x: '_'.join(x.split('_')[:-1]))
filename_dict = dict(df.values)
import configparser

cf = configparser.ConfigParser()
# 获取相关配置信息
cf.read(os.path.join(FatherPath, 'doc/config.ini'))
blob_dict = dict(cf.items('blob-config'))
account_name = blob_dict['account_name']
account_key = blob_dict['account_key']
endpoint = blob_dict['endpoint']

2. 创建blockblob服务

blockblobservice = BlockBlobService(account_name=account_name, account_key=account_key, endpoint_suffix=endpoint)

3. 开始文件上传

# 上传单个文件
def upload_localfile2blob(localfile, blob_path):
    container_name = blob_path.split('/')[0]
    blob_name = '/'.join(blob_path.split('/')[1:])
    blockblobservice.create_blob_from_path(container_name=container_name, blob_name=blob_name, file_path=localfile)

# 遍历本地路径下的所有文件
def load_localfile(local_path, date='all'):
    localfiles = os.listdir(local_path)
    files = ['/'.join([local_path, file]) for file in localfiles]
    if str(date).upper() == 'ALL':
        tmp_files = files
    else:
        tmp_files = []
        for file in files:
            filename = os.path.basename(file)
            filedate = filename.split('.')[0].split('_')[-1][:8]
            if filedate == str(date):
                tmp_files.append(file)
    localfiles = sorted(tmp_files, key=lambda x: os.path.getsize(x), reverse=False)
    return localfiles

# 通过循环上传文件
def Upload2Blob_main(local_father_path, container_name, date='all'):
    blobs = blockblobservice.list_blobs(container_name)
    localfiles = load_localfile(local_father_path, date)
    local_files = []
    blob_paths = []
    for index, localfile in enumerate(localfiles):
        filename = os.path.basename(localfile)
        filename_key = '_'.join(filename.split('_')[:-1])
        if filename_key in filename_dict:
            blob_fold = filename_dict[filename_key]
            blob_path = '/'.join([container_name, blob_fold,filename])
            local_files.append(localfile)
            blob_paths.append(blob_path)
        else:
            if filename.endswith('.zip'):
                pass
            elif 'File_End' in filename:
                pass
            else:
                logger.info(F'file {filename} not in filename_dict.xlsx')
    # print(blob_paths)
    # 清空要上传的blob文件夹，而且每次上传的时候只清空一次
    logger.info('-' * 20 + '开始清空要上传的blob文件夹' + '-' * 20)
    # 仅清空有新文件更新的文件夹
    del_blobs = []
    for blob_path in blob_paths:
        for blob in blobs:
            blob_path_name = blob_path.split('/')[1:-1]
            blob_name = blob.name.split('/')[:-1]
            if (blob_name == blob_path_name) and (blob.name not in del_blobs):
                blockblobservice.delete_blob(container_name, blob_name=blob.name)
                del_blobs.append(blob.name)
    logger.info('共清空了{}个blob文件，分别为:\n{}'.format(len(del_blobs), '\n'.join(del_blobs)))
    # 上传
    # print('-' * 20 + '开始上传blob文件' + '-' * 20)
    df = pd.DataFrame()
    # 上传并记录成功/失败状态
    for index, (localfile, blob_path) in enumerate(zip(local_files, blob_paths)):
        try:
            upload_localfile2blob(localfile, blob_path)
            data = {'index': f'{index + 1}-{len(local_files)}',
                    'local_file': localfile, 'blob_path': blob_path,
                    'status': 'success'}
        except:
            data = {'index': f'{index + 1}-{len(local_files)}',
                    'local_file': localfile, 'blob_path': blob_path,
                    'status': 'failed'}
        df = df.append(pd.DataFrame([data]))
    logger.info('\n{}'.format(tabulate(df, headers=df.columns, showindex=False)))

4. 单元测试

if __name__ == '__main__':
    local_father_path = 'F:/PycharmProjects/upload2blob/data_backup/20200114BACK'
    container_name = 'uaupload'
    date = 'all'
    Upload2Blob_main(local_father_path, container_name, date)