1. 导入相关包及配置
import os, sys
import argparse
import pandas as pd
from azure.storage.blob import BlockBlobService
import time
from tabulate import tabulate
import datetime as dt
FilePath = os.path.abspath(__file__)
CurrentPath = os.path.dirname(FilePath)
FatherPath = os.path.dirname(CurrentPath)
import logo
today = time.strftime('%Y%m%d',time.localtime(time.time()))
logger = logo.create_logo(os.getpid(), __name__, os.path.join(FatherPath, F'log/{today}.log'))
df = pd.read_excel('/'.join([f'{FatherPath}/doc', 'filename_dict.xlsx']))
df['filename'] = df['filename'].map(lambda x: '_'.join(x.split('_')[:-1]))
filename_dict = dict(df.values)
import configparser
cf = configparser.ConfigParser()
cf.read(os.path.join(FatherPath, 'doc/config.ini'))
blob_dict = dict(cf.items('blob-config'))
account_name = blob_dict['account_name']
account_key = blob_dict['account_key']
endpoint = blob_dict['endpoint']
2. 创建blockblob服务
blockblobservice = BlockBlobService(account_name=account_name, account_key=account_key, endpoint_suffix=endpoint)
3. 开始文件上传
def upload_localfile2blob(localfile, blob_path):
container_name = blob_path.split('/')[0]
blob_name = '/'.join(blob_path.split('/')[1:])
blockblobservice.create_blob_from_path(container_name=container_name, blob_name=blob_name, file_path=localfile)
def load_localfile(local_path, date='all'):
localfiles = os.listdir(local_path)
files = ['/'.join([local_path, file]) for file in localfiles]
if str(date).upper() == 'ALL':
tmp_files = files
else:
tmp_files = []
for file in files:
filename = os.path.basename(file)
filedate = filename.split('.')[0].split('_')[-1][:8]
if filedate == str(date):
tmp_files.append(file)
localfiles = sorted(tmp_files, key=lambda x: os.path.getsize(x), reverse=False)
return localfiles
def Upload2Blob_main(local_father_path, container_name, date='all'):
blobs = blockblobservice.list_blobs(container_name)
localfiles = load_localfile(local_father_path, date)
local_files = []
blob_paths = []
for index, localfile in enumerate(localfiles):
filename = os.path.basename(localfile)
filename_key = '_'.join(filename.split('_')[:-1])
if filename_key in filename_dict:
blob_fold = filename_dict[filename_key]
blob_path = '/'.join([container_name, blob_fold,filename])
local_files.append(localfile)
blob_paths.append(blob_path)
else:
if filename.endswith('.zip'):
pass
elif 'File_End' in filename:
pass
else:
logger.info(F'file {filename} not in filename_dict.xlsx')
logger.info('-' * 20 + '开始清空要上传的blob文件夹' + '-' * 20)
del_blobs = []
for blob_path in blob_paths:
for blob in blobs:
blob_path_name = blob_path.split('/')[1:-1]
blob_name = blob.name.split('/')[:-1]
if (blob_name == blob_path_name) and (blob.name not in del_blobs):
blockblobservice.delete_blob(container_name, blob_name=blob.name)
del_blobs.append(blob.name)
logger.info('共清空了{}个blob文件,分别为:\n{}'.format(len(del_blobs), '\n'.join(del_blobs)))
df = pd.DataFrame()
for index, (localfile, blob_path) in enumerate(zip(local_files, blob_paths)):
try:
upload_localfile2blob(localfile, blob_path)
data = {'index': f'{index + 1}-{len(local_files)}',
'local_file': localfile, 'blob_path': blob_path,
'status': 'success'}
except:
data = {'index': f'{index + 1}-{len(local_files)}',
'local_file': localfile, 'blob_path': blob_path,
'status': 'failed'}
df = df.append(pd.DataFrame([data]))
logger.info('\n{}'.format(tabulate(df, headers=df.columns, showindex=False)))
4. 单元测试
if __name__ == '__main__':
local_father_path = 'F:/PycharmProjects/upload2blob/data_backup/20200114BACK'
container_name = 'uaupload'
date = 'all'
Upload2Blob_main(local_father_path, container_name, date)