如何用python批量下载数据_【Python】数据库查询 & 批量下载文件

#!/usr/bin/env python3#-*- coding: utf-8 -*-#Author: Katrin#Create Time: 2020/04/22

from bson importObjectIdfrom pymongo importMongoClientimportos, sys, time, getpassimportparamikodefget_group():#group id

group =['group id 1','group id 2']return [ObjectId(i) for i ingroup]defget_queries(group_list):

queries=[]for group ingroup_list:

query={'group': group,'ranked': True

}

queries.append(query)returnqueriesdefget_ssh():

user= 'user'ip= 'ip'passwd= 'passwd'ssh=paramiko.SSHClient()

ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

ssh.connect(ip,22, user, passwd)

if_success=ssh.get_transport().is_active()assert if_success == True, 'SSH failed'

returnsshdefget_sftp(ssh):

sftp=paramiko.SFTPClient.from_transport(ssh.get_transport())#可替代写法 sftp = ssh.open_sftp()

returnsftp#SSHClient的exec_command是单会话,在下次运行时会重置

defsimple_pyshell(ssh, cmd):

stdin, stdout, stderr= ssh.exec_command(cmd + ';ls')

stdout_lines=stdout.readlines()

stderr_lines=stderr.readlines()#print(stdout_lines)

#print(stderr_lines)

returnstdout_lines, stderr_linesdefmkdir_valid(path):

os.makedirs(os.path.dirname(path), exist_ok=True)returnpath#如不查询,已知file_list#也可以直接批量下载

defdownload_file(ssh, sftp, download_path, local_path, obj_file):

remote_path= download_path + ''.join([obj_file[-i] + '/' for i in range(1, 5)])

cmd= 'cd' +remote_path

stdout_lines, _=simple_pyshell(ssh, cmd)

stdout_lines= [_file for _file in stdout_lines if '.cpp' in _file and obj_file in_file]if stdout_lines ==[]:print('obj_file {fileid} not exist for group {groupid}'.format(

fileid=obj_file, groupid=str(query['group'])))returnFalse

stdout_lines= stdout_lines[0].replace('\n', '')

sftp.get(remote_path+ stdout_lines, local_path +stdout_lines)returnTrueif __name__ == "__main__":

group_list=get_group()

queries=get_queries(group_list)

client= MongoClient('mongodb://user:passwd@ip:port/')

db=client.prod

ssh=get_ssh()

sftp=get_sftp(ssh)

download_path= 'path'save_path= mkdir_valid('codes/')for query inqueries:#{'versions': 1} 表示返回时只包含'_id'和'versions'域

res = db.files.find(query, {'versions': 1})

files= [str(fid['versions'][-1]) for fid inres]

local_path= mkdir_valid(save_path + str(query['group']) + '/')for obj_file infiles:

download_file(ssh, sftp, download_path, local_path, obj_file)

sftp.close()

client.close()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值