本篇文章给大家谈谈python登录网站自动下载文件,以及python自动从网页下载文件,希望对各位有所帮助,不要忘了收藏本站喔。
大家好,本文将围绕python自动下载文件 下载的不完整展开说明,python登录网站自动下载文件是一个很多人都想弄明白的事情,想搞清楚python自动下载网页中的文件需要先了解以下几个事情。
自动下载服务器各目录文件
import threading
from multiprocessing import Pool,Process
import requests
import re,os
# new_content = r.content.decode('utf-8')
#第一次调用生成子目录
def request_dirname(url,pattern):
path_new = []
r = requests.get(url)
new = re.findall(pattern, str(r.content))
i = 0
while i < len(new):
aa = os.path.join(url, new[i])
# print(aa)
path_new.append(aa)
i = i + 1
return path_new
#第二次调用生产文件目录
def request_filename(url, pattern):
# dict_new = {}
# num = 0
file_compare_list = []
path_url = request_dirname(url, pattern)
for item in path_url:
path_fileurl = request_dirname(item,pattern)
# item_new = re.findall(r'8081/(.*?)/', item)
# dict_new[num] = item_new #对应目录编号
# num = num + 1
file_compare_list.append(path_fileurl[1:])
return file_compare_list
#第三次循环写入text文件,文件url
def filename_urls(url, pattern):
f_urls = []
file_compare_list = request_filename(url, pattern)
with open('filename_url.txt', 'w') as f_txt:
f_txt.write('')
for list01 in file_compare_list:
for list02 in list01:
f_urls.append(list02)
with open('filename_url.txt','a') as f_txt:
f_txt.write(list02+'\n')
return f_urls
#创建文件夹并下载
def build_dir(url,ICBC,current_path):
path = url.split('/')[-2] #子目录
filename = url.split('/')[-1] #文件名称
dir_path = os.path.join(current_path,ICBC,path)
if os.path.exists(dir_path):
download_files(url,ICBC,current_path)
else:
# os.makedirs('ICBC/'+path)
os.makedirs(dir_path)
print('%s已创建'%path)
build_dir(url,ICBC,current_path)
#下载文件
def download_files(url,ICBC,current_path):
filename = url.split('/')[-1]
path = url.split('/')[-2]
work_path = os.path.join(current_path,ICBC,path)
print(work_path)
os.chdir(work_path)
#查看当前文件
r = requests.get(url)
with open(filename,'wb') as f:
f.write(r.content)
print(os.listdir('.'))
def run(ICBC,urls_results,current_path):
# global urls_results
for xxl in urls_results:
build_dir(xxl,ICBC,current_path)
if __name__ == '__main__':
current_path = os.getcwd()
pattern1 = r''
url1 = 'http://172.17.3.162:8081/'
ABC = filename_urls(url1, pattern1)
urls_results = ABC[3:]
#文件夹
# p1 = Process(target=run,args=('happy',urls_results,current_path))
# p1.start()
pool = Pool(processes=20)
for i in range(20):
msg = 'ICBC%d'%i
print(msg)
pool.apply_async(run,(msg,urls_results,current_path))
pool.close()
pool.join()
print('done')
结论: