一般情况是不需要magical的,但是现在不是一般情况!!!
import random
import requests
import socket
import time
import os
if not os.path.exists('./data'):
os.mkdir('data')
for i in range(14,17):
url = 'https://baidu/72/{:0>3.0f}.jpg'.format(i)
proxies = [
{'http':'socks5://127.0.0.1:1080'},
{'https':'socks5://127.0.0.1:1080'}
]
proxies = random.choice(proxies)
# print(proxies)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400'}
try:
response = requests.get(url,proxies=proxies, headers=headers) #使用代理
path = './data/{:0>3.0f}.jpg'.format(i) # 文件储存地址
with open(path, 'wb') as f: # 把图片数据写入本地,wb表示二进制储存
for chunk in response.iter_content(chunk_size=128):
f.write(chunk)
response.close() # 记得要关闭
if response.status_code == 200:
print(f'正在下载: {url}')
except requests.ConnectionError as e:
print(e.args)
timeout = 20
socket.setdefaulttimeout(timeout) # 这里对整个socket层设置超时时间。后续文件中如果再使用到socket,不必再设置
sleep_download_time = 10
time.sleep(sleep_download_time) # 这里时间自己设定
优化版:
# 开发时间: 2022/10/22 22:40
# 功能作用: 未知
import random
import requests
import socket
import time
import os
dataname = 51 ## 这里改第几个画集
star = 1 ## 从第几页开始下载
if not os.path.exists(f'./{dataname}'):
os.mkdir(f'{dataname}')
def delay():
timeout = 20
socket.setdefaulttimeout(timeout) # 这里对整个socket层设置超时时间。后续文件中如果再使用到socket,不必再设置
sleep_download_time = 5
time.sleep(sleep_download_time) # 这里时间自己设定
for i in range(star,999):
url = 'https://baidu/data/1602/{}/{:0>3.0f}.jpg'.format(dataname,i) ## 改自己的网址
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400'}
down = True
num = 0
while(down):
try:
proxies = [
{'http': 'socks5://127.0.0.1:1080'},
{'https': 'socks5://127.0.0.1:1080'}
]
proxies = random.choice(proxies)
# print(proxies)
response = requests.get(url,proxies=proxies, headers=headers) #使用代理
path = './{}/{:0>3.0f}.jpg'.format(dataname,i) # 文件储存地址
with open(path, 'wb') as f: # 把图片数据写入本地,wb表示二进制储存
for chunk in response.iter_content(chunk_size=128):
f.write(chunk)
response.close() # 记得要关闭
if response.status_code == 200:
print(f'正在下载: {url}')
down = False
except requests.ConnectionError as e:
print(e.args)
num += 1
print(f'Error========>>:下载失败,接着重试第{num}/20次: <<=================== {url}')
delay()
down = (False if(num >= 20) else True)
delay()