import requests, re, multiprocessing
def main(url):
response = requests.get(url)
html = response.text
imgs_name = re.findall('alt="(.*?)"', html, re.S) # 获取到每张图片的名称
imgs_url = re.findall('data-original="(.*?)"', html, re.S) # 获取到每张图片的url地址
for img_name, img_url in zip(imgs_name, imgs_url):
print('正在抓取: ' + img_url)
response = requests.get(img_url)
with open(r'E:\demo' + '\\' + img_name + '.jpg', 'wb') as f:
f.write(response.content)
print('success')
if __name__ == '__main__':
url = 'http://www.doutula.com/photo/list/?page='
pool = multiprocessing.Pool(4) # 使用四个进程
pool.map(main, [url + str(i) for i in range(1, 30)]) # 1-29页的图片
斗图网的图片抓取
最新推荐文章于 2020-12-02 20:47:26 发布