咳咳,不敢多说,不然这篇又白写了,两篇审核都因为版权问题没过了@!!
这篇文我上传修改了四次审核才过了,结果的话开始发了几张图片,说我低俗,不给过,呜呜呜
我太难了,呜呜呜~
有什么问题私聊,必回!!!
上代码:
import requests
from bs4 import BeautifulSoup
import re
import time
regular1 = re.compile(r'href="(?P<tail_url>.*?)" target=', re.S)
regular2 = re.compile(r'src="(?P<download_url>.*?)".*?</p>', re.S)
w = 1572
head = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/96.0.4664.110 Safari/537.36 "
}
for i in range(53, 377):
if i == 1:
father_url = 'https://www.umei.cc/katongdongman/index.htm'
resp = requests.get(url=father_url, headers=head)
resp.encoding = 'utf-8'
analysis1 = BeautifulSoup(resp.text, "html.parser")
part1 = analysis1.find('div', class_="TypeList").find_all('a')
part1 = list(map(str, part1))
part1 = ''.join(part1)
tail_url = regular1.finditer(part1)
for u in tail_url:
whole_url = 'https://www.umei.cc/' + u.group('tail_url')
resp2 = requests.get(url=whole_url, headers=head)
resp2.encoding = 'utf-8'
analysis2 = BeautifulSoup(resp2.text, "html.parser")
part2 = analysis2.find('div', class_="ImageBody")
part2 = list(map(str, part2))
part2 = ''.join(part2)
part3 = regular2.finditer(part2)
for du in part3:
download_url = du.group('download_url')
resp3 = requests.get(url=download_url, headers=head)
time.sleep(1)
with open("D:\python_write_file/number_one\image\爬取动画图片300+页" + f'图片{w}' + '.jpg', 'wb') as img:
img.write(resp3.content)
print(f"第{w}个图片")
w = w + 1
else:
father_url = f'https://www.umei.cc/katongdongman/index_{i}.htm'
resp = requests.get(url=father_url, headers=head)
resp.encoding = 'utf-8'
analysis1 = BeautifulSoup(resp.text, "html.parser")
part1 = analysis1.find('div', class_="TypeList").find_all('a')
part1 = list(map(str, part1))
part1 = ''.join(part1)
tail_url = regular1.finditer(part1)
for u in tail_url:
whole_url = 'https://www.umei.cc/' + u.group('tail_url')
resp2 = requests.get(url=whole_url, headers=head)
resp2.encoding = 'utf-8'
analysis2 = BeautifulSoup(resp2.text, "html.parser")
part2 = analysis2.find('div', class_="ImageBody")
part2 = list(map(str, part2))
part2 = ''.join(part2)
part3 = regular2.finditer(part2)
for du in part3:
download_url = du.group('download_url')
resp3 = requests.get(url=download_url, headers=head)
time.sleep(1)
with open("D:\python_write_file/number_one\image\爬取" + f'{w}' + '.jpg', 'wb') as img:
img.write(resp3.content)
print(f"第{w}个")
w = w + 1
print("\n\n\t全部完毕")