import requests, os, bs4
url = 'https://xkcd.com'
os.makedirs('xkcd',exist_ok = True)
while not url.endswith('#'):
# download the page
print('downloading the %s...'%(url)) # '%s...'%(url)对字符串及进行替换
res = requests.get(url)
res.raise_for_status() # 返回请求的状态
soup = bs4.BeautifulSoup(res.text, 'lxml')
# find the url of the comic image
comicele = soup.select('#comic img')
print(comicele)
if comicele == []:
print('could not find comic image')
else:
comicurl = comicele[0].get('src')
print(comicurl)
## print('downloading image %s...'% (comicurl))
res = requests.get(comicurl)
res.raise_for_status()
# download the image
# save the image to ./xkcd
imagefile = open(os.path.json('xkcd',os.path.basename(comicurl)), 'wb')
for chunk in res.iter_content(100000):
imagefile.write(chunk)
imagefile.close()
# get the prev button's url
privelink = soup.select('a[rel = "prev"]')[0]
url = 'https://xkcd.com' + prevlink.get('href')
print('DONE')
利用Python批量爬取XKCD动漫图片,并批量保存
最新推荐文章于 2022-05-13 11:05:15 发布