3.10爬取网页数据示例（二）

weixin_30384031

于 2019-03-10 14:25:00 发布

阅读量112

点赞数

原文链接：http://www.cnblogs.com/cqkangle/p/10505248.html

版权

import requests
import os
import bs4
url='http://xkcd.com'
ml='F:\ABD'
os.makedirs(ml,exist_ok=True)
while not url.endswith('#'):
    print('Download page %s...' %url)
    res=requests.get(url)
    res.raise_for_status()
    soup=bs4.BeautifulSoup(res.text)
    print('Done.')
comicElem=soup.select('#comic img')
if comicElem==[]:
    print('Could not find comic image.')
else:
    comicUrl=comicElem[0].get('src')
    print('Downloading image %s...'%(comicUrl))
    res=requests.get(comicUrl)
    res.raise_for_status()
print('Done.')
imageFile=open(os.path.join(ml,os.path.basename(comicUrl)),'wb')
for chunk in res.iter_content(100000):
    imageFile.write(chunk)
    imageFile.close()
prevLink=soup.select('a[rel="prev"]')[0]
url='http://xkcd.com'+prevLink.get('href')
print('Done.')