爬取糗事百科网页图片数据: 利用正则匹配url
<div class="thumb">
<a href="/article/123230554" target="_blank">
<img src="//pic.qiushibaike.com/system/pictures/12323/123230554/medium/JVGP2HUEURQH8WJB.jpg" alt="糗事#123230554" class="illustration" width="100%" height="auto">
</a>
</div>
> 匹配规则如下:
<div class="thumb">.*?<img src="(.*?)" alt.*?</div>
import requests
import re
url = 'https://www.qiushibaike.com/imgrank/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
r = requests.get(url,headers=headers)
# print(r.text)
url_list = re.findall(r'<div class="thumb">.*?<img src="(.*?)" alt.*?</div>',r.text,re.S)
print(url_list)
print(len(url_list))
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/af4342c2d0d84a86f307a3e6fc99daeb.png)