#爬取图片
from urllib import request, parse
import re
import json
#动态增加的文件请求
url = 'https://tieba.baidu.com/f?kw=csgo&ie=utf-8'
pge = int(input("请输入要访问的页数"))
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0"
}
for page in range(pge):
pn = page*50
u4 = url+"&pn"+"="+"%s"%pn
res = request.Request(url = u4,headers = headers)
response = request.urlopen(res)
yya = response.read().decode("utf-8")
#正则匹配
image_list = re.findall(r"(http://.*?jpg)", yya)
#从数组中截取图片地址
# print(image_list)
for link in image_list:
if link.startswith("http"):
print("开始爬取:%s"%link)
# request.urlretrieve(url= link, filename='../images/'+link[-10:])
else:
pass
2020/11/11:爬虫基础(二) 爬取图片
最新推荐文章于 2023-10-22 12:00:00 发布