import requests
import parsel
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
name = 1
for page in range(0,4):
print("\n正在获取第{}页数据".format(page + 1))
url = "https://tieba.baidu.com/f?kw=%E7%BE%8E%E5%A5%B3&ie=utf-8&pn={}".format(page * 50)
reapnse = requests.get(url,headers=headers)
html = parsel.Selector(reapnse.text) # 数据转换
title_url = html.xpath('//div[@class="threadlist_lz clearfix"]/div/a/@href').extract()
# 拼接 帖子详情链接
start = 'https://tieba.baidu.com'
for title in title_url:
new_url = start + title
resp2 = requests.get(new_url,headers=headers)
html2 = parsel.Selector(resp2.text)
# 提取图片 url
img_url = html2.xpath('//cc/div/img[@class="BDE_Image"]/@src').extract()
# 获取图片数据
for img in img_url:
resp3 = requests.get(img,headers=headers)
print("\t正在保存第{}张图片".format(name))
path = r'C:\Users\DELL\Desktop\python_wd\mig\百度贴吧\{}.jpg'.format(name)
with open(path,'wb')as f:
f.write(resp3.content)
name += 1
贴吧美女图片获取
最新推荐文章于 2021-12-20 18:00:51 发布