使用爬虫下载图片
刚学习爬虫没几天,希望大佬们多多提意见!
我爬的是某贴吧,url大家自行设置吧。
import requests
from lxml import etree
"""
获取详情页的url
获取图片url,下载保存
构造下一页详情页url
循坏
"""
def url_names(n):
#构建详情页的url地址
url_name = "https://tieba.baidu.com"+str(n)
return url_name
def next_url(next_url_if,n):
# 下一页的url
if "下一页" in next_url_if:
n += 1
else:
n = "end"
return n
def img_url_name(url_name,list):
htmls = requests.get(url_name,)
url_htmls = htmls.text
html = etree.HTML(url_htmls)
img_path = html.xpath("//div//img[@class='BDE_Image']/@src")
for i in img_path:
list.append(i)
next_if_else = html.xpath("//li[@class='l_pager pager_theme_4 pb_list_pager']/a[@href]/text()")
return next_if_else,list
def downloader(img_path):
for name,img_data_url in enumerate(img_path):
name = str(name)
req = requests.get(img_data_url)
with open(name+".jpg","wb") as f:
f.write(req.content)
print("保存成功")
def main():
n = 1
list = []
while n != "end":
url_name = url_names(n)
next_url_if,list = img_url_name(url_name,list)
n = next_url(next_url_if,n)
downloader(list)
print("下载完成!")
if __name__ == "__main__":
main()