from time importsleepimportfakerimportrequestsfrom lxml importetree
fake=faker.Faker()
base_url= "http://angelimg.spbeen.com"
defget_next_link(url):
content=downloadHtml(url)
html=etree.HTML(content)
next_url= html.xpath("//a[@class='ch next']/@href")ifnext_url:return base_url +next_url[0]else:returnFalsedefdownloadHtml(ur):
user_agent=fake.user_agent()
headers= {'User-Agent': user_agent,"Referer":"http://angelimg.spbeen.com/"}
response= requests.get(url, headers=headers)returnresponse.textdefgetImgUrl(content):
html=etree.HTML(content)
img_url= html.xpath('//*[@id="content"]/a/img/@src')
title= html.xpath(".//div['@class=article']/h2/text()")returnimg_url[0],title[0]defsaveImg(title,img_url):if img_url is not None and title is notNone:
with open("txt/"+str(title)+".jpg",'wb') as f:
user_agent=fake.user_agent()
headers= {'User-Agent': user_agent,"Referer":"http://angelimg.spbeen.com/"}
content= requests.get(img_url, headers=headers)#request_view(content)
f.write(content.content)
f.close()defrequest_view(response):importwebbrowser
request_url=response.url
base_url= '
' %(request_url)base_url=base_url.encode()
content= response.content.replace(b"
",base_url)tem_html= open('tmp.html','wb')
tem_html.write(content)
tem_html.close()
webbrowser.open_new_tab('tmp.html')defcrawl_img(url):
content=downloadHtml(url)
res=getImgUrl(content)
title= res[1]
img_url=res[0]
saveImg(title,img_url)if __name__ == "__main__":
url= "http://angelimg.spbeen.com/ang/4968/1"
whileurl:print(url)
crawl_img(url)
url= get_next_link(url)