学习爬虫的第不知道多少天了,今天用爬一些你懂得的网站,冗余代码太多,只用来相互交流
import requests
from lxml import etree
class Sprider():
""" 设置爬取,返回一个html传递给解析函数"""
def __init__(self,url):
self.url=url
def sprider(self):
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36",}
html=requests.get(url,headers=headers,verify=False).text,
# print(html)
return html
def resolving(self,html):
html=etree.HTML(html)
content=html.xpath("//div//a/img/@data-original")
name=html.xpath("//div//a/img/@alt")
# print(content,name)
return content,name
def spriderPicture(self,url,name):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, li