导包
import requests
import pandas as pd
from lxml import etree
目标网站
target='https://www.veer.com/photo/'
请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
}
获取响应
html = requests.get(target,headers=headers)
xpath解析
req = etree.HTML(html.text)
src = req.xpath('//div[@class="site_width"]/section/div/article/a/figure/img/@src')
name = req.xpath('//div[@class="site_width"]/section/div/article/a/figure/figcaption/text()')
通过pandas获取图片的真实地址
data = pd.DataFrame()
data['src'] = src
data['src'] = 'http:'+data['src'].astype('str')
data['name'] = name
写入本地文件
def save(name, content):
with open('images/'+str(name) + '.jpg', 'wb')as f:
f.write(content)
遍历图片地址,下载
if __name__ == '__main__':
for dat,name in zip(data['src'],data['name']):
print(dat)
re = requests.get(dat).content
save(name,re)
完整代码
import requests
import pandas as pd
from lxml import etree
target='https://www.veer.com/photo/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
}
html = requests.get(target,headers=headers)
# print(html.text)
req = etree.HTML(html.text)
src = req.xpath('//div[@class="site_width"]/section/div/article/a/figure/img/@src')
name = req.xpath('//div[@class="site_width"]/section/div/article/a/figure/figcaption/text()')
data = pd.DataFrame()
data['src'] = src
data['src'] = 'http:'+data['src'].astype('str')
data['name'] = name
def save(name, content):
with open('images/'+str(name) + '.jpg', 'wb')as f:
f.write(content)
if __name__ == '__main__':
for dat,name in zip(data['src'],data['name']):
print(dat)
re = requests.get(dat).content
save(name,re)