逛的时候太太太太喜欢某张图了 每日一次下下来的竟然还是位图 尝试爬下来什么格式
import re
import requests
from bs4 import BeautifulSoup as bs
import os
def url_open(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER'}
res = requests.get(url, headers=headers)
return res
def findlinks(res):
soup = bs(res.text, 'lxml')
target = soup.find_all('ul', class_="clearfix")
list = re.findall(r'href="(.*?\.html)"', str(target))
return list
def find_img(list):
urlhead = 'http://pic.netbian.com'
img = []
for each in list:
img.append(urlhead + each)
return img
def save_img(img):
urlhead = 'http://pic.netbian.com'
for each in img:
res = url_open(each)
soup = bs(res.content, 'lxml')
link = re.findall(r'src="(/.*\.jpg)"', str(soup))
url = urlhead + link[0]
filename = url.split('/')[-1].replace('.html', '')
img = url_open(url)
with open(filename, 'wb') as f:
f.write(img.content)
if __name__ == '__main__':
# os.mkdir('彼岸图')
os.chdir('彼岸图')
url = 'http://pic.netbian.com/4kdongman/index.html'
res = url_open(url)
list = findlinks(res)
img = find_img(list)
save_img(img)
学习参考@猫先生的早茶,大佬讲的很详细
以及[已解决]爬虫爬彼岸图网高清壁纸 有机会吗???
https://fishc.com.cn/thread-118301-1-1.html
(出处: 鱼C论坛)