练手作,各位如果有的优化方法,请评论
import requests
import lxml
from bs4 import BeautifulSoup
import time
import os
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36',
}
def pub(url, path):
cont = requests.get(url).content.decode('utf8')
type = BeautifulSoup(cont, 'lxml')
cont1 = type.select(path)[1:]
return cont1
def index(url):
cont1 = pub(url, 'div.product_query > div:nth-child(1) > div > div > a')
for i in cont1:
index_0(i['href'])
def index_0(urls):
cont1 = pub(urls, 'div.product_query > div:nth-child(2) > div > div > a')
for i in cont1:
index_1(i['href'])
def index_1(ur):
cont = requests.get(ur).content.decode('utf8')[1:]
type = BeautifulSoup(cont, 'lxml')
cont1 = type.select('.next')
if cont1:
for i in cont1:
index_1(i['href'])
index_2(ur)
def index_2(ur):
cont = requests.get(ur).content.decode('utf8')[1:]
type = BeautifulSoup(cont, 'lxml')
cont1 = type.select('.clearfix > li > a')
for i in cont1:
win(i['href'])
def win(url):
if '//' in url:
cent = requests.get(url, header).content.decode('utf8')
img = BeautifulSoup(cent, 'lxml')
pic = img.select('.pic-large')
name1 = img.select('.breadcrumbs > a:nth-child(3)')
name2 = img.select('.breadcrumbs > a:nth-child(4)')
if pic:
for i, q, w in zip(pic, name1, name2):
if not os.path.exists(os.getcwd() + r"\{}".format(q.text + w.text)):
os.mkdir(os.getcwd() + r"\{}".format(q.text + w.text))
with open(os.getcwd() + r"\{}\{}.jpg".format(q.text + w.text, i['title']), 'wb') as file:
file.write(requests.get(i['src']).content)
time.sleep(2)
file.close()
if __name__ == '__main__':
url = ['http://www.win4000.com/wallpaper.html', 'http://www.win4000.com/mobile.html']
for url in url:
index(url)