方法
代码
import requests
import threading
import os
from queue import Queue
from bs4 import BeautifulSoup
flag = True
index = 1
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0'
}
def get_url_lists():
url_lists = []
for i in list( range( 6, 9 ) ):
print( '正在获取第{}页所有URL'.format( int( i ) ) )
GK_url = "https://mm.enterdesk.com/qingchunmeinv/{}.html".format( i )
res = requests.get( GK_url, headers=headers ).text
bs = BeautifulSoup( res, 'lxml' )
urls = bs.select( 'dl.egeli_pic_dl>dd>a' )
for url in urls:
url_lists.append( url['href'] )
return url_lists
def get_source_pic(url,q):
global flag
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0'
}
res = requests.get( url, headers=headers ).text
bs = BeautifulSoup( res, 'lxml' )
srcs = bs.select( 'a.pics_pics' )
pic_url_lists = []
for src in srcs:
pic_url = src['src'].replace( 'edpic', 'edpic_source' )
q.put(pic_url)
flag =False
def savepic(q,i_lock):
global index
while flag or q.qsize()>0:
i_lock.acquire()
print("保存第%d张"%index)
url = q.get()
index += 1
i_lock.release()
path = r'F:\\python文档\\wallpaper\pic4\\' + str( url[-12:] )
if not os.path.exists( path ):
res = requests.get( url, headers=headers )
with open( path, 'wb' ) as f:
f.write( res.content )
def main():
q =Queue()
url_lists = get_url_lists()
thread_list = []
i_lock=threading.Lock()
for url in url_lists:
t1 = threading.Thread( target=get_source_pic, args=(url,q))
t1.start()
t2 =threading.Thread(target=savepic,args=(q,i_lock))
t2.start()
if __name__ == '__main__':
main()