import requests
from lxml import etree
import threading
import queue
import time
request_url=queue.Queue()
url_queue=queue.Queue()
last_queue=queue.Queue()
mutex=threading.Lock()
url_lock=threading.Lock()
pic_downlode=threading.Lock()
for i in range(40):
url=f'https://www.fabiaoqing.com/bqb/lists/type/liaomei/page/{i}.html'
request_url.put(url)
#访问网站并拿到套图的url
class 请求(threading.Thread):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
def __init__(self,request_url,url_queue,loc):
super(请求, self).__init__()
self.q=request_url
self.queue=url_queue
self.loc=loc
def run(self):
while 1:
self.loc.acquire()
urls=self.q.get()
self.loc.release()
reponse = requests.get(url=urls,headers=self.headers).text
self.parse(reponse)
if request_url.empty():
break
def parse(self,response):
tree =etree.HTML(response)
a =tree.xpath('//div//a[@class="bqba"]/@href')
for i in a:
url2='https://www.fabiaoqing.com'+i
self.loc.acquire()
self.queue.put(url2)
self.loc.release()
time.sleep(0.2)
#访问套图的url拿到表情包的链接
class pic_url(threading.Thread):
def __init__(self,queue,last_queue,loc):
super(pic_url, self).__init__()
self.queue=queue
self.last_queue=last_queue
self.loc=loc
def run(self):
while 1:
self.loc.acquire()
pic =self.queue.get()
self.loc.release()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
reponse =requests.get(url=pic,headers=headers).text
tree_= etree.HTML(reponse)
tree_ =tree_.xpath('//img[@class="bqbppdetail lazy"]/@data-original')
for x in tree_:
last_queue.put(x)
time.sleep(0.5)
if self.queue.empty():
break
#下载表情包
class downlode(threading.Thread):
def __init__(self,que,loc):
super(downlode, self).__init__()
self.que=que
self.loc=loc
def run(self):
while 1:
self.loc.acquire()
a =self.que.get()
self.loc.release()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
filename='E:\\biaoqing\\' +a[-36:]
s =requests.get(url=a,headers=headers)
time.sleep(0.1)
with open(filename,'wb') as a:
a.write(s.content)
if self.que.empty():
break
if __name__ == '__main__':
time.sleep(0.1)
for mount in range(3):
t=请求(request_url,url_queue,mutex)
t.start()
time.sleep(3)
for xd in range(5):
cd =pic_url(url_queue,last_queue,url_lock)
cd.start()
for sd in range(10):
asd=downlode(last_queue,pic_downlode)
asd.start()
这个网站有些问题,当请求次数过于频繁时,第二步获取表情包链接会获取空列表,所以需要用time.sleep