多线程爬取王者荣耀高清壁纸
import threading
import requests
import random
from urllib import request
from urllib import parse
import os
import queue
UserAgent = random.choice([
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
'Mozilla / 5.0(Windows NT 10.0;Win64;x64;rv: 78.0) Gecko / 20100101Firefox / 78.0',
'Mozilla / 4.0(compatible;MSIE7.0;WindowsNT6.1;WOW64;Trident / 5.0;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;MediaCenterPC6.0;InfoPath.3;.NET4.0C;.NET4.0E) QQBrowser / 6.9.11079.201',
'Mozilla / 5.0(Macintosh;U;IntelMacOSX10_6_8;en - us) AppleWebKit / 534.50(KHTML, likeGecko) Version / 5.1Safari / 534.50',
'Mozilla / 5.0(iPhone;U;CPUiPhoneOS4_3_3likeMacOSX;en - us) AppleWebKit / 533.17.9(KHTML, likeGecko) Version / 5.0.2Mobile / 8J2Safari / 6533.18.5',
'Mozilla / 5.0(Windows NT 6.1)AppleWebKit / 535.1(KHTML, likeGecko) Chrome / 13.0.782.41Safari / 535.1QQBrowser / 6.9.11079.201',
'Mozilla / 5.0(compatible;MSIE9.0;WindowsNT6.1;WOW64;Trident / 5.0;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;MediaCenterPC6.0;InfoPath.3;.NET4.0C;.NET4.0E)',
'Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1',
'Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0',
'Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999',
'Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
])
headers = {
'user-agent': UserAgent,
'referer': 'https://pvp.qq.com/web201605/wallpaper.shtml'
}
class Producer(threading.Thread):
def __init__(self,page_queue,image_queue,*args,**kwargs):
super(Producer,self).__init__(*args,**kwargs)
self.page_queue = page_queue
self.image_queue = image_queue
def run(self) -> None:
while not self.page_queue.empty():
page_url = self.page_queue.get()
resp = requests.get(page_url,headers=headers)
result = resp.json()
datas = result['List']
for data in datas:
image_urls = extract_images(data)
name = parse.unquote(data['sProdName']).replace('1:1','').strip()
dir_path = os.path.join('image',name)
if not os.path.exists(dir_path):
os.mkdir(dir_path)
for index,image_url in enumerate(image_urls):
self.image_queue.put({'image_url':image_url,'image_path':os.path.join(dir_path,'%d.png'%(index+1))})
class Consumer(threading.Thread):
def __init__(self,image_queue,*args,**kwargs):
super(Consumer,self).__init__(*args,**kwargs)
self.image_queue =image_queue
def run(self) -> None:
while True:
try:
image_obj = self.image_queue.get(timeout=10)
image_url = image_obj.get('image_url')
image_path = image_obj.get('image_path')
try:
request.urlretrieve(image_url,image_path)
print(image_path,'下载成功')
except:
print(image_path,'下载失败')
except:
break
def extract_images(data):
image_urls = []
for i in range(1,9):
image_url = parse.unquote(data['sProdImgNo_%d'%i]).replace('200','0').strip()
image_urls.append(image_url)
return image_urls
def main():
page_queue = queue.Queue()
image_queue = queue.Queue()
start_page = int(input('起始页:'))
end_page = int(input('结束页:'))
start_page = start_page - 1
end_page = end_page + 1
for i in range(start_page,end_page):
page_url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={page}&iOrder=0&iSortNumClose=1&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=1597591362368'.format(page=i)
page_queue.put(page_url)
for i in range(6):
producer = Producer(page_queue,image_queue)
producer.start()
for i in range(15):
consumer = Consumer(image_queue)
consumer.start()
if __name__ == '__main__':
main()