#运用多线程爬取详情页
import requests
from lxml import etree
from queue import Queue
import threading
import json
num=0
#设置每一页的链接网址
class shengchanshang(threading.Thread):
def __init__(self,car_queue):
threading.Thread.__init__(self)
self.car_queue = car_queue
def run(self):
while True:
if car_queue.empty():
break
url = self.car_queue.get()
self.get_response(url)
def get_response(self,url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}
response = requests.get(url,headers=headers).content.decode('utf-8')
url_queue.put(response)
class xiaofeizhe(threading.Thread):
def run(self):
try:
while True:
if car_queue.empty() and flag:
break
else:
response = url_queue.get(timeout=4)
self.get_data(response)
except:
pass
def get_data(self,response):
tree = etree.HTML(response)
photo = tree.xpath('//img[@class="lazy-slider"]/@src')
title = tree.xpath('//h2[@class="banner-tit"]/text()')
price = tree.xpath('//p[@class="price "]/text()')
#爬取车辆配置:
peizhi_name = tree.xpath('//p[@class="config-tit-top"]/text()')
peizhi_jieshao = tree.xpath('//p[@class="config-tit-foot"]/text()')
global num
num+=1
data={
"num":num,
"车名":title[0],
"图片":photo[0],
"价格":price[0],
"车辆配置":[{peizhi_name[i]:peizhi_jieshao[i]} for i in range(len(peizhi_name))]
""
}
print(data)
with open('che.txt','w') as fp:
fp.write(json.dumps(data))
#
#创建一个函数,用于提取所有的标签
def biaoqian(a):
response = requests.get(a).content.decode('utf-8')
tree = etree.HTML(response)
car = tree.xpath('//div[@class="list-wrap clearfix"]/a/@href')
return car
flag = False
url_queue=Queue()
if __name__ == '__main__':
car_queue = Queue()
wangye = ['https://www.maodou.com/car/list/all/pg'+str(page) for page in range(1,11)]
for i in wangye:
for j in biaoqian(i):
car_queue.put(j)
shengchan=[]
for cre in range(3):
craw1 = shengchanshang(car_queue)
craw1.start()
shengchan.append(craw1)
xiaofei=[]
for cus in range(3):
custom = xiaofeizhe()
custom.start()
xiaofei.append(custom)
[i.join() for i in shengchan]
flag=True
[b.join() for b in xiaofei]