demo.py(多线程爬虫):
# coding=utf-8
import requests
from lxml import etree
import threading
from queue import Queue
class QiubaiSpdier:
def __init__(self):
self.url_temp = "https://www.qiushibaike.com/8hr/page/{}/"
self.headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"}
self.url_queue = Queue()
self.html_queue = Queue()
self.content_queue = Queue()
def get_url_list(self):
for i in range(1,14):
self.url_queue.put(self.url_temp.format(i))
def parse_url(self):
while True:
url = self.url_queue.