from threading import Thread
from queue import Queue
import requests
from lxml import etree
import os
import time
class CrawlInfo(Thread):
def __init__(self,url_queue,html_queue):
Thread.__init__(self)
self.url_queue = url_queue
self.html_queue = html_queue
def run(self):
headers = {
'Connection': 'close',
'referer': 'https://www.xiurenji.com/XiuRen/',
'User-Agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64)AppleWebKit/537.36(KHTML,like Gecko)Chrome/86.0.4240.183 Safari/537.36'
}
while self.url_queue.empty() == False:
url = self.url_queue.get()
reponse = requests.get(url=url,headers=headers)
page_text = reponse.text.encode('ISO-8859-1').decode('GB18030')
if reponse.status_code ==