from threading import Thread
from queue import Queue
import requests
from lxml import etree
import os
class CrawlInfo(Thread):
def __init__(self,url_queue,html_queue):
Thread.__init__(self)
self.url_queue = url_queue
self.html_queue = html_queue
def run(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36'
}
while self.url_queue.empty() == False:
url = self.url_queue.get()
reponse = requests.get(url=url,headers=headers)