多线程的使用(中)
def run(self):
headers = {
“User-Agent”:UserAgent().random
}
while self.url_queue.empty()
response = request.get(self.url_queue,headers=headers)
print(response.text)
#解析类
def ParseInfo(Thread):
def init(self,html_queue):
thread init(self):
self.html_queue = html_queue
def run(self):
while self.html_queue.empty() == false
e = etree.HTML(self.html_queue.get())
span contents = e.xpath('//div[class = “content”]/span[1])
for span in span_contents:
info= span.xpath("string(.)“)
print(info)
创建一个爬虫
for i in range(0,3):
crawl1 = CrawlInfo(url_queue)
crawl1.start()
parse = ParseInfo(html_queue):
parse.start()