多线程的使用(下)
def run(self):
while self.html_queue.empty() = false:
e = etree.HTML(self.html_queue.get())
span_content = e.xpath("//div[@class = “content”]/span[1])
with open(“duanzi.text”,“a”,encoding = “utf-8”) as f:
for span in span_contents:
info = span.xpath("string(.))
f.write(info + ‘\n’)
创建一个爬虫
crawl_list = []
for i in range(0,3):
crawl1 = CrawlInfo(url_queue, html_queue)
crawl_list.append(crawl1)
for crowl in crowl_list:
crowl1.join()
parse_list = []
for i in range(0,3):
parse = ParseInfo(html_queue)
parse_list.append(parse)
parse.start()
for parse in parse_list:
parse.join()