#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020/12/18 19:30
# @Author : huni
# @File : 图图岛多线程.py
# @Software: PyCharm
import requests
from lxml import etree
import os
from threading import Thread
from queue import Queue
from urllib import parse
class CrawlInfo(Thread):
def __init__(self,url_queue,html_queue):
Thread.__init__(self)
self.url_queue = url_queue
self.html_queue = html_queue
def run(self):
while self.url_queue.empty() == False:
url = self.url_queue.get()
resp1 = requests.get(url=url, headers=headers)
# 处理中文乱码问题
resp1_text = resp1.text.encode('ISO-8859-1').decode('utf-8')
if resp1.status_co
python 爬虫 图图岛多线程并发爬取搜索内容的全部数据(解决href关联问题)
最新推荐文章于 2022-08-18 12:48:53 发布