爬取链家项目需要用到代理池 ,代理池具体怎么设置可以去翻我之前的文章
import hashlib
import requests
from lxml import etree
import pymongo
import time,re
import threading
from queue import Queue
class Lianjia(threading.Thread):
def __init__(self,url=None,q_area = None):
super().__init__()
self.url = url
self.q_area = q_area
self.proxies = self.get_proxies()
self.client = pymongo.MongoClient(host='localhost', port=27017)
self.db = self.client['lianjia']
def get_proxies(self):
try:
response = requests.get('http://localhost:5000/get')
proxies = {
'http': 'http://' + response.text
}
return proxies
except Exception: