爬取链家项目需要用到代理池 ,代理池具体怎么设置可以去翻我之前的文章
import hashlib
import requests
from lxml import etree
import pymongo
import time,re
def get_proxies():
try:
response = requests.get('http://localhost:5000/get')
proxies = {
'http': 'http://' + response.text
}
return proxies
except Exception:
return None
def get_xpath_by_requests(url,proxies):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Referer': 'https://bj.lianjia.com/?utm_source=baidu&utm_medium=pinzhuan&utm_term=biaoti&utm_content=biaotimiaoshu&utm_campaign=sousuo&ljref=pc_sem_baidu_ppzq_x',
}
try:
response = requests.get(url