如何通过Python+Selenium+PhantomJS/Chrome获取HTTP状态和Response Headers

Selenium没有提供获取HTTP状态码的API,并且似乎以后也不准备提供该功能,还好有变通的方法。这里提供Python+Selenium+PhantomJS的实现供参考:

# Python 2.7
from selenium import webdriver  
import json
from collections import OrderedDict

def getResponseHeaders(browser):
    har = json.loads(browser.get_log('har')[0]['message'])
    return OrderedDict(sorted([(header["name"], header["value"]) for header in har['log']['entries'][0]['response']["headers"]], key = lambda x: x[0]))

def getResponseStatus(browser):
    har = json.loads(browser.get_log('har')[0]['message'])
    return (har['log']['entries'][0]['response']["status"],\
            str(har['log']['entries'][0]['response']["statusText"]))

browser = webdriver.PhantomJS()

# Simple Test
print ">>>>> 404"
browser.get("http://www.questionfish.cn/notfound.html")
print "status: ", getResponseStatus(browser)
headers = getResponseHeaders(browser)
for key in headers:
    print key, "=>", headers[key]
print 

Python+Selenium+ChromeDriver当然也有解决方法:

from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import json

d = DesiredCapabilities.CHROME
d['loggingPrefs'] = { 'performance':'ALL' }

def getHttpStatus(browser):
    for responseReceived in browser.get_log('performance'):
        try:
            response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
            if response[u'url'] == browser.current_url:
                return (response[u'status'], response[u'statusText'])
        except:
            pass
    return None

def getHttpResponseHeader(browser):
    for responseReceived in browser.get_log('performance'):
        try:
            response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
            print
            if response[u'url'] == browser.current_url:
                return response[u'headers']
        except:
            pass
    return None

browser = webdriver.Chrome(desired_capabilities=d)
url = 'http://www.questionfish.cn/notfound.html'
browser.get(url)
print getHttpStatus(browser)
# 因get_log后旧的日志将被清除,两个函数切勿同时使用
# print getHttpResponseHeader(browser)
browser.quit()

REF: How to get status code by using selenium.py (python code) - Stack Overflow

  • 3
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值