requests获取所有状态码
requests默认是不会获取301/302的状态码的。可以设置allow_redirects=False,这样就可以获取所有的状态码了
import requests
# url
# url = 'http://www.freebuf.com/news/157100.html' # 请求200,返回200
url = 'http://www.freebuf.com/fevents/133225.html' # 请求302,返回200。要想不跳转,获取302,用参数:allow_redirects=False
# url = 'http://www.freebuf.com/articles/database/151839.html' # 请求403,返回403
# url = 'http://www.freebuf.com/articles/database/1518391.html' # 请求存在的域名中不存在的页面,请求404,返回404
# url = 'http://www.freebudfsf.com/articles/database/1518391.html' # 请求不存在的域名。程序崩溃
# url = 'https://www.douban.com/group/topic/49606658/' # 请求存在的域名,公司限制访问,返回抛出异常,程序崩溃。效果和网络中断一样。
# url = 'http://10.1.75.241' # 请求ip,(一定要加协议HTTP,否则崩溃)
# headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
try:
# 发请求,得响应
response = requests.get(url, headers=headers, allow_redirects=False)
# 解析
print(' give url:', url)
print(' request.url:', response.request.url)
print('response.url:', response.url)
print(response.content)
print(response.status_code)
except Exception as e:
print(e)
封装一个获取所有状态码的函数,同时实现验证返回值的方法
import requests
def get_statecode_or_errinfo(url=''):
'''
获取响应状态码,或者未响应的错误信息
:param url: 请求的url
:return: 状态码,或者未响应的错误信息
'''
if url == '':
return '请输入一个url作为get_statecode_or_errinfo的参数'
# headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gec