1.urllib.request请求
①、自定义配置全局opener:主要对请求的一些配置,如cookie、proxy代理等等
使用urllib.request.build_opener(*handler)配置opener,返回值opener
使用urllib.request.install_opener(opener)安装全局(默认)openner
②使用urllib.request.urlopen()和使用opener.open()
前者使用install的全局opener进行请求
后者使用单独配置的opener进行请求
2.Handlers配置(urllib.request.BaseHandler)
①、HTTPCookieProcessor(cookiejar=None) 用于处理Cookie
②、HTTPCookieProcessor(cookiejar=None) 用于处理页面重定向
③、ProxyHandler(proxies=None) 用于设置代理
④、HTTPSHandler(context=)和HTTPHandler() 前者处理HTTPS证书验证
3.请求配置(zRequestHelp)
#!/usr/bin/python
# -*- coding: UTF-8 -*-
__author__ = 'apple'
import urllib.request
import urllib.parse
from urllib.request import BaseHandler,HTTPHandler,HTTPSHandler,ProxyHandler,HTTPCookieProcessor
import http.cookiejar
import ssl
import socket
import os
import random
# 请求参数构造类
class zRequestConfig:
# 构造方法
def __init__(self,url,param=None,cookie=False,headers=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT,*handlers):
# 基础参数
self.url = url
print('<zRequestConfig>请求URL',url)
self.param = param
self.timeout = timeout
# Cookie管理
self.cookie_help = zCookieHelp(url) if cookie else zCookieHelp()
# 请求Handlers
self.opener = urllib.request.build_opener(*self.all_handlers(handlers))
print('<zRequestConfig>请求Handlers:', self.opener.handlers)
# 请求头
self.opener.addheaders = zRequestConfig.Headers(headers)
print('<zRequestConfig>请求头:', self.opener.addheaders)
# install opener
# urllib.request.install_opener(self.opener)
# POST请求参数
@property
def Data(self):
data = None
param = self.param
if param is not None:
if isinstance(param, dict) or isinstance(param, tuple): # 如果是字典、元组
data = urllib.parse.urlencode(param).encode('utf-8')
elif isinstance(param, str): # 如果是字符串
data = param.encode('utf-8')
elif isinstance(param, bytes): # 如果是字节
data = param
print('request Param:', data)
# data参数必须是一个字节对象
return data
def all_handlers(self,*handlers):
http_handler = urllib.request.HTTPHandler()
proxy_handler = urllib.request.ProxyHandler(self.proxies)
https_handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context())
cookie_handler = urllib.request.HTTPCookieProcessor(cookiejar=self.cookie_help.cookie)
all_handlers = (http_handler,proxy_handler,https_handler,cookie_handler)
for item in handlers:
if isinstance(item,BaseHandler):
all_handlers += (item,)
return all_handlers
@staticmethod
def Headers(headers):
if headers is None:
# 请求头
user_agents = ['Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60',
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36',
]
choose_agent = random.choice(user_agents)
# referer:引用站点
return {"User-agent": choose_agent, "Accept": "*/*", 'Referer': 'http://www.baidu.com','Accept-Encoding':'UTF-8'}.items()
else:
if isinstance(headers,dict):
return headers.items()
else:
return headers
@property
def proxies(self):
proxies = ['http://www.baidu.com',
'http://www.firefoxchina.cn',
'http://www.sina.com.cn',
'https://www.taobao.com',
'http://www.youku.com',
'https://jx.tmall.com']
proxy_url = random.choice(proxies)
proxy_pre = 'http' if self.url.startswith('https') else 'https'
return {proxy_pre:proxy_url}
# Cookie管理
class zCookieHelp:
# 构造方法
def __init__(self,cookie_file=None,cookie=http.cookiejar.LWPCookieJar()):
self.cookie_file = cookie_file
self.cookie = cookie
# 加载cookie
self.load_save_cookie()
# 打印Cookie存储地址
print('<zCookieHelp>Cookie文件:', self.cookie_path)
@property
def cookie_path(self):
if self.cookie_file is None : return None
cookie_path = os.path.abspath('..')+'/cookies/'+self.cookie_file+'/cookie.txt'
# 处理一下路径
cookie_path.replace('//','/')
cookie_path.replace('?','/')
# 文件不存在则创建文件路径
if not os.path.exists(os.path.dirname(cookie_path)):
try:
os.makedirs(os.path.dirname(cookie_path))
except Exception as e:
print('Cookies文件路径创建失败',e)
return cookie_path
# 存、取Cookie
def load_save_cookie(self,isRead=True):
full_path = self.cookie_path
# 守卫
if full_path is None or self is None : return
# 切换工作目录
root_dir = os.path.abspath('.')
os.chdir(os.path.dirname(full_path))
if isRead:
if os.path.exists(full_path):
# 加载Cookie
self.cookie.load(os.path.basename(full_path),ignore_discard=True,ignore_expires=True)
else:
# Cookie存储到本地
self.cookie.save(os.path.basename(full_path),ignore_discard=True,ignore_expires=True)
# 还原工作路径
os.chdir(os.path.abspath(root_dir))
4.请求(zRequest)
#!/usr/bin/python
# -*- coding: UTF-8 -*-
__author__ = 'apple'
import urllib.request
import http.client
from PythonWork.request.zRequestHelp import zRequestConfig
class zHttpRequest(object):
# 构造方法
def __init__(self,url_config=None,param=None,cookie=False):
# 请求结果
self.response = None
# 请求错误
self.error = None
if issubclass(type(url_config),zRequestConfig):
self.req_config = url_config
else:
# URL
if not isinstance(url_config, str):
raise ValueError("param url must be str and can't be None!")
self.req_config = zRequestConfig(url_config,param=param,cookie=cookie)
# Opener请求
@classmethod
def urlopen_opener(cls,url,param=None,cookie=False,coding='utf-8'):
# 初始化请求
request = zHttpRequest(url_config=url,param=param,cookie=cookie)
# 请求配置对象
req_config = request.req_config
try:
# 开始请求
request.response = req_config.opener.open(req_config.url,req_config.Data,timeout=req_config.timeout)
print('<zHttpRequest>请求成功>:',request.response)
# 保存cookie到文件
if cookie : req_config.cookie_help.load_save_cookie(isRead=False)
except urllib.request.HTTPError as e:
request.error = e
print('<zHttpRequest>请求错误码:',e.code)
print('<zHttpRequest>错误原因:', e.reason)
return request
# 对于http 和 https,这个函数将返回一个http.client.HTTPResponse对象
# 对于ftp,file和 data url,request对象将被继承的 URLopener 和 FancyURLopener类来处理。返回一个 urllib.response.addinfourl 对象,此对象将作为一个上下文管理器
# 请求结果处理
def result(self,coding='utf-8'):
result = self.response
if isinstance(result,http.client.HTTPResponse):
try:
print('<Result>请求code:%s 结果信息:%s'%(result.status,result.msg))
print('<Result>请求报文:',result.info())
return self.response.read().decode(coding)
except Exception as e:
print('<Result>请求结果解析错误:',e)
return ''
5.使用
request = zRequest.zHttpRequest.urlopen_opener('http://qq.ip138.com/train/anhui/HeFei.htm')
results = request.result('gb2312')
print('请求结果:',results)
#<zRequestConfig>请求URL http://qq.ip138.com/train/anhui/HeFei.htm
#<zCookieHelp>Cookie文件: None
#<zRequestConfig>请求Handlers: [<urllib.request.ProxyHandler object at 0x101a44be0>, <urllib.request.UnknownHandler object at 0x10310dd30>, <urllib.request.HTTPDefaultErrorHandler object at 0x103116be0>, <urllib.request.HTTPRedirectHandler object at 0x103116588>, <urllib.request.FTPHandler object at 0x103116748>, <urllib.request.FileHandler object at 0x103116780>, <urllib.request.DataHandler object at 0x103116630>, <urllib.request.HTTPHandler object at 0x101a44ba8>, <urllib.request.HTTPSHandler object at 0x101a44c18>, <urllib.request.HTTPCookieProcessor object at 0x101a44e10>, <urllib.request.HTTPErrorProcessor object at 0x103116710>]
#<zRequestConfig>请求头: dict_items([('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'), ('Accept', '*/*'), ('Referer', 'http://www.baidu.com'), ('Accept-Encoding', 'UTF-8')])
#<zHttpRequest>请求成功>: <http.client.HTTPResponse object at 0x103116c18>
#<Result>请求code:200 结果信息:OK
#<Result>请求报文: Cache-Control: max-age=3600
#Content-Length: 75389
#Content-Type: text/html
#Last-Modified: Thu, 29 Jun 2017 04:09:19 GMT
#Accept-Ranges: bytes
#ETag: "a43e227b8df0d21:73c4"
#Server: Microsoft-IIS/6.0
#X-Powered-By: ASP.NET
#Date: Thu, 29 Jun 2017 06:56:16 GMT
#Connection: close