17-Python 3.x urllib.request使用(1)

1.urllib.request请求

    ①、自定义配置全局opener:主要对请求的一些配置,如cookie、proxy代理等等

        使用urllib.request.build_opener(*handler)配置opener,返回值opener

        使用urllib.request.install_opener(opener)安装全局(默认)openner        

     ②使用urllib.request.urlopen()和使用opener.open()

        前者使用install的全局opener进行请求

        后者使用单独配置的opener进行请求

2.Handlers配置(urllib.request.BaseHandler)

    ①、HTTPCookieProcessor(cookiejar=None) 用于处理Cookie

    ②、HTTPCookieProcessor(cookiejar=None) 用于处理页面重定向

    ③、ProxyHandler(proxies=None) 用于设置代理

    ④、HTTPSHandler(context=)和HTTPHandler() 前者处理HTTPS证书验证

3.请求配置(zRequestHelp)

#!/usr/bin/python
# -*- coding: UTF-8 -*-

__author__ = 'apple'

import urllib.request
import urllib.parse
from urllib.request import BaseHandler,HTTPHandler,HTTPSHandler,ProxyHandler,HTTPCookieProcessor
import http.cookiejar

import ssl
import socket
import os
import random

# 请求参数构造类
class zRequestConfig:
    # 构造方法
    def __init__(self,url,param=None,cookie=False,headers=None,timeout=socket._GLOBAL_DEFAULT_TIMEOUT,*handlers):
        # 基础参数
        self.url = url
        print('<zRequestConfig>请求URL',url)
        self.param = param
        self.timeout = timeout
        # Cookie管理
        self.cookie_help = zCookieHelp(url) if cookie else zCookieHelp()
        # 请求Handlers
        self.opener = urllib.request.build_opener(*self.all_handlers(handlers))
        print('<zRequestConfig>请求Handlers:', self.opener.handlers)
        # 请求头
        self.opener.addheaders = zRequestConfig.Headers(headers)
        print('<zRequestConfig>请求头:', self.opener.addheaders)
        # install opener
        # urllib.request.install_opener(self.opener)

    # POST请求参数
    @property
    def Data(self):
        data = None
        param = self.param
        if param is not None:
            if isinstance(param, dict) or isinstance(param, tuple):  # 如果是字典、元组
                data = urllib.parse.urlencode(param).encode('utf-8')
            elif isinstance(param, str):  # 如果是字符串
                data = param.encode('utf-8')
            elif isinstance(param, bytes):  # 如果是字节
                data = param
            print('request Param:', data)
        # data参数必须是一个字节对象
        return data

    def all_handlers(self,*handlers):
        http_handler = urllib.request.HTTPHandler()
        proxy_handler = urllib.request.ProxyHandler(self.proxies)
        https_handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context())
        cookie_handler = urllib.request.HTTPCookieProcessor(cookiejar=self.cookie_help.cookie)
        all_handlers = (http_handler,proxy_handler,https_handler,cookie_handler)
        for item in handlers:
            if isinstance(item,BaseHandler):
                all_handlers += (item,)
        return all_handlers

    @staticmethod
    def Headers(headers):
        if headers is None:
            # 请求头
            user_agents = ['Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
                       'Opera/9.25 (Windows NT 5.1; U; en)',
                       'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
                       'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
                       'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
                       'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
                       "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
                       "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ",
                       'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60',
                       'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
                       'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50',
                       'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
                       'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
                       'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
                       'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36',
                       'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
                       'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16',
                       'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
                       'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
                       'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
                       'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
                       'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36',
                       ]
            choose_agent = random.choice(user_agents)
            # referer:引用站点
            return {"User-agent": choose_agent, "Accept": "*/*", 'Referer': 'http://www.baidu.com','Accept-Encoding':'UTF-8'}.items()
        else:
            if isinstance(headers,dict):
                return headers.items()
            else:
                return headers

    @property
    def proxies(self):
        proxies = ['http://www.baidu.com',
                   'http://www.firefoxchina.cn',
                   'http://www.sina.com.cn',
                   'https://www.taobao.com',
                   'http://www.youku.com',
                   'https://jx.tmall.com']
        proxy_url = random.choice(proxies)
        proxy_pre = 'http' if self.url.startswith('https') else 'https'
        return {proxy_pre:proxy_url}


# Cookie管理
class zCookieHelp:
    # 构造方法
    def __init__(self,cookie_file=None,cookie=http.cookiejar.LWPCookieJar()):
        self.cookie_file = cookie_file
        self.cookie = cookie
        # 加载cookie
        self.load_save_cookie()
        # 打印Cookie存储地址
        print('<zCookieHelp>Cookie文件:', self.cookie_path)

    @property
    def cookie_path(self):
        if self.cookie_file is None : return None
        cookie_path = os.path.abspath('..')+'/cookies/'+self.cookie_file+'/cookie.txt'
        # 处理一下路径
        cookie_path.replace('//','/')
        cookie_path.replace('?','/')
        # 文件不存在则创建文件路径
        if not os.path.exists(os.path.dirname(cookie_path)):
            try:
                os.makedirs(os.path.dirname(cookie_path))
            except Exception as e:
                print('Cookies文件路径创建失败',e)
        return cookie_path

    # 存、取Cookie
    def load_save_cookie(self,isRead=True):
        full_path = self.cookie_path
        # 守卫
        if full_path is None or self is None : return
        # 切换工作目录
        root_dir = os.path.abspath('.')
        os.chdir(os.path.dirname(full_path))
        if isRead:
            if os.path.exists(full_path):
                # 加载Cookie
                self.cookie.load(os.path.basename(full_path),ignore_discard=True,ignore_expires=True)
        else:
            # Cookie存储到本地
            self.cookie.save(os.path.basename(full_path),ignore_discard=True,ignore_expires=True)
        # 还原工作路径
        os.chdir(os.path.abspath(root_dir))

4.请求(zRequest)

#!/usr/bin/python
# -*- coding: UTF-8 -*-

__author__ = 'apple'

import urllib.request
import http.client
from PythonWork.request.zRequestHelp import zRequestConfig

class zHttpRequest(object):
    # 构造方法
    def __init__(self,url_config=None,param=None,cookie=False):
        # 请求结果
        self.response = None
        # 请求错误
        self.error = None
        if issubclass(type(url_config),zRequestConfig):
            self.req_config = url_config
        else:
            # URL
            if not isinstance(url_config, str):
                raise ValueError("param url must be str and can't be None!")
            self.req_config = zRequestConfig(url_config,param=param,cookie=cookie)

    # Opener请求
    @classmethod
    def urlopen_opener(cls,url,param=None,cookie=False,coding='utf-8'):
        # 初始化请求
        request = zHttpRequest(url_config=url,param=param,cookie=cookie)
        # 请求配置对象
        req_config = request.req_config
        try:
            # 开始请求
            request.response = req_config.opener.open(req_config.url,req_config.Data,timeout=req_config.timeout)
            print('<zHttpRequest>请求成功>:',request.response)
            # 保存cookie到文件
            if cookie : req_config.cookie_help.load_save_cookie(isRead=False)
        except urllib.request.HTTPError as e:
            request.error = e
            print('<zHttpRequest>请求错误码:',e.code)
            print('<zHttpRequest>错误原因:', e.reason)
        return request

    # 对于http 和 https,这个函数将返回一个http.client.HTTPResponse对象
    # 对于ftp,file和 data url,request对象将被继承的 URLopener 和 FancyURLopener类来处理。返回一个 urllib.response.addinfourl 对象,此对象将作为一个上下文管理器

    # 请求结果处理
    def result(self,coding='utf-8'):
        result = self.response
        if isinstance(result,http.client.HTTPResponse):
            try:
                print('<Result>请求code:%s  结果信息:%s'%(result.status,result.msg))
                print('<Result>请求报文:',result.info())
                return self.response.read().decode(coding)
            except Exception as e:
                print('<Result>请求结果解析错误:',e)
        return ''

5.使用

request = zRequest.zHttpRequest.urlopen_opener('http://qq.ip138.com/train/anhui/HeFei.htm')
results = request.result('gb2312')
print('请求结果:',results)


#<zRequestConfig>请求URL http://qq.ip138.com/train/anhui/HeFei.htm
#<zCookieHelp>Cookie文件: None
#<zRequestConfig>请求Handlers: [<urllib.request.ProxyHandler object at 0x101a44be0>, <urllib.request.UnknownHandler object at 0x10310dd30>, <urllib.request.HTTPDefaultErrorHandler object at 0x103116be0>, <urllib.request.HTTPRedirectHandler object at 0x103116588>, <urllib.request.FTPHandler object at 0x103116748>, <urllib.request.FileHandler object at 0x103116780>, <urllib.request.DataHandler object at 0x103116630>, <urllib.request.HTTPHandler object at 0x101a44ba8>, <urllib.request.HTTPSHandler object at 0x101a44c18>, <urllib.request.HTTPCookieProcessor object at 0x101a44e10>, <urllib.request.HTTPErrorProcessor object at 0x103116710>]
#<zRequestConfig>请求头: dict_items([('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'), ('Accept', '*/*'), ('Referer', 'http://www.baidu.com'), ('Accept-Encoding', 'UTF-8')])
#<zHttpRequest>请求成功>: <http.client.HTTPResponse object at 0x103116c18>
#<Result>请求code:200  结果信息:OK
#<Result>请求报文: Cache-Control: max-age=3600
#Content-Length: 75389
#Content-Type: text/html
#Last-Modified: Thu, 29 Jun 2017 04:09:19 GMT
#Accept-Ranges: bytes
#ETag: "a43e227b8df0d21:73c4"
#Server: Microsoft-IIS/6.0
#X-Powered-By: ASP.NET
#Date: Thu, 29 Jun 2017 06:56:16 GMT
#Connection: close

 

转载于:https://my.oschina.net/CoderW/blog/1068748

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值