python request封装,python爬虫
请求类
import urllib.request as req
import urllib.parse as parse
import urllib.error as uerr
import http.cookiejar as ckjar
import chardet
import requests as reqs
from urllib.parse import urlparse
class HttpResquestBase(object):
"""http请求类"""
data=None
timeout=10
cookies=None
headers=None
def __init__(self,url):
self.url=url
self.urlparse=urlparse(self.url)
def Get(self):
"""get方法"""
r=reqs.get(self.url,data=self.data,headers=self.headers)
return r.text
def urlopen(self):
"""返回html 默认get请求 添加data参数为post请求"""
_data=None
if(self.data!=None):
_data=bytes(parse.urlencode(self.data),encoding="utf-8")
resp=req.urlopen(self.url,data=_data,timeout=self.timeout)
content=resp.read()
result=chardet.detect(content)
encoding=result['encoding']
return content.decode(encoding)
def open(self):
"""与openurl一样,可以获取cookie 执行完后可以在 cookies 中获取当前请求的cookies"""
self.cookies=ckjar.CookieJar()
handler=req.HTTPCookieProcessor(self.cookies)
opener=req.build_opener(handler)
_data=None
if(self.data!=None):
_data=bytes(parse.urlencode(self.data),encoding="utf-8")
resp=opener.open(self.url,data=_data,timeout=self.timeout)
content=resp.read()
result=chardet.detect(content)
encoding=result['encoding']
return content.decode(encoding)
调用
url = "http://www.baidu.com";
hrb=HttpResquestBase(url)
hrb.headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
'Cookie':'Cookie'
}
v=hrb.Get()
print(v)