from urllib.parse import urlencode
from urllib import request
from pyquery import PyQuery as pq
from fake_useragent import UserAgent
def requestPlus(**kwargs):
url = kwargs.get('url')
if not url:
raise Exception('请求地址为空')
if not isinstance(url, str):
raise Exception('请求地址非字符串')
headers = {
'User-Agent': UserAgent().random
}
kwargsHeaders = kwargs.get('headers')
if kwargsHeaders and isinstance(kwargsHeaders, dict):
headers.update(kwargsHeaders)
kwargsMethod = kwargs.get('method')
method = 'get'
if kwargsMethod and isinstance(kwargsMethod, str) and kwargsMethod.lower() in ['get', 'post']:
method = kwargsMethod
kwargsQuery = kwargs.get('query')
if kwargsQuery and isinstance(kwargsQuery, dict):
url += '?' + urlencode(kwargsQuery)
kwargsData = kwargs.get('data')
data = {}
if kwargsData and isinstance(kwargsData, dict):
method = 'post'
data = kwargsData
data = bytes(urlencode(data), encoding='utf-8')
kwargsProxy = kwargs.get('proxy')
isProxy = False
if kwargsProxy and isinstance(kwargsProxy, dict):
isProxy = True
opener = request.build_opener(request.ProxyHandler(kwargsProxy))
addheaders = []
for k in headers:
pass
opener.addheaders = addheaders
request.install_opener(opener)
method = method.upper()
try:
req = request.Request(url, data, headers, method=method)
res = request.urlopen(req)
except (Exception, BaseException) as e:
print(url, headers, method, e)
else:
res.read = res.read()
try:
res.text = res.read.decode('utf-8')
except (Exception, BaseException) as e:
res.text = res.read.decode('gbk')
return res
respItems = []
respItems.append(requestPlus(url='https://www.baidu.com'))
respItems.append(requestPlus(url='https://wap.faloo.com'))
""""""
respItems.append(requestPlus(url='https://www.baidu.com/s', query={
'wd': '历史上的今天'
}))
""""""
respItems.append(requestPlus(url='https://www.httpbin.org/post', data={
'int': '1',
'float': '1.1',
}))
respItems.append(requestPlus(url='https://2021.ip138.com', proxy={
'https': '127.0.0.1:10809'
}))
respItems.append(requestPlus(url='https://www.google.com.hk/search', query={
'q': 'facebook'
}, proxy={
'https': '127.0.0.1:10809'
}))
for resp in respItems:
if resp.code == 200:
print(resp.url)
_ = pq(resp.text)
print(_('title').text(), _('p').text())