访问页面
from urllib.request import Request,urlopen
url='http://www.xx.com'
req=Request(url)
resp=urlopen(req)
返回数据
html=resp.read().decode()
添加报头信息
from fake_useragent import UserAgent
header={
'User-Agent': UserAgent().chrome
}
url='http://www.xx.com'
req=Request(url,headers=header)
https访问
#忽略证书
context=ssl._create_unverified_context()
response=urlopen(requset,context=context)
POST请求
from urllib.parse import urlencode
my_data={
'usr':'123',
'pwe':'123456'
}
f_data=urlencode(my_data).encode()
requset=Request(url,data=f_data,headers=headers)
Proxy代理
#http://httpbin.org/get
from urllib.request import build_opener,ProxyHandler
proxy=ProxyHandler({'http':'usr:pwd@ip:port'})
opener=build_opener(proxy)
Cookie
#Methon1
headers={
'User-Agent':UserAgent().chrome,
'Cookie':'xxxx'
}
request=Request(url,headers=headers)
#Methon2
from urllib.request import build_opener,HTTPCookieProcessor
from http.cookiejar import MozillaCookieJar
cookie_jar=MozillaCookieJar()
handle=HTTPCookieProcessor(cookie_jar)
opener=build_opener(handle)
response=opener.open(request)
#保存cookies,ignore_expires->是否过期,ignore_discard->是否丢弃
cookie_jar.save('cookie.txt',ignore_expires=True,ignore_discard=True)
#使用cookie
cookie_jar.load('cookie.txt',ignore_expires=True,ignore_discard=True)
URLError
from urllib.error import URLError
try:
opener=build_opener()
response=opener.open(request)
info=response.read().decode()
except URLError as e:
if e.args==():
print(e.code)
else:
print(e.args[0].errno)