1、urllib 库
面对验证网页--HTTPBasicAuthHandler
有时候需要用户名 验证码 才能成功登录
借用 HTTPBasicAuthHandler 能够完成
from urllib.request import HTTPPassMgrWithDefaultRealm,HTTPBasicAuthHandler,build_opener
from urllib.error import URLError
username='username'
password='password'
url='http://localhost:8888/' #网址
p=HTTPPassMgrWithDefaultRealm()#实例化一个对象
p.add_password(None,url,username,password)#给对象添加参数
auth_handler=HTTPBasicAuthHandler(p)#建立一个处理验证码的Handler
opener=build_opener(auth_handler)#建立Opener发送请求
try:
result=opener.open(url)
html=result.read().decode('utf-8')
print(html)
except URLError as e:
print(e.reason)
代理
from urllib.error import URLError
from urlib.request import ProxyHandler,builde_opener
proxy_handler=ProxyHanderler({
'http':'http://127.0.0.1:9743',
'https':'https://127.0.0.1:9743'
})#搭建一个本地代理,运行在9743端口上
opener=build_opener.open(proxy_handler)
try:
response=opener.open('https://www.baidu.com')
print(response.read().decode('utf-8'))
except URLError as e:
print(e.reason)
cookies
import http.cookiejar,urllib.request
cookie=http.cookiejar.CookieJar()#声明一个CookieJar对象
handler=urllib.request.HTTPCookieProcessor(cookie)#搭建一个Handler
opener=urllib.request.build_opener(handler)
response=opener.open('http://www.baidu.com')
for item in cookie:
print(item.name+'='+item.value)
输出结果,输出每条Cookie的名称和值:
BAIDUID=6EEA0EF8461DCF999E68A114DB1AA4C8:FG=1
BIDUPSID=6EEA0EF8461DCF99E60A706E1EBF45A4
H_PS_PSSID=36559_36459_37352_37300_36569_36786_37259_26350
PSTM=1662865756
BDSVRTM=22
BD_HOME=1
以文本形式输出cookies 并保存
import http.cookiejar,urllib.request
filename='cookies.txt'
cookie=http.cookiejar.MozillaCookieJar(filename)#声明一个CookieJar对象
handler=urllib.request.HTTPCookieProcessor(cookie)#搭建一个Handler
opener=urllib.request.build_opener(handler)
response=opener.open('http://www.baidu.com')
cookie.save(ignore_discard=True,ignore_expires=True)
运行结果
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
.baidu.com TRUE / FALSE 1694402216 BAIDUID 8EACF46325380EC4B7966E9819D326D7:FG=1
.baidu.com TRUE / FALSE 3810349863 BIDUPSID 8EACF46325380EC476BBCD9FEE52E012
.baidu.com TRUE / FALSE H_PS_PSSID 36550_36464_37358_37300_36885_34813_36570_37174_37260_26350_22158
.baidu.com TRUE / FALSE 3810349863 PSTM 1662866215
www.baidu.com FALSE / FALSE BDSVRTM 0
www.baidu.com FALSE / FALSE BD_HOME 1
LWPCookieJar 同样可以读取和保存 COOKIES
import http.cookiejar,urllib.request
filename='cookies2.LWP'
#cookie=http.cookiejar.MozillaCookieJar(filename)#以文本形式保存
cookie=http.cookiejar.LWPCookieJar(filename)#以libwww-perl(LMP)格式的文件
handler=urllib.request.HTTPCookieProcessor(cookie)#搭建一个Handler
opener=urllib.request.build_opener(handler)
response=opener.open('http://www.baidu.com')
cookie.save(ignore_discard=True,ignore_expires=True)
运行结果:
#LWP-Cookies-2.0
Set-Cookie3: BAIDUID="1E34DD6831E83D296871DB5075C448B9:FG=1"; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2023-09-11 03:29:01Z"; comment=bd; version=0
Set-Cookie3: BIDUPSID=1E34DD6831E83D29F53CBA16B6C159CB; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2090-09-29 06:43:08Z"; version=0
Set-Cookie3: H_PS_PSSID=36554_36462_37361_36884_34812_37274_36570_37071_37243_37259_26350; path="/"; domain=".baidu.com"; path_spec; domain_dot; discard; version=0
Set-Cookie3: PSTM=1662866940; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2090-09-29 06:43:08Z"; version=0
Set-Cookie3: BDSVRTM=0; path="/"; domain="www.baidu.com"; path_spec; discard; version=0
Set-Cookie3: BD_HOME=1; path="/"; domain="www.baidu.com"; path_spec; discard; version=0
import http.cookiejar,urllib.request
cookie=http.cookiejar.LWPCookieJar()
cookie.load('cookies2.LWP',ignore_discard=True,ignore_expires=True)
#对于生成的cookies文件,可以用load方法读取本地cookies文件
handler=urllib.request.HTTPCookieProcessor(cookie)
opener=urllib.request.build_opener(handler)
response=opener.open('http://www.baidu.com')
print(response.read().decode('utf-8'))#结果打印出百度网页源码