# requests的 post 与 session.post
# 案例: 访问 https://www.xbiquge.la/modules/article/bookcase.php (新笔趣阁-我的书架)
# 此网址在未登录时候不能访问
import requests
from fake_useragent import UserAgent
from pyquery import PyQuery as pq
def resPrint(res):
print('-' * 80)
print(res.url)
print(res.status_code)
for i, (k, v) in enumerate(res.headers.items()):
if k not in ['Content-Type', 'Set-Cookie', 'Location']:
continue
print(str(i).zfill(3), k, v)
text = res.text
textLen = len(text)
print(textLen)
if textLen:
_ = pq(text)
print(_('title').text())
gridtop = _('div.gridtop').text()
if gridtop:
print(gridtop) # 如果能够显示, 表示已经登录 并且能够访问我的书架页面
headers = {
'User-Agent': UserAgent().random
}
host = 'https://www.xbiquge.la'
bookcaseUrl = host + '/modules/article/bookcase.php'
resPrint(requests.get(bookcaseUrl, headers=headers, allow_redirects=False))
"""
allow_redirects = False 情况
https://www.xbiquge.la/modules/article/bookcase.php
302 !! Move temporarily 临时移动, 与301类似 但资源只是临时被移动。客户端应继续使用原有URI
002 Content-Type text/html; charset=UTF-8 !! 编码
006 Location http://www.xbiquge.la/login.php !! 这里说明了跳转
0 !! 说明没有任何文本内容响应
"""
resPrint(requests.get(bookcaseUrl, headers=headers))
"""
allow_redirects = True 情况
https://www.xbiquge.la/login.php !! 说明了直接访问 (新笔趣阁-我的书架) 会跳转, 这一页是无法直接请求
200
002 Content-Type text/html; charset=UTF-8
4509
新笔趣阁_书友最值得收藏的网络小说阅读网
"""
# 那么这一页改怎么请求呢
# 按照在浏览器步骤是登录再访问 那登录POST
loginUrl = host + '/login.php'
data = {
'LoginForm[username]': 'CY3761',
'LoginForm[password]': '123456789a'
}
res = requests.post(loginUrl, data, headers=headers, allow_redirects=False)
resPrint(res)
"""
allow_redirects = False 情况
https://www.xbiquge.la/login.php
302
002 Content-Type text/html; charset=UTF-8
带有头信息 cookie的
006 Set-Cookie PHPSESSID=6s5o068e20stbsm7jukmtr8kf4; path=/; HttpOnly, username=User; expires=Tue, 22-Nov-2022 11:04:05 GMT; Max-Age=31104000; path=/, _identity-frontend=7843f318d3979e1f547b6ef6d104bcbb68b97c05fe7a9293a999f8a739321a11a%3A2%3A%7Bi%3A0%3Bs%3A18%3A%22_identity-frontend%22%3Bi%3A1%3Bs%3A19%3A%22%5B328622%2C%22%22%2C2592000%5D%22%3B%7D; expires=Mon, 27-Dec-2021 11:04:05 GMT; Max-Age=2592000; path=/; httponly
010 Location http://www.xbiquge.la/ !! 应该登录成功进行跳转
0
"""
# 保存cookie
cookie = res.cookies
# 那么加上cookie 现在能否进行访问呢
resPrint(requests.get(bookcaseUrl, headers=headers, allow_redirects=False, cookies=cookie))
"""
200
002 Content-Type text/html; charset=UTF-8
009 Set-Cookie _identity-frontend=7843f318d3979e1f547b6ef6d104bcbb68b97c05fe7a9293a999f8a739321a11a%3A2%3A%7Bi%3A0%3Bs%3A18%3A%22_identity-frontend%22%3Bi%3A1%3Bs%3A19%3A%22%5B328622%2C%22%22%2C2592000%5D%22%3B%7D; expires=Mon, 27-Dec-2021 11:32:36 GMT; Max-Age=2592000; path=/; httponly
3232
新笔趣阁_书友最值得收藏的网络小说阅读网
您的书架可收藏 200 本,已收藏 0 本。(等级越高藏书量越大)
成功啦!!
"""
# 不过这样处理是否有点麻烦呢 试试使用 session.post
# 登录
session = requests.session()
resPrint(session.post(loginUrl, data, headers=headers))
# 我的书架
resPrint(session.get(bookcaseUrl, headers=headers))
"""
https://www.xbiquge.la/
200
002 Content-Type text/html; charset=UTF-8
27340
【笔趣阁】_笔趣阁小说网_笔趣阁小说阅读网_新笔趣阁
--------------------------------------------------------------------------------
https://www.xbiquge.la/modules/article/bookcase.php
200
002 Content-Type text/html; charset=UTF-8
009 Set-Cookie _identity-frontend=7843f318d3979e1f547b6ef6d104bcbb68b97c05fe7a9293a999f8a739321a11a%3A2%3A%7Bi%3A0%3Bs%3A18%3A%22_identity-frontend%22%3Bi%3A1%3Bs%3A19%3A%22%5B328622%2C%22%22%2C2592000%5D%22%3B%7D; expires=Mon, 27-Dec-2021 11:37:42 GMT; Max-Age=2592000; path=/; httponly
3232
新笔趣阁_书友最值得收藏的网络小说阅读网
您的书架可收藏 200 本,已收藏 0 本。(等级越高藏书量越大)
同样可以, 而且更方便
"""
# 补充一下代理
# 需要设置 allow_redirects=False
proxieUrl = 'https://2021.ip138.com/'
proxies = {
'https': 'http://127.0.0.1:10809',
}
res = requests.get(proxieUrl, headers=headers, proxies=proxies, allow_redirects=False)
resPrint(res)
print(pq(res.text)('p[align="center"]').eq(0).text())