#!/usr/bin/env python
#-*-coding:utf-8-*-
'''
先发送post请求,获取cookie,带上cookie请求登陆之后的页面
'''
import requests
from lxml import etree
import random
import time
def login():
#login_url可以通过抓包工具获取,也可以通过表单的action=""获取
login_url = "http://authserver.jit.edu.cn/authserver/login?service=http%3A%2F%2Fehall.jit.edu.cn%2Flogin%3Fservice%3Dhttp%3A%2F%2Fehall.jit.edu.cn%2Fnew%2Findex.html"
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
}
body ={
"usercode": "###",
"password": "###"
}
try:
res = requests.post(url=login_url,headers=headers,data=body)
mid_cookies = res.cookies
#把返回的cookie转换为字典
cookie = requests.utils.dict_from_cookiejar(mid_cookies)
print(cookie)
return cookie
except Exception as err:
print('获取cookie失败:\n{0}'.format(err))
#将cookie值放在headers中
def get_page(url):
#获取登录之后的cookie
cookie = login()
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
}
response = requests.get(url=url,headers=headers,cookies=cookie)
response.encoding='utf-8'
html = response.text
return html
def parse_html(html):
html_elem = etree.HTML(html)
day = html_elem.xpath('//span[@class="time"]/h1/text()')
month = html_elem.xpath('//span[@class="time"]/h2/text()')
title = html_elem.xpath('//span[@class="time"]/following::a[1]/@title')
data = zip(day,month,title)
return data
def openfile():
fd = open('banche02.txt','w',encoding='utf-8')
return fd
def savefile(fd,data):
for item in data:
fd.write('day:'+str(item[0])+'\n')
fd.write('month:'+str(item[1])+'\n')
fd.write('title:'+str(item[2])+'\n')
#爬取数据
def getInfo():
url = 'https://www.jit.edu.cn/xyzhfw/bcsk.htm'
fd = openfile()
html = get_page(url)
data = parse_html(html)
savefile(fd,data)
time.sleep(random.random())
if __name__ == "__main__":
getInfo()
pass
python request处理cookie方法2
最新推荐文章于 2024-05-14 13:53:37 发布