python request处理cookie方法2

#!/usr/bin/env python
#-*-coding:utf-8-*-


'''
先发送post请求,获取cookie,带上cookie请求登陆之后的页面
'''

import requests
from lxml import etree
import random
import time

def login():
    #login_url可以通过抓包工具获取,也可以通过表单的action=""获取
    login_url = "http://authserver.jit.edu.cn/authserver/login?service=http%3A%2F%2Fehall.jit.edu.cn%2Flogin%3Fservice%3Dhttp%3A%2F%2Fehall.jit.edu.cn%2Fnew%2Findex.html"
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
    }
    body ={

        "usercode": "###",
        "password": "###"
    }
    try:
        res = requests.post(url=login_url,headers=headers,data=body)
        mid_cookies = res.cookies
        #把返回的cookie转换为字典
        cookie = requests.utils.dict_from_cookiejar(mid_cookies)
        print(cookie)
        return cookie
    except Exception as err:
        print('获取cookie失败:\n{0}'.format(err))


#将cookie值放在headers中
def get_page(url):
    #获取登录之后的cookie
    cookie = login()  
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
    }
    response = requests.get(url=url,headers=headers,cookies=cookie)
    response.encoding='utf-8'
    html = response.text
    return html

def parse_html(html):
    html_elem = etree.HTML(html)
    day = html_elem.xpath('//span[@class="time"]/h1/text()')
    month = html_elem.xpath('//span[@class="time"]/h2/text()')
    title = html_elem.xpath('//span[@class="time"]/following::a[1]/@title')
    data = zip(day,month,title)
    return data

def openfile():  
    fd = open('banche02.txt','w',encoding='utf-8')
    return fd  

def savefile(fd,data):
    for item in data:
        fd.write('day:'+str(item[0])+'\n')
        fd.write('month:'+str(item[1])+'\n')
        fd.write('title:'+str(item[2])+'\n')
        

#爬取数据
def getInfo():
    url = 'https://www.jit.edu.cn/xyzhfw/bcsk.htm'
    fd = openfile()
    html = get_page(url)
    data = parse_html(html)
    savefile(fd,data)
    time.sleep(random.random())
      

if __name__ == "__main__":
    getInfo()
    pass    
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值