python request处理cookie方法3

最新推荐文章于 2023-11-01 17:55:03 发布

芝识分享

最新推荐文章于 2023-11-01 17:55:03 发布

阅读量125

点赞数

分类专栏： python 文章标签： python xpath cookie

本文链接：https://blog.csdn.net/weixin_42037528/article/details/117855979

版权

python 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

#!/usr/bin/env python
#-*-coding:utf-8-*-


'''
requests 处理cookie的几种方法
'''

import requests
from lxml import etree
import random
import time


#将cookie值放在headers中
def get_page(url):

    #headers
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'        
    }
    #登录url
    login_url = "http://authserver.jit.edu.cn/authserver/login?service=http%3A%2F%2Fehall.jit.edu.cn%2Flogin%3Fservice%3Dhttp%3A%2F%2Fehall.jit.edu.cn%2Fnew%2Findex.html"
    #session
    session = requests.session()
    post_data = {
        "usercode": "###",
        "password": "###"
    }
    #发送post请求获取session
    session.post(login_url, headers = headers, data = post_data)  
    #发送获取数据请求 
    response = session.get(url=url,headers=headers)
    #设置编码格式
    response.encoding='utf-8'
    html = response.text
    return html

def parse_html(html):
    html_elem = etree.HTML(html)
    day = html_elem.xpath('//span[@class="time"]/h1/text()')
    month = html_elem.xpath('//span[@class="time"]/h2/text()')
    title = html_elem.xpath('//span[@class="time"]/following::a[1]/@title')
    data = zip(day,month,title)
    return data

def openfile():  
    fd = open('banche03.txt','w',encoding='utf-8')
    return fd  

def savefile(fd,data):
    for item in data:
        fd.write('day:'+str(item[0])+'\n')
        fd.write('month:'+str(item[1])+'\n')
        fd.write('title:'+str(item[2])+'\n')
        

#爬取数据
def getInfo():
    url = 'https://www.jit.edu.cn/xyzhfw/bcsk.htm'
    fd = openfile()
    html = get_page(url)
    data = parse_html(html)
    savefile(fd,data)
    time.sleep(random.random())      

if __name__ == "__main__":
    getInfo()
    pass

芝识分享

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python request处理cookie方法3

#!/bin/usr/env python#-*-coding:utf-8-*-'''requests 处理cookie的几种方法'''import requestsfrom lxml import etreeimport randomimport time#将cookie值放在headers中def get_page(url): #headers headers = { 'User-Agent':'Mozilla/5.0 (Windows N
复制链接

扫一扫