某大学教务处(强智科技)成绩查询,课表爬取

一.登录

1.先要处理登录界面的cookie,多清空cookie刷新几次抓包就会发现这个sid为服务器返回的id

仅需带上13位的时间戳对该接口进行请求就能获得cookie

2.接下来要处理登录验证的第一个参数uid

可以发现这个接口返回的uid刚好就是验证所需的uid,仅需带上时间戳和前面获取的cookie就能请求到

这里的content就是验证码图片经过base64编码后得形式,这里我们需要将这个content保存

3.用户password加密也需要处理一下,在输入框中随便输入几个数,点击登录,即可找到ticket入口

service为进入系统后需要提供服务的链接,id就是前面提到的uid,code是验证码,password被加密成了长度为256的字符串,显然是由js代码加密而成

点击发起程序,找到加密入口S函数

这里的E就是我们需要的加密后的password

进入该函数内部

咱水平不够咱也看不懂写的啥,直接把代码照搬到本地的js,一点点运行一点点补环境,最后补完的js代码

var K=16
var X=65535
var R = new Array(131).fill(0); // 创建并填充0
var j=65536
var B=16
var H=[0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535]
var V=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
var encryptionConfig = {
    "e": {
        "digits": [
            1,
            1,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        "isNeg": false
    },
    "d": {
        "digits": [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        "isNeg": false
    },
    "m": {
        "digits": [
            59313,
            4375,
            54507,
            5267,
            8345,
            43610,
            49971,
            28563,
            34983,
            36521,
            17297,
            62027,
            42744,
            32131,
            40043,
            48417,
            5636,
            46659,
            52373,
            20768,
            28635,
            46498,
            55076,
            13948,
            44453,
            44804,
            40613,
            1466,
            26896,
            54350,
            28506,
            28712,
            44726,
            4974,
            46852,
            32655,
            60720,
            2973,
            7722,
            43040,
            10398,
            28111,
            52739,
            6542,
            43865,
            20892,
            59308,
            8898,
            58877,
            36302,
            41921,
            27719,
            59291,
            10923,
            8559,
            53747,
            10707,
            59976,
            48415,
            32958,
            37390,
            57449,
            45414,
            46574,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        "isNeg": false
    },
    "chunkSize": 126,
    "radix": 16,
    "barrett": {
        "modulus": {
            "digits": [
                59313,
                4375,
                54507,
                5267,
                8345,
                43610,
                49971,
                28563,
                34983,
                36521,
                17297,
                62027,
                42744,
                32131,
                40043,
                48417,
                5636,
                46659,
                52373,
                20768,
                28635,
                46498,
                55076,
                13948,
                44453,
                44804,
                40613,
                1466,
                26896,
                54350,
                28506,
                28712,
                44726,
                4974,
                46852,
                32655,
                60720,
                2973,
                7722,
                43040,
                10398,
                28111,
                52739,
                6542,
                43865,
                20892,
                59308,
                8898,
                58877,
                36302,
                41921,
                27719,
                59291,
                10923,
                8559,
                53747,
                10707,
                59976,
                48415,
                32958,
                37390,
                57449,
                45414,
                46574,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0
            ],
            "isNeg": false
        },
        "k": 64,
        "mu": {
            "digits": [
                7469,
                13822,
                15506,
                32982,
                50429,
                62979,
                64339,
                60597,
                40979,
                61913,
                20952,
                38396,
                43669,
                60926,
                10345,
                21166,
                11931,
                31731,
                17652,
                54018,
                14346,
                5098,
                29577,
                27601,
                14064,
                33529,
                25220,
                39088,
                8044,
                19738,
                21550,
                5198,
                30005,
                21337,
                934,
                14453,
                28049,
                17274,
                16321,
                32160,
                3193,
                55263,
                27029,
                41238,
                14094,
                25353,
                47316,
                6175,
                31709,
                27325,
                36707,
                32884,
                7478,
                49873,
                62514,
                44522,
                9303,
                45997,
                33566,
                1250,
                26644,
                59158,
                49823,
                26680,
                1,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0
            ],
            "isNeg": false
        },
        "bkplus1": {
            "digits": [
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                1,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0,
                0
            ],
            "isNeg": false
        }
    }
};

// 基于提供的配置创建一个函数实例
function BigNumConfig(config) {
    this.e = {
        digits: config.e.digits.slice(), // 使用slice来复制数组
        isNeg: config.e.isNeg
    };
    this.d = {
        digits: config.d.digits.slice(),
        isNeg: config.d.isNeg
    };
    this.m = {
        digits: config.m.digits.slice(),
        isNeg: config.m.isNeg
    };
    this.chunkSize = config.chunkSize;
    this.radix = config.radix;
    this.barrett = {
        modulus: {
            digits: config.barrett.modulus.digits.slice(),
            isNeg: config.barrett.modulus.isNeg
        },
        k: config.barrett.k,
        mu: {
            digits: config.barrett.mu.digits.slice(),
            isNeg: config.barrett.mu.isNeg
        },
        bkplus1: {
            digits: config.barrett.bkplus1.digits.slice(),
            isNeg: config.barrett.bkplus1.isNeg
        }
    };
    this.barrett.powMod=function(e, t) {

            var n = new i;
            n.digits[0] = 1;
            for (var a = e, r = t; ; ) {
                if (0 != (1 & r.digits[0]) && (n = this.multiplyMod(n, a)),
                r = T(r, 1),
                0 == r.digits[0] && 0 == h(r))
                    break;
                a = this.multiplyMod(a, a)
            }
            return n
        }
    this.barrett.multiplyMod=function(e, t) {
            var n = y(e, t);
            return this.modulo(n)
        }
    this.barrett.modulo=function(e) {
            var t = O(e, this.k - 1)
              , n = y(t, this.mu)
              , a = O(n, this.k + 1)
              , i = x(e, this.k + 1)
              , r = y(a, this.modulus)
              , o = x(r, this.k + 1)
              , s = g(i, o);
            s.isNeg && (s = f(s, this.bkplus1));
            for (var c = k(s, this.modulus) >= 0; c; )
                s = g(s, this.modulus),
                c = k(s, this.modulus) >= 0;
            return s
        }
}

// 使用提供的配置创建BigNumConfig的一个实例
var e= new BigNumConfig(encryptionConfig);
function s(e) {
            for (var t = "", n = e.length - 1; n > -1; --n)
                t += e.charAt(n);
            return t
        }
function l(e) {
            for (var t = "", n = 0; n < 4; ++n)
                t += V[15 & e],
                e >>>= 4;
            return s(t)
        }
function T(e, t) {
            var n = Math.floor(t / B)
              , a = new i;
            b(e.digits, n, a.digits, 0, e.digits.length - n);
            for (var r = t % B, o = B - r, s = 0, c = s + 1; s < a.digits.length - 1; ++s,
            ++c)
                a.digits[s] = a.digits[s] >>> r | (a.digits[c] & H[r]) << o;
            return a.digits[a.digits.length - 1] >>>= r,
            a.isNeg = e.isNeg,
            a
        }
function k(e, t) {
            if (e.isNeg != t.isNeg)
                return 1 - 2 * Number(e.isNeg);
            for (var n = e.digits.length - 1; n >= 0; --n)
                if (e.digits[n] != t.digits[n])
                    return e.isNeg ? 1 - 2 * Number(e.digits[n] > t.digits[n]) : 1 - 2 * Number(e.digits[n] < t.digits[n]);
            return 0
        }
function g(e, t) {
            var n;
            if (e.isNeg != t.isNeg)
                t.isNeg = !t.isNeg,
                n = f(e, t),
                t.isNeg = !t.isNeg;
            else {
                n = new i;
                var a, r;
                r = 0;
                for (var o = 0; o < e.digits.length; ++o)
                    a = e.digits[o] - t.digits[o] + r,
                    n.digits[o] = a % j,
                    n.digits[o] < 0 && (n.digits[o] += j),
                    r = 0 - Number(a < 0);
                if (-1 == r) {
                    r = 0;
                    for (var o = 0; o < e.digits.length; ++o)
                        a = 0 - n.digits[o] + r,
                        n.digits[o] = a % j,
                        n.digits[o] < 0 && (n.digits[o] += j),
                        r = 0 - Number(a < 0);
                    n.isNeg = !e.isNeg
                } else
                    n.isNeg = e.isNeg
            }
            return n
        }
function x(e, t) {
            var n = new i;
            return b(e.digits, 0, n.digits, 0, t),
            n
        }
function b(e, t, n, a, i) {
            for (var r = Math.min(t + i, e.length), o = t, s = a; o < r; ++o,
            ++s)
                n[s] = e[o]
        }
function O(e, t) {
            var n = new i;
            return b(e.digits, t, n.digits, 0, n.digits.length - t),
            n
        }
function h(e) {
            for (var t = e.digits.length - 1; t > 0 && 0 == e.digits[t]; )
                --t;
            return t
        }
function y(e, t) {
            for (var n, a, r, o = new i, s = h(e), c = h(t), l = 0; l <= c; ++l) {
                n = 0,
                r = l;
                for (var u = 0; u <= s; ++u,
                ++r)
                    a = o.digits[r] + e.digits[u] * t.digits[l] + n,
                    o.digits[r] = a & X,
                    n = a >>> K;
                o.digits[l + s + 1] = n
            }
            return o.isNeg = e.isNeg != t.isNeg,
            o
        }
function c(e, t) {
            var n = new i;
            n.digits[0] = t;
            for (var a = C(e, n), r = J[a[1].digits[0]]; 1 == k(a[0], F); )
                a = C(a[0], n),
                digit = a[1].digits[0],
                r += J[a[1].digits[0]];
            return (e.isNeg ? "-" : "") + s(r)
        }
function u(e) {
            for (var t = "", n = (h(e),
            h(e)); n > -1; --n)
                t += l(e.digits[n]);
            return t
        }
function i(e) {
            this.digits = "boolean" == typeof e && 1 == e ? null : R.slice(0),
            this.isNeg = !1
        }
function L(e, t) {
            for (var n = new Array, a = t.length, r = 0; r < a; )
                n[r] = t.charCodeAt(r),
                r++;

            for (; n.length % e.chunkSize != 0; )
                n[r++] = 0;

            var o, s, l, p = n.length, m = "";
            for (r = 0; r < p; r += e.chunkSize) {
                for (l = new i,
                o = 0,
                s = r; s < r + e.chunkSize; ++o)
                    l.digits[o] = n[s++],
                    l.digits[o] += n[s++] << 8;

                var d = e.barrett.powMod(l, e.e);
                m += (16 == e.radix ? u(d) : c(d, e.radix)) + " "
            }
            return m.substring(0, m.length - 1)
        }

function main123(password){
    return L(e,password)
}

结尾封装了一个main123函数,通过python程序调用这段js代码并传入原始的password就能完成加密

4.loginUserToken加密

这是我后来才发现的加密,因为这个参数在请求头中,太不起眼了所以没注意,但是很奇怪的是虽然我每次请求都带着一个错误的loginUserToken,服务器还是能返回相应的结果,哈哈哈系统bug得提醒他们修了

前面提到的所有请求几乎都有这个参数

直接全局搜索loginUserToken,便有了惊奇的发现

这个加密不就是和密码的加密一样吗哈哈哈哈,传入的参数是lyasp+时间戳,直接拿下好吧

5.验证码部分

我选择直接调用OCR识别图片文字,将前面保存得content经过base64解码就能获得图片,将图片导入模型中,就能读取验证码图片得内容

# -*- coding: utf-8 -*-
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'D:\Tesseract-OCR\\tesseract.exe'  # 替换为实际路径

# 打开图片文件
image = Image.open('./img.png')

# 使用Tesseract进行OCR
text = pytesseract.image_to_string(image, lang='eng')
cleaned_text=''.join(text.split())
print(cleaned_text)
if cleaned_text[1]=='+':
    print(int(cleaned_text[0])+int(cleaned_text[2]))
else:
    print(int(cleaned_text[0])*int(cleaned_text[2]))

这里其实有点小问题,若验证码中是乘法,例如2*0=,这个时候模型就识别不出来了,只能识别加法

后面有时间让会模型得舍友来训练一个模型来干这个事

6.登录验证整合

将以上所有逻辑全部整合,得到登录验证代码

import execjs
import time
import pytesseract
import sys
import io
from PIL import Image
from lxml import etree
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
def yanzheng(content):

    imgdata = base64.b64decode(content.split('base64,')[-1])
    # 保存图片到文件
    with open('./img.png', 'wb') as file:
        file.write(imgdata)

    pytesseract.pytesseract.tesseract_cmd = r'D:\Tesseract-OCR\\tesseract.exe'  # 替换为实际路径
    # 打开图片文件
    image = Image.open('./img.png')

    # 使用Tesseract进行OCR
    text = pytesseract.image_to_string(image, lang='eng')
    cleaned_text = ''.join(text.split())

    if cleaned_text[1] == '+':
        return int(cleaned_text[0]) + int(cleaned_text[2])
    else:
        return int(cleaned_text[0]) * int(cleaned_text[2])
username=input('请输入用户名:')

password=input('请输入密码:')

kksj=input('请输入开课时间(例如2023-2024-2为2023到2024年的第二学期):')
headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/lyuapServer/login?service=https://portal.wust.edu.cn/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'X-Requested-With': 'XMLHttpRequest',
    'loginUserToken': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', 'lyasp'+str(int(time.time()*1000))),
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    '_t': str(int(time.time()*1000)),
}

response = requests.get('https://auth.wust.edu.cn/api/uap/unauthorize/pageInfo', params=params, headers=headers)

cookies = {
    'sid': response.cookies['sid'],
}

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # 'Cookie': 'sid=18f9f2fd-8469-b000-95bb-cdf9420cda3e',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/lyuapServer/login?service=https://portal.wust.edu.cn/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'X-Requested-With': 'XMLHttpRequest',
    'loginUserToken': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', 'lyasp'+str(int(time.time()*1000))),
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    '_t': str(int(time.time()*1000)),
    'uid': '',
}

response = requests.get('https://auth.wust.edu.cn/lyuapServer/kaptcha', params=params, cookies=cookies, headers=headers)

uid=response.json().get('uid')

content=response.json().get('content')

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
    # 'Cookie': 'sid=18f9f2fd-8469-b000-95bb-cdf9420cda3e',
    'Origin': 'https://auth.wust.edu.cn',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/lyuapServer/login?service=https://portal.wust.edu.cn/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'X-Requested-With': 'XMLHttpRequest',
    'loginUserToken': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', 'lyasp'+str(int(time.time()*1000))),
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'username': username,
    'password': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', password),
    'service': 'https://bkjx.wust.edu.cn/jsxsd/sso.jsp',
    'loginType': '',
    'id': uid,
    'code': str(yanzheng(content)),
}

response = requests.post('https://auth.wust.edu.cn/lyuapServer/v1/tickets', cookies=cookies, headers=headers, data=data)

print(response.content)

验证通过后,会返回两个json字段,一个是tgt(办事部门验证字段),一个是ticket(教学一体化服务平台验证字段),我们需要的成绩和课表在教学一体化服务平台,因此仅需获取该字段保存即可

ticket=response.json().get('ticket')
print(ticket)

二.爬取成绩

1.cookie

点击登录按钮后,实际上经过了以下几个过程,首先,服务器会先给你分配两个cookie

这个时候,cookie当然不会是能够证明身份的,这个时候我们刚刚获取的ticket就有用了

通过抓包我们发现了一个状态为302(重定向)的请求,一般在登录时都会有重定向得操作,所以在抓包时要格外关注

打开该包的负载,我们惊奇的发现里面有个ticket

再带上刚刚返回的cookie发送请求,这个时候这个cookie才是能够证明我们身份的cookie(因为ticket是我们登录验证通过后返回的,可以理解为将这个cookie和ticket一起发送给服务器,他们俩同时出现,ticket能证明我们身份,则cookie此时也能证明我们的身份)

这时,我们再带着这段能证明我们身份的cookie去对成绩的链接发请求,就能得到对应的结果

这里的kksj按照x学年-y学年-第z学期的方式输入,就能返回该学期的所有成绩

将代码整合,如下

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Referer': 'https://portal.wust.edu.cn/',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-site',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

response = requests.get('https://bkjx.wust.edu.cn/jsxsd/framework/xsMain.jsp', headers=headers)

print(response.cookies)

cookies = {
    'bzb_jsxsd': response.cookies['bzb_jsxsd'],
    'SERVERID': response.cookies['SERVERID'],
}

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # 'Cookie': 'bzb_jsxsd=6DF1CD18165ECB5DFEE3CA3524DD67D8; SERVERID=121',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-site',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}


params = {
    'ticket': ticket,
}

response = requests.get('https://bkjx.wust.edu.cn/jsxsd/sso.jsp', params=params, cookies=cookies, headers=headers)

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # 'Cookie': 'bzb_jsxsd=C9E14B24574B542FCC4D4650CA8978A2; SERVERID=122',
    'Pragma': 'no-cache',
    'Referer': 'https://bkjx.wust.edu.cn/jsxsd/kscj/cjcx_frm',
    'Sec-Fetch-Dest': 'iframe',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'kksj': kksj,
    'kcxz': '',
    'kcmc': '',
    'xsfs': 'all',
}

response = requests.post('https://bkjx.wust.edu.cn/jsxsd/kscj/cjcx_list', cookies=cookies, headers=headers, data=data)

# 解析HTML内容
tree =etree.HTML(response.text)

# 使用XPath表达式筛选元素,例如:查找所有的段落元素
paragraphs = tree.xpath('//*[@id="dataList"]//tr')

print(paragraphs)

for para in paragraphs:

    for i in para.xpath('.//text()'):

        i=i.replace(" ", "").replace("\n", "").replace("\t", "").replace("\r", "")

        if i=='':
            continue
        else:
            print(i,end=' ')

    print('\n')

这里还有用到xpath的方法提取返回页面的元素,这里不做过多解释

运行结果:

三.课表爬取

这里的逻辑就不能按照爬取成绩那样弄了,那样弄会触发从登录页面重新开始的重定向,会很恶心

这里需要将server改为获取课表数据的url,如图

这是,登录后,返回的ticket与服务器响应的cookie再对重定向链接进行请求,就能直接定向到课表对应的页面,代码如下

# -*- coding: utf-8 -*-
import base64
import requests
import execjs
import time
import pytesseract
import sys
import io
from PIL import Image
from lxml import etree
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
def yanzheng(content):

    imgdata = base64.b64decode(content.split('base64,')[-1])
    # 保存图片到文件
    with open('./img.png', 'wb') as file:
        file.write(imgdata)

    pytesseract.pytesseract.tesseract_cmd = r'D:\Tesseract-OCR\\tesseract.exe'  # 替换为实际路径
    # 打开图片文件
    image = Image.open('./img.png')

    # 使用Tesseract进行OCR
    text = pytesseract.image_to_string(image, lang='eng')
    cleaned_text = ''.join(text.split())

    if cleaned_text[1] == '+':
        return int(cleaned_text[0]) + int(cleaned_text[2])
    else:
        return int(cleaned_text[0]) * int(cleaned_text[2])
username=input('请输入用户名:')

password=input('请输入密码:')

xnxq01id=input('请输入查询课表时间(例如2023-2024-2为2023到2024年的第二学期):')

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/lyuapServer/login?service=https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'X-Requested-With': 'XMLHttpRequest',
    'loginUserToken': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', 'lyasp'+str(int(time.time()*1000))),
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    '_t': str(int(time.time()*1000)),
}

response = requests.get('https://auth.wust.edu.cn/api/uap/unauthorize/pageInfo', params=params, headers=headers)

cookies = {
    'sid': response.cookies['sid'],
}

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # 'Cookie': 'sid=18f9f2fd-8469-b000-95bb-cdf9420cda3e',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/lyuapServer/login?service=https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'X-Requested-With': 'XMLHttpRequest',
    'loginUserToken': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', 'lyasp'+str(int(time.time()*1000))),
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    '_t': str(int(time.time()*1000)),
    'uid': '',
}

response = requests.get('https://auth.wust.edu.cn/lyuapServer/kaptcha', params=params, cookies=cookies, headers=headers)

uid=response.json().get('uid')

content=response.json().get('content')

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
    # 'Cookie': 'sid=18f9f2fd-8469-b000-95bb-cdf9420cda3e',
    'Origin': 'https://auth.wust.edu.cn',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/lyuapServer/login?service=https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'X-Requested-With': 'XMLHttpRequest',
    'loginUserToken': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', 'lyasp'+str(int(time.time()*1000))),
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'username': username,
    'password': execjs.compile(open('password加密.js', 'r', encoding='gbk').read()).call('main123', password),
    'service': 'https://bkjx.wust.edu.cn/jsxsd/sso.jsp',
    'loginType': '',
    'id': uid,
    'code': str(yanzheng(content)),
}

response = requests.post('https://auth.wust.edu.cn/lyuapServer/v1/tickets', cookies=cookies, headers=headers, data=data)

print(response.content)

ticket=response.json().get('ticket')
print(ticket)
headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Referer': 'https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-site',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

response = requests.get('https://bkjx.wust.edu.cn/jsxsd/framework/xsMain.jsp', headers=headers)

print(response.cookies)

cookies = {
    'bzb_jsxsd': response.cookies['bzb_jsxsd'],
    'SERVERID': response.cookies['SERVERID'],
}

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # 'Cookie': 'bzb_jsxsd=6DF1CD18165ECB5DFEE3CA3524DD67D8; SERVERID=121',
    'Pragma': 'no-cache',
    'Referer': 'https://auth.wust.edu.cn/',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-site',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}


params = {
    'ticket': ticket,
}

response = requests.get('https://bkjx.wust.edu.cn/jsxsd/sso.jsp', params=params, cookies=cookies, headers=headers)

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # 'Cookie': 'bzb_jsxsd=8FD42C876825C97D106DF3AFEA2A51D2; SERVERID=127; bzb_njw=5F5AFFE362F1CB72B52558FFD5517646',
    'Pragma': 'no-cache',
    'Referer': 'https://bkjx.wust.edu.cn/jsxsd/framework/xsMain.jsp',
    'Sec-Fetch-Dest': 'iframe',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

response = requests.get('https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do', cookies=cookies, headers=headers)

# 解析HTML内容
tree =etree.HTML(response.text)

# 使用XPath表达式筛选元素,例如:查找所有的段落元素
kbjcmsid = tree.xpath('//*[@id="kbjcmsid"]/option[1]/@value')[0]

data = [
    ('jx0404id', ''),
    ('cj0701id', ''),
    ('zc', ''),
    ('demo', ''),
    ('xnxq01id', xnxq01id),
    ('sfFD', '1'),
    ('kbjcmsid', kbjcmsid),
]

hiddens=tree.xpath("//input[@type='hidden']")

for hidden in hiddens:
    try:
        data = data + [(hidden.attrib['name'], hidden.attrib['value'])]

    except:
        continue

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    # 'Cookie': 'bzb_jsxsd=8FD42C876825C97D106DF3AFEA2A51D2; SERVERID=127; bzb_njw=5F5AFFE362F1CB72B52558FFD5517646',
    'Origin': 'https://bkjx.wust.edu.cn',
    'Pragma': 'no-cache',
    'Referer': 'https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do',
    'Sec-Fetch-Dest': 'iframe',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

response = requests.post('https://bkjx.wust.edu.cn/jsxsd/xskb/xskb_list.do', cookies=cookies, headers=headers, data=data)

print(response.text)
  • 35
    点赞
  • 30
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值