XJTU全校课表的爬虫demo

import requests
import threading
import json

req = requests.session()

headers = {
    'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'cookie':
        '' # 登陆后复制自己的cookie到此处
}

url = 'https://ehall.xjtu.edu.cn/jwapp/sys/kcbcx/modules/qxkcb/qxfbkccx.do'

lock = threading.Lock()

def page_spider(page_number, f):
    # print("craw page", page)
    data = {
        'querySetting':
            '[{"name": "XNXQDM", "value": "2023-2024-2", "linkOpt": "and", "builder": "equal"},[{"name": "RWZTDM", "value": "1", "linkOpt": "and", "builder": "equal"},{"name": "RWZTDM", "linkOpt": "or", "builder": "isNull"}]]',
        '*order': "+KKDWDM,+KCH,+KXH",
        'pageSize': 10,
        'pageNumber': page_number,
    }
    rep = req.post(url, data=data, headers=headers)
    content = json.loads(rep.text)
    elem_list = []
    for c in content['datas']['qxfbkccx']['rows']:
        if c['SKJS'] == None:
            continue
        elem = c['KCH'] + '\t' + str(c['KXH']) + '\t' + c['KCM'] + '\t' + str(c['XS']) + '\t' + str(c['XF']) + '\t' + c['SKJS']
        elem_list.append(elem)
    lock.acquire()
    f.write('\n'.join(elem_list) + '\n')
    lock.release()


if __name__ == '__main__':
    with open("c532_raw_date.txt", "w", encoding='utf-8') as f:
        threads = []
        for page in range(1, 435):
            t = threading.Thread(target=page_spider, args=(page, f))
            threads.append(t)
            t.start()
        for t in threads:
            t.join()

注:cookie需要登录后获取

(附:使用selenium自动登录ehall)

from selenium import webdriver
from selenium.webdriver.common.by import By

url = 'http://ehall.xjtu.edu.cn'
browser = webdriver.Chrome()
# 设置全屏
browser.maximize_window()
browser.implicitly_wait(1)

browser.get(url)
browser.implicitly_wait(1)

button = browser.find_element(by=By.CLASS_NAME, value='amp-no-login-zh')
button.click()
browser.implicitly_wait(1)

username = browser.find_element(by=By.CLASS_NAME, value='username')
username.send_keys('你的学号')
password = browser.find_element(by=By.CLASS_NAME, value='pwd')
password.send_keys('pwd')
button = browser.find_element(by=By.ID, value='account_login')
button.click()
browser.implicitly_wait(1)

# browser.close()

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值