import requests
import threading
import json
req = requests.session()
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'cookie':
'' # 登陆后复制自己的cookie到此处
}
url = 'https://ehall.xjtu.edu.cn/jwapp/sys/kcbcx/modules/qxkcb/qxfbkccx.do'
lock = threading.Lock()
def page_spider(page_number, f):
# print("craw page", page)
data = {
'querySetting':
'[{"name": "XNXQDM", "value": "2023-2024-2", "linkOpt": "and", "builder": "equal"},[{"name": "RWZTDM", "value": "1", "linkOpt": "and", "builder": "equal"},{"name": "RWZTDM", "linkOpt": "or", "builder": "isNull"}]]',
'*order': "+KKDWDM,+KCH,+KXH",
'pageSize': 10,
'pageNumber': page_number,
}
rep = req.post(url, data=data, headers=headers)
content = json.loads(rep.text)
elem_list = []
for c in content['datas']['qxfbkccx']['rows']:
if c['SKJS'] == None:
continue
elem = c['KCH'] + '\t' + str(c['KXH']) + '\t' + c['KCM'] + '\t' + str(c['XS']) + '\t' + str(c['XF']) + '\t' + c['SKJS']
elem_list.append(elem)
lock.acquire()
f.write('\n'.join(elem_list) + '\n')
lock.release()
if __name__ == '__main__':
with open("c532_raw_date.txt", "w", encoding='utf-8') as f:
threads = []
for page in range(1, 435):
t = threading.Thread(target=page_spider, args=(page, f))
threads.append(t)
t.start()
for t in threads:
t.join()
注:cookie需要登录后获取
(附:使用selenium自动登录ehall)
from selenium import webdriver
from selenium.webdriver.common.by import By
url = 'http://ehall.xjtu.edu.cn'
browser = webdriver.Chrome()
# 设置全屏
browser.maximize_window()
browser.implicitly_wait(1)
browser.get(url)
browser.implicitly_wait(1)
button = browser.find_element(by=By.CLASS_NAME, value='amp-no-login-zh')
button.click()
browser.implicitly_wait(1)
username = browser.find_element(by=By.CLASS_NAME, value='username')
username.send_keys('你的学号')
password = browser.find_element(by=By.CLASS_NAME, value='pwd')
password.send_keys('pwd')
button = browser.find_element(by=By.ID, value='account_login')
button.click()
browser.implicitly_wait(1)
# browser.close()