python爬取学生成绩

最新推荐文章于 2024-02-27 23:42:37 发布

夜色的繁星

最新推荐文章于 2024-02-27 23:42:37 发布

阅读量1.7k

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/qq_42444944/article/details/90905968

版权

python 专栏收录该内容

16 篇文章 2 订阅

订阅专栏

import requests
from PIL import Image
import re
from urllib.parse import quote
from bs4 import BeautifulSoup


headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                         'AppleWebKit/537.36 (KHTML, like Gecko) '
                         'Chrome/74.0.3729.131 Safari/537.36'}
data = {
    '__VIEWSTATE': '',
    '__EVENTVALIDATION': '',
    'TextBox1': '20170201010',      # 用户名
    'TextBox2': 'QQ760570',      # 密码
    'TextBox3': '',      # 验证码
    'RadioButtonList1': '学生',
    'Button1': '',
}
url = 'http://10.10.10.220/default2.aspx'
url1 = 'http://10.10.10.220/CheckCode.aspx'
# data['TextBox1'] = input('请输入学号：')
# data['TextBox2'] = input('请输入密码：')
while True:
    s = requests.Session()
    r = s.get(url1, headers=headers)
    cookies = r.cookies
    f = open('code.jpg', 'wb')
    f.write(r.content)
    f.close()
    img = Image.open('code.jpg')
    img.show()
    data['TextBox3'] = input('请输入验证码：')
    img.close()
    r1 = s.get(url, headers=headers)
    vi = re.compile(r'value="/w.*"')
    hide_code = vi.findall(r1.text)
    data['__VIEWSTATE'] = hide_code[0][7:-1]
    data['__EVENTVALIDATION'] = hide_code[1][7:-1]
    r2 = s.post(url, data=data, headers=headers)
    url2 = 'http://10.10.10.220/xs_main.aspx?xh=' + data['TextBox1']
    r3 = s.get(url2, headers=headers)
    if r3.status_code == 200:
        soup = BeautifulSoup(r3.text, 'html.parser')
        name = soup.find(attrs={'id': 'xhxm'}).string[:3]
        break
    else:
        print('验证码输入错误！请重新输入\n')
for c in cookies:
    cookies1 = c.name + '=' + c.value
url3 = 'http://10.10.10.220/xscj_gc.aspx?xh=' \
       + data['TextBox1'] + '&xm=' + quote(name) \
       + '&gnmkdm=N121605'

headers_change = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                         'AppleWebKit/537.36 (KHTML, like Gecko) '
                         'Chrome/74.0.3729.131 Safari/537.36',
                  'Referer': url3,
                  'Cookie': cookies1,
                  }

data1 = {'__VIEWSTATE': '',
         '__EVENTVALIDATION': '',
         'ddlXN': '',
         'ddlXQ': '',
         'Button2': '(unable to decode value)',
         }

r4 = s.get(url3, headers=headers_change)
vi1 = re.compile(r'value="/w.*"')
hide_code1 = vi1.findall(r4.text)
data1['__VIEWSTATE'] = hide_code1[0][7:-1]
data1['__EVENTVALIDATION'] = hide_code1[1][7:-1]
r5 = s.post(url3, headers=headers_change, data=data1)
soup = BeautifulSoup(r5.text, 'html.parser')
list_class = []
list_score = []
list_gpa = []
score = soup.find(name='table').find_all(name='td')
list0 = [score[i: i + 15] for i in range(0, len(score), 15)]
tplt = '{0:{3}<25}\t{1:^6}\t{2:>6}'
for i in range(0, int(len(score) / 15)):
    list_class.append(list0[i][3].string)
    list_gpa.append(list0[i][7].string)
    list_score.append(list0[i][8].string)
    print(tplt.format(list_class[i], list_score[i], list_gpa[i], chr(12288)))