青岛大学教务爬虫

最新推荐文章于 2023-05-19 18:47:05 发布
yxt1039770648
最新推荐文章于 2023-05-19 18:47:05 发布
阅读量2.3k
点赞数
分类专栏：算法文章标签： python 自动识别验证吗爬虫
本文链接：https://blog.csdn.net/yxt1039770648/article/details/78130616
版权
算法专栏收录该内容
6 篇文章 0 订阅
订阅专栏
# -*- coding: utf-8 -*-

from PIL import Image
import io
import re
import getpass
import requests
import pytesseract
from lxml import etree
from prettytable import PrettyTable


class qdujw:
    def __init__(self):
        self.userid = 0
        self.s = requests.Session()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36'
        }

    # 教务登录
    def login(self):
        global sid, passwd, jw
        loginurl = 'http://jw.qdu.edu.cn/academic/j_acegi_security_check'
        codeurl = 'http://jw.qdu.edu.cn/academic/getCaptcha.do'
        userurl = 'http://jw.qdu.edu.cn/academic/showPersonalInfo.do'

        # 验证码
        code = self.s.get(codeurl, headers=self.headers, stream=True)
        img = Image.open(io.BytesIO(code.content))

        # 降噪
        threshold = 140
        table = []
        for i in range(256):
            if i < threshold:
                table.append(0)
            else:
                table.append(1)

        # 将彩色图像转换为灰度图像
        imgry = img.convert('L')

        # 讲图像中噪声去除
        out = imgry.point(table, '1')

        codetext = pytesseract.image_to_string(out, config='digits')

        # 去除空格，将.替换为0
        codetext = codetext.replace(' ', '')
        codetext = codetext.replace('.', '0')

        # 登录
        postdata = {
            'j_username': sid,
            'j_password': passwd,
            'j_captcha': codetext
        }
        r = self.s.post(loginurl, postdata)

        # 密码错误
        if re.search(u'\u5bc6\u7801\u4e0d\u5339\u914d', r.text):
            print sid + ' 密码不匹配!'
            print
            choice = raw_input('请选择(1.重新输入 2.退出)：')
            if choice == '1':
                sid = raw_input('学号：')
                passwd = getpass.getpass('密码：')
            else:
                exit()
            jw.login()

        # 用户名不存在
        elif re.search(u'\u4e0d\u5b58\u5728', r.text):
            print sid + ' 用户名不存在!'
            print
            choice = raw_input('请选择(1.重新输入 2.退出)：')
            if choice == '1':
                sid = raw_input('学号：')
                passwd = getpass.getpass('密码：')
            else:
                exit()
            jw.login()

        # 验证码错误
        elif re.search(u'\u9a8c\u8bc1\u7801\u4e0d\u6b63\u786e', r.text):
            print '正在登录，请耐心等待......'
            jw.login()

        # 成功
        else:
            userpage = self.s.get(userurl).content
            name = etree.HTML(userpage.decode(
                'utf-8', 'ignore')).xpath('/html/body/center/table[1]/tr[1]/td[2]/text()')
            for n in name:
                name = n.encode('utf-8')
            userid = re.findall(b'.*?userid=(.*?)"', userpage, re.S)
            for i in userid:
                self.userid = self.userid * 10 + int(i)
            print name + '登录成功！'

    # 查询成绩
    def scores(self):
        print '======= 成绩查询 ========'
        print
        year = raw_input('请输入查询学年: ')
        term = raw_input('春季学期输入[1],秋季学期输入[2]: ')
        scoresurl = 'http://jw.qdu.edu.cn/academic/manager/score/studentOwnScore.do'

        # 字符串计算  eval()
        postdata = {
            'year': eval(year + '-' + '1980'),
            'term': term,
            'para': '0'
        }
        scorespage = self.s.post(scoresurl, postdata).content
        aa = '<td>' + year + '[\s\S]*?<td>.*?</td>[\s\S]*?<td>.*?</td>[\s\S]*?<td>([\s\S]*?)</td>[\s\S]*?<td>.*?</td>[\s\S]*?<td>([\s\S]*?)</td>'

        # str转types  .encode(encoding="utf-8")
        scores = re.findall(aa.encode(encoding="utf-8"), scorespage, re.S)

        print
        print '======= 考试成绩 ========'
        for s in scores:
            print
            print s[0].strip()+' : '+s[1].strip()

    # 查询课表
    def kebiao(self):
        print '======= 课表查询 ========'
        print
        year = raw_input('请输入查询学年: ')
        term = raw_input('春季学期输入[1],秋季学期输入[2]: ')
        print

        # 使用 PrettyTable 打印表格
        x = PrettyTable(["周一", "周二", "周三", "周四", "周五", "周六", "周日"])
        x.padding_width = 1
        get = {
            'id': self.userid,
            'yearid': eval(year + '-' + '1980'),
            'termid': term,
            'timetableType': 'STUDENT',
            'sectionType': 'COMBINE'
        }

        kb = self.s.get(
            'http://jw.qdu.edu.cn/academic/manager/coursearrange/showTimetable.do', params=get)
        html = etree.HTML(kb.content)

        kb = html.xpath('//*[@id="timetable"]//*[@class="center"]')
        i = 0
        list = []
        list1 = ['', '', '', '', '', '', '']
        for k in kb:
            # print k.xpath('string()').encode('utf-8')

            # 通过 list 将课程信息添加到 PrettyTable 中
            if i < 7:
                list.append(
                    k.text.split(';')[0].replace('<<', '').replace('>>', ''))
                i += 1
                if i == 7:
                    # 上下显示一行空白，方便阅读
                    x.add_row(list1)
                    x.add_row(list)
                    x.add_row(list1)
                    list = []
                    i = 0
        print x

    # 教务通知
    def news(self):
        newsurl = requests.get(
            'http://jw.qdu.edu.cn/homepage/infoArticleList.do;jsessionid=E06A6E2B5FA3F797FAB8FA5F6331AC92?columnId=358')
        html = etree.HTML(newsurl.content.decode('utf-8', 'ignore'))
        news = html.xpath('//*[@id="thirdcontent"]/div[2]/ul/li/div/a')
        for new in news:
            print new.xpath('string(.)').encode('utf-8').replace(' ', '').replace('\n', '')
            print 'http://jw.qdu.edu.cn/homepage/' + new.attrib['href']
            print

    # 个人信息
    def user(self):
        print '======= 个人信息 ========'
        print
        user = self.s.get('http://jw.qdu.edu.cn/academic/showPersonalInfo.do')
        html = etree.HTML(user.content.decode('utf-8', 'ignore'))
        for n in html.xpath('/html/body/center/table[1]/tr[1]/td[2]/text()'):
            print '姓名: ' + n.encode('utf-8')
        for n in html.xpath('/html/body/center/table[1]/tr[2]/td[1]/text()'):
            print '院系: ' + n.encode('utf-8')
        for n in html.xpath('/html/body/center/table[1]/tr[2]/td[2]/text()'):
            print '专业: ' + n.encode('utf-8')
        for n in html.xpath('/html/body/center/table[1]/tr[4]/td[1]/text()'):
            print '年级: ' + n.encode('utf-8')
        for n in html.xpath('/html/body/center/table[1]/tr[4]/td[2]/text()'):
            print '班级: ' + n.encode('utf-8')

jw = qdujw()
print '======== 教务通知 ========'
jw.news()
print

print '======== 登录教务系统 ========'

# 学号和密码
print '请输入学号和密码'
sid = raw_input('学号：')
passwd = raw_input('密码：')
jw.login()

while 1:
    print
    print '======== 青岛大学教务系统 ========'
    print
    print '[1]查询成绩'
    print '[2]查询课表'
    print '[3]个人信息'
    print '[4]退出'
    print
    choice = raw_input('请选择: ')
    print
    if choice == '1':
        jw.scores()
    elif choice == '2':
        jw.kebiao()
    elif choice == '3':
        jw.user()
    elif choice == '4':
        exit()