python3+BeautifulSoup+tkinter 爬虫 获取学校成绩

写的是一个小爬虫,有界面,爬取学校成绩

从最开始只用python3+正则表达式

然后加界面用tkinter

到最后加了BeautifulSoup

现在看起来和谐多了

<pre name="code" class="python">#获取学校成绩
 
import re,string,urllib.parse,urllib.request
from tkinter import *
from tkinter import ttk
from bs4 import BeautifulSoup
class AhutScore:
    def __init__(self):
        print('已经启动安工大成绩查询爬虫,咔嚓咔嚓')

    def getAhutScore(self,stuNo,idCard,xn,xq):
        #通过HttpWatch抓包的当前网站的形式
        postdata=urllib.parse.urlencode({
            '__EVENTVALIDATION':'/wEWIQLH/uyCBwLs0bLrBgLs0fbZDALWrMSACwKEx5fABgKFx/uABQKax7/ABwKax6OABgKbx6OABgKYx+dBAsKF4K8GAs2FiJQIAsqF5O0IAsOF8PcLAsCFjO0JAvGV4pUFAv/6yPsJAv76yPsJAvbLmuYBAq7k2jACzqvD4A4CrvycrAcCi+uC+wwCn/nbgQ0C4d349AoC9PbF/AwCrZj0xQsCrZiIoQQC0sqYtwoC6MqwtAcC1srwtQoChobTsw4C1orq2A/lc4cMuGz9/vf0WzeaMjk2B63pi/yD0c3bh6AkZ2usTA==',
            '__VIEWSTATE':'/wEPDwUKLTc5MTY3NzY2OA9kFgICAw9kFg4CBQ8QZBAVDA09Peivt+mAieaLqT09CTIwMTQtMjAxNQkyMDEzLTIwMTQJMjAxMi0yMDEzCTIwMTEtMjAxMwkyMDExLTIwMTIJMjAxMC0yMDExCTIwMDktMjAxMAkyMDA4LTIwMDkJMjAwNy0yMDA4CTIwMDYtMjAwNwkyMDA1LTIwMDYVDAAJMjAxNC0yMDE1CTIwMTMtMjAxNAkyMDEyLTIwMTMJMjAxMS0yMDEzCTIwMTEtMjAxMgkyMDEwLTIwMTEJMjAwOS0yMDEwCTIwMDgtMjAwOQkyMDA3LTIwMDgJMjAwNi0yMDA3CTIwMDUtMjAwNhQrAwxnZ2dnZ2dnZ2dnZ2dkZAIHDxBkEBUDDT096K+36YCJ5oupPT0BMgExFQMAATIBMRQrAwNnZ2dkZAIdD2QWAgIFDzwrABEAZAIfD2QWAgIBDzwrABEAZAIjD2QWAgIJDzwrABEAZAIlD2QWAgIDDxBkZBYBZmQCJw9kFgICAQ88KwARAQEQFgAWABYAZBgEBQlHcmlkVmlldzMPZ2QFCUdyaWRWaWV3MQ9nZAUMR3JpZFZpZXdfY2owD2dkBQtHcmlkVmlld19jag9nZJ3osNiaHFKtpB351twVA++gU7GdyOdYypVlNUYHNaNo',
            '__VIEWSTATEGENERATOR':'DCA2160B',
            'Button_cjcx':'查询',  
            'drop_type':'全部成绩',  
            'drop_xn':xn,  
            'drop_xq':xq,  
            'hid_dqszj':'',
            'TextBox1':stuNo,
            'TextBox2':idCard
        }).encode(encoding='utf-8')
        #伪装消息头,伪装用户访问
        headers = {  
            'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'  
        }
        #创建请求对象
        req = urllib.request.Request(
            url = 'http://211.70.149.134:8080/stud_score/brow_stud_score.aspx',  
            data = postdata,
            headers = headers
        )
        #接收结果对象
        result = urllib.request.urlopen(req)
        #读取并解码
        unicodePage = result.read().decode('utf-8')
        soup = BeautifulSoup(unicodePage)
        #print(soup.prettify())
        stuMsg = soup.find('span',id='Label1').string
        #stuMsg = soup.html('span',attrs={"id": "Label1"})[0].string
        #scoreMsg = re.findall('<tr align="left" οnmοuseοver="c=this.style.backgroundColor;this.style.backgroundColor='#EEC470'" οnmοuseοut="this.style.backgroundColor=c" style=".*?">(.*?)</tr>',unicodePage,re.S)
        # <tr class="Freezing" style="color:White;background-color:#006699;border-color:#6666FF;font-weight:bold;height:25px;"><th>*n</tr>
        #<span id="Label_SHOW" style="font-size:Small;color: #003300; font-family: 微软雅黑;">共找到4条记录!</span>
        items = []
        labelShow = soup.find('span',id='Label_SHOW').string
        if labelShow == '没有返回记录!':
            return items,stuMsg,labelShow
        scoreTitle = soup.find('tr',class_='Freezing')('th')
        scoreMsg = soup('tr',align='left',οnmοuseοut="this.style.backgroundColor=c")
        
        for itemMsg in scoreMsg:
            #itemScore = re.findall('<td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td style=".*?">(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>',itemMsg,re.S)
            itemScore = itemMsg('td')
            dicts ={}
            for i in range(15):
                dicts[scoreTitle[i].string] = itemScore[i].string.replace('\xa0','')
            items.append(dicts)
        print(items)
        return items,stuMsg,labelShow

    def getGUI(self):
        def test():
            result = self.getAhutScore(stuNo.get(),idCard.get(),xn.get(),xq.get())
            rstk = Tk()
            rstk.title('成绩查询结果')
            rstk.resizable(width=False, height=False)
            t=Text(rstk)
            t.insert(1.0,result[1]+'\n')
            t.insert(2.0,'-'*32+result[2]+'-'*32+'\n')
            if result[0]!=[]:
                for item in result[0]:
                    t.insert(3.0,item['课程名']+':'+item['总评成绩']+'\n')
            else:
                t.insert(3.0,'暂无信息!\n')
            t.grid(row=5, column=0,columnspan=4)
        root = Tk()
        root.geometry()
        root.resizable(width=False, height=False)
        root.title('ahut成绩查询')
        Label(root,text='学号:').grid(row=0, column=0,sticky=W)
        Label(root,text='身份证号:').grid(row=0, column=2,sticky=W)
        Label(root,text='学年:').grid(row=1, column=0,sticky=W)
        Label(root,text='学期:').grid(row=1, column=2,sticky=W)
        stuNo = StringVar()
        idCard = StringVar()
        xn = StringVar()
        xq = StringVar()
        #Entry(root, textvariable=e, state="readonly", show="*")
        Entry(root,textvariable = stuNo).grid(row=0, column=1)
        Entry(root,textvariable = idCard).grid(row=0, column=3)
        stuNo.set('11908***')
        idCard.set('34082*************')
        xnBox = ttk.Combobox(root,textvariable=xn,state='readonly')
        xnBox['values'] = ('','2010-2011', '2011-2012', '2012-2013','2013-2014', '2014-2015')
        xnBox.set('2014-2015')
        xnBox.grid(row=1, column=1)
        xqBox = ttk.Combobox(root,textvariable=xq,state='readonly')
        xqBox['values'] = ('','1', '2')
        xqBox.set('1')
        xqBox.grid(row=1, column=3)
        b = Button(root,text='查询',command=test).grid(row=4, columnspan=4)
        #label = Label(root,text='© F8').grid()
        root.mainloop()

ahutScore = AhutScore()
ahutScore.getGUI()



评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值