import http.cookiejar
import urllib.request
import re
import urllib
from bs4 import BeautifulSoup
class Spider(object):
def __init__(self):
print('获取课程成绩和学分,GO!')
def get_login(self,login_url,post_data,headers):
cookie = http.cookiejar.LWPCookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie))
post_data = urllib.parse.urlencode(post_data).encode(encoding='GBK')
request = urllib.request.Request(login_url, post_data, headers)
response = opener.open(request)
response = opener.open(grade_url)
return response.read().decode('GBK')
if __name__=='__main__':
info={}
login_url = 'http://grdms.bit.edu.cn/yjs/login.do'
grade_url = 'http://grdms.bit.edu.cn/yjs/yanyuan/py/pychengji.do?method=enterChaxun'
post_data = {'j_username': '2120140383',
'j_password': '********',
'loginType': '0'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Referer': 'http://grdms.bit.edu.cn/yjs/login.jsp',
}
my_spider=Spider()
my_html=my_spider.get_login(login_url,post_data,headers)
soup = BeautifulSoup(my_html, 'html.parser')
every_class = soup.find_all('tr', bgcolor="#FFFFFF", height="23")
for each_class in every_class:
each_class = str(each_class)
try:
myItems = re.findall('<td align="left">(.*?)</td>', each_class, re.S)
info['title'] = myItems[3]
info['grade'] = myItems[9]
print(info)
except IndexError:
print('第一栏标题')