'''
Created on 2013-11-12
@author: mowayao
'''
import urllib
import urllib2
import cookielib
import re
def SubTitle(web):
key = "<title>.+</title>"
title = re.findall(key,web)
return title
cookie = cookielib.CookieJar()
headers = {
'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
}
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
postdata=urllib.urlencode({
'userName':'XXXXX',
'userPwd':'XXXXX'
})
req = urllib2.Request(
url = 'http://jw.dhu.edu.cn/dhu/login_zh.jsp',
data = postdata,
headers = headers
)
filename = "es"
fl = file(filename,'w')
result = opener.open(req)
#print result.read()
#fl.write(result.read())
ref = urllib2.Request(
url = 'http://jw.dhu.edu.cn/dhu/student/query/scorequery.jsp?studentId=null',
headers = headers
)
html = opener.open(ref).read()
title = SubTitle(html)
problem = re.compile(u'<body.+</body>',re.DOTALL)
style = problem.search(html)
if style:
print(style.group())
html = style.group(0)
tmp = re.sub('<[^>]*>',' ',html)
tmp = tmp.replace('\s+','')
tmp = tmp.replace('\s+','')
#print(tmp)
fl.write(tmp)
东华大学教务处python爬虫
最新推荐文章于 2021-11-07 18:28:53 发布