BG
最近搞的一个扒学校题库的评测记录然后扔进数据库的小玩意,打算用matplotlib画个图统计一下玩一玩
Code
半成品
import urllib2 as ub
import pymongo
import re
def getPage(url):
headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 5.5; Windows NT)'}
request = ub.Request(url, headers = headers)
html = ub.urlopen(request)
page = html.read()
return page
def getStatusList(page):
pageReg = re.compile(r'<tr align=center><td>(.+?)</td><td><a href=.+?>(.+?)</a></td><td><a href=.+?>(.+?)</a></td><td><font color=.+?>(.+?)</font></td><td>(.+?)</td><td>(.+?)</td><td>(.+?)</td><td>(.+?)</td><td>(.+?)</td></tr>')
tmpList = re.findall(pageReg, page)
return tmpList
def getNextUrl(page):
if (re.search(r'Next Page', page) == None):
return None
urlReg = re.compile(r'\[<a href=status\?top=(.+?)><font color=blue>Next Page</font></a>\]')
url = re.findall(urlReg, page)
return 'http://10.156.17.250/JudgeOnline/status?top=' + url[0]
def getStatusType(problemId, user, problem, result, memory, time, language, length, time):
tmpStatusType = {'id': problemId, 'user': user, 'problem': problem, 'result': result, 'memory': memory, 'time': time, 'language': language, 'length': length, 'time': time}
return tmpStatusType
def getRec():
client = pymongo.MongoClient("localhost", 27017)
st = client['statusDb']
rec = st['statusRec']
return rec
url = 'http://10.156.17.250/JudgeOnline/status'
# file = open('Status (Online).txt', 'w')
rec = getRec()
while url != None:
page = getPage(url)
nexUrl = getNextUrl(page)
if (nexUrl == url):
break
else:
url = nexUrl
statusList = getStatusList(page)
for item in statusList:
status = getStatusType(item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7], item[8])
rec.insert(status)
# for i in item:
# file.write(i + '\n')