import webbrowser
import re
import urllib
#获取hdu网页
def getHtml_hdu(url):
page = urllib.urlopen(url)
html = page.read()
#unicodehtml = html.decode("utf-8")
#return unicodehtml
return html
#获取poj网页
def getHtml_poj(url):
page = urllib.urlopen(url)
html = page.read()
#unicodehtml = html.decode("utf-8")
#return unicodehtml
return html
#获取cug网页
def getHtml_cug(url):
page = urllib.urlopen(url)
html = page.read()
unicodehtml = html.decode("utf-8")
return unicodehtml
#获取hdu中用户信息
def zhenghe_hdu(str1,userid,imgre):
html=getHtml_hdu( str1+userid )
return re.findall(imgre,html)
#获取cug中用户信息
def zhenghe_cug(str1,userid,imgre):
html=getHtml_cug( str1+userid )
return re.findall(imgre,html)
#获取poj中用户信息
def zhenghe_poj(str1,userid,imgre):
html =getHtml_poj( str1+ userid)
return re.findall(imgre,htm
利用python爬虫抓取OJ上做题信息(终结版)
最新推荐文章于 2024-06-18 11:41:13 发布