Python爬取OJ提交过的代码

把之前在学校OJ提交过得正确代码保存到本地,做一下备份。主要用到了requests和bs,还是挺好玩的hhh。


import requests
from bs4 import BeautifulSoup
import re
import os


class Code(object):
	"""docstring for Code"""

	#初始化常用信息
	def __init__(self):
		self.home_url = 'http://acm.sdut.edu.cn/onlinejudge2/index.php/Home'
		self.login_url = 'http://acm.sdut.edu.cn/onlinejudge2/index.php/Home/Login/login'
		self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'}
		self.user = 'tooog'
		self.passw = '123123'


	def login(self,url):
		post_data ={'user_name':self.user,'password':self.passw}
		self.post(self.login_url,post_data)

	def post(self,url,post_data):
		session.post(url,post_data,headers = self.headers)
		self.check_login()

	#检查登陆状态
	def check_login(self):
		html = self.session_get(self.home_url)
		Soup = BeautifulSoup(html.text,'html5lib')
		if(Soup.find(text = 'Logout') != None):
			print('登陆成功')
			info_url = 'http://acm.sdut.edu.cn' + Soup.find('ul',class_ = 'navbar-right').find('a')['href']
			self.info(info_url)
		else:
			print('登陆失败')
			exit()

	#个人信息页		
	def info(self,url):
		info_html = self.session_get(url)
		sub_href = re.search('href="(.*?)">Submissions',info_html.text).group(1)
		sub_url = self.home_url + sub_href
		self.submissions(sub_url)

	#代码提交记录页
	def submissions(self,url):
		url = 'http://acm.sdut.edu.cn/onlinejudge2/index.php/Solution/status/username/' + self.user + '/result/1/p/1.html'
		self.page(url)

	#翻页处理
	def page(self,url):
		sub_html = self.session_get(url)
		page_num = BeautifulSoup(sub_html.text,'html5lib').find_all('a',class_ = 'num')[-1].get_text()
		for pg in range(1,int(page_num)+1):
			page_url = url[:-6] + str(pg) +'.html'
			self.find_code(page_url)

	def find_code(self,url):
		all_code = self.session_get(url)
		tr_list = BeautifulSoup(all_code.text,'html5lib').find('tbody').find_all('tr')
		for tr in tr_list:
			td = tr.find_all('td')
			code_id = td[2].text
			code_href = td[6].a['href']
			code_url = 'http://acm.sdut.edu.cn' + code_href
			self.save(code_url,code_id)

	def save(self,url,code_id):
		code_html = self.session_get(url)
		code = BeautifulSoup(code_html.text,'html5lib').find('pre',class_ = 'brush:cpp;').get_text()
		f = open(code_id+'.cpp','w')
		f.write(code)
		f.close()

	def session_get(self,url):
		content = session.get(url,headers = self.headers)
		return content

#cookie处理
session = requests.Session()
if __name__ == '__main__':
	code = Code()
	code.login('http://acm.sdut.edu.cn/onlinejudge2/index.php/Home/Login/login')


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值