老oj SGU搬家到codeforce上了为了做题方便就将题目爬取下来了 不会将HTML文件PDF化 但只需Ctrl+p打印即可
题目代码是老网站上的格式不是很好只能这样了
下面上码(只能爬取SGU板块的题,其他的题csdn都有)
# -*- coing:utf-8 -*-
import requests
import urllib.request
from bs4 import BeautifulSoup
fweb = open('an.html','w')
def Find_an(soup,url):
div_ul = soup.find('div',{"class",'problemindexholder'}) #存放题目的<div>
try :
img_ul = div_ul.find_all('img') #查找<img>并如果div_ul为空弹出异常
except :
return 0
for img in img_ul:
url = img['src']
url = "http://codeforces.com" + url #图片地址
r = requests.get(url, stream=True) #下载
image_name = url.split('/')[-1]
#print(image_name)
img['src'] = image_name #相对地址转到本地
with open('./%s' % image_name, 'wb') as f: #保存图片
for c