爬取*网站题库
import requests
import re
import time
import html
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36'
'(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' #加入请求头
}
f = open('/Volumes/SHARE/Python/GetAcmText/Text.txt', 'a+') #在路径下创建文件名为Text.txt的文件
def get_info(url):
global i
i = i + 1
print(i) #用于观察
res = requests.get(url, headers=headers)
if res.status_code == 200: #判断网站是否为可访问
title = re.findall('
(.*?)
', res.content.decode('utf-8'), re.S)[1].strip() #正则获取题目名describes = re.findall('
(.*?)', res.content.decode('utf-8'), re.S)describe = describes[0].strip()
tinput = describes[1].strip()
toutput = describes[2].strip()
einput = re.fi