修改文件保存路径(如果有必要的话)
"""
Created on Tue Jun 14 09:57:31 2022
@author: kankan_ol(冰原狼)
"""
from urllib.request import urlopen
import re
import os
import requests
savePath = "D:/apro/apythondata"
if not os.path.exists(savePath):
os.makedirs(savePath)
baseUrl = "https://xxx.xxx.com"
url = "https://xxx.xxx.com/xxx/index.html"
myURL = urlopen(url)
f = open(savePath + "/index.html", "wb")
content = myURL.read()
f.write(content)
f.close()
r = requests.get(url)
r.encoding = 'gb2312'
matchs = re.findall(r"(?<=href=\").+?(?=\")|(?<=href=\').+?(?=\')|(?<=src=\").+?(?=\")|(?<=src=\').+?(?=\')" , r.text)
for path in matchs:
if "https" not in path:
href = baseUrl + path
text = urlopen(href)
content2 = text.read()
pp = path.split("?")
filePath = savePath+pp[0]
r = '/'
d = filePath.split('/')
d2 = d[:len(d)-1]
dirs = r.join(d2)
print(dirs)
if not os.path.exists(dirs):
os.makedirs(dirs)
f1 = open(filePath, "wb")
f1.write(content2)
f1.close()