from urllib.parse import urljoin
from urllib.parse import urlparse
from urllib.parse import urlunparse
from posixpath import normpath
def myjoin(base, url):
url1 = urljoin(base, url)
arr = urlparse(url1)
path = normpath(arr[2])
return urlunparse((arr.scheme, arr.netloc, path, arr.params, arr.query, arr.fragment))
print(myjoin("http://www.baidu.com", "abc.html"))
print(myjoin("http://www.baidu.com", "/../../abc.html"))
print(myjoin("http://www.baidu.com/xxx/yy/zz/xyz", "./../../abc.html"))
print(myjoin("http://www.baidu.com", "abc.html?key=value&m=x"))
结果:
http://www.baidu.com/abc.html http://www.baidu.com/abc.html http://www.baidu.com/xxx/abc.html http://www.baidu.com/abc.html?key=value&m=x