此方法来自网络,很强大
from urlparse import urljoin
from urlparse import urlparse
from urlparse import urlunparse
from posixpath import normpath
def myjoin(base, url):
url1 = urljoin(base, url)
arr = urlparse(url1)
path = normpath(arr[2])
return urlunparse((arr.scheme, arr.netloc, path, arr.params, arr.query, arr.fragment))
经测试可正确拼接下列形式
- base1 = 'http://www.bagtree.com/' url1 = '../../themes/bagtree_2011/images/pinzhi.gif'
- base2 = 'http://info.ceo.hc360.com/list/qygl-ldl.shtml' url2 = '/2011/11/250020188368.shtml'
- base3 = 'http://info.ceo.hc360.com/2012/07/190833206838.shtml' url3 = '190833206838-2.shtml'