很简单,很管用,基于你发布的链接,但它是。。。所以,我不确定它是否会因为某种我想不到的原因而破裂:)import re
def trim_utm(url):
if "utm_" not in url:
return url
matches = re.findall('(.+\?)([^#]*)(.*)', url)
if len(matches) == 0:
return url
match = matches[0]
query = match[1]
sanitized_query = '&'.join([p for p in query.split('&') if not p.startswith('utm_')])
return match[0]+sanitized_query+match[2]
if __name__ == "__main__":
tests = [ "http://localhost/index.php?a=1&utm_source=1&b=2",
"http://localhost/index.php?a=1&utm_source=1&b=2#hash",
"http://localhost/index.php?a=1&utm_source=1&b=2&utm_something=no#hash",
"http://localhost/index.php?a=1&utm_source=1&utm_a=yes&b=2#hash",
"http://localhost/index.php?utm_a=a",
"http://localhost/index.php?a=utm_a",
"http://localhost/index.php?a=1&b=2",
"http://localhost/index.php",
"http://localhost/index.php#hash2"
]
for t in tests:
trimmed = trim_utm(t)
print t
print trimmed