urllib库parse模块用法总结如下:
# !/usr/bin/env python
# -*- coding:utf-8 -*-
"""
urllib.parse
处理URL的模块
"""
import urllib.parse
# 解析url,返回包含url信息的6元元组
res=urllib.parse.urlparse("https://docs.python.org/3/library/urllib.parse.html#module-urllib.parse")
print(res)
print(res.scheme)
print(res.netloc)
print(res.path)
print(res.params)
print(res.query)
print(res.fragment)
# 从任意包含6元素的可迭代数据结构中构造url字符串并返回
url=urllib.parse.urlunparse(["http","www.baidu.com","index","","",""])
print(url)
# 分割url,返回包含url的元组,可用来替代urlparse()
res=urllib.parse.urlsplit("https://docs.python.org/3/library/urllib.parse.html#module-urllib.parse",scheme="https",allow_fragments=True)
print(res)
# 从SplitResult中获取url字符串
print(res.geturl())
# 从任意包含5元素的可迭代数据结构中构造url字符串并返回,可用来替代urlunparse()
url=urllib.parse.urlunsplit(["http","x.org","index.html","",""])
print(url)
# 从2个url中构造完整的url
url=urllib.parse.urljoin(base="http://www.baidu.com",url="index.html#a",allow_fragments=True)
print(url)
url=urllib.parse.urljoin(base="http://www.baidu.com/",url="//www.baidu.com/index.html")
print(url)
# 对含有特殊符号的URL进行编码,使其转换为合法的url字符串
url=urllib.parse.quote("https://www.baidu.com/s?wd=编程 python",safe="/")
print(url)
# 效果与quote()唯一不同的是,将空格转换为加号 +
url=urllib.parse.quote_plus("https://www.baidu.com/s?wd=编程 python",safe="/")
print(url)
# 与quote()相反
url=urllib.parse.unquote("https%3A//www.baidu.com/s%3Fwd%3D%E7%BC%96%E7%A8%8B%20python")
print(url)
# 与quote_plus()相反
url=urllib.parse.unquote_plus("https%3A//www.baidu.com/s%3Fwd%3D%E7%BC%96%E7%A8%8B+python")
print(url)
# url编码,第一个参数为dict,常用来处理url参数
params={"key1":"编程","key2":"写作"}
params=urllib.parse.urlencode(params)
print(params)
# url解码,返回字典,与urlencode()相反
params=urllib.parse.parse_qs(urllib.parse.urlencode({"key":"编程"}))
print(params)
# 与parse_qs()唯一的不同是返回列表
params=urllib.parse.parse_qsl(urllib.parse.urlencode({"key1":"编程"}))
print(params)