导入包
import urllib.parse
类似这样的字符串%20%E8%BF%AD%E4%BB%A3%E5%9
可以将中文转换为URL编码格式
urllib.parse.unquote(i)
urlparse()实现URL的识别和分段
url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd=python%E6%AD%A3%E5%88%99%E8%8E%B7%E5%8F%96%E7%AC%AC%E5%87%A0%E4%B8%AA%E5%85%83%E7%B4%A0&oq=python%25E8%258E%25B7%25E5%258F%2596%25E7%25AC%25AC%25E5%2587%25A0%25E4%25B8%25AA%25E5%2585%2583%25E7%25B4%25A0&rsv_pq=c765e81a0001dce6&rsv_t=5dc78noxF%2BWIwGE%2FKDcvV50dul8H9yEiCxoAyyFQYAsI2eNP1nGCHz4JU3g&rqlang=cn&rsv_enter=1&rsv_dl=tb&rsv_btype=t&inputT=2645&rsv_sug3=87&rsv_sug2=0&rsv_sug4=3537'
result = urllib.parse.urlparse(url=url, scheme='http', allow_fragments=True)
print(result)
print(result.scheme)
"""
(scheme='https', netloc='book.qidian.com', path='/info/1004608738', params='', query='wd=123&page=20', fragment='Catalog')
scheme:表示协议
netloc:域名
path:路径
params:参数
query:查询条件,一般都是get请求的url
fragment:锚点,用于直接定位页
面的下拉位置,跳转到网页的指定位置
"""
urlencode()将字典构形式的参数序列化为url编码后的字符串(常用来构造get请求和post请求的参数)k1=v1&k2=v2
url1 = {
'username': '小明',
'password': '123456'
}
res = urllib.parse.urlencode(url1)
print(urllib.parse.unquote(res))
urlunparse()可以实现URL的构造
url_parmas = ('https', 'book.qidian.com', '/info/1004608738', '', 'wd=123&page=20', 'Catalog')
# components:是一个可迭代对象,长度必须为6
result = urllib.parse.urlunparse(url_parmas)
print(result)
"""
https://book.qidian.com/info/1004608738?wd=123&page=20#Catalog
"""