一、通过urllib.parse库解析url中需要的参数
https://www.xxx.com/api/projectid/123456/versionid/789012/components?limit=10&pageno=2
上面这个url想要解析如下数据
domain www.xxx.com
scheme https
projectid 123456
versionid 789012
limit 10
pageno 2
二、代码如下
from urllib.parse import urlparse, parse_qs
if __name__ == '__main__':
test_url = "https://www.xxx.com/api/projectid/123456/versionid/789012/components?limit=10&pageno=2"
parsed_url = urlparse(test_url) # 解析url 结果如下
# ParseResult(scheme='https', netloc='www.xxx.com', path='/api/projectid/123456/versionid/789012/components', params='', query='limit=10&pageno=2', fragment='')
domain = parsed_url.netloc # 提取域名 'www.xxx.com'
scheme = parsed_url.scheme # 提取scheme 'https'
full_domain = f"{scheme}://{domain}" # 全的域名 'https://www.xxx.com'
query_params = parse_qs(parsed_url.query) # 提取url?后面的参数 {'limit': ['10'], 'pageno': ['2']}
limit = query_params["limit"][0] # 提取limit的值 10
page_no = query_params["pageno"][0] # 提取pageno的值 2
url_path = parsed_url.path # 提取url路径 '/api/projectid/123456/versionid/789012/components'
url_path_parts = url_path.split("/") # 斜杠分隔url路径 ['', 'api', 'projectid', '123456', 'versionid', '789012', 'components']
projectid_index = url_path_parts.index("projectid") # projectid 的下标 2
projectid_value = url_path_parts[projectid_index + 1] # projectid 下标+1就是其对应的值 123456
versionid_index = url_path_parts.index("versionid") # versionid 的下标 4
version_value = url_path_parts[versionid_index + 1] # versionid 下标+1就是其对应的值 789012