判断远程url是否_快乐Python | furl 简化URL操作

最新推荐文章于 2023-05-23 08:40:52 发布

黄业文

最新推荐文章于 2023-05-23 08:40:52 发布

阅读量179

点赞数

文章标签：判断远程url是否

本文链接：https://blog.csdn.net/weixin_30177835/article/details/112568465

版权

furl :简化URL操作

furl是Python轻量级的处理URL的库，意在简化URL的解析和操作。下面直接安装使用：

pip install furl

对于下面这个URL:

scheme://username:password@host:port/path?query#fragment
举个例子：
http://smallstone:password@www.smallstone.com:80/?one=1&two=2#frag

如果是你，你会怎样取出各个部分，不妨自己试一试。

furl可以这样操作：

from furl import furl

f = furl('http://smallstone:password@www.smallstone.com:80/?one=1&two=2#frag')
print(f.scheme)  # http
print(f.username)  # smallstone
print(f.password)  # password
print(f.host) #  www.smallstone.com
print(f.port)  #  80
print(f.query)  #  one=1&two=2
print(f.fragment)  # frag

Path相关

绝对路径、文件、文件夹判断

from furl import furl
# 绝对路径判断
f = furl('http://www.google.com/#/absolute/fragment/path/')
print(f.fragment.path.isabsolute)  # True
f.fragment.path.isabsolute = False  # 将绝对路径设为False，仔细观察下面url的变化
print(f.url)  #  http://www.google.com/#absolute/fragment/path/
# 文件、文件夹判断
f = furl('http://www.google.com/a/directory/')
print(f.path.isdir)  # True  判断是否为文件夹
print(f.path.isfile)  # False  判断是否为文件
f = furl('http://www.google.com/a/file')
print(f.path.isdir)  # False  判断是否为文件夹
print(f.path.isfile)  # True  判断是否为文件

路径简化

from furl import furl
# 路径简化
f = furl('http://www.google.coma/./b/lolsup/../c/')
f.path.normalize()  # http://www.google.com/a/b/c/

/操作

from furl import furl
# / 操作
f = furl('http://www.google.com/a/')
f.path /= 'b'  #  /a/b
print(f.path)
f.path /= 'smallstone'
print(f.path)  # /a/b/smallstone
print(f.url)  # http://www.google.com/a/b/smallstone

Path类重写了__truediv__

def __truediv__(self, path):
    copy = self.__class__(
        path=self.segments,
        force_absolute=self._force_absolute,
        strict=self.strict)
    return copy.add(path)

asdict()

from pprint import pprint
from furl import furl
# asdict()
f = furl('http://www.smallstone.com/p1/p2/p3')
pprint(f.path.asdict())
"""
{'encoded': '/p1/p2/p3',
 'isabsolute': True,
 'isdir': False,
 'isfile': True,
 'segments': ['p1', 'p2', 'p3']}
"""

Query相关

Query内部有个ordered multivalue dictionary有序多值字典，后面的操作多和这个有关，args就是个简写，从源码能看出来

@property
def args(self):
    """
    Shortcut method to access the query parameters, self._query.params.
    """
    return self._query.params

from furl import furl

f = furl('http://www.smallstone.com/?one=1&two=2')
print(repr(f.query))  # Query('one=1&two=2')
print(repr(f.query.params))  # omdict1D([('one', '1'), ('two', '2')])
print(f.args == f.query.params)  # True 同一个对象

操作args(params)

from furl import furl

f = furl('http://www.smallstone.com/?one=1&two=2')
del f.args['one']
print(f.url)  # http://www.smallstone.com/?two=2
f.args["three"] = 'added'
print(f.url)  # http://www.smallstone.com/?two=2&three=added
f.args['two'] = 'updated'
print(f.url)  # http://www.smallstone.com/?two=updated&three=added

为啥args是ordered multivalue dictionary，看了下面相信你就知道了

from furl import furl

f = furl('http://www.smallstone.com/?one=1&two=2&two=222')
print(f.args.getlist('two'))  # ['2', '222']
f.args.addlist('repeated', ['1', '2', '3'])
print(f.url)  # http://www.smallstone.com/?one=1&two=2&two=222&repeated=1&repeated=2&repeated=3
# pop 操作
print(f.args.popvalue('repeated'))  # 3
print(f.url)  # http://www.smallstone.com/?one=1&two=2&two=222&repeated=1&repeated=2

Fragment相关

Fragment内部有一个Path和Query对象，所以操作和上面介绍的就很相似了。

from furl import furl

f = furl('http://www.google.com/#/fragment/path?with=params')
print(repr(f.fragment))  # Fragment('/fragment/path?with=params')
print(repr(f.fragment.path))  # Path('/fragment/path')
print(repr(f.fragment.query))  # Query('with=params')
print(repr(f.fragment.query.params))  # omdict1D([('with', 'params')])
print(f.fragment.args == f.fragment.query.params)  # True 同一个对象
print(f.fragment)  # /fragment/path?with=params&last=added

所以Path和Query有的方法在这里还是可以用的，这里就不重复介绍了。

asdict():

from pprint import pprint
from furl import furl

f = furl('http://www.google.com/#/fragment/path?with=params')
pprint(f.fragment.asdict())
"""
{'encoded': '/fragment/path?with=params',
 'path': {'encoded': '/fragment/path',
          'isabsolute': True,
          'isdir': False,
          'isfile': True,
          'segments': ['fragment', 'path']},
 'query': {'encoded': 'with=params', 'params': [('with', 'params')]},
 'separator': True}
 """

链式操作

为了进一步简化上述操作，furl提供了链式操作。

from furl import furl

url = 'http://www.google.com/#fragment'
print(furl(url).add(args={'example': 'arg'}).set(port=99).remove(fragment=True).url)  # http://www.google.com:99/?example=arg

add()可以传入的参数如下：

def add(self, args=_absent, path=_absent, fragment_path=_absent,
        fragment_args=_absent, query_params=_absent):

里面的参数前面也都介绍过，其中args是query_params别名，传其中一个就行。使用示例如下：

from furl import furl

f = furl('http://www.google.com/').add(path='/search', fragment_path='frag/path', fragment_args={'frag':'arg'})
print(f.url)  # http://www.google.com/search#frag/path?frag=arg

set()的参数就更多了

def set(self, args=_absent, path=_absent, fragment=_absent, scheme=_absent,
        netloc=_absent, origin=_absent, fragment_path=_absent,
        fragment_args=_absent, fragment_separator=_absent, host=_absent,
        port=_absent, query=_absent, query_params=_absent,
        username=_absent, password=_absent):

使用示例如下：

from furl import furl

f = furl().set(scheme='https', host='secure.google.com', port=99,
               path='index.html',args={'some':'args'}, fragment='great job')
print(f.url)  # https://secure.google.com:99/index.html?some=args#great%20job

remove()从URL中移除想要移除的部分

def remove(self, args=_absent, path=_absent, fragment=_absent,
           query=_absent, query_params=_absent, port=False,
           fragment_path=_absent, fragment_args=_absent, username=False,
           password=False):

使用示例如下：

from furl import furl

url = 'https://secure.google.com:99/a/path/?some=args#great job'
f = furl(url).remove(args=['some'], path='path/', fragment=True, port=True)
print(f.url)  # https://secure.google.com/a/

杂项

tostr()：上面f.url实际上就是调用的tostr()

@property
def url(self):
    return self.tostr()

def tostr(self, query_delimiter='&', query_quote_plus=True,
          query_dont_quote=''):

copy()：返回一个相同URL的新的furl实例

from furl import furl

f = furl('http://www.google.com')
new_f = f.copy()
print(new_f.url)  # http://www.google.com
new_f.set(path='path/test')
print(new_f.url)  # http://www.google.com/path/test
print(f.url)   # http://www.google.com

join()：furl对象的url和提供的参数进行连接。

from furl import furl

f = furl('http://www.smaillstone.com')
f.join('parent/firstChild')
print(f.url)  # http://www.smaillstone.com/parent/firstChild
f.join('secChild')
print(f.url)  # http://www.smaillstone.com/parent/secChild
f.join("thdChild/grandson")
print(f.url)  # http://www.smaillstone.com/parent/thdChild/grandson
f.join('../../uncle')
print(f.url)  # http://www.smaillstone.com/uncle
f.join('http://www.test.com/haha/test')
print(f.url)  # http://www.test.com/haha/test

asdict()：字典形式返回furl对象信息

from furl import furl

f = furl('http://www.smallstone.com/?one=1&two=2#/fragment/path?with=params')
pprint(f.asdict())

输出结果如下：

{'fragment': {'encoded': '/fragment/path?with=params',
              'path': {'encoded': '/fragment/path',
                       'isabsolute': True,
                       'isdir': False,
                       'isfile': True,
                       'segments': ['fragment', 'path']},
              'query': {'encoded': 'with=params',
                        'params': [('with', 'params')]},
              'separator': True},
 'host': 'www.smallstone.com',
 'host_encoded': 'www.smallstone.com',
 'netloc': 'www.smallstone.com',
 'origin': 'http://www.smallstone.com',
 'password': None,
 'path': {'encoded': '/',
          'isabsolute': True,
          'isdir': True,
          'isfile': False,
          'segments': ['']},
 'port': 80,
 'query': {'encoded': 'one=1&two=2', 'params': [('one', '1'), ('two', '2')]},
 'scheme': 'http',
 'url': 'http://www.smallstone.com/?one=1&two=2#/fragment/path?with=params',
 'username': None}

END

furl用起来还是很轻便的，爬虫呀之类的可以尝试用用，上面没介绍到的地方可以查看官方文档，https://github.com/gruns/furl/blob/master/API.md，最后放上Github地址：https://github.com/gruns/furl。

如果有帮助，欢迎点赞和在看

黄业文

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
判断远程url是否_快乐Python | furl 简化URL操作

furl :简化URL操作furl是Python轻量级的处理URL的库，意在简化URL的解析和操作。下面直接安装使用：pipinstallfurl对于下面这个URL:scheme://username:password@host:port/path?query#fragment举个例子：http://smallstone:password@www.smallstone.com:80/...
复制链接

扫一扫