Python cachetools常用缓存算法汇总

Cachel wood

于 2024-10-09 21:44:39 发布

阅读量236

点赞数 3

分类专栏： sql语言 sql server + mysql 文章标签： python 缓存算法开发语言 pandas spring 网络

本文链接：https://blog.csdn.net/weixin_46530492/article/details/142796821

版权

sql语言 sql server + mysql 专栏收录该内容

15 篇文章 3 订阅

订阅专栏

文章目录

cachetools介绍

cachetools : 是一个Python第三方库，提供了多种缓存算法的实现。缓存是一种用于临时存储计算结果的技术，以避免在后续计算中重复执行相同的计算。使用缓存可以提高应用程序的性能和响应速度。
多种缓存策略
cachetools 提供了以下常见的缓存策略：
1. LRUCache（Least Recently Used Cache）：基于最近使用的原则，删除最久未使用的缓存项。当缓存达到最大容量时，将删除最久未使用的缓存项。
2. LFUCache（Least Frequently Used Cache）：基于最近使用频率的原则，删除使用频率最低的缓存项。当缓存达到最大容量时，将删除使用频率最低的缓存项。
3. FIFOCache（First In, First Out Cache）：按照缓存项的插入顺序进行删除，最先插入的缓存项将首先被删除。
4. RRCache（Random Replacement Cache）：随机删除缓存项，没有特定的策略。

这些缓存策略都可以在 Cachetools 中使用，并可以通过设置缓存的最大容量来控制缓存的大小。

python3.10版本的cachetools缓存策略

import cachetools

# 创建 LRU 缓存
lru_cache = cachetools.LRUCache(maxsize=100)

# 创建 MRU 缓存
mru_cache = cachetools.MRUCache(maxsize=100)

# 创建 RR 缓存
rr_cache = cachetools.RRCache(maxsize=100)

# 创建 FIFO 缓存
fifo_cache = cachetools.FIFOCache(maxsize=100)

maxsize参数代表的是缓存中可以存储的最大条目数量，而不是字符数。

缓存操作

缓存对象支持类似字典的操作，例如：添加、获取、删除和更新缓存项。

# 类似于字典操作

# 添加缓存项
lru_cache["key"] = "value"

# 获取缓存项
value = lru_cache.get("key", "default_value")
print(lru_cache)
# 删除缓存项
if "key" in lru_cache:
    del lru_cache["key"]

# 更新缓存项
lru_cache["key"] = "new_value"
print(lru_cache)

LRUCache({'key': 'value'}, maxsize=100, currsize=1)
LRUCache({'key': 'new_value'}, maxsize=100, currsize=1)

设置数据生存时间（TTL）

cachetools 还支持为缓存项设置生存时间（TTL）。当缓存项的生存时间到期后，该项将被自动移除。

import cachetools
import time

# 创建一个带 TTL 的缓存对象
ttl_cache = cachetools.TTLCache(maxsize=100, ttl=60)

# 添加缓存项
ttl_cache["key"] = "value"
print(ttl_cache)

# 等待一段时间，让缓存项过期
time.sleep(61)

# 此时缓存项已过期，尝试获取时将返回默认值
value = ttl_cache.get("key", "default_value")
print(value)

当为ttl_cache添加缓存项之后，可以看到 TTLCache类型缓存添加成功，当过去61s之后，缓存项已过期，尝试获取时返回的是默认值default_value。

TTLCache({'key': 'value'}, maxsize=100, currsize=1)
default_value

自定义缓存策略

cachetools允许自定义缓存策略。要实现一个自定义的缓存策略，需要继承 cachetools.Cache 类，并实现相应的方法。例如，实现一个简单的大小有限制的缓存：

import cachetools

class SizeLimitedCache(cachetools.Cache):
    def __init__(self, maxsize):
        super().__init__(maxsize=maxsize)

    def __getitem__(self, key, cache_getitem=dict.__getitem__):
        return cache_getitem(self, key)

    def __setitem__(self, key, value, cache_setitem=dict.__setitem__):
        if len(self) >= self.maxsize:
            self.popitem(last=False)  # 删除第一个缓存项
        cache_setitem(self, key, value)

# 使用自定义缓存策略
custom_cache = SizeLimitedCache(maxsize=100)
custom_cache

SizeLimitedCache({}, maxsize=100, currsize=0)

缓存装饰器

cachetools还提供了一些缓存装饰器，可以方便地将缓存应用于函数或方法。

import cachetools
import cachetools.func
import requests 

# 使用 LRU 缓存装饰函数
@cachetools.func.ttl_cache(maxsize=100, ttl=60)
def get_data_from_api(api_url, params):
    response = requests.get(api_url, params=params)
    response.raise_for_status()
    data = response.json()
    return data

# 使用缓存的函数
data = get_data_from_api("https://api.example.com/data", {"param1": "value1", "param2": "value2"})

缓存清理

cachetools提供了一些方法，可以手动清理缓存

import cachetools

# 创建 LRU 缓存
lru_cache = cachetools.LRUCache(maxsize=100)
lru_cache["name"] = "Abel"
lru_cache["age"] = 33
lru_cache["job"] = "student"
print(lru_cache)

# 移除最近最少使用的缓存项
lru_cache.popitem()
print(lru_cache)

# 手动清空缓存
lru_cache.clear()
print(lru_cache)

lru_cache创建缓存之后依次添加了3个缓存项，当使用popitem()函数移除最近最少使用的一条缓存项之后，lru_cache只剩余其他两个缓存项，最后使用clear()函数清空缓存之后，lru_cache显示为空。

LRUCache({'name': 'Abel', 'age': 33, 'job': 'student'}, maxsize=100, currsize=3)
LRUCache({'age': 33, 'job': 'student'}, maxsize=100, currsize=2)
LRUCache({}, maxsize=100, currsize=0)

import cachetools

# 创建 LRU 缓存
lru_cache = cachetools.LRUCache(maxsize=100)

# 向lru_cache添加缓存项
lru_cache["name"] = "Abel"
lru_cache["age"] = 33
lru_cache["job"] = "student"
print(lru_cache)

# 查看缓存项
print(lru_cache.get("name"))

# 移除最近最少使用的缓存项
lru_cache.popitem()
print(lru_cache)

# 手动清空缓存
lru_cache.clear()
print(lru_cache)

在添加3条缓存项之后，如果查看第一条缓存项，则这条缓存项被使用过，在调用popitem()函数进行移除时，会从下一条未被使用过的缓存项开始，找到最近的一条进行删除。

LRUCache({'name': 'Abel', 'age': 33, 'job': 'student'}, maxsize=100, currsize=3)
Abel
LRUCache({'name': 'Abel', 'job': 'student'}, maxsize=100, currsize=2)
LRUCache({}, maxsize=100, currsize=0)

cachetools 超过缓存数量maxsize

设置缓存量为10，添加11个缓存项，对LRUCache缓存策略，则会删除最近未被使用的一条。

import cachetools

# 创建 LRU 缓存
lru_cache = cachetools.LRUCache(maxsize=10)

for index in range(11):
    lru_cache[index] = 'cache'+str(index)
    print(lru_cache)

LRUCache({'key': 'value'}, maxsize=100, currsize=1)
LRUCache({'key': 'new_value'}, maxsize=100, currsize=1)
TTLCache({'key': 'value'}, maxsize=100, currsize=1)
default_value
SizeLimitedCache({}, maxsize=100, currsize=0)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[11], line 14
     11     return data
     13 # 使用缓存的函数
---> 14 data = get_data_from_api("https://api.example.com/data", {"param1": "value1", "param2": "value2"})

File e:\python3.10\lib\site-packages\cachetools\__init__.py:696, in cached.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    694 try:
    695     with lock:
--> 696         result = cache[k]
    697         hits += 1
    698         return result

File e:\python3.10\lib\site-packages\cachetools\__init__.py:410, in TTLCache.__getitem__(self, key, cache_getitem)
    408 def __getitem__(self, key, cache_getitem=Cache.__getitem__):
    409     try:
--> 410         link = self.__getlink(key)
    411     except KeyError:
    412         expired = False

File e:\python3.10\lib\site-packages\cachetools\__init__.py:497, in TTLCache.__getlink(self, key)
    496 def __getlink(self, key):
--> 497     value = self.__links[key]
    498     self.__links.move_to_end(key)
...
     18 if hashvalue is None:
---> 19     self.__hashvalue = hashvalue = hash(self)
     20 return hashvalue

TypeError: unhashable type: 'dict'
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
LRUCache({'name': 'Abel', 'age': 33, 'job': 'student'}, maxsize=100, currsize=3)
Abel
LRUCache({'name': 'Abel', 'job': 'student'}, maxsize=100, currsize=2)
LRUCache({}, maxsize=100, currsize=0)
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
File e:\python3.10\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self)
    173 try:
--> 174     conn = connection.create_connection(
    175         (self._dns_host, self.port), self.timeout, **extra_kw
    176     )
    178 except SocketTimeout:

File e:\python3.10\lib\site-packages\urllib3\util\connection.py:72, in create_connection(address, timeout, source_address, socket_options)
     68     return six.raise_from(
     69         LocationParseError(u"'%s', label empty or too long" % host), None
     70     )
---> 72 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     73     af, socktype, proto, canonname, sa = res

File e:\python3.10\lib\socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
    954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    956     af, socktype, proto, canonname, sa = res

gaierror: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:
...
--> 519     raise ConnectionError(e, request=request)
    521 except ClosedPoolError as e:
    522     raise ConnectionError(e, request=request)

ConnectionError: HTTPSConnectionPool(host='api.example.com', port=443): Max retries exceeded with url: /data (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000015BB5EBEFB0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
File e:\python3.10\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self)
    173 try:
--> 174     conn = connection.create_connection(
    175         (self._dns_host, self.port), self.timeout, **extra_kw
    176     )
    178 except SocketTimeout:

File e:\python3.10\lib\site-packages\urllib3\util\connection.py:72, in create_connection(address, timeout, source_address, socket_options)
     68     return six.raise_from(
     69         LocationParseError(u"'%s', label empty or too long" % host), None
     70     )
---> 72 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     73     af, socktype, proto, canonname, sa = res

File e:\python3.10\lib\socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
    954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    956     af, socktype, proto, canonname, sa = res

gaierror: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:
...
--> 519     raise ConnectionError(e, request=request)
    521 except ClosedPoolError as e:
    522     raise ConnectionError(e, request=request)

ConnectionError: HTTPSConnectionPool(host='api.example.com', port=443): Max retries exceeded with url: /data?param1=value1&param2=value2 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000015BB65BDED0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
LRUCache({0: 'cache0'}, maxsize=10, currsize=1)
LRUCache({0: 'cache0', 1: 'cache1'}, maxsize=10, currsize=2)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2'}, maxsize=10, currsize=3)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3'}, maxsize=10, currsize=4)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4'}, maxsize=10, currsize=5)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5'}, maxsize=10, currsize=6)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6'}, maxsize=10, currsize=7)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7'}, maxsize=10, currsize=8)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8'}, maxsize=10, currsize=9)
LRUCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8', 9: 'cache9'}, maxsize=10, currsize=10)
LRUCache({1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8', 9: 'cache9', 10: 'cache10'}, maxsize=10, currsize=10)

Random 缓存会随机删除一条记录。

import cachetools

# 创建 RR 缓存
rr_cache = cachetools.RRCache(maxsize=10)

for index in range(11):
    rr_cache[index] = 'cache'+str(index)
    print(rr_cache)

RRCache({0: 'cache0'}, maxsize=10, currsize=1)
RRCache({0: 'cache0', 1: 'cache1'}, maxsize=10, currsize=2)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2'}, maxsize=10, currsize=3)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3'}, maxsize=10, currsize=4)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4'}, maxsize=10, currsize=5)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5'}, maxsize=10, currsize=6)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6'}, maxsize=10, currsize=7)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7'}, maxsize=10, currsize=8)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8'}, maxsize=10, currsize=9)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8', 9: 'cache9'}, maxsize=10, currsize=10)
RRCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8', 9: 'cache9', 10: 'cache10'}, maxsize=10, currsize=10)

FIFO缓存会删除第一条存入的缓存项。

import cachetools

# 创建 FIFO 缓存
fifo_cache = cachetools.FIFOCache(maxsize=10)

for index in range(11):
    fifo_cache[index] = 'cache'+str(index)
    print(fifo_cache)

FIFOCache({0: 'cache0'}, maxsize=10, currsize=1)
FIFOCache({0: 'cache0', 1: 'cache1'}, maxsize=10, currsize=2)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2'}, maxsize=10, currsize=3)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3'}, maxsize=10, currsize=4)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4'}, maxsize=10, currsize=5)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5'}, maxsize=10, currsize=6)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6'}, maxsize=10, currsize=7)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7'}, maxsize=10, currsize=8)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8'}, maxsize=10, currsize=9)
FIFOCache({0: 'cache0', 1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8', 9: 'cache9'}, maxsize=10, currsize=10)
FIFOCache({1: 'cache1', 2: 'cache2', 3: 'cache3', 4: 'cache4', 5: 'cache5', 6: 'cache6', 7: 'cache7', 8: 'cache8', 9: 'cache9', 10: 'cache10'}, maxsize=10, currsize=10)

cachetools 使用示例

在这个示例中，我们使用 cachetools.LRUCache 创建一个 LRU 缓存。当我们调用 get_data_from_api() 函数时，会先检查缓存中是否有数据。如果缓存中有数据，就直接返回缓存的数据，避免了重复请求接口，提高了程序性能。

import requests
import cachetools

# 创建一个 LRU 缓存，最大容量为 100
cache = cachetools.LRUCache(maxsize=100)

def get_data_from_api(url):
    if url in cache:
        return cache[url]  # 如果数据已经在缓存中，直接返回缓存的数据

    response = requests.get(url)
    response.raise_for_status()
    data = response.json()

    cache[url] = data  # 将数据存储在缓存中
    return data

# 使用缓存的函数
data = get_data_from_api("https://api.example.com/data")