Skip to content

Commit 337727b

Browse files
committed
feat(core): add max_items functionality, so memory usage is decreased
1 parent 0704349 commit 337727b

File tree

4 files changed

+76
-5
lines changed

4 files changed

+76
-5
lines changed

class_cache/core.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,63 @@
1+
import math
12
from abc import abstractmethod
23
from typing import Any, Callable, ClassVar, Iterable
34

45
from replete.consistent_hash import consistent_hash
56

67
from class_cache.backends import SQLiteBackend
8+
from class_cache.lru_queue import LRUQueue
79
from class_cache.types import CacheInterface, IdType, KeyType, ValueType
810

911
DEFAULT_BACKEND_TYPE = SQLiteBackend
1012

1113

1214
class Cache(CacheInterface[KeyType, ValueType]):
15+
"""
16+
:param max_items: Maximum number of items to keep in memory
17+
:param flush_ratio: Amount of stored items to write to backend when memory if full
18+
ceiling will be used to calculate the final amount
19+
"""
20+
1321
def __init__(
1422
self,
1523
id_: IdType = None,
1624
backend_type: type[CacheInterface] | Callable[[IdType], CacheInterface] = DEFAULT_BACKEND_TYPE,
1725
max_items=128,
26+
*,
27+
flush_ratio=0.1,
1828
) -> None:
1929
super().__init__(id_)
2030
self._backend = backend_type(id_)
21-
# TODO: Implement max_size logic
31+
32+
self._max_items = max_items
33+
self._flush_amount = math.ceil(self._max_items * flush_ratio)
34+
self._lru_queue = LRUQueue()
35+
2236
self._data: dict[KeyType, ValueType] = {}
2337
self._to_write = set()
2438
self._to_delete = set()
25-
self._max_items = max_items
2639

2740
@property
2841
def backend(self) -> CacheInterface:
2942
return self._backend
3043

3144
def __contains__(self, key: KeyType) -> bool:
3245
if key in self._data:
46+
self._lru_queue.update(key)
3347
return True
3448
return key not in self._to_delete and key in self._backend
3549

3650
def __setitem__(self, key: KeyType, value: ValueType) -> None:
3751
self._data[key] = value
3852
self._to_write.add(key)
53+
self._lru_queue.update(key)
54+
self._check_max_items()
3955

4056
def __getitem__(self, key: KeyType) -> ValueType:
4157
if key not in self._data:
4258
self._data[key] = self._backend[key]
59+
self._check_max_items()
60+
self._lru_queue.update(key)
4361
return self._data[key]
4462

4563
def __iter__(self) -> Iterable[KeyType]:
@@ -54,6 +72,8 @@ def __delitem__(self, key: KeyType) -> None:
5472
# Check that key is present. Can't check self._data, since it can be unloaded
5573
if key not in self:
5674
raise KeyError(key)
75+
if key in self._data:
76+
del self._lru_queue[key]
5777
self._data.pop(key, None)
5878
self._to_delete.add(key)
5979

@@ -69,8 +89,20 @@ def clear(self) -> None:
6989
self._data = {}
7090
self._to_write = set()
7191
self._to_delete = set()
92+
self._lru_queue.clear()
93+
94+
def _check_max_items(self) -> None:
95+
if len(self._data) <= self._max_items:
96+
return
97+
98+
keys_to_free = self._lru_queue.pop_many(self._flush_amount)
99+
if any(key in self._to_write for key in keys_to_free):
100+
self.write()
101+
for key in keys_to_free:
102+
self._data.pop(key)
72103

73104

105+
# TODO: Refactor this, this should use composition, not inheritance. Maybe a wrapper.
74106
class CacheWithDefault(Cache[KeyType, ValueType]):
75107
VERSION = 0
76108
NON_HASH_ATTRIBUTES: ClassVar[frozenset[str]] = frozenset(

class_cache/lru_queue.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def pop(self) -> KeyType:
9292
return last.key
9393

9494
def pop_many(self, count: int) -> list[KeyType]:
95-
if self._check_empty(no_raise=True):
95+
if count == 0 or self._check_empty(no_raise=True):
9696
return []
9797

9898
first = self._root.prev
@@ -128,5 +128,10 @@ def __str__(self) -> str:
128128
result += f"{key} -> "
129129
return result[:-4]
130130

131+
def clear(self) -> None:
132+
if self._root.next == self._root:
133+
return
134+
self._cut(self._root.next, self._root)
135+
131136

132137
__all__ = ["LRUQueue"]

tests/test_core.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import resource
2+
3+
import numpy as np
4+
15
from class_cache import Cache, CacheWithDefault
26

37
TEST_DICT = {1: "foo", "foo": "bar", (2, 3): [4, 5]}
@@ -14,8 +18,8 @@ def _get_data(self, key: str) -> str:
1418
return self._name + key
1519

1620

17-
def get_new_cache(id_: str = None, *, clear=True) -> Cache:
18-
cache = Cache(id_)
21+
def get_new_cache(id_: str = None, *, clear=True, **kwargs) -> Cache:
22+
cache = Cache(id_, **kwargs)
1923
if clear:
2024
cache.clear()
2125
return cache
@@ -118,3 +122,26 @@ def test_len():
118122
del cache
119123
cache = get_new_cache(clear=False)
120124
assert len(cache) == len(TEST_DICT)
125+
126+
127+
def get_max_memory_used() -> int:
128+
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
129+
130+
131+
def get_random_array(rng: np.random.Generator) -> np.ndarray:
132+
return rng.uniform(size=1024)
133+
134+
135+
def test_max_memory_usage():
136+
cache = get_new_cache(max_items=16)
137+
rng = np.random.default_rng()
138+
# Get an array to account for generation in memory calculation
139+
_ = get_random_array(rng)
140+
starting_max_memory = get_max_memory_used()
141+
for idx in range(1024):
142+
cache[idx] = get_random_array(rng)
143+
end_max_memory_usage = get_max_memory_used()
144+
assert end_max_memory_usage - starting_max_memory < 1_000
145+
146+
147+
# TODO: Add parallel test for cache as well (threading)

tests/test_lru_queue.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,10 @@ def test_pop_many():
8989
assert small_queue.pop_many(3) == [0, 1, 2]
9090
assert small_queue.pop_many(2) == [3]
9191
assert small_queue.pop_many(1) == []
92+
93+
94+
def test_clear():
95+
small_queue = get_queue()
96+
97+
small_queue.clear()
98+
assert len(small_queue) == 0

0 commit comments

Comments
 (0)