Skip to content

Commit 15546cd

Browse files
Draft: Initial version of keep alive lock thread (#57)
* Initial version * fix never ending thread * performance improvement * Updated assets/coverage.svg * add more locking tests * Updated assets/coverage.svg * update workspace * improvements * reduce timeout * more tests --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 7f52085 commit 15546cd

3 files changed

Lines changed: 117 additions & 22 deletions

File tree

DictDataBase.code-workspace

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"editor.defaultFormatter": "charliermarsh.ruff"
1111
},
1212
"editor.codeActionsOnSave": {
13-
"source.organizeImports": true
13+
"source.organizeImports": "explicit"
1414
},
1515
}
1616
}

dictdatabase/locking.py

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import contextlib
34
import os
45
import threading
56
import time
@@ -10,8 +11,17 @@
1011
# - Do not use pathlib, because it is slower than os
1112

1213
# Constants
13-
SLEEP_TIMEOUT = 0.001
14-
LOCK_TIMEOUT = 60.0 # Duration to wait before considering a lock as orphaned.
14+
SLEEP_TIMEOUT = 0.001 * 1 # (ms)
15+
LOCK_KEEP_ALIVE_TIMEOUT = 0.001 * 0.08 # (ms)
16+
17+
# Duration to wait updating the timestamp of the lock file
18+
ALIVE_LOCK_REFRESH_INTERVAL_NS = 1_000_000_000 * 10 # (s)
19+
20+
# Duration to wait before considering a lock as orphaned
21+
REMOVE_ORPHAN_LOCK_TIMEOUT = 20.0
22+
23+
# Duration to wait before giving up on acquiring a lock
24+
AQUIRE_LOCK_TIMEOUT = 60.0
1525

1626

1727
def os_touch(path: str) -> None:
@@ -50,6 +60,9 @@ def __init__(self, ddb_dir: str, name: str, id: str, time_ns: str, stage: str, m
5060
lock_file = f"{name}.{id}.{time_ns}.{stage}.{mode}.lock"
5161
self.path = os.path.join(ddb_dir, lock_file)
5262

63+
def __repr__(self) -> str:
64+
return f"LockFileMeta({self.ddb_dir=}, {self.name=}, {self.id=}, {self.time_ns=}, {self.stage=}, {self.mode=})"
65+
5366
def new_with_updated_time(self) -> LockFileMeta:
5467
"""
5568
Create a new instance with an updated timestamp.
@@ -91,7 +104,7 @@ def __init__(self, need_lock: LockFileMeta) -> None:
91104
# Remove orphaned locks
92105
if lock_meta.path != need_lock.path:
93106
lock_age = time.time_ns() - int(lock_meta.time_ns)
94-
if lock_age > LOCK_TIMEOUT * 1_000_000_000:
107+
if lock_age > REMOVE_ORPHAN_LOCK_TIMEOUT * 1_000_000_000:
95108
os.unlink(lock_meta.path)
96109
print(f"Removed orphaned lock ({lock_meta.path})")
97110
continue
@@ -129,13 +142,15 @@ class AbstractLock:
129142
provides a blueprint for derived classes to implement.
130143
"""
131144

132-
__slots__ = ("db_name", "need_lock", "has_lock", "snapshot", "mode")
145+
__slots__ = ("db_name", "need_lock", "has_lock", "snapshot", "mode", "is_alive" "keep_alive_thread")
133146

134147
db_name: str
135148
need_lock: LockFileMeta
136149
has_lock: LockFileMeta
137150
snapshot: FileLocksSnapshot
138151
mode: str
152+
is_alive: bool
153+
keep_alive_thread: threading.Thread
139154

140155
def __init__(self, db_name: str) -> None:
141156
# Normalize db_name to avoid file naming conflicts
@@ -147,16 +162,59 @@ def __init__(self, db_name: str) -> None:
147162
self.need_lock = LockFileMeta(dir, self.db_name, t_id, time_ns, "need", self.mode)
148163
self.has_lock = LockFileMeta(dir, self.db_name, t_id, time_ns, "has", self.mode)
149164

165+
self.is_alive = False
166+
self.keep_alive_thread = None
167+
150168
# Ensure lock directory exists
151169
if not os.path.isdir(dir):
152170
os.makedirs(dir, exist_ok=True)
153171

172+
def _keep_alive_thread(self) -> None:
173+
"""
174+
Keep the lock alive by updating the timestamp of the lock file.
175+
"""
176+
177+
current_has_lock_time_ns: int = int(self.has_lock.time_ns)
178+
179+
while self.is_alive:
180+
time.sleep(LOCK_KEEP_ALIVE_TIMEOUT)
181+
if time.time_ns() - current_has_lock_time_ns < ALIVE_LOCK_REFRESH_INTERVAL_NS:
182+
continue
183+
184+
# Assert: The lock is older than ALIVE_LOCK_REFRESH_INTERVAL_NS ns
185+
# This means the has_lock must be refreshed
186+
187+
new_has_lock = self.has_lock.new_with_updated_time()
188+
os_touch(new_has_lock.path)
189+
with contextlib.suppress(FileNotFoundError):
190+
os.unlink(self.has_lock.path) # Remove old lock file
191+
self.has_lock = new_has_lock
192+
current_has_lock_time_ns = int(new_has_lock.time_ns)
193+
194+
def _start_keep_alive_thread(self) -> None:
195+
"""
196+
Start a thread that keeps the lock alive by updating the timestamp of the lock file.
197+
"""
198+
199+
if self.keep_alive_thread is not None:
200+
raise RuntimeError("Keep alive thread already exists.")
201+
202+
self.is_alive = True
203+
self.keep_alive_thread = threading.Thread(target=self._keep_alive_thread, daemon=False)
204+
self.keep_alive_thread.start()
205+
154206
def _lock(self) -> None:
155207
"""Override this method to implement locking mechanism."""
156208
raise NotImplementedError
157209

158210
def _unlock(self) -> None:
159211
"""Remove the lock files associated with this lock."""
212+
213+
if self.keep_alive_thread is not None:
214+
self.is_alive = False
215+
self.keep_alive_thread.join()
216+
self.keep_alive_thread = None
217+
160218
for p in ("need_lock", "has_lock"):
161219
try:
162220
if lock := getattr(self, p, None):
@@ -169,7 +227,7 @@ def _unlock(self) -> None:
169227
def __enter__(self) -> None:
170228
self._lock()
171229

172-
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
230+
def __exit__(self, exc_type, exc_val, exc_tb) -> None: # noqa: ANN001
173231
self._unlock()
174232

175233

@@ -202,9 +260,10 @@ def _lock(self) -> None:
202260
self.has_lock = self.has_lock.new_with_updated_time()
203261
os_touch(self.has_lock.path)
204262
os.unlink(self.need_lock.path)
263+
self._start_keep_alive_thread()
205264
return
206265
time.sleep(SLEEP_TIMEOUT)
207-
if time.time() - start_time > LOCK_TIMEOUT:
266+
if time.time() - start_time > AQUIRE_LOCK_TIMEOUT:
208267
raise RuntimeError("Timeout while waiting for read lock.")
209268
self.snapshot = FileLocksSnapshot(self.need_lock)
210269

@@ -236,8 +295,9 @@ def _lock(self) -> None:
236295
self.has_lock = self.has_lock.new_with_updated_time()
237296
os_touch(self.has_lock.path)
238297
os.unlink(self.need_lock.path)
298+
self._start_keep_alive_thread()
239299
return
240300
time.sleep(SLEEP_TIMEOUT)
241-
if time.time() - start_time > LOCK_TIMEOUT:
301+
if time.time() - start_time > AQUIRE_LOCK_TIMEOUT:
242302
raise RuntimeError("Timeout while waiting for write lock.")
243303
self.snapshot = FileLocksSnapshot(self.need_lock)

tests/test_locking.py

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,20 @@ def test_lock_release():
1717
pass
1818

1919

20-
def test_orphaned_lock_timeout():
21-
prev_timeout = locking.LOCK_TIMEOUT
22-
locking.LOCK_TIMEOUT = 0.1
23-
lock = locking.WriteLock("db_orphaned")
20+
def test_read_lock_release():
21+
read_lock = locking.ReadLock("test_db")
22+
write_lock = locking.WriteLock("test_db")
2423

25-
lock._lock()
26-
time.sleep(0.2)
24+
# Acquire and release a read lock
25+
with read_lock:
26+
pass
2727

28-
# Trigger the removal of orphaned locks
29-
ls = locking.FileLocksSnapshot(lock.need_lock)
30-
assert len(ls.locks) == 0
28+
# Now attempt to acquire a write lock
29+
with write_lock:
30+
assert write_lock.has_lock is not None
3131

32-
locking.LOCK_TIMEOUT = prev_timeout
32+
read_lock._unlock()
33+
write_lock._unlock()
3334

3435

3536
def test_double_lock_exception(use_compression):
@@ -64,18 +65,52 @@ def test_get_lock_names(use_compression):
6465
lock._unlock()
6566

6667

68+
def test_lock_must_implement_lock_function():
69+
class BadLock(locking.AbstractLock):
70+
mode = "read"
71+
72+
lock = BadLock("db")
73+
with pytest.raises(NotImplementedError):
74+
lock._lock()
75+
76+
6777
def test_remove_orphaned_locks():
68-
prev_config = locking.LOCK_TIMEOUT
69-
locking.LOCK_TIMEOUT = 0.1
78+
# SLEEP_TIMEOUT = 0.001
79+
# LOCK_KEEP_ALIVE_TIMEOUT = 0.001
80+
# REMOVE_ORPHAN_LOCK_TIMEOUT = 20.0 # Duration to wait before considering a lock as orphaned.
81+
# AQUIRE_LOCK_TIMEOUT = 60.0
82+
83+
prev = locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT
84+
85+
locking.AQUIRE_LOCK_TIMEOUT = 10.0
86+
locking.LOCK_KEEP_ALIVE_TIMEOUT = 1.0
87+
locking.REMOVE_ORPHAN_LOCK_TIMEOUT = 0.1
7088
lock = locking.ReadLock("test_remove_orphaned_locks")
7189
lock._lock()
7290

7391
ls = locking.FileLocksSnapshot(lock.need_lock)
74-
assert len(ls.locks) == 1
92+
assert len(ls.locks) >= 1 ## The one lock or two if currently in keep alive handover
7593

7694
time.sleep(0.2)
7795
# Trigger the removal of orphaned locks
7896
ls = locking.FileLocksSnapshot(lock.need_lock)
7997

8098
assert len(ls.locks) == 0
81-
locking.LOCK_TIMEOUT = prev_config
99+
100+
lock._unlock()
101+
102+
locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT = prev
103+
104+
105+
def test_lock_keep_alive():
106+
prev = locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT
107+
108+
locking.LOCK_KEEP_ALIVE_TIMEOUT = 0.1
109+
locking.ALIVE_LOCK_MAX_AGE = 0.5
110+
111+
lock = locking.ReadLock("test_lock_keep_alive")
112+
113+
with lock:
114+
time.sleep(1.0)
115+
116+
locking.AQUIRE_LOCK_TIMEOUT, locking.LOCK_KEEP_ALIVE_TIMEOUT, locking.REMOVE_ORPHAN_LOCK_TIMEOUT = prev

0 commit comments

Comments
 (0)