11from __future__ import annotations
22
3+ import contextlib
34import os
45import threading
56import time
1011# - Do not use pathlib, because it is slower than os
1112
1213# Constants
13- SLEEP_TIMEOUT = 0.001
14- LOCK_TIMEOUT = 60.0 # Duration to wait before considering a lock as orphaned.
14+ SLEEP_TIMEOUT = 0.001 * 1 # (ms)
15+ LOCK_KEEP_ALIVE_TIMEOUT = 0.001 * 0.08 # (ms)
16+
17+ # Duration to wait updating the timestamp of the lock file
18+ ALIVE_LOCK_REFRESH_INTERVAL_NS = 1_000_000_000 * 10 # (s)
19+
20+ # Duration to wait before considering a lock as orphaned
21+ REMOVE_ORPHAN_LOCK_TIMEOUT = 20.0
22+
23+ # Duration to wait before giving up on acquiring a lock
24+ AQUIRE_LOCK_TIMEOUT = 60.0
1525
1626
1727def os_touch (path : str ) -> None :
@@ -50,6 +60,9 @@ def __init__(self, ddb_dir: str, name: str, id: str, time_ns: str, stage: str, m
5060 lock_file = f"{ name } .{ id } .{ time_ns } .{ stage } .{ mode } .lock"
5161 self .path = os .path .join (ddb_dir , lock_file )
5262
63+ def __repr__ (self ) -> str :
64+ return f"LockFileMeta({ self .ddb_dir = } , { self .name = } , { self .id = } , { self .time_ns = } , { self .stage = } , { self .mode = } )"
65+
5366 def new_with_updated_time (self ) -> LockFileMeta :
5467 """
5568 Create a new instance with an updated timestamp.
@@ -91,7 +104,7 @@ def __init__(self, need_lock: LockFileMeta) -> None:
91104 # Remove orphaned locks
92105 if lock_meta .path != need_lock .path :
93106 lock_age = time .time_ns () - int (lock_meta .time_ns )
94- if lock_age > LOCK_TIMEOUT * 1_000_000_000 :
107+ if lock_age > REMOVE_ORPHAN_LOCK_TIMEOUT * 1_000_000_000 :
95108 os .unlink (lock_meta .path )
96109 print (f"Removed orphaned lock ({ lock_meta .path } )" )
97110 continue
@@ -129,13 +142,15 @@ class AbstractLock:
129142 provides a blueprint for derived classes to implement.
130143 """
131144
132- __slots__ = ("db_name" , "need_lock" , "has_lock" , "snapshot" , "mode" )
145+ __slots__ = ("db_name" , "need_lock" , "has_lock" , "snapshot" , "mode" , "is_alive" "keep_alive_thread" )
133146
134147 db_name : str
135148 need_lock : LockFileMeta
136149 has_lock : LockFileMeta
137150 snapshot : FileLocksSnapshot
138151 mode : str
152+ is_alive : bool
153+ keep_alive_thread : threading .Thread
139154
140155 def __init__ (self , db_name : str ) -> None :
141156 # Normalize db_name to avoid file naming conflicts
@@ -147,16 +162,59 @@ def __init__(self, db_name: str) -> None:
147162 self .need_lock = LockFileMeta (dir , self .db_name , t_id , time_ns , "need" , self .mode )
148163 self .has_lock = LockFileMeta (dir , self .db_name , t_id , time_ns , "has" , self .mode )
149164
165+ self .is_alive = False
166+ self .keep_alive_thread = None
167+
150168 # Ensure lock directory exists
151169 if not os .path .isdir (dir ):
152170 os .makedirs (dir , exist_ok = True )
153171
172+ def _keep_alive_thread (self ) -> None :
173+ """
174+ Keep the lock alive by updating the timestamp of the lock file.
175+ """
176+
177+ current_has_lock_time_ns : int = int (self .has_lock .time_ns )
178+
179+ while self .is_alive :
180+ time .sleep (LOCK_KEEP_ALIVE_TIMEOUT )
181+ if time .time_ns () - current_has_lock_time_ns < ALIVE_LOCK_REFRESH_INTERVAL_NS :
182+ continue
183+
184+ # Assert: The lock is older than ALIVE_LOCK_REFRESH_INTERVAL_NS ns
185+ # This means the has_lock must be refreshed
186+
187+ new_has_lock = self .has_lock .new_with_updated_time ()
188+ os_touch (new_has_lock .path )
189+ with contextlib .suppress (FileNotFoundError ):
190+ os .unlink (self .has_lock .path ) # Remove old lock file
191+ self .has_lock = new_has_lock
192+ current_has_lock_time_ns = int (new_has_lock .time_ns )
193+
194+ def _start_keep_alive_thread (self ) -> None :
195+ """
196+ Start a thread that keeps the lock alive by updating the timestamp of the lock file.
197+ """
198+
199+ if self .keep_alive_thread is not None :
200+ raise RuntimeError ("Keep alive thread already exists." )
201+
202+ self .is_alive = True
203+ self .keep_alive_thread = threading .Thread (target = self ._keep_alive_thread , daemon = False )
204+ self .keep_alive_thread .start ()
205+
154206 def _lock (self ) -> None :
155207 """Override this method to implement locking mechanism."""
156208 raise NotImplementedError
157209
158210 def _unlock (self ) -> None :
159211 """Remove the lock files associated with this lock."""
212+
213+ if self .keep_alive_thread is not None :
214+ self .is_alive = False
215+ self .keep_alive_thread .join ()
216+ self .keep_alive_thread = None
217+
160218 for p in ("need_lock" , "has_lock" ):
161219 try :
162220 if lock := getattr (self , p , None ):
@@ -169,7 +227,7 @@ def _unlock(self) -> None:
169227 def __enter__ (self ) -> None :
170228 self ._lock ()
171229
172- def __exit__ (self , exc_type , exc_val , exc_tb ) -> None :
230+ def __exit__ (self , exc_type , exc_val , exc_tb ) -> None : # noqa: ANN001
173231 self ._unlock ()
174232
175233
@@ -202,9 +260,10 @@ def _lock(self) -> None:
202260 self .has_lock = self .has_lock .new_with_updated_time ()
203261 os_touch (self .has_lock .path )
204262 os .unlink (self .need_lock .path )
263+ self ._start_keep_alive_thread ()
205264 return
206265 time .sleep (SLEEP_TIMEOUT )
207- if time .time () - start_time > LOCK_TIMEOUT :
266+ if time .time () - start_time > AQUIRE_LOCK_TIMEOUT :
208267 raise RuntimeError ("Timeout while waiting for read lock." )
209268 self .snapshot = FileLocksSnapshot (self .need_lock )
210269
@@ -236,8 +295,9 @@ def _lock(self) -> None:
236295 self .has_lock = self .has_lock .new_with_updated_time ()
237296 os_touch (self .has_lock .path )
238297 os .unlink (self .need_lock .path )
298+ self ._start_keep_alive_thread ()
239299 return
240300 time .sleep (SLEEP_TIMEOUT )
241- if time .time () - start_time > LOCK_TIMEOUT :
301+ if time .time () - start_time > AQUIRE_LOCK_TIMEOUT :
242302 raise RuntimeError ("Timeout while waiting for write lock." )
243303 self .snapshot = FileLocksSnapshot (self .need_lock )
0 commit comments