Skip to content

Commit 7cba56b

Browse files
committed
feat(services): service init restructuring + container fork safety
1 parent 236fd9f commit 7cba56b

24 files changed

+2072
-224
lines changed

src/backend/base/langflow/__main__.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import time
99
import warnings
1010
from contextlib import suppress
11-
from functools import partial
1211
from ipaddress import ip_address
1312
from pathlib import Path
1413

@@ -339,13 +338,8 @@ def run(
339338
static_files_dir: Path | None = Path(frontend_path) if frontend_path else None
340339

341340
# Step 2: Starting Core Services
342-
app = None
343-
app_factory = None
344341
with progress.step(2):
345-
if platform.system() == "Windows":
346-
app = setup_app(static_files_dir=static_files_dir, backend_only=bool(backend_only))
347-
else:
348-
app_factory = partial(setup_app, static_files_dir=static_files_dir, backend_only=bool(backend_only))
342+
app = setup_app(static_files_dir=static_files_dir, backend_only=bool(backend_only))
349343

350344
# Step 3: Connecting Database (this happens inside setup_app via dependencies)
351345
with progress.step(3):
@@ -381,10 +375,6 @@ def run(
381375
with progress.step(6):
382376
import uvicorn
383377

384-
if app is None:
385-
msg = "Windows startup requires a pre-built FastAPI application."
386-
raise RuntimeError(msg)
387-
388378
# Print summary and banner before starting the server, since uvicorn is a blocking call.
389379
# We _may_ be able to subprocess, but with window's spawn behavior, we'd have to move all
390380
# non-picklable code to the subprocess.
@@ -416,20 +406,16 @@ def run(
416406
# Use Gunicorn with LangflowUvicornWorker for non-Windows systems
417407
from langflow.server import LangflowApplication
418408

419-
if app_factory is None:
420-
msg = "Gunicorn startup requires an application factory."
421-
raise RuntimeError(msg)
422-
423409
options = {
424410
"bind": f"{host}:{port}",
425411
"workers": get_number_of_workers(workers),
426412
"timeout": worker_timeout,
427413
"certfile": ssl_cert_file_path,
428414
"keyfile": ssl_key_file_path,
429415
"log_level": log_level.lower() if log_level is not None else "info",
430-
"preload_app": os.environ.get("LANGFLOW_GUNICORN_PRELOAD", "false").lower() == "true",
416+
"preload_app": os.environ.get("LANGFLOW_GUNICORN_PRELOAD", "true").lower() == "true",
431417
}
432-
server = LangflowApplication(app_factory, options)
418+
server = LangflowApplication(app, options)
433419

434420
# Start the webapp process
435421
process_manager.webapp_process = Process(target=server.run)

src/backend/base/langflow/initial_setup/setup.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,12 +1269,6 @@ async def get_or_create_default_folder(session: AsyncSession, user_id: UUID) ->
12691269
12701270
This implementation avoids an external distributed lock and works with both SQLite and PostgreSQL.
12711271
1272-
The function only creates a new default folder on first initialization (when the user has no
1273-
folders at all). If the user has already been through initial setup and has at least one folder
1274-
— even if they renamed the default or only kept other folders — the existing folder is returned
1275-
instead of creating a new "Starter Project". This prevents a phantom default folder from being
1276-
forced back into the UI every time the user logs in or the server restarts.
1277-
12781272
Args:
12791273
session (AsyncSession): The active database session.
12801274
user_id (UUID): The ID of the user who owns the folder.
@@ -1316,18 +1310,7 @@ async def get_or_create_default_folder(session: AsyncSession, user_id: UUID) ->
13161310
await session.rollback()
13171311
break
13181312

1319-
# Respect prior user intent: if the user already has folders (e.g. they renamed the
1320-
# default folder to something like "My Flows"), do not force a new "Starter Project" back
1321-
# into their UI on every login/server restart. Return any existing folder instead.
1322-
any_folder_stmt = (
1323-
select(Folder).where(Folder.user_id == user_id).order_by(Folder.id).limit(1) # type: ignore[arg-type]
1324-
)
1325-
any_folder = (await session.exec(any_folder_stmt)).first()
1326-
if any_folder:
1327-
return FolderRead.model_validate(any_folder, from_attributes=True)
1328-
1329-
# No existing folder found for this user — this is the first-time setup path.
1330-
# Create the default folder.
1313+
# If no existing folder found, create a new one
13311314
try:
13321315
folder_obj = Folder(user_id=user_id, name=DEFAULT_FOLDER_NAME, description=DEFAULT_FOLDER_DESCRIPTION)
13331316
session.add(folder_obj)
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
"""SVC-01 starter-projects hash gate helpers.
2+
3+
Computes a content hash over the starter-project JSON files plus the installed
4+
``lfx`` package version, and persists it as plaintext under
5+
``${LANGFLOW_CONFIG_DIR}/starter_projects.hash``. ``main.py`` uses the hash to
6+
short-circuit the full starter-project re-sync on restarts where nothing
7+
changed.
8+
9+
Failure modes (D-04): missing, unreadable, or corrupt hash files all fall
10+
through to a full re-sync. ``LANGFLOW_FORCE_STARTER_RESYNC=1`` bypasses the
11+
comparison. Write failures (read-only root filesystem, e.g. container
12+
deployments) log at debug level and never raise -- mirroring the
13+
``update_project_file`` pattern at ``setup.py:690-701``.
14+
"""
15+
16+
from __future__ import annotations
17+
18+
import hashlib
19+
import os
20+
from importlib.metadata import PackageNotFoundError, version
21+
from typing import TYPE_CHECKING, Any
22+
23+
import aiofiles
24+
from lfx.log.logger import logger
25+
26+
if TYPE_CHECKING:
27+
from collections.abc import Awaitable, Callable
28+
from pathlib import Path
29+
30+
import anyio
31+
32+
HASH_FILENAME = "starter_projects.hash"
33+
_HEX_LEN = 64
34+
35+
36+
async def compute_starter_projects_hash(starter_folder: anyio.Path) -> str:
37+
"""Return a SHA256 hex digest over the starter folder contents + ``lfx`` version.
38+
39+
The digest updates with ``filename || NUL || file_bytes || NUL`` for each
40+
``*.json`` file in ``starter_folder`` sorted by filename, followed by the
41+
installed ``lfx`` package version string. Sorting is load-bearing: glob
42+
order is not stable across filesystems.
43+
44+
If ``importlib.metadata.version("lfx")`` raises ``PackageNotFoundError``
45+
(source-only checkout without ``pip install -e .``), the sentinel
46+
``"unknown"`` is substituted (Pattern F / Pitfall 5). The hash remains
47+
stable within such an environment but will invalidate whenever the
48+
fallback fires in a fresh environment -- acceptable per D-01.
49+
"""
50+
try:
51+
pkg_version = version("lfx")
52+
except PackageNotFoundError:
53+
pkg_version = "unknown"
54+
hasher = hashlib.sha256()
55+
paths = sorted(
56+
[p async for p in starter_folder.glob("*.json")],
57+
key=lambda p: p.name,
58+
)
59+
for path in paths:
60+
hasher.update(path.name.encode("utf-8"))
61+
hasher.update(b"\x00")
62+
hasher.update(await path.read_bytes())
63+
hasher.update(b"\x00")
64+
hasher.update(pkg_version.encode("utf-8"))
65+
return hasher.hexdigest()
66+
67+
68+
async def read_hash_file_safe(hash_path: Path) -> str | None:
69+
"""Return the stored SHA hex string, or ``None`` on any failure.
70+
71+
Skips comment lines (starting with ``#``) and returns the first line that
72+
is exactly 64 lowercase hex characters. Returns ``None`` for:
73+
74+
- Missing file (``FileNotFoundError``)
75+
- Unreadable file (``OSError`` / ``PermissionError``)
76+
- Empty content
77+
- Corrupt content (first non-comment line is not 64 hex chars)
78+
79+
The caller is expected to treat ``None`` as a cache miss and fall through
80+
to a full re-sync (D-04).
81+
"""
82+
try:
83+
async with aiofiles.open(str(hash_path), encoding="utf-8") as f:
84+
content = await f.read()
85+
except (OSError, FileNotFoundError):
86+
return None
87+
for raw_line in content.splitlines():
88+
line = raw_line.strip()
89+
if not line or line.startswith("#"):
90+
continue
91+
if len(line) == _HEX_LEN and all(c in "0123456789abcdef" for c in line):
92+
return line
93+
return None
94+
return None
95+
96+
97+
async def write_hash_file_safe(hash_path: Path, sha_hex: str, version_string: str) -> None:
98+
"""Write ``sha_hex`` + a ``# version:`` comment line to ``hash_path``.
99+
100+
Ensures the parent directory exists (``mkdir(parents=True, exist_ok=True)``
101+
on the parent). Swallows ``OSError`` (Pattern E) so that a read-only
102+
filesystem -- common in containerized deployments with
103+
``readOnlyRootFilesystem: true`` -- does not crash lifespan startup; the
104+
hash gate simply falls through to a full re-sync on every restart in that
105+
environment.
106+
"""
107+
content = f"{sha_hex}\n# version: {version_string}\n"
108+
try:
109+
hash_path.parent.mkdir(parents=True, exist_ok=True)
110+
async with aiofiles.open(str(hash_path), "w", encoding="utf-8") as f:
111+
await f.write(content)
112+
except OSError as e:
113+
await logger.adebug(
114+
f"Could not write starter-projects hash file (read-only filesystem): {e}. "
115+
"Hash gate will fall through to full re-sync on each restart."
116+
)
117+
118+
119+
def is_force_resync_requested() -> bool:
120+
"""Return ``True`` if ``LANGFLOW_FORCE_STARTER_RESYNC`` is set to 1/true/yes.
121+
122+
Comparison is case-insensitive and whitespace-stripped. Any other value
123+
(empty string, unset, "no", "0") returns ``False`` so the hash comparison
124+
path runs normally.
125+
"""
126+
return os.getenv("LANGFLOW_FORCE_STARTER_RESYNC", "").strip().lower() in ("1", "true", "yes")
127+
128+
129+
async def run_starter_projects_hash_gate(
130+
*,
131+
starter_folder: anyio.Path,
132+
hash_path: Path,
133+
sync_fn: Callable[[], Awaitable[Any]],
134+
) -> bool:
135+
"""Execute the SVC-01 hash-gated starter-project sync.
136+
137+
This helper encapsulates the hash compare / sync / write sequence so both
138+
``main.py`` (inside its ``FileLock``) and the Phase 4 parity tests invoke
139+
the exact same code path. ``sync_fn`` is a zero-arg coroutine factory the
140+
caller uses to pass in ``create_or_update_starter_projects(all_types_dict)``
141+
with ``all_types_dict`` already bound.
142+
143+
Returns ``True`` when the full re-sync ran (cache miss or force-resync),
144+
``False`` when the hash matched and the sync was skipped.
145+
146+
The caller is responsible for wrapping this in its own ``FileLock`` /
147+
error-handling context (TOCTOU safety per Pitfall 2). The gate itself
148+
does not manage locking.
149+
"""
150+
expected = await compute_starter_projects_hash(starter_folder)
151+
actual = await read_hash_file_safe(hash_path)
152+
if is_force_resync_requested() or actual != expected:
153+
await sync_fn()
154+
try:
155+
pkg_v = version("lfx")
156+
except PackageNotFoundError:
157+
pkg_v = "unknown"
158+
await write_hash_file_safe(hash_path, expected, pkg_v)
159+
return True
160+
await logger.adebug("Starter projects hash matches; skipped re-sync")
161+
return False

0 commit comments

Comments
 (0)