Fix model selector, voice dropdown width, and torchcodec dep

devnen · devnen · commit 252db2def150 · 2026-03-28T15:25:58.000+01:00
- All Dia 2 models always selectable in dropdown (never disabled)
- Auto-install dia2 package via pip when user selects a Dia 2 model
- Predefined voice dropdown capped at max-width 320px
- Added torchcodec to requirements.txt (required by newer torchaudio)
diff --git a/engine.py b/engine.py
@@ -227,14 +227,14 @@ def get_model_info() -> Dict[str, Any]:
 
 
 def get_model_registry() -> Dict[str, Dict[str, Any]]:
-    """Returns the full model registry for the UI dropdown, filtered by availability."""
+    """Returns the full model registry for the UI dropdown. All models are always selectable."""
     result = {}
     for k, v in MODEL_REGISTRY.items():
-        available = True
+        installed = True
         if v["model_type"] == "dia1" and not DIA1_AVAILABLE:
-            available = False
+            installed = False
         if v["model_type"] == "dia2" and not DIA2_AVAILABLE:
-            available = False
+            installed = False
         result[k] = {
             "display_name": v["display_name"],
             "params": v["params"],
@@ -243,7 +243,8 @@ def get_model_registry() -> Dict[str, Dict[str, Any]]:
             "default_voice": v["default_voice"],
             "supports_cloning": v["supports_cloning"],
             "cloning_method": v["cloning_method"],
-            "available": available,
+            "available": True,  # Always selectable — will download/install on demand
+            "installed": installed,
         }
     return result
 
@@ -380,6 +381,38 @@ def get_compute_dtype(device: torch.device, weights_filename: str) -> str:
         return ComputeDtype.FLOAT32.value  # Return string value "float32"
 
 
+def _auto_install_dia2():
+    """Attempts to install the dia2 package via pip at runtime."""
+    global DIA2_AVAILABLE, Dia2, GenerationConfig, SamplingConfig, PrefixConfig, GenerationResult
+    import subprocess
+    import sys
+
+    try:
+        _check_cancelled()
+        result = subprocess.run(
+            [sys.executable, "-m", "pip", "install", "dia2"],
+            capture_output=True, text=True, timeout=600,
+        )
+        if result.returncode != 0:
+            raise RuntimeError(f"pip install dia2 failed:\n{result.stderr}")
+
+        logger.info("dia2 package installed successfully. Importing...")
+        from dia2 import Dia2 as _Dia2, GenerationConfig as _GC, SamplingConfig as _SC, PrefixConfig as _PC, GenerationResult as _GR
+        Dia2 = _Dia2
+        GenerationConfig = _GC
+        SamplingConfig = _SC
+        PrefixConfig = _PC
+        GenerationResult = _GR
+        DIA2_AVAILABLE = True
+        logger.info("dia2 package imported successfully after auto-install.")
+    except Exception as e:
+        logger.error(f"Failed to auto-install dia2: {e}", exc_info=True)
+        raise ImportError(
+            f"dia2 package could not be installed automatically: {e}. "
+            "Please install it manually: pip install dia2"
+        )
+
+
 def load_model():
     """
     Loads a TTS model based on the current config selector.
@@ -402,11 +435,17 @@ def load_model():
         model_type = reg["model_type"]
         repo_id = reg["repo_id"]
 
-        # Check availability
+        # Check availability — auto-install if missing
         if model_type == "dia1" and not DIA1_AVAILABLE:
-            raise ImportError("Dia 1 (dia) package is not installed. Cannot load Dia 1.6B model.")
+            raise ImportError(
+                "Dia 1 (dia) package is not installed. "
+                "The local 'dia/' directory with model code is required. "
+                "Please check that the repository was cloned completely."
+            )
         if model_type == "dia2" and not DIA2_AVAILABLE:
-            raise ImportError("Dia 2 (dia2) package is not installed. Cannot load Dia 2 models.")
+            logger.info("dia2 package not found. Attempting to install it automatically...")
+            _update_download_status("installing", "Installing dia2 package (pip install dia2)...", 5)
+            _auto_install_dia2()
 
         cache_path = get_model_cache_path()
         model_device = get_device()
diff --git a/requirements.txt b/requirements.txt
@@ -12,6 +12,7 @@ soundfile # Requires libsndfile system library (e.g., sudo apt-get install libsn
 huggingface_hub
 descript-audio-codec
 safetensors
+torchcodec
 openai-whisper
 
 # Configuration & Utilities
diff --git a/ui/index.html b/ui/index.html
@@ -374,7 +374,7 @@ <h2 class="card-header">Generate Speech with Dia</h2>
                             <div id="predefined-voice-options" class="mb-6 hidden">
                                 <label for="predefined_voice_select" class="label-base">Select Predefined Voice (use
                                     voice pairs for dialogues):</label>
-                                <select id="predefined_voice_select" name="predefined_voice_select" class="select-base">
+                                <select id="predefined_voice_select" name="predefined_voice_select" class="select-base" style="max-width: 320px;">
                                     <option value="none">-- Select Voice --</option>
                                     {% for voice in predefined_voices %}
                                     <option value="{{ voice.filename }}" {% if
diff --git a/ui/script.js b/ui/script.js
@@ -413,8 +413,7 @@ document.addEventListener('DOMContentLoaded', function () {
         for (const [selector, info] of Object.entries(modelRegistry)) {
             const option = document.createElement('option');
             option.value = selector;
-            option.textContent = info.display_name + (info.available === false ? ' (not installed)' : '');
-            option.disabled = info.available === false;
+            option.textContent = info.display_name;
             modelSelect.appendChild(option);
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -413,8 +413,7 @@ document.addEventListener('DOMContentLoaded', function () {`
`413`	`413`	`for (const [selector, info] of Object.entries(modelRegistry)) {`
`414`	`414`	`const option = document.createElement('option');`
`415`	`415`	`option.value = selector;`
`416`		`- option.textContent = info.display_name + (info.available === false ? ' (not installed)' : '');`
`417`		`- option.disabled = info.available === false;`
	`416`	`+ option.textContent = info.display_name;`
`418`	`417`	`modelSelect.appendChild(option);`
`419`	`418`	`}`
`420`	`419`	`}`