[Fix]: guard LigerQwen3_5CausalLMOutputWithPast import (#1169)

noemotiovon · web-flow · commit dcd404b1ea15 · 2026-04-18T15:45:49.000Z
LigerQwen3_5CausalLMOutputWithPast is only defined in output_classes.py
when the installed transformers version includes the Qwen3.5 model. In
older transformers versions, the base class import fails silently andthe
Liger subclass is never defined, causing an ImportError when qwen3_5.py
unconditionally imports it at module level.

This breaks CI（Ascend） environments with a transformers version that
does not yet ship Qwen3.5, as test_monkey_patch.py imports qwen3_5.py at
collection time, causing the entire test run to abort with:
```
    ImportError: cannot import name 'LigerQwen3_5CausalLMOutputWithPast'
    from 'liger_kernel.transformers.model.output_classes'
```
Fix by wrapping the import in a try/except block (falling back to None)
and converting the return type annotation to a string literal to avoid
evaluation at function definition time.

- Hardware Type: Atlas 800I A2
- [x] run `make test` to ensure correctness
- [x] run `make checkstyle` to ensure code style
- [ ] run `make test-convergence` to ensure convergence
diff --git a/src/liger_kernel/transformers/model/qwen3_5.py b/src/liger_kernel/transformers/model/qwen3_5.py
@@ -7,7 +7,11 @@
 from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
 from liger_kernel.transformers.model.loss_utils import unpack_cross_entropy_result
 from liger_kernel.transformers.model.output_classes import LigerCausalLMOutputWithPast
-from liger_kernel.transformers.model.output_classes import LigerQwen3_5CausalLMOutputWithPast
+
+try:
+    from liger_kernel.transformers.model.output_classes import LigerQwen3_5CausalLMOutputWithPast
+except ImportError:
+    LigerQwen3_5CausalLMOutputWithPast = None
 
 
 def lce_forward(
@@ -138,7 +142,7 @@ def lce_forward_for_multimodal(
     logits_to_keep: Union[int, torch.Tensor] = 0,
     skip_logits: Optional[bool] = None,
     **kwargs,
-) -> Union[tuple, LigerQwen3_5CausalLMOutputWithPast]:
+) -> Union[tuple, "LigerQwen3_5CausalLMOutputWithPast"]:
     r"""
     labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
         Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,