elixir-nx
diff --git a/‎generate_tiny_gemma4_checkpoint.py‎
Lines changed: 95 additions & 0 deletions b/‎generate_tiny_gemma4_checkpoint.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎lib/bumblebee.ex‎
Lines changed: 4 additions & 0 deletions b/‎lib/bumblebee.ex‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""
+Generate tiny-random Gemma4 checkpoints for Bumblebee integration tests.
+
+Creates:
+  /tmp/bumblebee-testing/tiny-random-Gemma4ForCausalLM/
+  
+Then prints Python reference values for the Elixir test.
+
+Usage:
+  python3 generate_tiny_gemma4_checkpoint.py
+"""
+
+import torch
+import os
+
+from transformers import Gemma4TextConfig, Gemma4ForCausalLM
+
+# Tiny config matching the test structure
+config = Gemma4TextConfig(
+    vocab_size=1024,
+    hidden_size=32,
+    num_hidden_layers=2,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    head_dim=8,
+    global_head_dim=16,
+    intermediate_size=64,
+    hidden_activation="gelu_pytorch_tanh",
+    max_position_embeddings=128,
+    initializer_range=0.02,
+    rms_norm_eps=1e-6,
+    pad_token_id=0,
+    sliding_window=32,
+    enable_moe_block=False,
+    hidden_size_per_layer_input=0,
+    num_kv_shared_layers=0,
+    attention_k_eq_v=False,
+    tie_word_embeddings=True,
+    final_logit_softcapping=30.0,
+    num_experts=None,
+    top_k_experts=None,
+    layer_types=["sliding_attention", "full_attention"],
+    rope_parameters={
+        "sliding_attention": {"rope_theta": 10000.0, "rope_type": "default"},
+        "full_attention": {
+            "rope_theta": 1000000.0,
+            "rope_type": "proportional",
+            "partial_rotary_factor": 0.25,
+        },
+    },
+)
+
+print(f"Config model_type: {config.model_type}")
+
+# Create and save model
+model = Gemma4ForCausalLM(config)
+model.eval()
+print(f"Model params: {sum(p.numel() for p in model.parameters()):,}")
+
+out_dir = "/tmp/bumblebee-testing/tiny-random-Gemma4ForCausalLM"
+os.makedirs(out_dir, exist_ok=True)
+model.save_pretrained(out_dir)
+print(f"Saved to {out_dir}")
+print(f"Files: {os.listdir(out_dir)}")
+
+# Generate reference values
+inputs = {
+    "input_ids": torch.tensor([[10, 20, 30, 40, 50, 60, 70, 80, 0, 0]]),
+    "attention_mask": torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]),
+}
+
+with torch.no_grad():
+    outputs = model(**inputs)
+    logits = outputs.logits
+
+print(f"\nlogits shape: {logits.shape}")
+print(f"\nReference values for Elixir test:")
+print(f"outputs.logits[[.., 1..3, 1..3]]:")
+ref = logits[:, 1:4, 1:4]
+print(ref)
+
+print(f"\n--- Copy this into gemma4_test.exs ---")
+print(f"assert Nx.shape(outputs.logits) == {{1, 10, 1024}}")
+print()
+print(f"assert_all_close(")
+print(f"  outputs.logits[[.., 1..3, 1..3]],")
+print(f"  Nx.tensor([")
+rows = []
+for i in range(3):
+    vals = [f"{ref[0, i, j].item():.4f}" for j in range(3)]
+    rows.append(f"    [{', '.join(vals)}]")
+print(",\n".join(rows))
+print(f"  ])")
+print(f")")
@@ -136,6 +136,8 @@ defmodule Bumblebee do
     "DistilBertForMultipleChoice" => {Bumblebee.Text.Distilbert, :for_multiple_choice},
     "GemmaModel" => {Bumblebee.Text.Gemma, :base},
     "GemmaForCausalLM" => {Bumblebee.Text.Gemma, :for_causal_language_modeling},
+    "Gemma4ForCausalLM" => {Bumblebee.Text.Gemma4, :for_causal_language_modeling},
+    "Gemma4TextModel" => {Bumblebee.Text.Gemma4, :base},
     "GemmaForSequenceClassification" => {Bumblebee.Text.Gemma, :for_sequence_classification},
     "Gemma3ForCausalLM" => {Bumblebee.Text.Gemma3Text, :for_causal_language_modeling},
     "Gemma3TextModel" => {Bumblebee.Text.Gemma3Text, :base},
@@ -273,6 +275,8 @@ defmodule Bumblebee do
     "clip" => :clip,
     "gemma" => :gemma,
     "gemma3_text" => :gemma,
+    "gemma4" => :gemma,
+    "gemma4_text" => :gemma,
     "gpt_neox" => :gpt_neo_x,
     "gpt2" => :gpt2,
     "gpt_bigcode" => :gpt2,