added tests for scripts (#331)

FrankLeeeee · web-flow · commit 3df5b27b2de3 · 2025-11-27T14:30:07.000+08:00
* added tests for scripts

* added tests for scripts

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* added tests for scripts

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish

* polish
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -26,8 +26,12 @@ jobs:
 
       - name: Restore cache
         run: |
+          if [ -d /github/home/cache ] && [ ! -z "$(ls -A /github/home/cache/)" ]; then
+            cp -p -r /github/home/cache ./
+          fi
+
           if [ -d /github/home/sf ] && [ ! -z "$(ls -A /github/home/sf/)" ]; then
-            cp -p -r /github/home/sf/* ./
+            cp -p -r /github/home/sf ./
           fi
 
       - name: Remove flashinfer # this is needed to avoid flashinfer jit compilation makes the program hang
@@ -55,3 +59,4 @@ jobs:
       - name: Save cache
         run: |
           cp -p -r sf /github/home/
+          cp -p -r cache /github/home/
diff --git a/scripts/regenerate_train_data.py b/scripts/regenerate_train_data.py
@@ -83,6 +83,12 @@ def parse_arguments():
         action="store_true",
         help="Whether the model is a GPT-OSS model",
     )
+    parser.add_argument(
+        "--num-samples",
+        type=int,
+        default=None,
+        help="The number of samples to regenerate, if not provided, all samples will be regenerated",
+    )
     return parser.parse_args()
 
 
@@ -217,6 +223,9 @@ def main():
     )
     print("-" * 50)
 
+    success_samples = 0
+    error_samples = 0
+
     # Create progress bar
     with open(args.input_file_path, "r") as input_file, open(
         args.output_file_path, "w"
@@ -231,6 +240,12 @@ def main():
         start_server_index = 0
 
         for line in input_file:
+            if (
+                args.num_samples is not None
+                and success_samples + error_samples >= args.num_samples
+            ):
+                break
+
             data = json.loads(line.strip())
 
             # find server address with the least waiting requests
@@ -249,10 +264,12 @@ def main():
                             error_file_handle.write(
                                 json.dumps(regen_data, ensure_ascii=False) + "\n"
                             )
+                            error_samples += 1
                         else:
                             output_file_handle.write(
                                 json.dumps(regen_data, ensure_ascii=False) + "\n"
                             )
+                            success_samples += 1
                         waiting_queue[server_address].remove(req_future)
                         finished_on_request = True
 
@@ -280,12 +297,16 @@ def main():
                     error_file_handle.write(
                         json.dumps(regen_data, ensure_ascii=False) + "\n"
                     )
+                    error_samples += 1
                 else:
                     output_file_handle.write(
                         json.dumps(regen_data, ensure_ascii=False) + "\n"
                     )
+                    success_samples += 1
 
-    print(f"\nProcessing completed!")
+    print(
+        f"\nProcessing completed! {success_samples} samples regenerated, {error_samples} samples failed."
+    )
 
 
 if __name__ == "__main__":
diff --git a/scripts/train_eagle3.py b/scripts/train_eagle3.py
@@ -91,6 +91,12 @@ def parse_args() -> Tuple[ArgumentParser, Namespace]:
 
     # training hyper params
     parser.add_argument("--num-epochs", type=int, default=10)
+    parser.add_argument(
+        "--max-num-steps",
+        type=int,
+        default=None,
+        help="The maximum number of steps to train. If not provided, will be calculated as num_epochs * steps_per_epoch",
+    )
     parser.add_argument("--batch-size", type=int, default=1)
     parser.add_argument("--learning-rate", type=float, default=1e-4)
     parser.add_argument("--max-length", type=int, default=2048)
@@ -766,6 +772,12 @@ def main():
                 # Save the model
                 save_checkpoints(args, epoch, global_step, eagle3_model, optimizer)
 
+            if args.max_num_steps is not None and global_step >= args.max_num_steps:
+                break
+
+        if args.max_num_steps is not None and global_step >= args.max_num_steps:
+            break
+
     # Close the tracker
     tracker.close()
     destroy_distributed()
diff --git a/tests/test_scripts/__init__.py b/tests/test_scripts/__init__.py
diff --git a/tests/test_scripts/test_prepare_data.py b/tests/test_scripts/test_prepare_data.py
@@ -0,0 +1,26 @@
+import unittest
+from pathlib import Path
+
+from sglang.utils import execute_shell_command
+
+CACHE_DIR = Path(__file__).parent.parent.parent.joinpath("cache")
+
+
+class TestPrepareData(unittest.TestCase):
+
+    def test_prepare_sharegpt(self):
+        sharegpt_train_path = CACHE_DIR.joinpath("dataset", "sharegpt_train.jsonl")
+
+        if sharegpt_train_path.exists():
+            # delete the file
+            sharegpt_train_path.unlink()
+        process = execute_shell_command(
+            "python scripts/prepare_data.py --dataset sharegpt"
+        )
+        process.wait()
+        self.assertEqual(process.returncode, 0)
+        self.assertTrue(sharegpt_train_path.exists())
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_scripts/test_regenerate_train_data.py b/tests/test_scripts/test_regenerate_train_data.py
@@ -0,0 +1,57 @@
+import unittest
+from pathlib import Path
+
+from tests.utils import execute_shell_command, wait_for_server
+
+CACHE_DIR = Path(__file__).parent.parent.parent.joinpath("cache")
+
+
+class TestRegenerateTrainData(unittest.TestCase):
+
+    def test_regenerate_sharegpt(self):
+        # prepare data
+        data_process = execute_shell_command(
+            "python scripts/prepare_data.py --dataset sharegpt"
+        )
+        data_process.wait()
+
+        # launch sglang
+        sglang_process = execute_shell_command(
+            """python3 -m sglang.launch_server \
+    --model unsloth/Llama-3.2-1B-Instruct \
+    --tp 1 \
+    --cuda-graph-bs 4 \
+    --dtype bfloat16 \
+    --mem-frac=0.8 \
+    --port 30000
+        """,
+            disable_proxy=True,
+            enable_hf_mirror=True,
+        )
+        wait_for_server(f"http://localhost:30000", disable_proxy=True)
+
+        regeneration_process = execute_shell_command(
+            """python scripts/regenerate_train_data.py \
+    --model unsloth/Llama-3.2-1B-Instruct \
+    --concurrency 128 \
+    --max-tokens 128 \
+    --server-address localhost:30000 \
+    --temperature 0.8 \
+    --input-file-path ./cache/dataset/sharegpt_train.jsonl \
+    --output-file-path ./cache/dataset/sharegpt_train_regen.jsonl \
+    --num-samples 10
+        """,
+            disable_proxy=True,
+            enable_hf_mirror=True,
+        )
+        regeneration_process.wait()
+        self.assertEqual(regeneration_process.returncode, 0)
+        self.assertTrue(
+            CACHE_DIR.joinpath("dataset", "sharegpt_train_regen.jsonl").exists()
+        )
+        sglang_process.terminate()
+        sglang_process.wait()
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/test_scripts/test_train_eagle3.py b/tests/test_scripts/test_train_eagle3.py
@@ -0,0 +1,137 @@
+import shutil
+import unittest
+from pathlib import Path
+
+from tests.utils import execute_shell_command
+
+CACHE_DIR = Path(__file__).parent.parent.parent.joinpath("cache")
+
+
+def replace_in_script(script_path: Path, pattern: str, replacement: str):
+    with open(script_path, "r") as f:
+        script = f.readlines()
+    script = [line.replace(pattern, replacement) for line in script]
+    with open(script_path, "w") as f:
+        for line in script:
+            f.write(line)
+
+
+class TestTrainEagle3(unittest.TestCase):
+
+    def setUp(self) -> None:
+        # prepare data
+        data_process = execute_shell_command(
+            "python scripts/prepare_data.py --dataset sharegpt"
+        )
+        data_process.wait()
+
+        # modify the sccript to only train for 10 steps
+        # add --max-num-steps 10 to the launch command
+        script_path = Path(__file__).parent.parent.parent.joinpath(
+            "examples", "run_llama3.1_8b_eagle3_online.sh"
+        )
+        with open(script_path, "r") as f:
+            script = f.readlines()
+
+        # remove empty lines
+        script = [line for line in script if line.strip()]
+        script[-1] = script[-1].rstrip() + " --max-num-steps 10"
+
+        # replace meta-llama/Llama-3.1-8B-Instruct with unsloth/Llama-3.2-1B-Instruct
+        # so that we don't need HF token for gated repo
+        script = [
+            line.replace(
+                "meta-llama/Llama-3.1-8B-Instruct", "nreHieW/Llama-3.1-8B-Instruct"
+            )
+            for line in script
+        ]
+
+        # write the script back to the file
+        with open(script_path, "w") as f:
+            for line in script:
+                f.write(line)
+
+    def test_online_train_eagle3_with_sglang_backend(self):
+        # run training
+        train_process = execute_shell_command(
+            "bash examples/run_llama3.1_8b_eagle3_online.sh 2"
+        )
+        train_process.wait()
+        self.assertEqual(train_process.returncode, 0)
+
+    def test_online_train_eagle3_with_hf_backend(self):
+        # replace --target-model-backend sglang with --target-model-backend hf
+        script_path = Path(__file__).parent.parent.parent.joinpath(
+            "examples", "run_llama3.1_8b_eagle3_online.sh"
+        )
+        replace_in_script(
+            script_path, "--target-model-backend sglang", "--target-model-backend hf"
+        )
+
+        # run training
+        train_process = execute_shell_command(
+            "bash examples/run_llama3.1_8b_eagle3_online.sh 2"
+        )
+        train_process.wait()
+        self.assertEqual(train_process.returncode, 0)
+
+    def test_online_train_eagle3_with_custom_backend(self):
+        # replace --target-model-backend sglang with --target-model-backend custom
+        script_path = Path(__file__).parent.parent.parent.joinpath(
+            "examples", "run_llama3.1_8b_eagle3_online.sh"
+        )
+        replace_in_script(
+            script_path,
+            "--target-model-backend sglang",
+            "--target-model-backend custom",
+        )
+
+        # run training
+        train_process = execute_shell_command(
+            "bash examples/run_llama3.1_8b_eagle3_online.sh 2"
+        )
+        train_process.wait()
+        self.assertEqual(train_process.returncode, 0)
+
+    def test_offline_train_eagle3(self):
+        # remove the hidden states if they exist
+        script_path = Path(__file__).parent.parent.parent.joinpath(
+            "examples", "run_llama3.1_8b_eagle3_offline.sh"
+        )
+        replace_in_script(
+            script_path,
+            "meta-llama/Llama-3.1-8B-Instruct",
+            "nreHieW/Llama-3.1-8B-Instruct",
+        )
+        replace_in_script(
+            script_path,
+            "--batch-size 32",
+            "--batch-size 5",
+        )
+        replace_in_script(
+            script_path,
+            "scripts/prepare_hidden_states.py",
+            "scripts/prepare_hidden_states.py --num-samples 10",
+        )
+        replace_in_script(
+            script_path,
+            "$ROOT_DIR/scripts/train_eagle3.py",
+            "$ROOT_DIR/scripts/train_eagle3.py --max-num-steps 2",
+        )
+
+        hidden_states_path = Path(__file__).parent.parent.parent.joinpath(
+            "cache", "hidden_states", "sharegpt_train_Llama-3.1-8B-Instruct"
+        )
+        if hidden_states_path.exists():
+            # delete the directory
+            shutil.rmtree(hidden_states_path)
+
+        training_process = execute_shell_command(
+            "bash examples/run_llama3.1_8b_eagle3_offline.sh 2",
+        )
+        training_process.wait()
+        self.assertEqual(training_process.returncode, 0)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/utils.py b/tests/utils.py