@@ -96,7 +96,7 @@ class SGLangBackendArgs:
9696 sglang_enable_torch_compile : bool = True
9797 sglang_enable_dp_attention : bool = False
9898 sglang_enable_dp_lm_head : bool = False
99- sglang_enable_piecewise_cuda_graph : bool = False
99+ sglang_enforce_piecewise_cuda_graph : bool = False
100100 sglang_piecewise_cuda_graph_max_tokens : int = 4096
101101 sglang_piecewise_cuda_graph_tokens : List [int ] = None
102102 sglang_ep_size : int = 1
@@ -151,9 +151,9 @@ def add_args(parser: argparse.ArgumentParser) -> None:
151151 help = "Enable piecewise CUDA graph for SGLang backend" ,
152152 )
153153 parser .add_argument (
154- "--sglang-enable -piecewise-cuda-graph" ,
154+ "--sglang-enforce -piecewise-cuda-graph" ,
155155 action = "store_true" ,
156- help = "Enable piecewise CUDA graph for SGLang backend's prefill" ,
156+ help = "Enforce piecewise CUDA graph for SGLang backend's prefill" ,
157157 )
158158 parser .add_argument (
159159 "--sglang-piecewise-cuda-graph-max-tokens" ,
@@ -186,7 +186,7 @@ def from_args(args: argparse.Namespace) -> "SGLangBackendArgs":
186186 sglang_enable_torch_compile = args .sglang_enable_torch_compile ,
187187 sglang_enable_dp_attention = args .sglang_enable_dp_attention ,
188188 sglang_enable_dp_lm_head = args .sglang_enable_dp_lm_head ,
189- sglang_enable_piecewise_cuda_graph = args .sglang_enable_piecewise_cuda_graph ,
189+ sglang_enforce_piecewise_cuda_graph = args .sglang_enforce_piecewise_cuda_graph ,
190190 sglang_piecewise_cuda_graph_max_tokens = args .sglang_piecewise_cuda_graph_max_tokens ,
191191 sglang_piecewise_cuda_graph_tokens = args .sglang_piecewise_cuda_graph_tokens ,
192192 sglang_ep_size = args .sglang_ep_size ,
@@ -210,7 +210,7 @@ def to_kwargs(self) -> Dict[str, Any]:
210210 enable_torch_compile = self .sglang_enable_torch_compile ,
211211 enable_dp_attention = self .sglang_enable_dp_attention ,
212212 enable_dp_lm_head = self .sglang_enable_dp_lm_head ,
213- enable_piecewise_cuda_graph = self .sglang_enable_piecewise_cuda_graph ,
213+ enforce_piecewise_cuda_graph = self .sglang_enforce_piecewise_cuda_graph ,
214214 piecewise_cuda_graph_max_tokens = self .sglang_piecewise_cuda_graph_max_tokens ,
215215 piecewise_cuda_graph_tokens = self .sglang_piecewise_cuda_graph_tokens ,
216216 ep_size = self .sglang_ep_size ,
0 commit comments