-
Notifications
You must be signed in to change notification settings - Fork 213
fix: Support different version of PCG args #517
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,32 @@ | |
| from sglang.srt.server_args import ATTENTION_BACKEND_CHOICES | ||
|
|
||
|
|
||
| def adapt_sglang_server_args_kwargs(kwargs: Dict[str, Any]) -> Dict[str, Any]: | ||
| """Adapt piecewise cuda graph kwargs for sglang version compatibility. | ||
|
|
||
| New sglang (post-0.5.9) uses 'enforce_piecewise_cuda_graph' (piecewise cuda graph | ||
| is enabled by default, this flag forces it on even when auto-disabled). | ||
| Old sglang (<=0.5.9) uses 'enable_piecewise_cuda_graph' (disabled by default). | ||
|
|
||
| This function translates between the two based on the installed sglang version. | ||
| """ | ||
| from sglang.srt.server_args import ServerArgs | ||
|
|
||
| has_enforce = hasattr(ServerArgs, "enforce_piecewise_cuda_graph") | ||
| has_enable = hasattr(ServerArgs, "enable_piecewise_cuda_graph") | ||
|
|
||
| if "enforce_piecewise_cuda_graph" in kwargs and not has_enforce and has_enable: | ||
| kwargs["enable_piecewise_cuda_graph"] = kwargs.pop( | ||
| "enforce_piecewise_cuda_graph" | ||
| ) | ||
| elif "enable_piecewise_cuda_graph" in kwargs and not has_enable and has_enforce: | ||
| kwargs["enforce_piecewise_cuda_graph"] = kwargs.pop( | ||
| "enable_piecewise_cuda_graph" | ||
| ) | ||
|
|
||
| return kwargs | ||
|
Comment on lines
+8
to
+31
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function modifies the I suggest creating a copy of def adapt_sglang_server_args_kwargs(kwargs: Dict[str, Any]) -> Dict[str, Any]:
"""Adapt piecewise cuda graph kwargs for sglang version compatibility.
New sglang (post-0.5.9) uses 'enforce_piecewise_cuda_graph' (piecewise cuda graph
is enabled by default, this flag forces it on even when auto-disabled).
Old sglang (<=0.5.9) uses 'enable_piecewise_cuda_graph' (disabled by default).
This function translates between the two based on the installed sglang version.
"""
kwargs = kwargs.copy()
from sglang.srt.server_args import ServerArgs
has_enforce = hasattr(ServerArgs, "enforce_piecewise_cuda_graph")
has_enable = hasattr(ServerArgs, "enable_piecewise_cuda_graph")
if "enforce_piecewise_cuda_graph" in kwargs and not has_enforce and has_enable:
kwargs["enable_piecewise_cuda_graph"] = kwargs.pop(
"enforce_piecewise_cuda_graph"
)
elif "enable_piecewise_cuda_graph" in kwargs and not has_enable and has_enforce:
kwargs["enforce_piecewise_cuda_graph"] = kwargs.pop(
"enable_piecewise_cuda_graph"
)
return kwargs |
||
|
|
||
|
|
||
| @dataclass | ||
| class TrackerArgs: | ||
| report_to: str = "none" | ||
|
|
@@ -188,7 +214,7 @@ def from_args(args: argparse.Namespace) -> "SGLangBackendArgs": | |
| ) | ||
|
|
||
| def to_kwargs(self) -> Dict[str, Any]: | ||
| return dict( | ||
| kwargs = dict( | ||
| attention_backend=self.sglang_attention_backend, | ||
| mem_fraction_static=self.sglang_mem_fraction_static, | ||
| context_length=self.sglang_context_length, | ||
|
|
@@ -204,3 +230,4 @@ def to_kwargs(self) -> Dict[str, Any]: | |
| max_running_requests=self.sglang_max_running_requests, | ||
| max_total_tokens=self.sglang_max_total_tokens, | ||
| ) | ||
| return adapt_sglang_server_args_kwargs(kwargs) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
adapt_sglang_server_args_kwargscan silently overwrite values when bothenable_piecewise_cuda_graphandenforce_piecewise_cuda_graphare present inkwargs(e.g., caller passes one manually while another is injected elsewhere). Please handle this explicitly (e.g., define precedence, raise a ValueError, or only convert when the destination key is absent) to avoid surprising behavior.