Skip to content

Commit 42ca212

Browse files
authored
Merge pull request #2451 from dgageot/board/add-thinking-display-config-to-opus-4-7-657e4d3f
Add thinking_display provider_opt for Anthropic models
2 parents 1e6975e + 4b6f252 commit 42ca212

File tree

8 files changed

+638
-132
lines changed

8 files changed

+638
-132
lines changed

agent-schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@
634634
},
635635
"provider_opts": {
636636
"type": "object",
637-
"description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
637+
"description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true), thinking_display ('summarized', 'omitted', or 'display') controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking by default ('omitted'); set thinking_display: summarized (or thinking_display: display) to receive thinking blocks. openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
638638
"additionalProperties": true
639639
},
640640
"track_usage": {

docs/configuration/models/index.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,22 @@ models:
178178
interleaved_thinking: false # disable if needed
179179
```
180180

181+
## Thinking Display (Anthropic)
182+
183+
For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); set this provider option to receive summarized thinking:
184+
185+
```yaml
186+
models:
187+
opus-4-7:
188+
provider: anthropic
189+
model: claude-opus-4-7
190+
thinking_budget: adaptive
191+
provider_opts:
192+
thinking_display: summarized # "summarized", "display", or "omitted"
193+
```
194+
195+
See the [Anthropic provider page](/providers/anthropic/#thinking-display) for details.
196+
181197
## Examples by Provider
182198

183199
```yaml

docs/providers/anthropic/index.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,28 @@ Object form (forward-compatible with future budget types):
106106

107107
See the full schema on the [Model Configuration]({{ '/configuration/models/#task-budget' | relative_url }}) page.
108108

109+
## Thinking Display
110+
111+
Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override:
112+
113+
```yaml
114+
models:
115+
claude-opus-4-7:
116+
provider: anthropic
117+
model: claude-opus-4-7
118+
thinking_budget: adaptive
119+
provider_opts:
120+
thinking_display: summarized # "summarized", "display", or "omitted"
121+
```
122+
123+
Valid values:
124+
125+
- `summarized`: thinking blocks are returned with summarized thinking text (default for Claude 4 models prior to Opus 4.7).
126+
- `display`: thinking blocks are returned for display (use this to re-enable thinking output on Opus 4.7).
127+
- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7). Useful to reduce time-to-first-text-token when streaming.
128+
129+
Note: `thinking_display` applies to both `thinking_budget` with token counts and adaptive/effort-based budgets. Full thinking tokens are billed regardless of the `thinking_display` value.
130+
109131
<div class="callout callout-info" markdown="1">
110132
<div class="callout-title">ℹ️ Note
111133
</div>

examples/thinking_budget.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ models:
4848
model: claude-opus-4-6
4949
thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"
5050

51+
claude-opus-4-7-summarized:
52+
provider: anthropic
53+
model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model
54+
thinking_budget: adaptive
55+
provider_opts:
56+
thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted)
57+
5158
gemini-2-5-flash-dynamic-thinking:
5259
provider: google
5360
model: gemini-2.5-flash

pkg/model/provider/anthropic/beta_client.go

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,7 @@ func (c *Client) createBetaStream(
9090
// Configure thinking if a thinking budget is set in the model config.
9191
// The beta client is also used for structured output and file attachments,
9292
// which don't require thinking.
93-
if budget := c.ModelConfig.ThinkingBudget; budget != nil {
94-
if effort, ok := anthropicThinkingEffort(budget); ok {
95-
adaptive := anthropic.BetaThinkingConfigAdaptiveParam{}
96-
params.Thinking = anthropic.BetaThinkingConfigParamUnion{OfAdaptive: &adaptive}
97-
params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort)
98-
slog.Debug("Anthropic Beta API using adaptive thinking", "effort", effort)
99-
} else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
100-
params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(tokens)
101-
slog.Debug("Anthropic Beta API using thinking_budget", "budget_tokens", tokens)
102-
}
103-
}
93+
c.applyBetaThinkingConfig(&params, maxTokens)
10494

10595
// Forward task_budget via `output_config.task_budget` (Anthropic
10696
// Opus 4.7+) and enable the corresponding beta header. Older Claude

pkg/model/provider/anthropic/client.go

Lines changed: 1 addition & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717

1818
"github.com/docker/docker-agent/pkg/chat"
1919
"github.com/docker/docker-agent/pkg/config/latest"
20-
"github.com/docker/docker-agent/pkg/effort"
2120
"github.com/docker/docker-agent/pkg/environment"
2221
"github.com/docker/docker-agent/pkg/httpclient"
2322
"github.com/docker/docker-agent/pkg/model/provider/base"
@@ -35,79 +34,6 @@ type Client struct {
3534
fileManager *FileManager
3635
}
3736

38-
// adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
39-
// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
40-
// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
41-
//
42-
// Only fixed token budgets need adjustment. Adaptive and effort-based budgets
43-
// don't need it since the model manages its own thinking allocation.
44-
func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
45-
if c.ModelConfig.ThinkingBudget == nil {
46-
return maxTokens, nil
47-
}
48-
// Adaptive and effort-based budgets: no token adjustment needed.
49-
if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok {
50-
return maxTokens, nil
51-
}
52-
53-
thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
54-
if thinkingTokens <= 0 {
55-
return maxTokens, nil
56-
}
57-
58-
minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer
59-
60-
if maxTokens <= thinkingTokens {
61-
userSetMaxTokens := c.ModelConfig.MaxTokens != nil
62-
if userSetMaxTokens {
63-
// User explicitly set max_tokens too low - return error
64-
slog.Error("Anthropic: max_tokens must be greater than thinking_budget",
65-
"max_tokens", maxTokens,
66-
"thinking_budget", thinkingTokens)
67-
return 0, fmt.Errorf("anthropic: max_tokens (%d) must be greater than thinking_budget (%d); increase max_tokens to at least %d",
68-
maxTokens, thinkingTokens, minRequired)
69-
}
70-
// Auto-adjust when user didn't set max_tokens
71-
slog.Info("Anthropic: auto-adjusting max_tokens to accommodate thinking_budget",
72-
"original_max_tokens", maxTokens,
73-
"thinking_budget", thinkingTokens,
74-
"new_max_tokens", minRequired)
75-
// return the configured thinking budget + 8192 because that's the default
76-
// max_tokens value for anthropic models when unspecified by the user
77-
return thinkingTokens + 8192, nil
78-
}
79-
80-
return maxTokens, nil
81-
}
82-
83-
// interleavedThinkingEnabled returns false unless explicitly enabled via
84-
// models:provider_opts:interleaved_thinking: true
85-
func (c *Client) interleavedThinkingEnabled() bool {
86-
// Default to false if not provided
87-
if c == nil || len(c.ModelConfig.ProviderOpts) == 0 {
88-
return false
89-
}
90-
v, ok := c.ModelConfig.ProviderOpts["interleaved_thinking"]
91-
if !ok {
92-
return false
93-
}
94-
switch t := v.(type) {
95-
case bool:
96-
return t
97-
case string:
98-
s := strings.TrimSpace(strings.ToLower(t))
99-
return s != "false" && s != "0" && s != "no"
100-
case int:
101-
return t != 0
102-
case int64:
103-
return t != 0
104-
case float64:
105-
return t != 0
106-
default:
107-
return false
108-
}
109-
}
110-
11137
// NewClient creates a new Anthropic client from the provided configuration
11238
func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider, opts ...options.Opt) (*Client, error) {
11339
if cfg == nil {
@@ -288,20 +214,7 @@ func (c *Client) CreateChatCompletionStream(
288214
}
289215

290216
// Apply thinking budget first, as it affects whether we can set temperature
291-
thinkingEnabled := false
292-
if budget := c.ModelConfig.ThinkingBudget; budget != nil {
293-
if effortStr, ok := anthropicThinkingEffort(budget); ok {
294-
adaptive := anthropic.ThinkingConfigAdaptiveParam{}
295-
params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
296-
params.OutputConfig.Effort = anthropic.OutputConfigEffort(effortStr)
297-
thinkingEnabled = true
298-
slog.Debug("Anthropic API using adaptive thinking", "effort", effortStr)
299-
} else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
300-
params.Thinking = anthropic.ThinkingConfigParamOfEnabled(tokens)
301-
thinkingEnabled = true
302-
slog.Debug("Anthropic API using thinking_budget", "budget_tokens", tokens)
303-
}
304-
}
217+
thinkingEnabled := c.applyThinkingConfig(&params, maxTokens)
305218

306219
// Temperature and TopP cannot be set when extended thinking is enabled
307220
// (Anthropic requires temperature=1.0 which is the default when thinking is on)
@@ -753,38 +666,6 @@ func contentArray(m map[string]any) []any {
753666
return nil
754667
}
755668

756-
// validThinkingTokens validates that the token budget is within the
757-
// acceptable range for Anthropic (>= 1024 and < maxTokens).
758-
// Returns (tokens, true) if valid, or (0, false) with a warning log if not.
759-
func validThinkingTokens(tokens, maxTokens int64) (int64, bool) {
760-
if tokens < 1024 {
761-
slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", tokens)
762-
return 0, false
763-
}
764-
if tokens >= maxTokens {
765-
slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", tokens, "max_tokens", maxTokens)
766-
return 0, false
767-
}
768-
return tokens, true
769-
}
770-
771-
// anthropicThinkingEffort returns the Anthropic API effort level for the given
772-
// ThinkingBudget. It covers both explicit adaptive mode and string effort
773-
// levels. Returns ("", false) when the budget uses token counts or is nil.
774-
func anthropicThinkingEffort(b *latest.ThinkingBudget) (string, bool) {
775-
if b == nil {
776-
return "", false
777-
}
778-
if e, ok := b.AdaptiveEffort(); ok {
779-
return e, true
780-
}
781-
l, ok := b.EffortLevel()
782-
if !ok {
783-
return "", false
784-
}
785-
return effort.ForAnthropic(l)
786-
}
787-
788669
// anthropicContextLimit returns a reasonable default context window for Anthropic models.
789670
// We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time.
790671
func anthropicContextLimit(model string) int64 {

0 commit comments

Comments
 (0)