Merge pull request #2451 from dgageot/board/add-thinking-display-config-to-opus-4-7-657e4d3f

dgageot · web-flow · commit 42ca212dcf8e · 2026-04-17T12:29:16.000+02:00
Add thinking_display provider_opt for Anthropic models
diff --git a/agent-schema.json b/agent-schema.json
@@ -634,7 +634,7 @@
         },
         "provider_opts": {
           "type": "object",
-          "description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
+          "description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true), thinking_display ('summarized', 'omitted', or 'display') controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking by default ('omitted'); set thinking_display: summarized (or thinking_display: display) to receive thinking blocks. openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
           "additionalProperties": true
         },
         "track_usage": {
diff --git a/docs/configuration/models/index.md b/docs/configuration/models/index.md
@@ -178,6 +178,22 @@ models:
       interleaved_thinking: false # disable if needed
 ```
 
+## Thinking Display (Anthropic)
+
+For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); set this provider option to receive summarized thinking:
+
+```yaml
+models:
+  opus-4-7:
+    provider: anthropic
+    model: claude-opus-4-7
+    thinking_budget: adaptive
+    provider_opts:
+      thinking_display: summarized # "summarized", "display", or "omitted"
+```
+
+See the [Anthropic provider page](/providers/anthropic/#thinking-display) for details.
+
 ## Examples by Provider
 
 ```yaml
diff --git a/docs/providers/anthropic/index.md b/docs/providers/anthropic/index.md
@@ -106,6 +106,28 @@ Object form (forward-compatible with future budget types):
 
 See the full schema on the [Model Configuration]({{ '/configuration/models/#task-budget' | relative_url }}) page.
 
+## Thinking Display
+
+Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override:
+
+```yaml
+models:
+  claude-opus-4-7:
+    provider: anthropic
+    model: claude-opus-4-7
+    thinking_budget: adaptive
+    provider_opts:
+      thinking_display: summarized # "summarized", "display", or "omitted"
+```
+
+Valid values:
+
+- `summarized`: thinking blocks are returned with summarized thinking text (default for Claude 4 models prior to Opus 4.7).
+- `display`: thinking blocks are returned for display (use this to re-enable thinking output on Opus 4.7).
+- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7). Useful to reduce time-to-first-text-token when streaming.
+
+Note: `thinking_display` applies to both `thinking_budget` with token counts and adaptive/effort-based budgets. Full thinking tokens are billed regardless of the `thinking_display` value.
+
 <div class="callout callout-info" markdown="1">
 <div class="callout-title">ℹ️ Note
 </div>
diff --git a/examples/thinking_budget.yaml b/examples/thinking_budget.yaml
@@ -48,6 +48,13 @@ models:
     model: claude-opus-4-6
     thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"
 
+  claude-opus-4-7-summarized:
+    provider: anthropic
+    model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model
+    thinking_budget: adaptive
+    provider_opts:
+      thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted)
+
   gemini-2-5-flash-dynamic-thinking:
     provider: google
     model: gemini-2.5-flash
diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go
@@ -90,17 +90,7 @@ func (c *Client) createBetaStream(
 	// Configure thinking if a thinking budget is set in the model config.
 	// The beta client is also used for structured output and file attachments,
 	// which don't require thinking.
-	if budget := c.ModelConfig.ThinkingBudget; budget != nil {
-		if effort, ok := anthropicThinkingEffort(budget); ok {
-			adaptive := anthropic.BetaThinkingConfigAdaptiveParam{}
-			params.Thinking = anthropic.BetaThinkingConfigParamUnion{OfAdaptive: &adaptive}
-			params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort)
-			slog.Debug("Anthropic Beta API using adaptive thinking", "effort", effort)
-		} else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
-			params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(tokens)
-			slog.Debug("Anthropic Beta API using thinking_budget", "budget_tokens", tokens)
-		}
-	}
+	c.applyBetaThinkingConfig(&params, maxTokens)
 
 	// Forward task_budget via `output_config.task_budget` (Anthropic
 	// Opus 4.7+) and enable the corresponding beta header. Older Claude
diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go
@@ -17,7 +17,6 @@ import (
 
 	"github.com/docker/docker-agent/pkg/chat"
 	"github.com/docker/docker-agent/pkg/config/latest"
-	"github.com/docker/docker-agent/pkg/effort"
 	"github.com/docker/docker-agent/pkg/environment"
 	"github.com/docker/docker-agent/pkg/httpclient"
 	"github.com/docker/docker-agent/pkg/model/provider/base"
@@ -35,79 +34,6 @@ type Client struct {
 	fileManager *FileManager
 }
 
-// adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
-// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
-// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
-//
-// Only fixed token budgets need adjustment. Adaptive and effort-based budgets
-// don't need it since the model manages its own thinking allocation.
-func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
-	if c.ModelConfig.ThinkingBudget == nil {
-		return maxTokens, nil
-	}
-	// Adaptive and effort-based budgets: no token adjustment needed.
-	if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok {
-		return maxTokens, nil
-	}
-
-	thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
-	if thinkingTokens <= 0 {
-		return maxTokens, nil
-	}
-
-	minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer
-
-	if maxTokens <= thinkingTokens {
-		userSetMaxTokens := c.ModelConfig.MaxTokens != nil
-		if userSetMaxTokens {
-			// User explicitly set max_tokens too low - return error
-			slog.Error("Anthropic: max_tokens must be greater than thinking_budget",
-				"max_tokens", maxTokens,
-				"thinking_budget", thinkingTokens)
-			return 0, fmt.Errorf("anthropic: max_tokens (%d) must be greater than thinking_budget (%d); increase max_tokens to at least %d",
-				maxTokens, thinkingTokens, minRequired)
-		}
-		// Auto-adjust when user didn't set max_tokens
-		slog.Info("Anthropic: auto-adjusting max_tokens to accommodate thinking_budget",
-			"original_max_tokens", maxTokens,
-			"thinking_budget", thinkingTokens,
-			"new_max_tokens", minRequired)
-		// return the configured thinking budget + 8192 because that's the default
-		// max_tokens value for anthropic models when unspecified by the user
-		return thinkingTokens + 8192, nil
-	}
-
-	return maxTokens, nil
-}
-
-// interleavedThinkingEnabled returns false unless explicitly enabled via
-// models:provider_opts:interleaved_thinking: true
-func (c *Client) interleavedThinkingEnabled() bool {
-	// Default to false if not provided
-	if c == nil || len(c.ModelConfig.ProviderOpts) == 0 {
-		return false
-	}
-	v, ok := c.ModelConfig.ProviderOpts["interleaved_thinking"]
-	if !ok {
-		return false
-	}
-	switch t := v.(type) {
-	case bool:
-		return t
-	case string:
-		s := strings.TrimSpace(strings.ToLower(t))
-		return s != "false" && s != "0" && s != "no"
-	case int:
-		return t != 0
-	case int64:
-		return t != 0
-	case float64:
-		return t != 0
-	default:
-		return false
-	}
-}
-
 // NewClient creates a new Anthropic client from the provided configuration
 func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider, opts ...options.Opt) (*Client, error) {
 	if cfg == nil {
@@ -288,20 +214,7 @@ func (c *Client) CreateChatCompletionStream(
 	}
 
 	// Apply thinking budget first, as it affects whether we can set temperature
-	thinkingEnabled := false
-	if budget := c.ModelConfig.ThinkingBudget; budget != nil {
-		if effortStr, ok := anthropicThinkingEffort(budget); ok {
-			adaptive := anthropic.ThinkingConfigAdaptiveParam{}
-			params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
-			params.OutputConfig.Effort = anthropic.OutputConfigEffort(effortStr)
-			thinkingEnabled = true
-			slog.Debug("Anthropic API using adaptive thinking", "effort", effortStr)
-		} else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
-			params.Thinking = anthropic.ThinkingConfigParamOfEnabled(tokens)
-			thinkingEnabled = true
-			slog.Debug("Anthropic API using thinking_budget", "budget_tokens", tokens)
-		}
-	}
+	thinkingEnabled := c.applyThinkingConfig(&params, maxTokens)
 
 	// Temperature and TopP cannot be set when extended thinking is enabled
 	// (Anthropic requires temperature=1.0 which is the default when thinking is on)
@@ -753,38 +666,6 @@ func contentArray(m map[string]any) []any {
 	return nil
 }
 
-// validThinkingTokens validates that the token budget is within the
-// acceptable range for Anthropic (>= 1024 and < maxTokens).
-// Returns (tokens, true) if valid, or (0, false) with a warning log if not.
-func validThinkingTokens(tokens, maxTokens int64) (int64, bool) {
-	if tokens < 1024 {
-		slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", tokens)
-		return 0, false
-	}
-	if tokens >= maxTokens {
-		slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", tokens, "max_tokens", maxTokens)
-		return 0, false
-	}
-	return tokens, true
-}
-
-// anthropicThinkingEffort returns the Anthropic API effort level for the given
-// ThinkingBudget. It covers both explicit adaptive mode and string effort
-// levels. Returns ("", false) when the budget uses token counts or is nil.
-func anthropicThinkingEffort(b *latest.ThinkingBudget) (string, bool) {
-	if b == nil {
-		return "", false
-	}
-	if e, ok := b.AdaptiveEffort(); ok {
-		return e, true
-	}
-	l, ok := b.EffortLevel()
-	if !ok {
-		return "", false
-	}
-	return effort.ForAnthropic(l)
-}
-
 // anthropicContextLimit returns a reasonable default context window for Anthropic models.
 // We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time.
 func anthropicContextLimit(model string) int64 {
diff --git a/pkg/model/provider/anthropic/thinking.go b/pkg/model/provider/anthropic/thinking.go
diff --git a/pkg/model/provider/anthropic/thinking_test.go b/pkg/model/provider/anthropic/thinking_test.go