Improve tool discoverability: expand docstrings and add Literal type for profile

rodion-m · claude · rodion-m · commit 3d414c7d91d6 · 2026-04-14T00:36:05.000+07:00
- Expand semantic_search and grep_search docstrings with full Args, Returns,
  and Examples sections so agents see complete usage guidance in tool descriptions
- Use Literal["callsOnly", "inheritanceOnly", "allRelevant", "referencesOnly"]
  for get_artifact_relationships profile parameter — generates JSON Schema enum
  so agents see valid values without guessing
- Update e2e test for invalid profile to expect Pydantic validation error

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/tests/test_e2e_tools.py b/src/tests/test_e2e_tools.py
@@ -1073,6 +1073,7 @@ async def test_not_found(self):
 
     @pytest.mark.asyncio
     async def test_invalid_profile_returns_error(self):
+        """Pydantic rejects invalid Literal values before the function body runs."""
         mcp = _server({})
         async with Client(mcp) as client:
             result = await client.call_tool(
@@ -1082,9 +1083,10 @@ async def test_invalid_profile_returns_error(self):
             )
 
         text = _text(result)
-        data = json.loads(text)
-        assert "error" in data
-        assert "Unsupported profile" in data["error"]
+        # Pydantic Literal validation fires before the function body, producing
+        # a human-readable validation error (not our custom JSON).
+        assert "callsOnly" in text
+        assert "literal_error" in text or "Input should be" in text
 
     @pytest.mark.asyncio
     async def test_empty_identifier_returns_error(self):
diff --git a/src/tools/artifact_relationships.py b/src/tools/artifact_relationships.py
@@ -1,7 +1,7 @@
 """Artifact relationships tool implementation."""
 
 import json
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Literal, Optional
 from urllib.parse import urljoin
 
 import httpx
@@ -34,7 +34,7 @@
 async def get_artifact_relationships(
     ctx: Context,
     identifier: str,
-    profile: str = "callsOnly",
+    profile: Literal["callsOnly", "inheritanceOnly", "allRelevant", "referencesOnly"] = "callsOnly",
     max_count_per_type: int = 50,
 ) -> str:
     """
diff --git a/src/tools/search.py b/src/tools/search.py
@@ -142,10 +142,56 @@ async def semantic_search(
     max_results: Optional[int] = None,
 ) -> str:
     """
-    Canonical semantic search across indexed repositories and workspaces.
-
-    Use this for natural-language exploration when you want relevant artifacts by meaning.
-    For exact or regex matching, use `grep_search` instead.
+    Search indexed code by meaning — the default discovery tool.
+
+    Use this for natural-language exploration when you want relevant artifacts
+    by meaning: function names, concepts, architecture patterns, etc.
+    For exact string or regex matching, use `grep_search` instead.
+
+    Args:
+        query: Natural-language description of what you're looking for.
+               Example: "authentication middleware", "database connection pooling",
+               "JWT token validation"
+
+        data_sources: Repository or workspace names to search.
+                      Omit to use the API key's default data source.
+                      Call `get_data_sources` first to discover available names.
+                      Example: ["backend", "workspace:payments-team"]
+
+        paths: Restrict results to specific directory paths.
+               Example: ["src/services", "src/domain"]
+
+        extensions: Restrict results to specific file extensions.
+                    Example: [".cs", ".py", ".ts"]
+
+        max_results: Maximum number of results to return (1–500).
+                     Omit for the server default.
+
+    Returns:
+        Compact JSON: {"results": [...], "hint": "..."}
+
+        Each result contains:
+        - path: file path within the repository
+        - identifier: fully qualified artifact ID — pass this to `fetch_artifacts`
+        - kind: "File", "Symbol", or "Chunk"
+        - description: short triage summary (NOT the real source — see hint)
+        - startLine/endLine: line range (for symbols)
+        - contentByteSize: file size in bytes
+
+        The `hint` field reminds you to load real source code via
+        `fetch_artifacts(identifier)` or local `Read(path)` before reasoning
+        about the code.
+
+    Examples:
+        1. Find authentication code:
+           semantic_search(query="authentication middleware",
+                           data_sources=["backend"])
+
+        2. Narrow to Python files in a specific directory:
+           semantic_search(query="database retry logic",
+                           data_sources=["backend"],
+                           paths=["src/services"],
+                           extensions=[".py"])
     """
     tool_name = "semantic_search"
     query_error = _validate_query(query, tool_name)
@@ -197,9 +243,54 @@ async def grep_search(
     regex: bool = False,
 ) -> str:
     """
-    Canonical exact/regex search across indexed repositories and workspaces.
+    Search indexed code by exact text or regex pattern.
+
+    Use this when the literal string or pattern matters: function names, error
+    messages, config keys, import paths, TODO comments, etc.
+    For meaning-based exploration, use `semantic_search` instead.
+
+    Args:
+        query: Exact text or regex pattern to match.
+               Literal examples: "ConnectionString", "TODO: fix", "import numpy"
+               Regex examples: "def test_.*async", "Status\\.(Alive|Failed)"
+
+        data_sources: Repository or workspace names to search.
+                      Omit to use the API key's default data source.
+                      Call `get_data_sources` first to discover available names.
+
+        paths: Restrict results to specific directory paths.
+               Example: ["src/services"]
+
+        extensions: Restrict results to specific file extensions.
+                    Example: [".cs", ".py"]
+
+        max_results: Maximum number of results to return (1–500).
+
+        regex: If True, treat `query` as a regex pattern. Default: False (literal).
+
+    Returns:
+        Compact JSON: {"results": [...], "hint": "..."}
+
+        Each result contains:
+        - path: file path
+        - identifier: pass to `fetch_artifacts` for full source
+        - matchCount: total matches in this file
+        - matches: array of line-level hits, each with:
+          - lineNumber, startColumn, endColumn, lineText
+
+        The `hint` reminds you that line previews are evidence only — load
+        full source via `fetch_artifacts` or local `Read()` before reasoning.
+
+    Examples:
+        1. Find exact string:
+           grep_search(query="ConnectionString",
+                       data_sources=["backend"])
 
-    Use this for literal string lookup or regex matching when the pattern itself matters.
+        2. Regex search for test methods:
+           grep_search(query="def test_.*auth",
+                       data_sources=["backend"],
+                       extensions=[".py"],
+                       regex=True)
     """
     tool_name = "grep_search"
     query_error = _validate_query(query, tool_name)