codealive-mcp/src/utils/response_transformer.py at f2ae8a31b5896211773eb36627c0a7e7c910961d · CodeAlive-AI/codealive-mcp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""Response transformation utilities for API responses."""

from typing import Any, Dict, List, Optional


# Hint embedded in every search response. Tells the agent that
# `description` is only a pointer for triage and that the source of truth
# is the full content returned by `fetch_artifacts` (or a local `Read()`
# for repos in the working directory).
_SEARCH_HINT = (
    "`description` is only a triage hint to help you decide which artifacts "
    "deserve a closer look — DO NOT base your understanding of the code on it. "
    "For every artifact you consider relevant you MUST load the real source "
    "via `fetch_artifacts` (using the `identifier`) for external repos, or "
    "`Read()` on the file path for repos in the local working directory. "
    "Treat only the `content` returned by `fetch_artifacts` (or `Read()`) as "
    "ground truth."
)

_GREP_HINT = (
    "Line previews in `matches` are only search evidence. Before reasoning about "
    "behavior, load the full source via `fetch_artifacts` for external repos or "
    "`Read()` on the local path. Treat only the full source as ground truth."
)

# Hints for empty results — guide the agent through recovery instead of
# suggesting fetch_artifacts on nothing.
_SEARCH_EMPTY_HINT = (
    "No results matched your semantic query. This does NOT mean the code "
    "doesn't exist — only that this phrasing didn't match indexed artifacts. "
    "Before concluding absence: "
    "(1) rephrase with synonyms, broader terms, or a different level "
    "of abstraction; "
    "(2) if you know a specific identifier, class name, error message, "
    "or literal string, use `grep_search` instead — semantic search "
    "finds by meaning, not by exact text; "
    "(3) call `get_data_sources` to verify you're searching the correct "
    "repository; "
    "(4) if you used `paths` or `extensions` filters, retry without them."
)

_GREP_EMPTY_HINT = (
    "No grep matches found. This does NOT mean the code doesn't exist — "
    "the exact string may differ in casing, spacing, or naming convention. "
    "Before concluding absence: "
    "(1) check case — grep is case-sensitive by default; "
    "(2) if you're exploring a concept or unsure of exact naming, use "
    "`semantic_search` — it finds code by meaning, not literal text; "
    "(3) call `get_data_sources` to verify you're searching the correct "
    "repository; "
    "(4) if you used `paths` or `extensions` filters, retry without them."
)


def transform_search_response(
    search_results: Dict[str, Any],
) -> Dict[str, Any]:
    """
    Transform search API response to a dict for LLM consumption.

    Returns a dict that FastMCP serializes automatically via
    ``pydantic_core.to_json`` — no manual ``json.dumps`` needed.
    """
    if not isinstance(search_results, dict) or "results" not in search_results:
        return {"results": [], "hint": _SEARCH_EMPTY_HINT}

    results = search_results.get("results", [])
    formatted_results = _format_results(results or [])

    if not formatted_results:
        return {"results": [], "hint": _SEARCH_EMPTY_HINT}

    return {"results": formatted_results, "hint": _SEARCH_HINT}


def transform_grep_response(grep_results: Dict[str, Any]) -> Dict[str, Any]:
    """Transform canonical grep response to a dict for LLM consumption."""
    if not isinstance(grep_results, dict) or "results" not in grep_results:
        return {"results": [], "hint": _GREP_EMPTY_HINT}

    formatted_results: List[Dict[str, Any]] = []
    for result in grep_results.get("results", []) or []:
        kind = result.get("kind", "")
        if kind == "Folder":
            continue

        path = _extract_path_from_result(result)
        if not path:
            continue

        item = _build_result_dict(path, result)
        if result.get("matchCount") is not None:
            item["matchCount"] = result["matchCount"]
        if result.get("matches"):
            item["matches"] = [
                _build_match_dict(match) for match in result["matches"]
            ]
        # Forward matchedByName only when the backend set it (name-only hits).
        # The backend omits the field for content matches via System.Text.Json
        # WhenWritingNull, so `get("matchedByName")` is None/missing for those
        # and we skip it here to keep the happy path free of an extra key.
        if result.get("matchedByName"):
            item["matchedByName"] = True
        formatted_results.append(item)

    if not formatted_results:
        return {"results": [], "hint": _GREP_EMPTY_HINT}

    return {"results": formatted_results, "hint": _GREP_HINT}


def _build_match_dict(match: Dict) -> Dict[str, Any]:
    """Build a match dict, forwarding remark only when present."""
    entry: Dict[str, Any] = {
        "lineNumber": match.get("lineNumber"),
        "startColumn": match.get("startColumn"),
        "endColumn": match.get("endColumn"),
        "lineText": match.get("lineText"),
    }
    if match.get("remark") is not None:
        entry["remark"] = match["remark"]
    return entry


# Backward-compatible aliases (deprecated)
transform_search_response_to_json = transform_search_response
transform_grep_response_to_json = transform_grep_response


def _format_results(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    formatted_results: List[Dict[str, Any]] = []
    for result in results:
        kind = result.get("kind", "")

        if kind == "Folder":
            continue

        path = _extract_path_from_result(result)
        if not path:
            continue

        formatted_results.append(_build_result_dict(path, result))

    return formatted_results


def _extract_path_from_result(result: Dict) -> Optional[str]:
    """Extract file path from a search result."""
    if result.get("location", {}).get("path"):
        return result["location"]["path"]
    elif result.get("identifier"):
        # Extract path from identifier (format: "{owner/repo}::{path}::{symbol_or_chunk}")
        parts = result["identifier"].split("::")
        if len(parts) >= 2:
            return parts[1]
    return None


def _build_result_dict(path: str, result: Dict) -> Dict[str, Any]:
    """Build a JSON-serializable dict from a search result."""
    info: Dict[str, Any] = {"path": path}

    # Add line numbers for symbols
    range_obj = result.get("location", {}).get("range") if result.get("location") else None
    if range_obj:
        start = range_obj.get("start", {}) or {}
        end = range_obj.get("end", {}) or {}
        if start.get("line") is not None:
            info["startLine"] = start["line"]
        if end.get("line") is not None:
            info["endLine"] = end["line"]

    if result.get("kind"):
        info["kind"] = result["kind"]

    if result.get("identifier"):
        info["identifier"] = result["identifier"]

    if result.get("contentByteSize") is not None:
        info["contentByteSize"] = result["contentByteSize"]

    if result.get("description"):
        info["description"] = result["description"]
    elif result.get("snippet"):
        # Snippet acts as a fallback when no description is available
        info["snippet"] = result["snippet"]

    return info