codecompanion.nvim/lua/codecompanion/adapters/http/copilot/init.lua at 94e60249044289c4e66312f83e583e05746c45c1 · olimorris/codecompanion.nvim · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
local adapter_utils = require("codecompanion.utils.adapters")
local get_models = require("codecompanion.adapters.http.copilot.get_models")
local log = require("codecompanion.utils.log")
local stats = require("codecompanion.adapters.http.copilot.stats")
local token = require("codecompanion.adapters.http.copilot.token")
local tokens = require("codecompanion.utils.tokens")

local _fetching_models = false
local version = vim.version()

---Resolves the options that a model has
---@param adapter CodeCompanion.HTTPAdapter
---@return table
local function resolve_model_opts(adapter)
  local model = adapter.schema.model.default
  local choices = adapter.schema.model.choices
  if type(model) == "function" then
    model = model(adapter)
  end
  if type(choices) == "function" then
    -- Avoid blocking during initialization
    choices = choices(adapter, { async = true })
  end

  if adapter.model and choices and choices[model] then
    adapter.model.info = choices[model]
  end

  return choices and choices[model] or { opts = {} }
end

---Return the handlers of a specific adapter, ensuring the correct endpoint is set
---@param adapter CodeCompanion.HTTPAdapter
---@return table
local function handlers(adapter)
  local model_opts = resolve_model_opts(adapter)
  local current_url = adapter.url or "https://api.githubcopilot.com/chat/completions"
  local base_url = current_url:gsub("/chat/completions$", ""):gsub("/responses$", "")

  if model_opts.endpoint == "responses" then
    adapter.url = base_url .. "/responses"

    local responses = require("codecompanion.adapters.http.openai_responses")

    -- Backwards compatibility for handlers
    responses.handlers.setup = function(self)
      return responses.handlers.lifecycle.setup(self)
    end
    responses.handlers.on_exit = function(self, data)
      return responses.handlers.lifecycle.on_exit(self, data)
    end
    responses.handlers.form_parameters = function(self, params, messages)
      return responses.handlers.request.build_parameters(self, params, messages)
    end
    responses.handlers.form_messages = function(self, messages)
      return responses.handlers.request.build_messages(self, messages)
    end
    responses.handlers.form_tools = function(self, tools)
      return responses.handlers.request.build_tools(self, tools)
    end
    responses.handlers.chat_output = function(self, data, tools)
      return responses.handlers.response.parse_chat(self, data, tools)
    end
    responses.handlers.inline_output = function(self, data, context)
      return responses.handlers.response.parse_inline(self, data, context)
    end
    responses.handlers.tokens = function(self, data)
      return responses.handlers.response.parse_tokens(self, data)
    end
    responses.handlers.tools.format_tool_calls = function(self, tools)
      return responses.handlers.tools.format_calls(self, tools)
    end
    responses.handlers.tools.output_response = function(self, tool_call, output)
      return responses.handlers.tools.format_response(self, tool_call, output)
    end

    return responses.handlers
  end

  adapter.url = base_url .. "/chat/completions"
  return require("codecompanion.adapters.http.openai").handlers
end

---@class CodeCompanion.HTTPAdapter.Copilot: CodeCompanion.HTTPAdapter
return {
  name = "copilot",
  formatted_name = "Copilot",
  roles = {
    llm = "assistant",
    tool = "tool",
    user = "user",
  },
  opts = {
    stream = true,
    tools = true,
    vision = true,
  },
  features = {
    text = true,
    tokens = true,
  },
  url = "https://api.githubcopilot.com/chat/completions",
  env = {
    ---@return string
    api_key = function()
      return token.fetch({ force = true }).copilot_token
    end,
  },
  headers = {
    Authorization = "Bearer ${api_key}",
    ["Content-Type"] = "application/json",
    ["Copilot-Integration-Id"] = "vscode-chat",
    ["Editor-Version"] = "Neovim/" .. version.major .. "." .. version.minor .. "." .. version.patch,
  },
  show_copilot_stats = function()
    return stats.show()
  end,
  handlers = {
    ---Initiate fetching the models in the background as soon as the adapter is resolved
    ---@param self CodeCompanion.HTTPAdapter
    ---@return nil
    resolve = function(self)
      if _fetching_models then
        return
      end
      _fetching_models = true

      -- Defer token initialization - only fetch models in background without requiring tokens
      vim.schedule(function()
        pcall(function()
          -- Only fetch models if we already have a token cached, otherwise skip
          local cached_token = token.fetch()
          if cached_token and cached_token.copilot_token then
            get_models.choices(self, { token = cached_token, async = true })
          end
        end)
        _fetching_models = false
      end)
    end,

    ---Check for a token before starting the request
    ---@param self CodeCompanion.HTTPAdapter
    ---@return boolean
    setup = function(self)
      -- Ensure models are fetched synchronously before checking capabilities
      -- This prevents features from being disabled due to missing model info
      local fetched_token = token.fetch({ force = true })
      if fetched_token and fetched_token.copilot_token then
        -- Force synchronous model fetch to ensure we have model capabilities
        get_models.choices(self, { token = fetched_token, async = false })
      end

      local model_opts = resolve_model_opts(self)

      if (self.opts and self.opts.stream) and (model_opts and model_opts.opts and model_opts.opts.can_stream) then
        self.parameters.stream = true
      else
        self.parameters.stream = nil
      end
      if (self.opts and self.opts.tools) and (model_opts and model_opts.opts and not model_opts.opts.can_use_tools) then
        self.opts.tools = false
      end
      if (self.opts and self.opts.vision) and (model_opts and model_opts.opts and not model_opts.opts.has_vision) then
        self.opts.vision = false
      end

      return token.init(self)
    end,

    --- Use the OpenAI adapter for the bulk of the work
    form_parameters = function(self, params, messages)
      return handlers(self).form_parameters(self, params, messages)
    end,
    form_messages = function(self, messages)
      for _, m in ipairs(messages) do
        if m._meta and m._meta.tag == "image" and (m.context and m.context.mimetype) then
          self.headers["X-Initiator"] = "user"
          self.headers["Copilot-Vision-Request"] = "true"
          break
        end
      end

      local last_msg = messages[#messages]
      if last_msg and last_msg.role == self.roles.tool then
        -- NOTE: The inclusion of this header reduces premium token usage when
        -- sending tool output back to the LLM (#1717)
        self.headers["X-Initiator"] = "agent"
      end

      -- Capture estimated token counts before OpenAI strips _meta
      local est_tokens = {}
      for _, m in ipairs(messages) do
        if m._meta and m._meta.estimated_tokens and type(m.content) == "string" then
          est_tokens[m.content] = m._meta.estimated_tokens
        end
      end

      local result = handlers(self).form_messages(self, messages)

      -- For gemini-3, merge consecutive LLM messages and ensure that reasoning
      -- data is transformed. This enables consecutive tool calls to be made
      if result.messages then
        local merged = {}
        local i = 1
        while i <= #result.messages do
          local current = result.messages[i]

          -- gemini-3 requires reasoning_text and reasoning_opaque fields
          if current.reasoning then
            if current.reasoning.content then
              current.reasoning_text = current.reasoning.content
            end
            if current.reasoning.opaque then
              current.reasoning_opaque = current.reasoning.opaque
            end
            current.reasoning = nil
          end

          -- From investigating Copilot Chat's output, tool_calls are merged
          -- into a single message per role with reasoning data
          if
            i < #result.messages
            and result.messages[i + 1].role == current.role
            and result.messages[i + 1].tool_calls
            and not result.messages[i + 1].content
          then
            current.tool_calls = result.messages[i + 1].tool_calls
            i = i + 1 -- Skip the next message since we merged it
          end

          table.insert(merged, current)
          i = i + 1
        end
        result.messages = merged
      end

      -- Add copilot_cache_control to the top 4 messages by estimated token count.
      -- Uses pre-computed _meta.estimated_tokens from the chat interaction,
      -- falling back to on-the-fly calculation for messages without estimates
      if result.messages and #result.messages > 0 then
        local scored = {}
        for i, m in ipairs(result.messages) do
          local est = type(m.content) == "string" and est_tokens[m.content] or nil
          if not est and type(m.content) == "string" then
            est = tokens.calculate(m.content)
          end
          table.insert(scored, { index = i, tokens = est or 0 })
        end

        table.sort(scored, function(a, b)
          return a.tokens > b.tokens
        end)

        -- Copilot limits us to 4 cache points at most
        for j = 1, math.min(4, #scored) do
          result.messages[scored[j].index].copilot_cache_control = { type = "ephemeral" }
        end
      end

      return result
    end,
    form_tools = function(self, tools)
      return handlers(self).form_tools(self, tools)
    end,
    form_reasoning = function(self, data)
      local content = vim
        .iter(data)
        :map(function(item)
          return item.content
        end)
        :filter(function(content)
          return content ~= nil
        end)
        :join("")

      local opaque
      for _, item in ipairs(data) do
        if item.opaque then
          opaque = item.opaque
          break
        end
      end

      return {
        content = content,
        opaque = opaque,
      }
    end,
    ---Copilot with Gemini 3 provides reasoning data that must be sent back in responses
    ---@param self CodeCompanion.HTTPAdapter
    ---@param data table
    ---@return table
    parse_message_meta = function(self, data)
      local extra = data.extra
      if not extra then
        return data
      end

      if extra.reasoning_text then
        data.output.reasoning = data.output.reasoning or {}
        data.output.reasoning.content = extra.reasoning_text
      end
      if extra.reasoning_opaque then
        data.output.reasoning = data.output.reasoning or {}
        data.output.reasoning.opaque = extra.reasoning_opaque
      end

      if data.output.content == "" then
        data.output.content = nil
      end

      return data
    end,
    tokens = function(self, data)
      if data and data ~= "" then
        local data_mod = adapter_utils.clean_streamed_data(data)
        local ok, json = pcall(vim.json.decode, data_mod, { luanil = { object = true } })

        if ok then
          if json.usage then
            local total_tokens = json.usage.total_tokens or 0
            local completion_tokens = json.usage.completion_tokens or 0
            local prompt_tokens = json.usage.prompt_tokens or 0
            local tokens = total_tokens > 0 and total_tokens or completion_tokens + prompt_tokens
            log:trace("Tokens: %s", tokens)
            return tokens
          end
        end
      end
    end,
    chat_output = function(self, data, tools)
      if type(data) == "string" then
        if data and data:match("quota") and data:match("exceeded") then
          return {
            status = "error",
            output = "Your Copilot quota has been exceeded for this conversation",
          }
        end
      end

      return handlers(self).chat_output(self, data, tools, { adapter = "copilot" })
    end,
    tools = {
      format_tool_calls = function(self, tools)
        return handlers(self).tools.format_tool_calls(self, tools)
      end,
      output_response = function(self, tool_call, output)
        return handlers(self).tools.output_response(self, tool_call, output)
      end,
    },
    inline_output = function(self, data, context)
      return handlers(self).inline_output(self, data, context)
    end,
    on_exit = function(self, data)
      get_models.reset_cache()
      return handlers(self).on_exit(self, data)
    end,
  },
  schema = {
    ---@type CodeCompanion.Schema
    model = {
      order = 1,
      mapping = "parameters",
      type = "enum",
      desc = "ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
      ---@type string|fun(): string
      default = "gpt-4.1",
      ---@type fun(self: CodeCompanion.HTTPAdapter, opts?: table): table
      choices = function(self, opts)
        opts = opts or {}
        -- Force token initialization for synchronous requests (user-initiated model selection)
        -- Don't force for async requests (background operations)
        local force = opts.async == false
        local fetched = token.fetch({ force = force })
        if not fetched or not fetched.copilot_token then
          return { ["gpt-4.1"] = { opts = {} } }
        end
        return get_models.choices(self, { token = fetched, async = opts.async })
      end,
    },
    ---@type CodeCompanion.Schema
    temperature = {
      order = 3,
      mapping = "parameters",
      type = "number",
      default = 0.1,
      ---@type fun(self: CodeCompanion.HTTPAdapter): boolean
      enabled = function(self)
        local model = self.schema.model.default
        if type(model) == "function" then
          model = model()
        end
        return not vim.startswith(model, "o1") and not model:find("codex") and not vim.startswith(model, "gpt-5")
      end,
      desc = "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.",
    },
    max_tokens = {
      order = 4,
      mapping = "parameters",
      type = "integer",
      default = function(self)
        local model_opts = resolve_model_opts(self)
        if model_opts.limits and model_opts.limits.max_output_tokens then
          return tonumber(model_opts.limits.max_output_tokens)
        end
        return 16384
      end,
      desc = "The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length.",
    },
    ---@type CodeCompanion.Schema
    top_p = {
      order = 5,
      mapping = "parameters",
      type = "number",
      default = 1,
      ---@type fun(self: CodeCompanion.HTTPAdapter): boolean
      enabled = function(self)
        local model = self.schema.model.default
        if type(model) == "function" then
          model = model()
        end
        return not vim.startswith(model, "o1")
      end,
      desc = "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.",
    },
    ---@type CodeCompanion.Schema
    n = {
      order = 6,
      mapping = "parameters",
      type = "number",
      default = 1,
      ---@type fun(self: CodeCompanion.HTTPAdapter): boolean
      enabled = function(self)
        local model = self.schema.model.default
        if type(model) == "function" then
          model = model()
        end
        return not vim.startswith(model, "o1")
      end,
      desc = "How many chat completions to generate for each prompt.",
    },
  },
}