-
-
Notifications
You must be signed in to change notification settings - Fork 406
Expand file tree
/
Copy pathinit.lua
More file actions
443 lines (408 loc) · 15.6 KB
/
init.lua
File metadata and controls
443 lines (408 loc) · 15.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
local adapter_utils = require("codecompanion.utils.adapters")
local get_models = require("codecompanion.adapters.http.copilot.get_models")
local log = require("codecompanion.utils.log")
local stats = require("codecompanion.adapters.http.copilot.stats")
local token = require("codecompanion.adapters.http.copilot.token")
local tokens = require("codecompanion.utils.tokens")
local _fetching_models = false
local version = vim.version()
---Resolves the options that a model has
---@param adapter CodeCompanion.HTTPAdapter
---@return table
local function resolve_model_opts(adapter)
local model = adapter.schema.model.default
local choices = adapter.schema.model.choices
if type(model) == "function" then
model = model(adapter)
end
if type(choices) == "function" then
-- Avoid blocking during initialization
choices = choices(adapter, { async = true })
end
if adapter.model and choices and choices[model] then
adapter.model.info = choices[model]
end
return choices and choices[model] or { opts = {} }
end
---Return the handlers of a specific adapter, ensuring the correct endpoint is set
---@param adapter CodeCompanion.HTTPAdapter
---@return table
local function handlers(adapter)
local model_opts = resolve_model_opts(adapter)
local current_url = adapter.url or "https://api.githubcopilot.com/chat/completions"
local base_url = current_url:gsub("/chat/completions$", ""):gsub("/responses$", "")
if model_opts.endpoint == "responses" then
adapter.url = base_url .. "/responses"
local responses = require("codecompanion.adapters.http.openai_responses")
-- Backwards compatibility for handlers
responses.handlers.setup = function(self)
return responses.handlers.lifecycle.setup(self)
end
responses.handlers.on_exit = function(self, data)
return responses.handlers.lifecycle.on_exit(self, data)
end
responses.handlers.form_parameters = function(self, params, messages)
return responses.handlers.request.build_parameters(self, params, messages)
end
responses.handlers.form_messages = function(self, messages)
return responses.handlers.request.build_messages(self, messages)
end
responses.handlers.form_tools = function(self, tools)
return responses.handlers.request.build_tools(self, tools)
end
responses.handlers.chat_output = function(self, data, tools)
return responses.handlers.response.parse_chat(self, data, tools)
end
responses.handlers.inline_output = function(self, data, context)
return responses.handlers.response.parse_inline(self, data, context)
end
responses.handlers.tokens = function(self, data)
return responses.handlers.response.parse_tokens(self, data)
end
responses.handlers.tools.format_tool_calls = function(self, tools)
return responses.handlers.tools.format_calls(self, tools)
end
responses.handlers.tools.output_response = function(self, tool_call, output)
return responses.handlers.tools.format_response(self, tool_call, output)
end
return responses.handlers
end
adapter.url = base_url .. "/chat/completions"
return require("codecompanion.adapters.http.openai").handlers
end
---@class CodeCompanion.HTTPAdapter.Copilot: CodeCompanion.HTTPAdapter
return {
name = "copilot",
formatted_name = "Copilot",
roles = {
llm = "assistant",
tool = "tool",
user = "user",
},
opts = {
stream = true,
tools = true,
vision = true,
},
features = {
text = true,
tokens = true,
},
url = "https://api.githubcopilot.com/chat/completions",
env = {
---@return string
api_key = function()
return token.fetch({ force = true }).copilot_token
end,
},
headers = {
Authorization = "Bearer ${api_key}",
["Content-Type"] = "application/json",
["Copilot-Integration-Id"] = "vscode-chat",
["Editor-Version"] = "Neovim/" .. version.major .. "." .. version.minor .. "." .. version.patch,
},
show_copilot_stats = function()
return stats.show()
end,
handlers = {
---Initiate fetching the models in the background as soon as the adapter is resolved
---@param self CodeCompanion.HTTPAdapter
---@return nil
resolve = function(self)
if _fetching_models then
return
end
_fetching_models = true
-- Defer token initialization - only fetch models in background without requiring tokens
vim.schedule(function()
pcall(function()
-- Only fetch models if we already have a token cached, otherwise skip
local cached_token = token.fetch()
if cached_token and cached_token.copilot_token then
get_models.choices(self, { token = cached_token, async = true })
end
end)
_fetching_models = false
end)
end,
---Check for a token before starting the request
---@param self CodeCompanion.HTTPAdapter
---@return boolean
setup = function(self)
-- Ensure models are fetched synchronously before checking capabilities
-- This prevents features from being disabled due to missing model info
local fetched_token = token.fetch({ force = true })
if fetched_token and fetched_token.copilot_token then
-- Force synchronous model fetch to ensure we have model capabilities
get_models.choices(self, { token = fetched_token, async = false })
end
local model_opts = resolve_model_opts(self)
if (self.opts and self.opts.stream) and (model_opts and model_opts.opts and model_opts.opts.can_stream) then
self.parameters.stream = true
else
self.parameters.stream = nil
end
if (self.opts and self.opts.tools) and (model_opts and model_opts.opts and not model_opts.opts.can_use_tools) then
self.opts.tools = false
end
if (self.opts and self.opts.vision) and (model_opts and model_opts.opts and not model_opts.opts.has_vision) then
self.opts.vision = false
end
return token.init(self)
end,
--- Use the OpenAI adapter for the bulk of the work
form_parameters = function(self, params, messages)
return handlers(self).form_parameters(self, params, messages)
end,
form_messages = function(self, messages)
for _, m in ipairs(messages) do
if m._meta and m._meta.tag == "image" and (m.context and m.context.mimetype) then
self.headers["X-Initiator"] = "user"
self.headers["Copilot-Vision-Request"] = "true"
break
end
end
local last_msg = messages[#messages]
if last_msg and last_msg.role == self.roles.tool then
-- NOTE: The inclusion of this header reduces premium token usage when
-- sending tool output back to the LLM (#1717)
self.headers["X-Initiator"] = "agent"
end
-- Capture estimated token counts before OpenAI strips _meta
local est_tokens = {}
for _, m in ipairs(messages) do
if m._meta and m._meta.estimated_tokens and type(m.content) == "string" then
est_tokens[m.content] = m._meta.estimated_tokens
end
end
local result = handlers(self).form_messages(self, messages)
-- For gemini-3, merge consecutive LLM messages and ensure that reasoning
-- data is transformed. This enables consecutive tool calls to be made
if result.messages then
local merged = {}
local i = 1
while i <= #result.messages do
local current = result.messages[i]
-- gemini-3 requires reasoning_text and reasoning_opaque fields
if current.reasoning then
if current.reasoning.content then
current.reasoning_text = current.reasoning.content
end
if current.reasoning.opaque then
current.reasoning_opaque = current.reasoning.opaque
end
current.reasoning = nil
end
-- From investigating Copilot Chat's output, tool_calls are merged
-- into a single message per role with reasoning data
if
i < #result.messages
and result.messages[i + 1].role == current.role
and result.messages[i + 1].tool_calls
and not result.messages[i + 1].content
then
current.tool_calls = result.messages[i + 1].tool_calls
i = i + 1 -- Skip the next message since we merged it
end
table.insert(merged, current)
i = i + 1
end
result.messages = merged
end
-- Add copilot_cache_control to the top 4 messages by estimated token count.
-- Uses pre-computed _meta.estimated_tokens from the chat interaction,
-- falling back to on-the-fly calculation for messages without estimates
if result.messages and #result.messages > 0 then
local scored = {}
for i, m in ipairs(result.messages) do
local est = type(m.content) == "string" and est_tokens[m.content] or nil
if not est and type(m.content) == "string" then
est = tokens.calculate(m.content)
end
table.insert(scored, { index = i, tokens = est or 0 })
end
table.sort(scored, function(a, b)
return a.tokens > b.tokens
end)
-- Copilot limits us to 4 cache points at most
for j = 1, math.min(4, #scored) do
result.messages[scored[j].index].copilot_cache_control = { type = "ephemeral" }
end
end
return result
end,
form_tools = function(self, tools)
return handlers(self).form_tools(self, tools)
end,
form_reasoning = function(self, data)
local content = vim
.iter(data)
:map(function(item)
return item.content
end)
:filter(function(content)
return content ~= nil
end)
:join("")
local opaque
for _, item in ipairs(data) do
if item.opaque then
opaque = item.opaque
break
end
end
return {
content = content,
opaque = opaque,
}
end,
---Copilot with Gemini 3 provides reasoning data that must be sent back in responses
---@param self CodeCompanion.HTTPAdapter
---@param data table
---@return table
parse_message_meta = function(self, data)
local extra = data.extra
if not extra then
return data
end
if extra.reasoning_text then
data.output.reasoning = data.output.reasoning or {}
data.output.reasoning.content = extra.reasoning_text
end
if extra.reasoning_opaque then
data.output.reasoning = data.output.reasoning or {}
data.output.reasoning.opaque = extra.reasoning_opaque
end
if data.output.content == "" then
data.output.content = nil
end
return data
end,
tokens = function(self, data)
if data and data ~= "" then
local data_mod = adapter_utils.clean_streamed_data(data)
local ok, json = pcall(vim.json.decode, data_mod, { luanil = { object = true } })
if ok then
if json.usage then
local total_tokens = json.usage.total_tokens or 0
local completion_tokens = json.usage.completion_tokens or 0
local prompt_tokens = json.usage.prompt_tokens or 0
local tokens = total_tokens > 0 and total_tokens or completion_tokens + prompt_tokens
log:trace("Tokens: %s", tokens)
return tokens
end
end
end
end,
chat_output = function(self, data, tools)
if type(data) == "string" then
if data and data:match("quota") and data:match("exceeded") then
return {
status = "error",
output = "Your Copilot quota has been exceeded for this conversation",
}
end
end
return handlers(self).chat_output(self, data, tools, { adapter = "copilot" })
end,
tools = {
format_tool_calls = function(self, tools)
return handlers(self).tools.format_tool_calls(self, tools)
end,
output_response = function(self, tool_call, output)
return handlers(self).tools.output_response(self, tool_call, output)
end,
},
inline_output = function(self, data, context)
return handlers(self).inline_output(self, data, context)
end,
on_exit = function(self, data)
get_models.reset_cache()
return handlers(self).on_exit(self, data)
end,
},
schema = {
---@type CodeCompanion.Schema
model = {
order = 1,
mapping = "parameters",
type = "enum",
desc = "ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
---@type string|fun(): string
default = "gpt-4.1",
---@type fun(self: CodeCompanion.HTTPAdapter, opts?: table): table
choices = function(self, opts)
opts = opts or {}
-- Force token initialization for synchronous requests (user-initiated model selection)
-- Don't force for async requests (background operations)
local force = opts.async == false
local fetched = token.fetch({ force = force })
if not fetched or not fetched.copilot_token then
return { ["gpt-4.1"] = { opts = {} } }
end
return get_models.choices(self, { token = fetched, async = opts.async })
end,
},
---@type CodeCompanion.Schema
temperature = {
order = 3,
mapping = "parameters",
type = "number",
default = 0.1,
---@type fun(self: CodeCompanion.HTTPAdapter): boolean
enabled = function(self)
local model = self.schema.model.default
if type(model) == "function" then
model = model()
end
return not vim.startswith(model, "o1") and not model:find("codex") and not vim.startswith(model, "gpt-5")
end,
desc = "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.",
},
max_tokens = {
order = 4,
mapping = "parameters",
type = "integer",
default = function(self)
local model_opts = resolve_model_opts(self)
if model_opts.limits and model_opts.limits.max_output_tokens then
return tonumber(model_opts.limits.max_output_tokens)
end
return 16384
end,
desc = "The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length.",
},
---@type CodeCompanion.Schema
top_p = {
order = 5,
mapping = "parameters",
type = "number",
default = 1,
---@type fun(self: CodeCompanion.HTTPAdapter): boolean
enabled = function(self)
local model = self.schema.model.default
if type(model) == "function" then
model = model()
end
return not vim.startswith(model, "o1")
end,
desc = "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.",
},
---@type CodeCompanion.Schema
n = {
order = 6,
mapping = "parameters",
type = "number",
default = 1,
---@type fun(self: CodeCompanion.HTTPAdapter): boolean
enabled = function(self)
local model = self.schema.model.default
if type(model) == "function" then
model = model()
end
return not vim.startswith(model, "o1")
end,
desc = "How many chat completions to generate for each prompt.",
},
},
}