Skip to content

Commit 42a77ee

Browse files
fix(agent): reconnect after successful cert renewal
`run_single_connection` previously returned `Ok(())` on both graceful shutdown and successful cert renewal. The outer reconnect loop treated `Ok(())` as "task done forever", so after a renewal the agent exited and never reconnected with the new cert. Split the return with `ConnectionOutcome::{Shutdown, CertRenewed}`; renewal now reconnects immediately (bypassing backoff), shutdown still exits the task. Also wrap the `CertRenewalResponse` recv in a 30s timeout so a stalled gateway cannot hang the agent indefinitely.
1 parent 00b9a9e commit 42a77ee

1 file changed

Lines changed: 29 additions & 7 deletions

File tree

devolutions-agent/src/tunnel.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,15 @@ impl Task for TunnelTask {
132132
let start = std::time::Instant::now();
133133

134134
match run_single_connection(&self.conf_handle, &mut shutdown_signal).await {
135-
Ok(()) => {
135+
Ok(ConnectionOutcome::Shutdown) => {
136136
info!("Tunnel task stopped");
137137
return Ok(());
138138
}
139+
Ok(ConnectionOutcome::CertRenewed) => {
140+
info!("Certificate renewed; reconnecting with new cert immediately");
141+
// Skip backoff — renewal is a successful "completion", not a failure.
142+
continue;
143+
}
139144
Err(error) => {
140145
warn!(error = %format!("{error:#}"), "Tunnel connection lost");
141146
}
@@ -174,11 +179,23 @@ impl Task for TunnelTask {
174179
// Single connection lifetime
175180
// ---------------------------------------------------------------------------
176181

182+
/// Outcome of a single connection lifetime, telling the outer loop what to do next.
183+
enum ConnectionOutcome {
184+
/// Shutdown signal received — exit the tunnel task cleanly.
185+
Shutdown,
186+
/// Certificate was renewed successfully; reconnect immediately with the new cert.
187+
CertRenewed,
188+
}
189+
177190
/// Run a single QUIC tunnel connection lifetime: config → connect → event loop.
178191
///
179-
/// Returns `Ok(())` on graceful shutdown (shutdown signal received).
180-
/// Returns `Err(...)` on any failure — the caller should retry with backoff.
181-
async fn run_single_connection(conf_handle: &ConfHandle, shutdown_signal: &mut ShutdownSignal) -> anyhow::Result<()> {
192+
/// - `Ok(Shutdown)`: graceful shutdown, exit the task.
193+
/// - `Ok(CertRenewed)`: certificate renewed; caller should reconnect immediately.
194+
/// - `Err(...)`: connection lost or handshake failed — caller should retry with backoff.
195+
async fn run_single_connection(
196+
conf_handle: &ConfHandle,
197+
shutdown_signal: &mut ShutdownSignal,
198+
) -> anyhow::Result<ConnectionOutcome> {
182199
// Ensure rustls crypto provider is installed (ring).
183200
let _ = rustls::crypto::ring::default_provider().install_default();
184201

@@ -359,7 +376,12 @@ async fn run_single_connection(conf_handle: &ConfHandle, shutdown_signal: &mut S
359376
.await
360377
.context("send CertRenewalRequest")?;
361378

362-
match ctrl.recv().await.context("receive CertRenewalResponse")? {
379+
let response = tokio::time::timeout(Duration::from_secs(30), ctrl.recv())
380+
.await
381+
.context("timeout waiting for CertRenewalResponse")?
382+
.context("receive CertRenewalResponse")?;
383+
384+
match response {
363385
ControlMessage::CertRenewalResponse {
364386
result:
365387
agent_tunnel_proto::CertRenewalResult::Success {
@@ -373,7 +395,7 @@ async fn run_single_connection(conf_handle: &ConfHandle, shutdown_signal: &mut S
373395
.context("write renewed CA certificate")?;
374396
info!("Certificate renewed successfully, reconnecting with new cert");
375397
connection.close(0u32.into(), b"cert-renewed");
376-
return Ok(());
398+
return Ok(ConnectionOutcome::CertRenewed);
377399
}
378400
ControlMessage::CertRenewalResponse {
379401
result: agent_tunnel_proto::CertRenewalResult::Error { reason },
@@ -448,7 +470,7 @@ async fn run_single_connection(conf_handle: &ConfHandle, shutdown_signal: &mut S
448470

449471
task_handles.shutdown().await;
450472

451-
Ok(())
473+
Ok(ConnectionOutcome::Shutdown)
452474
}
453475

454476
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)