Skip to content

Commit 393c38c

Browse files
authored
Merge branch 'main' into bewithgaurav/sanitize-connstr-parser
2 parents e5b2873 + acbe020 commit 393c38c

File tree

4 files changed

+384
-23
lines changed

4 files changed

+384
-23
lines changed

mssql_python/pybind/connection/connection.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,16 @@ void Connection::connect(const py::dict& attrs_before) {
8585
#else
8686
connStrPtr = const_cast<SQLWCHAR*>(_connStr.c_str());
8787
#endif
88-
SQLRETURN ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
89-
0, nullptr, SQL_DRIVER_NOPROMPT);
88+
SQLRETURN ret;
89+
{
90+
// Release the GIL during the blocking ODBC connect call.
91+
// SQLDriverConnect involves DNS resolution, TCP handshake, TLS negotiation,
92+
// and SQL Server authentication — all pure I/O that doesn't need the GIL.
93+
// This allows other Python threads to run concurrently.
94+
py::gil_scoped_release release;
95+
ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
96+
0, nullptr, SQL_DRIVER_NOPROMPT);
97+
}
9098
checkError(ret);
9199
updateLastUsed();
92100
}
@@ -95,6 +103,11 @@ void Connection::disconnect() {
95103
if (_dbcHandle) {
96104
LOG("Disconnecting from database");
97105

106+
// Check if we hold the GIL so we can conditionally release it.
107+
// The GIL is held when called from pybind11-bound methods but may NOT
108+
// be held in destructor paths (C++ shared_ptr ref-count drop, shutdown).
109+
bool hasGil = PyGILState_Check() != 0;
110+
98111
// CRITICAL FIX: Mark all child statement handles as implicitly freed
99112
// When we free the DBC handle below, the ODBC driver will automatically free
100113
// all child STMT handles. We need to tell the SqlHandle objects about this
@@ -135,8 +148,26 @@ void Connection::disconnect() {
135148
_allocationsSinceCompaction = 0;
136149
} // Release lock before potentially slow SQLDisconnect call
137150

138-
SQLRETURN ret = SQLDisconnect_ptr(_dbcHandle->get());
139-
checkError(ret);
151+
SQLRETURN ret;
152+
if (hasGil) {
153+
// Release the GIL during the blocking ODBC disconnect call.
154+
// This allows other Python threads to run while the network
155+
// round-trip completes.
156+
py::gil_scoped_release release;
157+
ret = SQLDisconnect_ptr(_dbcHandle->get());
158+
} else {
159+
// Destructor / shutdown path — GIL is not held, call directly.
160+
ret = SQLDisconnect_ptr(_dbcHandle->get());
161+
}
162+
// In destructor/shutdown paths, suppress errors to avoid
163+
// std::terminate() if this throws during stack unwinding.
164+
if (hasGil) {
165+
checkError(ret);
166+
} else if (!SQL_SUCCEEDED(ret)) {
167+
// Intentionally no LOG() here: LOG() acquires the GIL internally
168+
// via py::gil_scoped_acquire, which is unsafe during interpreter
169+
// shutdown or stack unwinding (can deadlock or call std::terminate).
170+
}
140171
// triggers SQLFreeHandle via destructor, if last owner
141172
_dbcHandle.reset();
142173
} else {
@@ -375,7 +406,6 @@ bool Connection::reset() {
375406
(SQLPOINTER)SQL_RESET_CONNECTION_YES, SQL_IS_INTEGER);
376407
if (!SQL_SUCCEEDED(ret)) {
377408
LOG("Failed to reset connection (ret=%d). Marking as dead.", ret);
378-
disconnect();
379409
return false;
380410
}
381411

@@ -387,7 +417,6 @@ bool Connection::reset() {
387417
(SQLPOINTER)SQL_TXN_READ_COMMITTED, SQL_IS_INTEGER);
388418
if (!SQL_SUCCEEDED(ret)) {
389419
LOG("Failed to reset transaction isolation level (ret=%d). Marking as dead.", ret);
390-
disconnect();
391420
return false;
392421
}
393422

mssql_python/pybind/connection/connection_pool.cpp

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
1616
const py::dict& attrs_before) {
1717
std::vector<std::shared_ptr<Connection>> to_disconnect;
1818
std::shared_ptr<Connection> valid_conn = nullptr;
19+
bool needs_connect = false;
1920
{
2021
std::lock_guard<std::mutex> lock(_mutex);
2122
auto now = std::chrono::steady_clock::now();
@@ -57,16 +58,33 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
5758
}
5859
}
5960

60-
// Create new connection if none reusable
61+
// Reserve a slot for a new connection if none reusable.
62+
// The actual connect() call happens outside the mutex to avoid
63+
// holding the mutex during the blocking ODBC call (which releases
64+
// the GIL and could otherwise cause a mutex/GIL deadlock).
6165
if (!valid_conn && _current_size < _max_size) {
6266
valid_conn = std::make_shared<Connection>(connStr, true);
63-
valid_conn->connect(attrs_before);
6467
++_current_size;
68+
needs_connect = true;
6569
} else if (!valid_conn) {
6670
throw std::runtime_error("ConnectionPool::acquire: pool size limit reached");
6771
}
6872
}
6973

74+
// Phase 2.5: Connect the new connection outside the mutex.
75+
if (needs_connect) {
76+
try {
77+
valid_conn->connect(attrs_before);
78+
} catch (...) {
79+
// Connect failed — release the reserved slot
80+
{
81+
std::lock_guard<std::mutex> lock(_mutex);
82+
if (_current_size > 0) --_current_size;
83+
}
84+
throw;
85+
}
86+
}
87+
7088
// Phase 3: Disconnect expired/bad connections outside lock
7189
for (auto& conn : to_disconnect) {
7290
try {
@@ -79,12 +97,25 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
7997
}
8098

8199
void ConnectionPool::release(std::shared_ptr<Connection> conn) {
82-
std::lock_guard<std::mutex> lock(_mutex);
83-
if (_pool.size() < _max_size) {
84-
conn->updateLastUsed();
85-
_pool.push_back(conn);
86-
} else {
87-
conn->disconnect();
100+
bool should_disconnect = false;
101+
{
102+
std::lock_guard<std::mutex> lock(_mutex);
103+
if (_pool.size() < _max_size) {
104+
conn->updateLastUsed();
105+
_pool.push_back(conn);
106+
} else {
107+
should_disconnect = true;
108+
}
109+
}
110+
// Disconnect outside the mutex to avoid holding it during the
111+
// blocking ODBC call (which releases the GIL).
112+
if (should_disconnect) {
113+
try {
114+
conn->disconnect();
115+
} catch (const std::exception& ex) {
116+
LOG("ConnectionPool::release: disconnect failed: %s", ex.what());
117+
}
118+
std::lock_guard<std::mutex> lock(_mutex);
88119
if (_current_size > 0)
89120
--_current_size;
90121
}
@@ -116,21 +147,35 @@ ConnectionPoolManager& ConnectionPoolManager::getInstance() {
116147

117148
std::shared_ptr<Connection> ConnectionPoolManager::acquireConnection(const std::wstring& connStr,
118149
const py::dict& attrs_before) {
119-
std::lock_guard<std::mutex> lock(_manager_mutex);
120-
121-
auto& pool = _pools[connStr];
122-
if (!pool) {
123-
LOG("Creating new connection pool");
124-
pool = std::make_shared<ConnectionPool>(_default_max_size, _default_idle_secs);
150+
std::shared_ptr<ConnectionPool> pool;
151+
{
152+
std::lock_guard<std::mutex> lock(_manager_mutex);
153+
auto& pool_ref = _pools[connStr];
154+
if (!pool_ref) {
155+
LOG("Creating new connection pool");
156+
pool_ref = std::make_shared<ConnectionPool>(_default_max_size, _default_idle_secs);
157+
}
158+
pool = pool_ref;
125159
}
160+
// Call acquire() outside _manager_mutex. acquire() may release the GIL
161+
// during the ODBC connect call; holding _manager_mutex across that would
162+
// create a mutex/GIL lock-ordering deadlock.
126163
return pool->acquire(connStr, attrs_before);
127164
}
128165

129166
void ConnectionPoolManager::returnConnection(const std::wstring& conn_str,
130167
const std::shared_ptr<Connection> conn) {
131-
std::lock_guard<std::mutex> lock(_manager_mutex);
132-
if (_pools.find(conn_str) != _pools.end()) {
133-
_pools[conn_str]->release((conn));
168+
std::shared_ptr<ConnectionPool> pool;
169+
{
170+
std::lock_guard<std::mutex> lock(_manager_mutex);
171+
auto it = _pools.find(conn_str);
172+
if (it != _pools.end()) {
173+
pool = it->second;
174+
}
175+
}
176+
// Call release() outside _manager_mutex to avoid deadlock.
177+
if (pool) {
178+
pool->release(conn);
134179
}
135180
}
136181

@@ -142,6 +187,9 @@ void ConnectionPoolManager::configure(int max_size, int idle_timeout_secs) {
142187

143188
void ConnectionPoolManager::closePools() {
144189
std::lock_guard<std::mutex> lock(_manager_mutex);
190+
// Keep _manager_mutex held for the full close operation so that
191+
// acquireConnection()/returnConnection() cannot create or use pools
192+
// while closePools() is in progress.
145193
for (auto& [conn_str, pool] : _pools) {
146194
if (pool) {
147195
pool->close();

tests/test_009_pooling.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,42 @@ def try_overflow():
278278
c.close()
279279

280280

281+
def test_pool_release_overflow_disconnects_outside_mutex(conn_str):
282+
"""Test that releasing a connection when pool is full disconnects it correctly.
283+
284+
When a connection is returned to a pool that is already at max_size,
285+
the connection must be disconnected. This exercises the overflow path in
286+
ConnectionPool::release() (connection_pool.cpp) where should_disconnect
287+
is set and disconnect happens outside the mutex.
288+
289+
With the current pool semantics, max_size limits total concurrent
290+
connections, so we acquire two connections with max_size=2, then shrink
291+
the pool to max_size=1 before returning them. The second close hits
292+
the overflow path.
293+
"""
294+
pooling(max_size=2, idle_timeout=30)
295+
296+
conn1 = connect(conn_str)
297+
conn2 = connect(conn_str)
298+
299+
# Shrink idle capacity so first close fills the pool and second overflows
300+
pooling(max_size=1, idle_timeout=30)
301+
302+
# Close conn1 — returned to the pool (pool now has 1 idle entry)
303+
conn1.close()
304+
305+
# Close conn2 — pool is full (1 idle already), so this connection
306+
# must be disconnected rather than pooled (overflow path).
307+
conn2.close()
308+
309+
# Verify the pool is still functional
310+
conn3 = connect(conn_str)
311+
cursor = conn3.cursor()
312+
cursor.execute("SELECT 1")
313+
assert cursor.fetchone()[0] == 1
314+
conn3.close()
315+
316+
281317
@pytest.mark.skip("Flaky test - idle timeout behavior needs investigation")
282318
def test_pool_idle_timeout_removes_connections(conn_str):
283319
"""Test that idle_timeout removes connections from the pool after the timeout."""

0 commit comments

Comments
 (0)