Skip to content

Commit 2398ffd

Browse files
FIX: Credential instance cache (#483)
### Work Item / Issue Reference <!-- IMPORTANT: Please follow the PR template guidelines below. For mssql-python maintainers: Insert your ADO Work Item ID below For external contributors: Insert Github Issue number below Only one reference is required - either GitHub issue OR ADO Work Item. --> <!-- mssql-python maintainers: ADO Work Item --> > [AB#43254](https://sqlclientdrivers.visualstudio.com/c6d89619-62de-46a0-8b46-70b92a84d85e/_workitems/edit/43254) <!-- External contributors: GitHub Issue --> > GitHub Issue: #388 ------------------------------------------------------------------- ### Summary This pull request introduces credential instance caching for Azure Active Directory (AAD) authentication in the `mssql_python` package, improving performance by reusing credential objects instead of creating new ones for each token request. It also adds comprehensive tests for the caching logic and edge cases, and includes a new benchmark script to measure the performance impact of credential caching. **Credential instance caching and logic changes:** * Added a module-level credential cache (`_credential_cache`) and lock in `auth.py` to reuse Azure Identity credential instances per authentication type, enabling the SDK's in-memory token cache and reducing redundant token acquisitions. (`mssql_python/auth.py`) * Modified `AADAuth.get_raw_token` to use the cached credential instance rather than creating a new one each call, ensuring efficient token reuse. (`mssql_python/auth.py` ) **Testing improvements:** * Added a pytest fixture to clear the credential cache between tests, ensuring test isolation. (`tests/test_008_auth.py` ) * Introduced a new `TestCredentialInstanceCache` class with tests verifying credential reuse, separation by auth type, and cache state. (`tests/test_008_auth.py`) * Added tests for error handling and edge cases, including missing Azure libraries, authentication errors, and unusual connection string parameter cases. (`tests/test_008_auth.py` ) **Benchmarking:** * Added a new benchmark script (`benchmarks/bench_credential_cache.py`) to measure and compare the performance of credential caching versus the previous behavior of creating new credentials for every token request. (`benchmarks/bench_credential_cache.py`) --------- Co-authored-by: gargsaumya <saumyagarg.100@gmail.com>
1 parent 57ed673 commit 2398ffd

File tree

3 files changed

+479
-6
lines changed

3 files changed

+479
-6
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
"""
2+
Benchmark: Credential Instance Caching for Azure AD Authentication
3+
4+
Measures the performance difference between:
5+
1. Creating a new DefaultAzureCredential + get_token() each call (old behavior)
6+
2. Reusing a cached DefaultAzureCredential instance (new behavior)
7+
8+
Prerequisites:
9+
- pip install azure-identity azure-core
10+
- az login (for AzureCliCredential to work)
11+
12+
Usage:
13+
python benchmarks/bench_credential_cache.py
14+
"""
15+
16+
from __future__ import annotations
17+
18+
import time
19+
import statistics
20+
21+
22+
def bench_no_cache(n: int) -> list[float]:
23+
"""Simulate the OLD behavior: new credential per call."""
24+
from azure.identity import DefaultAzureCredential
25+
26+
times = []
27+
for _ in range(n):
28+
start = time.perf_counter()
29+
cred = DefaultAzureCredential()
30+
cred.get_token("https://database.windows.net/.default")
31+
times.append(time.perf_counter() - start)
32+
return times
33+
34+
35+
def bench_with_cache(n: int) -> list[float]:
36+
"""Simulate the NEW behavior: reuse a single credential instance."""
37+
from azure.identity import DefaultAzureCredential
38+
39+
cred = DefaultAzureCredential()
40+
times = []
41+
for _ in range(n):
42+
start = time.perf_counter()
43+
cred.get_token("https://database.windows.net/.default")
44+
times.append(time.perf_counter() - start)
45+
return times
46+
47+
48+
def report(label: str, times: list[float]) -> None:
49+
print(f"\n{'=' * 50}")
50+
print(f" {label}")
51+
print(f"{'=' * 50}")
52+
print(f" Calls: {len(times)}")
53+
print(f" Total: {sum(times):.3f}s")
54+
print(f" Mean: {statistics.mean(times) * 1000:.1f}ms")
55+
print(f" Median: {statistics.median(times) * 1000:.1f}ms")
56+
print(f" Stdev: {statistics.stdev(times) * 1000:.1f}ms" if len(times) > 1 else "")
57+
print(f" Min: {min(times) * 1000:.1f}ms")
58+
print(f" Max: {max(times) * 1000:.1f}ms")
59+
60+
61+
def main() -> None:
62+
N = 10 # number of calls to benchmark
63+
64+
print("Credential Instance Cache Benchmark")
65+
print(f"Running {N} sequential token acquisitions for each scenario...\n")
66+
67+
try:
68+
print(">>> Without cache (new credential each call)...")
69+
no_cache_times = bench_no_cache(N)
70+
report("WITHOUT credential cache (old behavior)", no_cache_times)
71+
72+
print("\n>>> With cache (reuse credential instance)...")
73+
cache_times = bench_with_cache(N)
74+
report("WITH credential cache (new behavior)", cache_times)
75+
76+
speedup = statistics.mean(no_cache_times) / statistics.mean(cache_times)
77+
saved = (statistics.mean(no_cache_times) - statistics.mean(cache_times)) * 1000
78+
print(f"\n{'=' * 50}")
79+
print(f" SPEEDUP: {speedup:.1f}x ({saved:.0f}ms saved per call)")
80+
print(f"{'=' * 50}")
81+
except Exception as e:
82+
print(f"\nBenchmark failed: {e}")
83+
print("Make sure you are logged in via 'az login' and have azure-identity installed.")
84+
85+
86+
if __name__ == "__main__":
87+
main()

mssql_python/auth.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,19 @@
66

77
import platform
88
import struct
9+
import threading
910
from typing import Tuple, Dict, Optional, List
1011

1112
from mssql_python.logging import logger
1213
from mssql_python.constants import AuthType, ConstantsDDBC
1314

15+
# Module-level credential instance cache.
16+
# Reusing credential objects allows the Azure Identity SDK's built-in
17+
# in-memory token cache to work, avoiding redundant token acquisitions.
18+
# See: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/identity/azure-identity/TOKEN_CACHING.md
19+
_credential_cache: Dict[str, object] = {}
20+
_credential_cache_lock = threading.Lock()
21+
1422

1523
class AADAuth:
1624
"""Handles Azure Active Directory authentication"""
@@ -36,12 +44,11 @@ def get_token(auth_type: str) -> bytes:
3644

3745
@staticmethod
3846
def get_raw_token(auth_type: str) -> str:
39-
"""Acquire a fresh raw JWT for the mssql-py-core connection (bulk copy).
47+
"""Acquire a raw JWT for the mssql-py-core connection (bulk copy).
4048
41-
This deliberately does NOT cache the credential or token — each call
42-
creates a new Azure Identity credential instance and requests a token.
43-
A fresh acquisition avoids expired-token errors when bulkcopy() is
44-
called long after the original DDBC connect().
49+
Uses the cached credential instance so the Azure Identity SDK's
50+
built-in token cache can serve a valid token without a round-trip
51+
when the previous token has not yet expired.
4552
"""
4653
_, raw_token = AADAuth._acquire_token(auth_type)
4754
return raw_token
@@ -83,7 +90,19 @@ def _acquire_token(auth_type: str) -> Tuple[bytes, str]:
8390
)
8491

8592
try:
86-
credential = credential_class()
93+
with _credential_cache_lock:
94+
if auth_type not in _credential_cache:
95+
logger.debug(
96+
"get_token: Creating new credential instance for auth_type=%s",
97+
auth_type,
98+
)
99+
_credential_cache[auth_type] = credential_class()
100+
else:
101+
logger.debug(
102+
"get_token: Reusing cached credential instance for auth_type=%s",
103+
auth_type,
104+
)
105+
credential = _credential_cache[auth_type]
87106
raw_token = credential.get_token("https://database.windows.net/.default").token
88107
logger.info(
89108
"get_token: Azure AD token acquired successfully - token_length=%d chars",

0 commit comments

Comments
 (0)