Skip to content

Commit 90c77b5

Browse files
committed
perf: cache apply_parameters to avoid repeated type() class creation
Cache the results of _CassandraType.apply_parameters() in a class-level dict keyed by (cls, subtypes, names). This avoids the expensive type() metaclass machinery on repeated calls with the same type signature, which is the common case during result-set deserialization. Benchmark: 31.7x speedup (6.48 us/call -> 0.20 us/call) for cached hits.
1 parent 8e6c4d4 commit 90c77b5

File tree

3 files changed

+159
-1
lines changed

3 files changed

+159
-1
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Micro-benchmark: apply_parameters caching.
3+
4+
Measures the speedup from caching parameterized type creation
5+
in _CassandraType.apply_parameters().
6+
7+
Run:
8+
python benchmarks/bench_cache_apply_parameters.py
9+
"""
10+
import timeit
11+
from cassandra.cqltypes import (
12+
MapType, SetType, ListType, TupleType,
13+
Int32Type, UTF8Type, FloatType, DoubleType, BooleanType,
14+
_CassandraType,
15+
)
16+
17+
18+
def bench_apply_parameters():
19+
"""Benchmark apply_parameters with cache (repeated calls)."""
20+
cache = _CassandraType._apply_parameters_cache
21+
22+
# Warm up the cache
23+
MapType.apply_parameters([UTF8Type, Int32Type])
24+
SetType.apply_parameters([FloatType])
25+
ListType.apply_parameters([DoubleType])
26+
TupleType.apply_parameters([Int32Type, UTF8Type, BooleanType])
27+
28+
calls = [
29+
(MapType, [UTF8Type, Int32Type]),
30+
(SetType, [FloatType]),
31+
(ListType, [DoubleType]),
32+
(TupleType, [Int32Type, UTF8Type, BooleanType]),
33+
]
34+
35+
def run_cached():
36+
for cls, subtypes in calls:
37+
cls.apply_parameters(subtypes)
38+
39+
# Benchmark cached path
40+
n = 100_000
41+
t_cached = timeit.timeit(run_cached, number=n)
42+
print(f"Cached apply_parameters ({len(calls)} types x {n} iters): "
43+
f"{t_cached:.3f}s ({t_cached / (n * len(calls)) * 1e6:.2f} us/call)")
44+
45+
# Benchmark uncached path (clear cache each iteration)
46+
def run_uncached():
47+
for cls, subtypes in calls:
48+
cache.clear()
49+
cls.apply_parameters(subtypes)
50+
51+
t_uncached = timeit.timeit(run_uncached, number=n)
52+
print(f"Uncached apply_parameters ({len(calls)} types x {n} iters): "
53+
f"{t_uncached:.3f}s ({t_uncached / (n * len(calls)) * 1e6:.2f} us/call)")
54+
55+
speedup = t_uncached / t_cached
56+
print(f"Speedup: {speedup:.1f}x")
57+
58+
59+
if __name__ == '__main__':
60+
bench_apply_parameters()

cassandra/cqltypes.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ class _CassandraType(object, metaclass=CassandraTypeType):
275275
subtypes = ()
276276
num_subtypes = 0
277277
empty_binary_ok = False
278+
_apply_parameters_cache = {}
278279

279280
support_empty_values = False
280281
"""
@@ -373,8 +374,15 @@ def apply_parameters(cls, subtypes, names=None):
373374
if cls.num_subtypes != 'UNKNOWN' and len(subtypes) != cls.num_subtypes:
374375
raise ValueError("%s types require %d subtypes (%d given)"
375376
% (cls.typename, cls.num_subtypes, len(subtypes)))
377+
subtypes = tuple(subtypes)
378+
cache_key = (cls, subtypes, tuple(names) if names else names)
379+
cached = cls._apply_parameters_cache.get(cache_key)
380+
if cached is not None:
381+
return cached
376382
newname = cls.cass_parameterized_type_with(subtypes)
377-
return type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names})
383+
result = type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names})
384+
cls._apply_parameters_cache[cache_key] = result
385+
return result
378386

379387
@classmethod
380388
def cql_parameterized_type(cls):
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""
2+
Unit tests for apply_parameters caching in _CassandraType.
3+
"""
4+
import unittest
5+
from cassandra.cqltypes import (
6+
MapType, SetType, ListType, TupleType,
7+
Int32Type, UTF8Type, FloatType, DoubleType, BooleanType,
8+
_CassandraType,
9+
)
10+
11+
12+
class TestApplyParametersCache(unittest.TestCase):
13+
14+
def setUp(self):
15+
_CassandraType._apply_parameters_cache.clear()
16+
17+
def test_cache_returns_same_object(self):
18+
"""Repeated apply_parameters calls return the exact same class object."""
19+
result1 = MapType.apply_parameters([UTF8Type, Int32Type])
20+
result2 = MapType.apply_parameters([UTF8Type, Int32Type])
21+
self.assertIs(result1, result2)
22+
23+
def test_cache_different_subtypes_different_results(self):
24+
"""Different subtype combinations produce different cached classes."""
25+
r1 = MapType.apply_parameters([UTF8Type, Int32Type])
26+
r2 = MapType.apply_parameters([Int32Type, UTF8Type])
27+
self.assertIsNot(r1, r2)
28+
29+
def test_cache_different_base_types(self):
30+
"""Different base types with same subtypes produce different classes."""
31+
r1 = SetType.apply_parameters([Int32Type])
32+
r2 = ListType.apply_parameters([Int32Type])
33+
self.assertIsNot(r1, r2)
34+
35+
def test_cached_type_has_correct_subtypes(self):
36+
"""Cached types preserve their subtype information."""
37+
result = MapType.apply_parameters([UTF8Type, FloatType])
38+
self.assertEqual(result.subtypes, (UTF8Type, FloatType))
39+
# Call again, verify cache hit still has correct subtypes
40+
result2 = MapType.apply_parameters([UTF8Type, FloatType])
41+
self.assertEqual(result2.subtypes, (UTF8Type, FloatType))
42+
43+
def test_cached_type_has_correct_cassname(self):
44+
"""Cached types preserve their cassname."""
45+
result = SetType.apply_parameters([DoubleType])
46+
self.assertEqual(result.cassname, SetType.cassname)
47+
48+
def test_cached_type_with_names(self):
49+
"""Caching works correctly with named parameters (UDT-style)."""
50+
r1 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name'])
51+
r2 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name'])
52+
self.assertIs(r1, r2)
53+
54+
def test_different_names_different_cache_entries(self):
55+
"""Different names produce different cached classes."""
56+
r1 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['id', 'name'])
57+
r2 = TupleType.apply_parameters([Int32Type, UTF8Type], names=['key', 'value'])
58+
self.assertIsNot(r1, r2)
59+
60+
def test_names_none_vs_no_names(self):
61+
"""Passing names=None and not passing names use the same cache entry."""
62+
r1 = MapType.apply_parameters([UTF8Type, Int32Type], names=None)
63+
r2 = MapType.apply_parameters([UTF8Type, Int32Type])
64+
self.assertIs(r1, r2)
65+
66+
def test_tuple_subtypes_accepted(self):
67+
"""Both list and tuple subtypes produce the same cached result."""
68+
r1 = MapType.apply_parameters([UTF8Type, Int32Type])
69+
r2 = MapType.apply_parameters((UTF8Type, Int32Type))
70+
self.assertIs(r1, r2)
71+
72+
def test_cache_populated(self):
73+
"""The cache dict is populated after apply_parameters calls."""
74+
_CassandraType._apply_parameters_cache.clear()
75+
MapType.apply_parameters([UTF8Type, Int32Type])
76+
self.assertGreater(len(_CassandraType._apply_parameters_cache), 0)
77+
78+
def test_cache_clear_forces_new_creation(self):
79+
"""Clearing the cache forces new type creation."""
80+
r1 = MapType.apply_parameters([UTF8Type, Int32Type])
81+
_CassandraType._apply_parameters_cache.clear()
82+
r2 = MapType.apply_parameters([UTF8Type, Int32Type])
83+
# After clearing, we get a new class (different object identity)
84+
self.assertIsNot(r1, r2)
85+
# But they should be functionally equivalent
86+
self.assertEqual(r1.subtypes, r2.subtypes)
87+
88+
89+
if __name__ == '__main__':
90+
unittest.main()

0 commit comments

Comments
 (0)