perf: use lru_cache for caching (#14459)

P403n1x87 · web-flow · commit abfd0cb4a5a9 · 2025-09-09T11:06:44.000-04:00
All the Python versions that we support offer lru_cache from the standard library. We can use this instead of our own implementation because it is generally implemented in native and offers better performance, even though it is not specialised to the single argument case of our internal implementation. A simple test shows a 6x overhead reduction when opting for the stdlib solution. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
diff --git a/benchmarks/django_simple/scenario.py b/benchmarks/django_simple/scenario.py
@@ -69,17 +69,17 @@ def _(loops):
                 from ddtrace.contrib.internal.django import database
 
                 try:
-                    database.get_conn_config.invalidate()
+                    database.get_conn_config.cache_clear()
                 except Exception:
                     pass
 
                 try:
-                    database.get_service_name.invalidate()
+                    database.get_service_name.cache_clear()
                 except Exception:
                     pass
 
                 try:
-                    database.get_conn_service_name.invalidate()
+                    database.get_conn_service_name.cache_clear()
                 except Exception:
                     pass
             except Exception:
@@ -90,12 +90,12 @@ def _(loops):
                 from ddtrace.contrib.internal.django import cache
 
                 try:
-                    cache.get_service_name.invalidate()
+                    cache.get_service_name.cache_clear()
                 except Exception:
                     pass
 
                 try:
-                    cache.func_cache_operation.invalidate()
+                    cache.func_cache_operation.cache_clear()
                 except Exception:
                     pass
             except Exception:
diff --git a/ddtrace/internal/utils/cache.py b/ddtrace/internal/utils/cache.py
@@ -1,13 +1,13 @@
+from functools import lru_cache
 from functools import wraps
 from inspect import FullArgSpec
 from inspect import getfullargspec
 from inspect import isgeneratorfunction
-from threading import RLock
 from typing import Any  # noqa:F401
 from typing import Callable  # noqa:F401
 from typing import Optional  # noqa:F401
 from typing import Type  # noqa:F401
-from typing import TypeVar  # noqa:F401
+from typing import TypeVar
 
 
 miss = object()
@@ -17,78 +17,14 @@
 M = Callable[[Any, T], Any]
 
 
-class LFUCache(dict):
-    """Simple LFU cache implementation.
+def cached(maxsize: int = 256) -> Callable[[Callable], Callable]:
+    def _(f: Callable) -> Callable:
+        return lru_cache(maxsize)(f)
 
-    This cache is designed for memoizing functions with a single hashable
-    argument. The eviction policy is LFU, i.e. the least frequently used values
-    are evicted when the cache is full. The amortized cost of shrinking the
-    cache when it grows beyond the requested size is O(log(size)).
-    """
-
-    def __init__(self, maxsize=256):
-        # type: (int) -> None
-        self.maxsize = maxsize
-        self.lock = RLock()
-        self.count_lock = RLock()
-
-    def get(self, key, f):  # type: ignore[override]
-        # type: (T, F) -> Any
-        """Get a value from the cache.
-
-        If the value with the given key is not in the cache, the expensive
-        function ``f`` is called on the key to generate it. The return value is
-        then stored in the cache and returned to the caller.
-        """
-
-        _ = super(LFUCache, self).get(key, miss)
-        if _ is not miss:
-            with self.count_lock:
-                value, count = _
-                self[key] = (value, count + 1)
-            return value
-
-        with self.lock:
-            _ = super(LFUCache, self).get(key, miss)
-            if _ is not miss:
-                with self.count_lock:
-                    value, count = _
-                    self[key] = (value, count + 1)
-                return value
-
-            # Cache miss: ensure that we have enough space in the cache
-            # by evicting half of the entries when we go over the threshold
-            while len(self) >= self.maxsize:
-                for h in sorted(self, key=lambda h: self[h][1])[: self.maxsize >> 1]:
-                    del self[h]
-
-            value = f(key)
-
-            self[key] = (value, 1)
-
-            return value
-
-
-def cached(maxsize=256):
-    # type: (int) -> Callable[[F], F]
-    """Decorator for memoizing functions of a single argument (LFU policy)."""
-
-    def cached_wrapper(f):
-        # type: (F) -> F
-        cache = LFUCache(maxsize)
-
-        def cached_f(key):
-            # type: (T) -> Any
-            return cache.get(key, f)
-
-        cached_f.invalidate = cache.clear  # type: ignore[attr-defined]
-
-        return cached_f
-
-    return cached_wrapper
+    return _
 
 
-class CachedMethodDescriptor(object):
+class CachedMethodDescriptor:
     def __init__(self, method, maxsize):
         # type: (M, int) -> None
         self._method = method
diff --git a/ddtrace/settings/_config.py b/ddtrace/settings/_config.py
@@ -411,7 +411,7 @@ def error_statuses(self, value):
             self._error_statuses = value
             self._error_ranges = get_error_ranges(value)
             # Mypy can't catch cached method's invalidate()
-            self.is_error_code.invalidate()  # type: ignore[attr-defined]
+            self.is_error_code.cache_clear()  # type: ignore[attr-defined]
 
         @property
         def error_ranges(self):
diff --git a/ddtrace/settings/http.py b/ddtrace/settings/http.py
@@ -24,7 +24,7 @@ def __init__(self, header_tags=None):
 
     def _reset(self):
         self._header_tags = {}
-        self._header_tag_name.invalidate()
+        self._header_tag_name.cache_clear()
 
     @cachedmethod()
     def _header_tag_name(self, header_name):
@@ -63,7 +63,7 @@ def trace_headers(self, whitelist):
             self._header_tags.setdefault(normalized_header_name, "")
 
         # Mypy can't catch cached method's invalidate()
-        self._header_tag_name.invalidate()  # type: ignore[attr-defined]
+        self._header_tag_name.cache_clear()  # type: ignore[attr-defined]
 
         return self
 
diff --git a/tests/cache/conftest.py b/tests/cache/conftest.py
@@ -1,6 +1,7 @@
 """
 Ensure that cached functions are invalidated between test runs.
 """
+
 import pytest
 
 from ddtrace.internal.utils import cache
@@ -26,6 +27,6 @@ def wrapped_cached_f(f):
 @pytest.hookimpl(hookwrapper=True)
 def pytest_runtest_teardown(item, nextitem):
     for f in _CACHED_FUNCTIONS:
-        f.invalidate()
+        f.cache_clear()
 
     yield
diff --git a/tests/contrib/django/conftest.py b/tests/contrib/django/conftest.py
@@ -33,11 +33,11 @@ def clear_django_caches():
     from ddtrace.contrib.internal.django import cache
     from ddtrace.contrib.internal.django import database
 
-    cache.get_service_name.invalidate()
-    cache.func_cache_operation.invalidate()
-    database.get_conn_config.invalidate()
-    database.get_conn_service_name.invalidate()
-    database.get_traced_cursor_cls.invalidate()
+    cache.get_service_name.cache_clear()
+    cache.func_cache_operation.cache_clear()
+    database.get_conn_config.cache_clear()
+    database.get_conn_service_name.cache_clear()
+    database.get_traced_cursor_cls.cache_clear()
 
 
 @pytest.fixture
diff --git a/tests/debugging/mocking.py b/tests/debugging/mocking.py
@@ -198,7 +198,7 @@ def _debugger(config_to_override: DDConfig, config_overrides: Any) -> Generator[
             config_to_override.__dict__ = old_config
             # Reset any test changes to the redaction config or cached calls.
             redaction_config.__dict__ = old_config
-            redact.invalidate()
+            redact.cache_clear()
         finally:
             atexit.register = atexit_register
 
diff --git a/tests/internal/test_packages.py b/tests/internal/test_packages.py
@@ -26,7 +26,7 @@ def packages():
     for f in _p.__dict__.values():
         try:
             if f.__code__ is _cached_sentinel.__code__:
-                f.invalidate()
+                f.cache_clear()
         except AttributeError:
             pass
 
diff --git a/tests/internal/test_settings.py b/tests/internal/test_settings.py
@@ -576,7 +576,7 @@ def test_remoteconfig_header_tags(ddtrace_run_python_code_in_subprocess):
 assert span.get_tag("env_set_tag_name") == "helloworld"
 
 config._http._reset()
-config._header_tag_name.invalidate()
+config._header_tag_name.cache_clear()
 call_apm_tracing_rc(_base_rc_config({"tracing_header_tags":
     [{"header": "X-Header-Tag-420", "tag_name":"header_tag_420"}]}), config)
 
@@ -588,7 +588,7 @@ def test_remoteconfig_header_tags(ddtrace_run_python_code_in_subprocess):
 assert span2.get_tag("env_set_tag_name") is None
 
 config._http._reset()
-config._header_tag_name.invalidate()
+config._header_tag_name.cache_clear()
 call_apm_tracing_rc(_base_rc_config({}), config)
 
 with tracer.trace("test") as span3:
diff --git a/tests/tracer/test_utils.py b/tests/tracer/test_utils.py
@@ -278,7 +278,7 @@ def cached_test_recipe(expensive, cheap, witness, cache_size):
     witness.assert_called_with("Foo")
     assert witness.call_count == 1
 
-    cheap.invalidate()
+    cheap.cache_clear()
 
     for i in range(cache_size >> 1):
         cheap("Foo%d" % i)
@@ -290,17 +290,6 @@ def cached_test_recipe(expensive, cheap, witness, cache_size):
 
     assert witness.call_count == 1 + cache_size
 
-    MAX_FOO = "Foo%d" % (cache_size - 1)
-
-    cheap("last drop")  # Forces least frequent elements out of the cache
-    assert witness.call_count == 2 + cache_size
-
-    cheap(MAX_FOO)  # Check MAX_FOO was dropped
-    assert witness.call_count == 3 + cache_size
-
-    cheap("last drop")  # Check last drop was retained
-    assert witness.call_count == 3 + cache_size
-
 
 def test_cached():
     witness = mock.Mock()