[server] replace utils.QueryCache with cachetools.LFUCache

This change introduces a dependency to cachetools, but reinventing the wheelseems useless in the present case

authorNicolas Chauvat <nicolas.chauvat@logilab.fr>
changeset4ed547bd583f
branchdefault
phasedraft
hiddenno
parent revision#3c66c31ebdd2 fix: UX when migractions failed to get its connection
child revision<not specified>
files modified by this revision
cubicweb/server/querier.py
cubicweb/server/sources/native.py
cubicweb/server/test/unittest_server_utils.py
cubicweb/utils.py
setup.py
# HG changeset patch
# User Nicolas Chauvat <nicolas.chauvat@logilab.fr>
# Date 1552510013 -3600
# Wed Mar 13 21:46:53 2019 +0100
# Node ID 4ed547bd583f116ae1fdf3f12e772649fb1ef43b
# Parent 3c66c31ebdd2394ea6a254563c8bb662004aaf7e
[server] replace utils.QueryCache with cachetools.LFUCache

This change introduces a dependency to cachetools, but reinventing the wheelseems useless in the present case

diff --git a/cubicweb/server/querier.py b/cubicweb/server/querier.py
@@ -20,10 +20,13 @@
1  """
2  import uuid
3  import time
4  import traceback
5  from itertools import repeat
6 +from threading import Lock
7 +
8 +from cachetools import LFUCache
9 
10  from rql import RQLSyntaxError, CoercionError
11  from rql.stmts import Union
12  from rql.nodes import ETYPE_PYOBJ_MAP, etype_from_pyobj, Relation, Exists, Not,\
13      VariableRef, Constant
@@ -33,11 +36,11 @@
14  from cubicweb.rqlrewrite import RQLRelationRewriter
15  from cubicweb import Binary, server
16  from cubicweb.rset import ResultSet
17  from cubicweb.debug import emit_to_debug_channel
18 
19 -from cubicweb.utils import QueryCache, RepeatList
20 +from cubicweb.utils import RepeatList
21  from cubicweb.misc.source_highlight import highlight_terminal
22  from cubicweb.server.rqlannotation import RQLAnnotator, set_qdata
23  from cubicweb.server.ssplanner import (READ_ONLY_RTYPES, add_types_restriction,
24                                         prepare_plan)
25  from cubicweb.server.edition import EditedEntity
@@ -628,11 +631,12 @@
26 
27  class RQLCache(object):
28 
29      def __init__(self, repo, schema):
30          # rql st and solution cache.
31 -        self._cache = QueryCache(repo.config['rql-cache-size'])
32 +        self._cache = LFUCache(repo.config['rql-cache-size'])
33 +        self._lock = Lock()
34          # rql cache key cache. Don't bother using a Cache instance: we should
35          # have a limited number of queries in there, since there are no entries
36          # in this cache for user queries (which have no args)
37          self._ck_cache = {}
38          # some cache usage stats
@@ -650,48 +654,51 @@
39              except UnicodeError:
40                  raise RQLSyntaxError(rql)
41          self._parse = parse
42 
43      def __len__(self):
44 -        return len(self._cache)
45 +        with self._lock:
46 +            return len(self._cache)
47 
48      def get(self, cnx, rql, args):
49          """Return syntax tree and cache key for the given RQL.
50 
51          Returned syntax tree is cached and must not be modified
52          """
53 -        # parse the query and binds variables
54 -        cachekey = (rql,)
55 -        try:
56 -            if args:
57 -                # search for named args in query which are eids (hence
58 -                # influencing query's solutions)
59 -                eidkeys = self._ck_cache[rql]
60 -                if eidkeys:
61 -                    # if there are some, we need a better cache key, eg (rql +
62 -                    # entity type of each eid)
63 -                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
64 -            rqlst = self._cache[cachekey]
65 -            self.cache_hit += 1
66 -        except KeyError:
67 -            self.cache_miss += 1
68 -            rqlst = self._parse(rql)
69 -            # compute solutions for rqlst and return named args in query
70 -            # which are eids. Notice that if you may not need `eidkeys`, we
71 -            # have to compute solutions anyway (kept as annotation on the
72 -            # tree)
73 -            eidkeys = self.compute_var_types(cnx, rqlst, args)
74 -            if args and rql not in self._ck_cache:
75 -                self._ck_cache[rql] = eidkeys
76 -                if eidkeys:
77 -                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
78 -            self._cache[cachekey] = rqlst
79 -        return rqlst, cachekey
80 +        with self._lock:
81 +            # parse the query and binds variables
82 +            cachekey = (rql,)
83 +            try:
84 +                if args:
85 +                    # search for named args in query which are eids (hence
86 +                    # influencing query's solutions)
87 +                    eidkeys = self._ck_cache[rql]
88 +                    if eidkeys:
89 +                        # if there are some, we need a better cache key, eg (rql +
90 +                        # entity type of each eid)
91 +                        cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
92 +                rqlst = self._cache[cachekey]
93 +                self.cache_hit += 1
94 +            except KeyError:
95 +                self.cache_miss += 1
96 +                rqlst = self._parse(rql)
97 +                # compute solutions for rqlst and return named args in query
98 +                # which are eids. Notice that if you may not need `eidkeys`, we
99 +                # have to compute solutions anyway (kept as annotation on the
100 +                # tree)
101 +                eidkeys = self.compute_var_types(cnx, rqlst, args)
102 +                if args and rql not in self._ck_cache:
103 +                    self._ck_cache[rql] = eidkeys
104 +                    if eidkeys:
105 +                        cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
106 +                self._cache[cachekey] = rqlst
107 +            return rqlst, cachekey
108 
109      def pop(self, key, *args):
110          """Pop a key from the cache."""
111 -        self._cache.pop(key, *args)
112 +        with self._lock:
113 +            self._cache.pop(key, *args)
114 
115 
116  def _rql_cache_key(cnx, rql, args, eidkeys):
117      cachekey = [rql]
118      type_from_eid = cnx.repo.type_from_eid
diff --git a/cubicweb/server/sources/native.py b/cubicweb/server/sources/native.py
@@ -28,21 +28,22 @@
119  import time
120  import zipfile
121  import logging
122  import sys
123 
124 +import cachetools
125 +
126  from logilab.common.decorators import cached, clear_cache
127  from logilab.common.configuration import Method
128  from logilab.common.shellutils import getlogin, ASK
129  from logilab.database import get_db_helper, sqlgen
130 
131  from yams.schema import role_name
132 
133  from cubicweb import (UnknownEid, AuthenticationError, ValidationError, Binary,
134                        UniqueTogetherError, UndoTransactionException, ViolatedConstraint)
135  from cubicweb import transaction as tx, server, neg_role, _
136 -from cubicweb.utils import QueryCache
137  from cubicweb.debug import emit_to_debug_channel
138  from cubicweb.schema import VIRTUAL_RTYPES
139  from cubicweb.cwconfig import CubicWebNoAppConfiguration
140  from cubicweb.server import hook
141  from cubicweb.server import schema2sql as y2sql
@@ -334,11 +335,11 @@
142          self._rql_sqlgen = rql2sql.SQLGenerator(self.schema, self.dbhelper,
143                                                  ATTR_MAP.copy())
144          # full text index helper
145          self.do_fti = not repo.config['delay-full-text-indexation']
146          # sql queries cache
147 -        self._cache = QueryCache(repo.config['rql-cache-size'])
148 +        self._cache = cachetools.LFUCache(repo.config['rql-cache-size'])
149          # (etype, attr) / storage mapping
150          self._storages = {}
151          self.binary_to_str = self.dbhelper.dbapi_module.binary_to_str
152          if self.dbdriver == 'sqlite':
153              self.eid_generator = SQLITEEidGenerator(self)
@@ -364,11 +365,11 @@
154          authentifier.set_schema(self.schema)
155 
156      def clear_caches(self, eids, etypes):
157          """Clear potential source caches."""
158          if eids is None:
159 -            self._cache = QueryCache(self.repo.config['rql-cache-size'])
160 +            self._cache.clear()
161          else:
162              cache = self._cache
163              for eid, etype in zip(eids, etypes):
164                  cache.pop('Any X WHERE X eid %s' % eid, None)
165                  cache.pop('Any %s' % eid, None)
@@ -473,11 +474,11 @@
166          set_qdata(self.schema.rschema, rqlst, ())
167          return rqlst
168 
169      def set_schema(self, schema):
170          """set the instance'schema"""
171 -        self._cache = QueryCache(self.repo.config['rql-cache-size'])
172 +        self._cache = cachetools.LFUCache(self.repo.config['rql-cache-size'])
173          self.cache_hit, self.cache_miss, self.no_cache = 0, 0, 0
174          self.schema = schema
175          try:
176              self._rql_sqlgen.schema = schema
177          except AttributeError:
diff --git a/cubicweb/server/test/unittest_server_utils.py b/cubicweb/server/test/unittest_server_utils.py
@@ -24,12 +24,11 @@
178  import re
179  from unittest import TestCase
180 
181  from cubicweb import Binary, Unauthorized
182  from cubicweb.devtools.testlib import CubicWebTC
183 -from cubicweb.utils import (make_uid, UStringIO, RepeatList, HTMLHead,
184 -                            QueryCache)
185 +from cubicweb.utils import make_uid, UStringIO, RepeatList, HTMLHead
186  from cubicweb.entity import Entity
187 
188  try:
189      from cubicweb.utils import CubicWebJsonEncoder, json
190  except ImportError:
@@ -51,149 +50,10 @@
191                  self.fail('make_uid must not return something begining with '
192                            'some numeric character, got %s' % uid)
193              d.add(uid)
194 
195 
196 -class TestQueryCache(TestCase):
197 -    def test_querycache(self):
198 -        c = QueryCache(ceiling=20)
199 -        # write only
200 -        for x in range(10):
201 -            c[x] = x
202 -        self.assertEqual(c._usage_report(),
203 -                         {'transientcount': 0,
204 -                          'itemcount': 10,
205 -                          'permanentcount': 0})
206 -        c = QueryCache(ceiling=10)
207 -        # we should also get a warning
208 -        for x in range(20):
209 -            c[x] = x
210 -        self.assertEqual(c._usage_report(),
211 -                         {'transientcount': 0,
212 -                          'itemcount': 10,
213 -                          'permanentcount': 0})
214 -        # write + reads
215 -        c = QueryCache(ceiling=20)
216 -        for n in range(4):
217 -            for x in range(10):
218 -                c[x] = x
219 -                c[x]
220 -        self.assertEqual(c._usage_report(),
221 -                         {'transientcount': 10,
222 -                          'itemcount': 10,
223 -                          'permanentcount': 0})
224 -        c = QueryCache(ceiling=20)
225 -        for n in range(17):
226 -            for x in range(10):
227 -                c[x] = x
228 -                c[x]
229 -        self.assertEqual(c._usage_report(),
230 -                         {'transientcount': 0,
231 -                          'itemcount': 10,
232 -                          'permanentcount': 10})
233 -        c = QueryCache(ceiling=20)
234 -        for n in range(17):
235 -            for x in range(10):
236 -                c[x] = x
237 -                if n % 2:
238 -                    c[x]
239 -                if x % 2:
240 -                    c[x]
241 -        self.assertEqual(c._usage_report(),
242 -                         {'transientcount': 5,
243 -                          'itemcount': 10,
244 -                          'permanentcount': 5})
245 -
246 -    def test_clear_on_overflow(self):
247 -        """Tests that only non-permanent items in the cache are wiped-out on ceiling overflow
248 -        """
249 -        c = QueryCache(ceiling=10)
250 -        # set 10 values
251 -        for x in range(10):
252 -            c[x] = x
253 -        # arrange for the first 5 to be permanent
254 -        for x in range(5):
255 -            for r in range(QueryCache._maxlevel + 2):
256 -                v = c[x]
257 -                self.assertEqual(v, x)
258 -        # Add the 11-th
259 -        c[10] = 10
260 -        self.assertEqual(c._usage_report(),
261 -                         {'transientcount': 0,
262 -                          'itemcount': 6,
263 -                          'permanentcount': 5})
264 -
265 -    def test_get_with_default(self):
266 -        """
267 -        Tests the capability of QueryCache for retrieving items with a default value
268 -        """
269 -        c = QueryCache(ceiling=20)
270 -        # set 10 values
271 -        for x in range(10):
272 -            c[x] = x
273 -        # arrange for the first 5 to be permanent
274 -        for x in range(5):
275 -            for r in range(QueryCache._maxlevel + 2):
276 -                v = c[x]
277 -                self.assertEqual(v, x)
278 -        self.assertEqual(c._usage_report(),
279 -                         {'transientcount': 0,
280 -                          'itemcount': 10,
281 -                          'permanentcount': 5})
282 -        # Test defaults for existing (including in permanents)
283 -        for x in range(10):
284 -            v = c.get(x, -1)
285 -            self.assertEqual(v, x)
286 -        # Test defaults for others
287 -        for x in range(10, 15):
288 -            v = c.get(x, -1)
289 -            self.assertEqual(v, -1)
290 -
291 -    def test_iterkeys(self):
292 -        """
293 -        Tests the iterating on keys in the cache
294 -        """
295 -        c = QueryCache(ceiling=20)
296 -        # set 10 values
297 -        for x in range(10):
298 -            c[x] = x
299 -        # arrange for the first 5 to be permanent
300 -        for x in range(5):
301 -            for r in range(QueryCache._maxlevel + 2):
302 -                v = c[x]
303 -                self.assertEqual(v, x)
304 -        self.assertEqual(c._usage_report(),
305 -                         {'transientcount': 0,
306 -                          'itemcount': 10,
307 -                          'permanentcount': 5})
308 -        keys = sorted(c)
309 -        for x in range(10):
310 -            self.assertEqual(x, keys[x])
311 -
312 -    def test_items(self):
313 -        """
314 -        Tests the iterating on key-value couples in the cache
315 -        """
316 -        c = QueryCache(ceiling=20)
317 -        # set 10 values
318 -        for x in range(10):
319 -            c[x] = x
320 -        # arrange for the first 5 to be permanent
321 -        for x in range(5):
322 -            for r in range(QueryCache._maxlevel + 2):
323 -                v = c[x]
324 -                self.assertEqual(v, x)
325 -        self.assertEqual(c._usage_report(),
326 -                         {'transientcount': 0,
327 -                          'itemcount': 10,
328 -                          'permanentcount': 5})
329 -        content = sorted(c.items())
330 -        for x in range(10):
331 -            self.assertEqual(x, content[x][0])
332 -            self.assertEqual(x, content[x][1])
333 -
334 -
335  class UStringIOTC(TestCase):
336      def test_boolean_value(self):
337          self.assertTrue(UStringIO())
338 
339 
diff --git a/cubicweb/utils.py b/cubicweb/utils.py
@@ -27,11 +27,11 @@
340  import json
341  from operator import itemgetter
342  from inspect import getfullargspec as getargspec
343  from itertools import repeat
344  from uuid import uuid4
345 -from threading import Lock
346 +from warnings import warn
347  from logging import getLogger
348 
349  from logilab.mtconverter import xml_escape
350  from logilab.common.date import ustrftime
351 
@@ -575,160 +575,5 @@
352      else:
353          return ipdb
354 
355 
356  logger = getLogger('cubicweb.utils')
357 -
358 -class QueryCache(object):
359 -    """ a minimalist dict-like object to be used by the querier
360 -    and native source (replaces lgc.cache for this very usage)
361 -
362 -    To be efficient it must be properly used. The usage patterns are
363 -    quite specific to its current clients.
364 -
365 -    The ceiling value should be sufficiently high, else it will be
366 -    ruthlessly inefficient (there will be warnings when this happens).
367 -    A good (high enough) value can only be set on a per-application
368 -    value. A default, reasonnably high value is provided but tuning
369 -    e.g `rql-cache-size` can certainly help.
370 -
371 -    There are two kinds of elements to put in this cache:
372 -    * frequently used elements
373 -    * occasional elements
374 -
375 -    The former should finish in the _permanent structure after some
376 -    warmup.
377 -
378 -    Occasional elements can be buggy requests (server-side) or
379 -    end-user (web-ui provided) requests. These have to be cleaned up
380 -    when they fill the cache, without evicting the useful, frequently
381 -    used entries.
382 -    """
383 -    # quite arbitrary, but we want to never
384 -    # immortalize some use-a-little query
385 -    _maxlevel = 15
386 -
387 -    def __init__(self, ceiling=3000):
388 -        self._max = ceiling
389 -        # keys belonging forever to this cache
390 -        self._permanent = set()
391 -        # mapping of key (that can get wiped) to getitem count
392 -        self._transient = {}
393 -        self._data = {}
394 -        self._lock = Lock()
395 -
396 -    def __contains__(self, key):
397 -        return key in self._data
398 -
399 -    def __len__(self):
400 -        with self._lock:
401 -            return len(self._data)
402 -
403 -    def items(self):
404 -        """Get an iterator over the dictionary's items: (key, value) pairs"""
405 -        with self._lock:
406 -            for k, v in self._data.items():
407 -                yield k, v
408 -
409 -    def get(self, k, default=None):
410 -        """Get the value associated to the specified key
411 -
412 -        :param k: The key to look for
413 -        :param default: The default value when the key is not found
414 -        :return: The associated value (or the default value)
415 -        """
416 -        try:
417 -            return self._data[k]
418 -        except KeyError:
419 -            return default
420 -
421 -    def __iter__(self):
422 -        with self._lock:
423 -            for k in iter(self._data):
424 -                yield k
425 -
426 -    def __getitem__(self, k):
427 -        with self._lock:
428 -            if k in self._permanent:
429 -                return self._data[k]
430 -            v = self._transient.get(k, _MARKER)
431 -            if v is _MARKER:
432 -                self._transient[k] = 1
433 -                return self._data[k]
434 -            if v > self._maxlevel:
435 -                self._permanent.add(k)
436 -                self._transient.pop(k, None)
437 -            else:
438 -                self._transient[k] += 1
439 -            return self._data[k]
440 -
441 -    def __setitem__(self, k, v):
442 -        with self._lock:
443 -            if len(self._data) >= self._max:
444 -                self._try_to_make_room()
445 -            self._data[k] = v
446 -
447 -    def pop(self, key, default=_MARKER):
448 -        with self._lock:
449 -            try:
450 -                if default is _MARKER:
451 -                    return self._data.pop(key)
452 -                return self._data.pop(key, default)
453 -            finally:
454 -                if key in self._permanent:
455 -                    self._permanent.remove(key)
456 -                else:
457 -                    self._transient.pop(key, None)
458 -
459 -    def clear(self):
460 -        with self._lock:
461 -            self._clear()
462 -
463 -    def _clear(self):
464 -        self._permanent = set()
465 -        self._transient = {}
466 -        self._data = {}
467 -
468 -    def _try_to_make_room(self):
469 -        current_size = len(self._data)
470 -        items = sorted(self._transient.items(), key=itemgetter(1))
471 -        level = 0
472 -        for k, v in items:
473 -            self._data.pop(k, None)
474 -            self._transient.pop(k, None)
475 -            if v > level:
476 -                datalen = len(self._data)
477 -                if datalen == 0:
478 -                    return
479 -                if (current_size - datalen) / datalen > .1:
480 -                    break
481 -                level = v
482 -        else:
483 -            # we removed cruft
484 -            if len(self._data) >= self._max:
485 -                if len(self._permanent) >= self._max:
486 -                    # we really are full with permanents => clear
487 -                    logger.warning('Cache %s is full.' % id(self))
488 -                    self._clear()
489 -                else:
490 -                    # pathological case where _transient was probably empty ...
491 -                    # drop all non-permanents
492 -                    to_drop = set(self._data.keys()).difference(self._permanent)
493 -                    for k in to_drop:
494 -                        # should not be in _transient
495 -                        assert k not in self._transient
496 -                        self._data.pop(k, None)
497 -
498 -    def _usage_report(self):
499 -        with self._lock:
500 -            return {'itemcount': len(self._data),
501 -                    'transientcount': len(self._transient),
502 -                    'permanentcount': len(self._permanent)}
503 -
504 -    def popitem(self):
505 -        raise NotImplementedError()
506 -
507 -    def setdefault(self, key, default=None):
508 -        raise NotImplementedError()
509 -
510 -    def update(self, other):
511 -        raise NotImplementedError()
diff --git a/setup.py b/setup.py
@@ -79,10 +79,11 @@
512          'pyramid >= 1.5.0',
513          'waitress >= 0.8.9',
514          'wsgicors >= 0.3',
515          'pyramid_multiauth',
516          'repoze.lru',
517 +        'cachetools',
518      ],
519      entry_points={
520          'console_scripts': [
521              'cubicweb-ctl = cubicweb.cwctl:run',
522          ],