# HG changeset patch
# User Nicolas Chauvat <nicolas.chauvat@logilab.fr>
# Date 1552510013 -3600
# Wed Mar 13 21:46:53 2019 +0100
# Node ID 4ed547bd583f116ae1fdf3f12e772649fb1ef43b
# Parent 3c66c31ebdd2394ea6a254563c8bb662004aaf7e
[server] replace utils.QueryCache with cachetools.LFUCache
This change introduces a dependency to cachetools, but reinventing the wheelseems useless in the present case
# User Nicolas Chauvat <nicolas.chauvat@logilab.fr>
# Date 1552510013 -3600
# Wed Mar 13 21:46:53 2019 +0100
# Node ID 4ed547bd583f116ae1fdf3f12e772649fb1ef43b
# Parent 3c66c31ebdd2394ea6a254563c8bb662004aaf7e
[server] replace utils.QueryCache with cachetools.LFUCache
This change introduces a dependency to cachetools, but reinventing the wheelseems useless in the present case
@@ -20,10 +20,13 @@
1 """ 2 import uuid 3 import time 4 import traceback 5 from itertools import repeat 6 +from threading import Lock 7 + 8 +from cachetools import LFUCache 9 10 from rql import RQLSyntaxError, CoercionError 11 from rql.stmts import Union 12 from rql.nodes import ETYPE_PYOBJ_MAP, etype_from_pyobj, Relation, Exists, Not,\ 13 VariableRef, Constant
@@ -33,11 +36,11 @@
14 from cubicweb.rqlrewrite import RQLRelationRewriter 15 from cubicweb import Binary, server 16 from cubicweb.rset import ResultSet 17 from cubicweb.debug import emit_to_debug_channel 18 19 -from cubicweb.utils import QueryCache, RepeatList 20 +from cubicweb.utils import RepeatList 21 from cubicweb.misc.source_highlight import highlight_terminal 22 from cubicweb.server.rqlannotation import RQLAnnotator, set_qdata 23 from cubicweb.server.ssplanner import (READ_ONLY_RTYPES, add_types_restriction, 24 prepare_plan) 25 from cubicweb.server.edition import EditedEntity
@@ -628,11 +631,12 @@
26 27 class RQLCache(object): 28 29 def __init__(self, repo, schema): 30 # rql st and solution cache. 31 - self._cache = QueryCache(repo.config['rql-cache-size']) 32 + self._cache = LFUCache(repo.config['rql-cache-size']) 33 + self._lock = Lock() 34 # rql cache key cache. Don't bother using a Cache instance: we should 35 # have a limited number of queries in there, since there are no entries 36 # in this cache for user queries (which have no args) 37 self._ck_cache = {} 38 # some cache usage stats
@@ -650,48 +654,51 @@
39 except UnicodeError: 40 raise RQLSyntaxError(rql) 41 self._parse = parse 42 43 def __len__(self): 44 - return len(self._cache) 45 + with self._lock: 46 + return len(self._cache) 47 48 def get(self, cnx, rql, args): 49 """Return syntax tree and cache key for the given RQL. 50 51 Returned syntax tree is cached and must not be modified 52 """ 53 - # parse the query and binds variables 54 - cachekey = (rql,) 55 - try: 56 - if args: 57 - # search for named args in query which are eids (hence 58 - # influencing query's solutions) 59 - eidkeys = self._ck_cache[rql] 60 - if eidkeys: 61 - # if there are some, we need a better cache key, eg (rql + 62 - # entity type of each eid) 63 - cachekey = _rql_cache_key(cnx, rql, args, eidkeys) 64 - rqlst = self._cache[cachekey] 65 - self.cache_hit += 1 66 - except KeyError: 67 - self.cache_miss += 1 68 - rqlst = self._parse(rql) 69 - # compute solutions for rqlst and return named args in query 70 - # which are eids. Notice that if you may not need `eidkeys`, we 71 - # have to compute solutions anyway (kept as annotation on the 72 - # tree) 73 - eidkeys = self.compute_var_types(cnx, rqlst, args) 74 - if args and rql not in self._ck_cache: 75 - self._ck_cache[rql] = eidkeys 76 - if eidkeys: 77 - cachekey = _rql_cache_key(cnx, rql, args, eidkeys) 78 - self._cache[cachekey] = rqlst 79 - return rqlst, cachekey 80 + with self._lock: 81 + # parse the query and binds variables 82 + cachekey = (rql,) 83 + try: 84 + if args: 85 + # search for named args in query which are eids (hence 86 + # influencing query's solutions) 87 + eidkeys = self._ck_cache[rql] 88 + if eidkeys: 89 + # if there are some, we need a better cache key, eg (rql + 90 + # entity type of each eid) 91 + cachekey = _rql_cache_key(cnx, rql, args, eidkeys) 92 + rqlst = self._cache[cachekey] 93 + self.cache_hit += 1 94 + except KeyError: 95 + self.cache_miss += 1 96 + rqlst = self._parse(rql) 97 + # compute solutions for rqlst and return named args in query 98 + # which are eids. Notice that if you may not need `eidkeys`, we 99 + # have to compute solutions anyway (kept as annotation on the 100 + # tree) 101 + eidkeys = self.compute_var_types(cnx, rqlst, args) 102 + if args and rql not in self._ck_cache: 103 + self._ck_cache[rql] = eidkeys 104 + if eidkeys: 105 + cachekey = _rql_cache_key(cnx, rql, args, eidkeys) 106 + self._cache[cachekey] = rqlst 107 + return rqlst, cachekey 108 109 def pop(self, key, *args): 110 """Pop a key from the cache.""" 111 - self._cache.pop(key, *args) 112 + with self._lock: 113 + self._cache.pop(key, *args) 114 115 116 def _rql_cache_key(cnx, rql, args, eidkeys): 117 cachekey = [rql] 118 type_from_eid = cnx.repo.type_from_eid
@@ -28,21 +28,22 @@
119 import time 120 import zipfile 121 import logging 122 import sys 123 124 +import cachetools 125 + 126 from logilab.common.decorators import cached, clear_cache 127 from logilab.common.configuration import Method 128 from logilab.common.shellutils import getlogin, ASK 129 from logilab.database import get_db_helper, sqlgen 130 131 from yams.schema import role_name 132 133 from cubicweb import (UnknownEid, AuthenticationError, ValidationError, Binary, 134 UniqueTogetherError, UndoTransactionException, ViolatedConstraint) 135 from cubicweb import transaction as tx, server, neg_role, _ 136 -from cubicweb.utils import QueryCache 137 from cubicweb.debug import emit_to_debug_channel 138 from cubicweb.schema import VIRTUAL_RTYPES 139 from cubicweb.cwconfig import CubicWebNoAppConfiguration 140 from cubicweb.server import hook 141 from cubicweb.server import schema2sql as y2sql
@@ -334,11 +335,11 @@
142 self._rql_sqlgen = rql2sql.SQLGenerator(self.schema, self.dbhelper, 143 ATTR_MAP.copy()) 144 # full text index helper 145 self.do_fti = not repo.config['delay-full-text-indexation'] 146 # sql queries cache 147 - self._cache = QueryCache(repo.config['rql-cache-size']) 148 + self._cache = cachetools.LFUCache(repo.config['rql-cache-size']) 149 # (etype, attr) / storage mapping 150 self._storages = {} 151 self.binary_to_str = self.dbhelper.dbapi_module.binary_to_str 152 if self.dbdriver == 'sqlite': 153 self.eid_generator = SQLITEEidGenerator(self)
@@ -364,11 +365,11 @@
154 authentifier.set_schema(self.schema) 155 156 def clear_caches(self, eids, etypes): 157 """Clear potential source caches.""" 158 if eids is None: 159 - self._cache = QueryCache(self.repo.config['rql-cache-size']) 160 + self._cache.clear() 161 else: 162 cache = self._cache 163 for eid, etype in zip(eids, etypes): 164 cache.pop('Any X WHERE X eid %s' % eid, None) 165 cache.pop('Any %s' % eid, None)
@@ -473,11 +474,11 @@
166 set_qdata(self.schema.rschema, rqlst, ()) 167 return rqlst 168 169 def set_schema(self, schema): 170 """set the instance'schema""" 171 - self._cache = QueryCache(self.repo.config['rql-cache-size']) 172 + self._cache = cachetools.LFUCache(self.repo.config['rql-cache-size']) 173 self.cache_hit, self.cache_miss, self.no_cache = 0, 0, 0 174 self.schema = schema 175 try: 176 self._rql_sqlgen.schema = schema 177 except AttributeError:
diff --git a/cubicweb/server/test/unittest_server_utils.py b/cubicweb/server/test/unittest_server_utils.py
@@ -24,12 +24,11 @@
178 import re 179 from unittest import TestCase 180 181 from cubicweb import Binary, Unauthorized 182 from cubicweb.devtools.testlib import CubicWebTC 183 -from cubicweb.utils import (make_uid, UStringIO, RepeatList, HTMLHead, 184 - QueryCache) 185 +from cubicweb.utils import make_uid, UStringIO, RepeatList, HTMLHead 186 from cubicweb.entity import Entity 187 188 try: 189 from cubicweb.utils import CubicWebJsonEncoder, json 190 except ImportError:
@@ -51,149 +50,10 @@
191 self.fail('make_uid must not return something begining with ' 192 'some numeric character, got %s' % uid) 193 d.add(uid) 194 195 196 -class TestQueryCache(TestCase): 197 - def test_querycache(self): 198 - c = QueryCache(ceiling=20) 199 - # write only 200 - for x in range(10): 201 - c[x] = x 202 - self.assertEqual(c._usage_report(), 203 - {'transientcount': 0, 204 - 'itemcount': 10, 205 - 'permanentcount': 0}) 206 - c = QueryCache(ceiling=10) 207 - # we should also get a warning 208 - for x in range(20): 209 - c[x] = x 210 - self.assertEqual(c._usage_report(), 211 - {'transientcount': 0, 212 - 'itemcount': 10, 213 - 'permanentcount': 0}) 214 - # write + reads 215 - c = QueryCache(ceiling=20) 216 - for n in range(4): 217 - for x in range(10): 218 - c[x] = x 219 - c[x] 220 - self.assertEqual(c._usage_report(), 221 - {'transientcount': 10, 222 - 'itemcount': 10, 223 - 'permanentcount': 0}) 224 - c = QueryCache(ceiling=20) 225 - for n in range(17): 226 - for x in range(10): 227 - c[x] = x 228 - c[x] 229 - self.assertEqual(c._usage_report(), 230 - {'transientcount': 0, 231 - 'itemcount': 10, 232 - 'permanentcount': 10}) 233 - c = QueryCache(ceiling=20) 234 - for n in range(17): 235 - for x in range(10): 236 - c[x] = x 237 - if n % 2: 238 - c[x] 239 - if x % 2: 240 - c[x] 241 - self.assertEqual(c._usage_report(), 242 - {'transientcount': 5, 243 - 'itemcount': 10, 244 - 'permanentcount': 5}) 245 - 246 - def test_clear_on_overflow(self): 247 - """Tests that only non-permanent items in the cache are wiped-out on ceiling overflow 248 - """ 249 - c = QueryCache(ceiling=10) 250 - # set 10 values 251 - for x in range(10): 252 - c[x] = x 253 - # arrange for the first 5 to be permanent 254 - for x in range(5): 255 - for r in range(QueryCache._maxlevel + 2): 256 - v = c[x] 257 - self.assertEqual(v, x) 258 - # Add the 11-th 259 - c[10] = 10 260 - self.assertEqual(c._usage_report(), 261 - {'transientcount': 0, 262 - 'itemcount': 6, 263 - 'permanentcount': 5}) 264 - 265 - def test_get_with_default(self): 266 - """ 267 - Tests the capability of QueryCache for retrieving items with a default value 268 - """ 269 - c = QueryCache(ceiling=20) 270 - # set 10 values 271 - for x in range(10): 272 - c[x] = x 273 - # arrange for the first 5 to be permanent 274 - for x in range(5): 275 - for r in range(QueryCache._maxlevel + 2): 276 - v = c[x] 277 - self.assertEqual(v, x) 278 - self.assertEqual(c._usage_report(), 279 - {'transientcount': 0, 280 - 'itemcount': 10, 281 - 'permanentcount': 5}) 282 - # Test defaults for existing (including in permanents) 283 - for x in range(10): 284 - v = c.get(x, -1) 285 - self.assertEqual(v, x) 286 - # Test defaults for others 287 - for x in range(10, 15): 288 - v = c.get(x, -1) 289 - self.assertEqual(v, -1) 290 - 291 - def test_iterkeys(self): 292 - """ 293 - Tests the iterating on keys in the cache 294 - """ 295 - c = QueryCache(ceiling=20) 296 - # set 10 values 297 - for x in range(10): 298 - c[x] = x 299 - # arrange for the first 5 to be permanent 300 - for x in range(5): 301 - for r in range(QueryCache._maxlevel + 2): 302 - v = c[x] 303 - self.assertEqual(v, x) 304 - self.assertEqual(c._usage_report(), 305 - {'transientcount': 0, 306 - 'itemcount': 10, 307 - 'permanentcount': 5}) 308 - keys = sorted(c) 309 - for x in range(10): 310 - self.assertEqual(x, keys[x]) 311 - 312 - def test_items(self): 313 - """ 314 - Tests the iterating on key-value couples in the cache 315 - """ 316 - c = QueryCache(ceiling=20) 317 - # set 10 values 318 - for x in range(10): 319 - c[x] = x 320 - # arrange for the first 5 to be permanent 321 - for x in range(5): 322 - for r in range(QueryCache._maxlevel + 2): 323 - v = c[x] 324 - self.assertEqual(v, x) 325 - self.assertEqual(c._usage_report(), 326 - {'transientcount': 0, 327 - 'itemcount': 10, 328 - 'permanentcount': 5}) 329 - content = sorted(c.items()) 330 - for x in range(10): 331 - self.assertEqual(x, content[x][0]) 332 - self.assertEqual(x, content[x][1]) 333 - 334 - 335 class UStringIOTC(TestCase): 336 def test_boolean_value(self): 337 self.assertTrue(UStringIO()) 338 339
@@ -27,11 +27,11 @@
340 import json 341 from operator import itemgetter 342 from inspect import getfullargspec as getargspec 343 from itertools import repeat 344 from uuid import uuid4 345 -from threading import Lock 346 +from warnings import warn 347 from logging import getLogger 348 349 from logilab.mtconverter import xml_escape 350 from logilab.common.date import ustrftime 351
@@ -575,160 +575,5 @@
352 else: 353 return ipdb 354 355 356 logger = getLogger('cubicweb.utils') 357 - 358 -class QueryCache(object): 359 - """ a minimalist dict-like object to be used by the querier 360 - and native source (replaces lgc.cache for this very usage) 361 - 362 - To be efficient it must be properly used. The usage patterns are 363 - quite specific to its current clients. 364 - 365 - The ceiling value should be sufficiently high, else it will be 366 - ruthlessly inefficient (there will be warnings when this happens). 367 - A good (high enough) value can only be set on a per-application 368 - value. A default, reasonnably high value is provided but tuning 369 - e.g `rql-cache-size` can certainly help. 370 - 371 - There are two kinds of elements to put in this cache: 372 - * frequently used elements 373 - * occasional elements 374 - 375 - The former should finish in the _permanent structure after some 376 - warmup. 377 - 378 - Occasional elements can be buggy requests (server-side) or 379 - end-user (web-ui provided) requests. These have to be cleaned up 380 - when they fill the cache, without evicting the useful, frequently 381 - used entries. 382 - """ 383 - # quite arbitrary, but we want to never 384 - # immortalize some use-a-little query 385 - _maxlevel = 15 386 - 387 - def __init__(self, ceiling=3000): 388 - self._max = ceiling 389 - # keys belonging forever to this cache 390 - self._permanent = set() 391 - # mapping of key (that can get wiped) to getitem count 392 - self._transient = {} 393 - self._data = {} 394 - self._lock = Lock() 395 - 396 - def __contains__(self, key): 397 - return key in self._data 398 - 399 - def __len__(self): 400 - with self._lock: 401 - return len(self._data) 402 - 403 - def items(self): 404 - """Get an iterator over the dictionary's items: (key, value) pairs""" 405 - with self._lock: 406 - for k, v in self._data.items(): 407 - yield k, v 408 - 409 - def get(self, k, default=None): 410 - """Get the value associated to the specified key 411 - 412 - :param k: The key to look for 413 - :param default: The default value when the key is not found 414 - :return: The associated value (or the default value) 415 - """ 416 - try: 417 - return self._data[k] 418 - except KeyError: 419 - return default 420 - 421 - def __iter__(self): 422 - with self._lock: 423 - for k in iter(self._data): 424 - yield k 425 - 426 - def __getitem__(self, k): 427 - with self._lock: 428 - if k in self._permanent: 429 - return self._data[k] 430 - v = self._transient.get(k, _MARKER) 431 - if v is _MARKER: 432 - self._transient[k] = 1 433 - return self._data[k] 434 - if v > self._maxlevel: 435 - self._permanent.add(k) 436 - self._transient.pop(k, None) 437 - else: 438 - self._transient[k] += 1 439 - return self._data[k] 440 - 441 - def __setitem__(self, k, v): 442 - with self._lock: 443 - if len(self._data) >= self._max: 444 - self._try_to_make_room() 445 - self._data[k] = v 446 - 447 - def pop(self, key, default=_MARKER): 448 - with self._lock: 449 - try: 450 - if default is _MARKER: 451 - return self._data.pop(key) 452 - return self._data.pop(key, default) 453 - finally: 454 - if key in self._permanent: 455 - self._permanent.remove(key) 456 - else: 457 - self._transient.pop(key, None) 458 - 459 - def clear(self): 460 - with self._lock: 461 - self._clear() 462 - 463 - def _clear(self): 464 - self._permanent = set() 465 - self._transient = {} 466 - self._data = {} 467 - 468 - def _try_to_make_room(self): 469 - current_size = len(self._data) 470 - items = sorted(self._transient.items(), key=itemgetter(1)) 471 - level = 0 472 - for k, v in items: 473 - self._data.pop(k, None) 474 - self._transient.pop(k, None) 475 - if v > level: 476 - datalen = len(self._data) 477 - if datalen == 0: 478 - return 479 - if (current_size - datalen) / datalen > .1: 480 - break 481 - level = v 482 - else: 483 - # we removed cruft 484 - if len(self._data) >= self._max: 485 - if len(self._permanent) >= self._max: 486 - # we really are full with permanents => clear 487 - logger.warning('Cache %s is full.' % id(self)) 488 - self._clear() 489 - else: 490 - # pathological case where _transient was probably empty ... 491 - # drop all non-permanents 492 - to_drop = set(self._data.keys()).difference(self._permanent) 493 - for k in to_drop: 494 - # should not be in _transient 495 - assert k not in self._transient 496 - self._data.pop(k, None) 497 - 498 - def _usage_report(self): 499 - with self._lock: 500 - return {'itemcount': len(self._data), 501 - 'transientcount': len(self._transient), 502 - 'permanentcount': len(self._permanent)} 503 - 504 - def popitem(self): 505 - raise NotImplementedError() 506 - 507 - def setdefault(self, key, default=None): 508 - raise NotImplementedError() 509 - 510 - def update(self, other): 511 - raise NotImplementedError()
@@ -79,10 +79,11 @@
512 'pyramid >= 1.5.0', 513 'waitress >= 0.8.9', 514 'wsgicors >= 0.3', 515 'pyramid_multiauth', 516 'repoze.lru', 517 + 'cachetools', 518 ], 519 entry_points={ 520 'console_scripts': [ 521 'cubicweb-ctl = cubicweb.cwctl:run', 522 ],