use cubicweb-similarity to display side-box with related books

authorNicolas Chauvat <nicolas.chauvat@logilab.fr>
changeset0e16715ecad0
branchdefault
phasedraft
hiddenyes
parent revision#ec56a12d2d23 parse OL rdf using rdflib instead of lxml
child revision<not specified>
files modified by this revision
__pkginfo__.py
hooks.py
sobjects.py
views/boxes.py
views/urls.py
# HG changeset patch
# User Nicolas Chauvat <nicolas.chauvat@logilab.fr>
# Date 1377844636 -7200
# Fri Aug 30 08:37:16 2013 +0200
# Node ID 0e16715ecad0d0ad42887aac1a9728f81eddf833
# Parent ec56a12d2d235888c5ce2802fb8b618652640dfa
use cubicweb-similarity to display side-box with related books

diff --git a/__pkginfo__.py b/__pkginfo__.py
@@ -31,10 +31,12 @@
1 
2  __depends__ = {'cubicweb': '>= 3.10.0',
3                 'cubicweb-addressbook': None,
4                 'cubicweb-person': None,
5                 'cubicweb-file': None,
6 +               'cubicweb-file': None,
7 +               'cubicweb-similarity': None,
8                 }
9 
10  # packaging ###
11 
12  from os import listdir as _listdir
diff --git a/hooks.py b/hooks.py
@@ -11,10 +11,27 @@
13  from cubicweb import Binary, ValidationError
14  from cubicweb.selectors import is_instance
15  from cubicweb.server import hook
16 
17  from cubes.book import olapi
18 +from cubes.similarity import reset_similarity
19 +
20 +class BookAttributeHook(hook.Hook):
21 +    __regid__ = 'book_attribute_hook'
22 +    events = ('after_add_entity', 'after_update_entity', 'after_delete_entity')
23 +    __select__ = hook.Hook.__select__ & is_instance('Book')
24 +
25 +    def __call__(self):
26 +        reset_similarity('Book')
27 +
28 +class PersonAttributeHook(hook.Hook):
29 +    __regid__ = 'person_attribute_hook'
30 +    events = ('after_add_entity', 'after_update_entity', 'after_delete_entity')
31 +    __select__ = hook.Hook.__select__ & is_instance('Person')
32 +
33 +    def __call__(self):
34 +        reset_similarity('Person')
35 
36  def set_if_unset(entity, key, book, otherkey):
37      if 'key' not in entity.cw_attr_cache:
38          if otherkey in book:
39              value = book[otherkey]
diff --git a/sobjects.py b/sobjects.py
@@ -1,2 +1,67 @@
40 +# -*- encoding: utf-8 -*-
41 +
42  """this contains the server-side objects"""
43 
44 +import re
45 +import functools as ft
46 +
47 +from cubicweb.entities import AnyEntity
48 +
49 +from cubes.similarity import register_similarity, reset_similarity, vsm
50 +
51 +number_rgx = re.compile('^\d+$')
52 +
53 +REPLACE_TABLE = [
54 +    ('&nbsp;', ' '),
55 +    ]
56 +
57 +# BOOKS ##############################################################
58 +
59 +book_filters = [
60 +    ft.partial(vsm.replace_filter, REPLACE_TABLE),
61 +    ft.partial(vsm.tokenize_filter, vsm.TOKENIZE_PATTERN),
62 +    #ft.partial(vsm.exclude_filter, vsm.STOP_WORDS['fr']),
63 +    #ft.partial(vsm.transform_filter, TRANSLATE_TABLE),
64 +    #ft.partial(vsm.ngrams_filter, 2),
65 +    ]
66 +
67 +book_rql = 'Any X WHERE X is Book'
68 +def book_to_text(entity):
69 +    return u' '.join(entity.title)
70 +
71 +register_similarity('Book', book_rql, book_to_text, book_filters)
72 +
73 +# PERSON ##############################################################
74 +
75 +PERSON_TRANSLATE_TABLE = [
76 +    (('tom',), ft.partial(vsm.const, 'thomas')),
77 +    ]
78 +
79 +person_filters = [
80 +    ft.partial(vsm.replace_filter, REPLACE_TABLE),
81 +    ft.partial(vsm.tokenize_filter, vsm.TOKENIZE_PATTERN),
82 +    ft.partial(vsm.transform_filter, PERSON_TRANSLATE_TABLE),
83 +    ]
84 +
85 +person_rql = 'Any X WHERE X is Person'
86 +def person_to_text(entity):
87 +    return u' '.join([(entity.firstname or u''), (entity.surname or u'')])
88 +
89 +register_similarity('Person', person_rql, person_to_text, person_filters)
90 +
91 +# EDITOR ##############################################################
92 +
93 +EDITOR_STOP_WORDS = ('books','publishers')
94 +
95 +editor_filters = [
96 +    ft.partial(vsm.replace_filter, REPLACE_TABLE),
97 +    ft.partial(vsm.tokenize_filter, vsm.TOKENIZE_PATTERN),
98 +    ft.partial(vsm.exclude_filter, EDITOR_STOP_WORDS),
99 +    ]
100 +
101 +editor_rql = 'Any X WHERE X is Editor'
102 +def editor_to_text(entity):
103 +    return u' '.join([(entity.name or u''), ])
104 +
105 +register_similarity('Editor', editor_rql, editor_to_text, editor_filters)
106 +
diff --git a/views/boxes.py b/views/boxes.py
@@ -9,10 +9,12 @@
107  __docformat__ = "restructuredtext en"
108 
109  from cubicweb.selectors import is_instance, score_entity
110  from cubicweb.web.component import EntityCtxComponent
111 
112 +from cubes.similarity import get_vspace
113 +
114  def has_isbn(entity):
115      return entity.isbn13 is not None
116 
117  class BookSeeAlso(EntityCtxComponent):
118      __regid__ = 'book_seealso_box'
@@ -30,7 +32,92 @@
119          self.append(self.link('OpenLibrary', 'http://openlibrary.org/isbn/%s' % isbn))
120          self.append(self.link('Google Books', 'http://books.google.com/books?q=isbn:%s' % isbn))
121          self.append(self.link('Amazon Books', 'http://www.amazon.com/gp/search/ref=sr_adv_b/?field-isbn=%s' % isbn))
122          self.render_items(w)
123 
124 +class BookSameAuthor(EntityBoxTemplate):
125 +    __regid__ = 'book_sameauthor_box'
126 +    __select__ = EntityBoxTemplate.__select__ & is_instance('Book')
127 +    order = 25
128 +
129 +    def cell_call(self, row, col, **kwargs):
130 +        entity = self.cw_rset.get_entity(row, col)
131 +        rset = self._cw.execute('DISTINCT Any B WHERE B authors A, X authors A, X eid %(x)s, NOT B eid %(x)s', {'x': entity.eid})
132 +        if rset:
133 +            self.w(u'<div class="sideBox">')
134 +            self.w(u'<div class="sideBoxTitle"><span>%s</span></div>' % _('Books by the same author'))
135 +            self.w(u'<div class="%s"><div class="sideBoxBody">' % 'sideBox')
136 +            self.wview('list', rset)
137 +            self.w(u'</div>')
138 +            self.w(u'</div>')
139 +            self.w(u'</div>')
140 +
141 +class ClosestBooksBox(EntityBoxTemplate):
142 +    __regid__ = 'closest_books_box'
143 +    __select__ = EntityBoxTemplate.__select__ & is_instance('Book')
144 +
145 +    def cell_call(self, row, col, **kwargs):
146 +        entity = self.cw_rset.get_entity(row, col)
147 +        vspace = get_vspace('Book', self._cw)
148 +        scores = vspace.similarity_by_id(entity.eid)
149 +        if scores:
150 +            self.w(u'<div class="sideBox">')
151 +            self.w(u'<div class="sideBoxTitle"><span>%s</span></div>' % _('Similar book titles'))
152 +            self.w(u'<div class="%s"><div class="sideBoxBody">' % 'sideBox')
153 +            count = 0
154 +            for score, eids in scores[:5]:
155 +                for eid in eids:
156 +                    if entity.eid != eid:
157 +                        self.w(u'<span>%.2f - %s</span><br />' % (score, self._cw.entity_from_eid(eid).view('outofcontext')))
158 +            self.w(u'</div>')
159 +            self.w(u'</div>')
160 +            self.w(u'</div>')
161 +
162 +class ClosestPersonBox(EntityBoxTemplate):
163 +    __regid__ = 'closest_persons_box'
164 +    __select__ = EntityBoxTemplate.__select__ & is_instance('Person')
165 +
166 +    def cell_call(self, row, col, **kwargs):
167 +        entity = self.cw_rset.get_entity(row, col)
168 +        vspace = get_vspace('Person', self._cw)
169 +        scores = vspace.similarity_by_id(entity.eid)
170 +        if scores:
171 +            self.w(u'<div class="sideBox">')
172 +            self.w(u'<div class="sideBoxTitle"><span>%s</span></div>' % _('Similar person names'))
173 +            self.w(u'<div class="%s"><div class="sideBoxBody">' % 'sideBox')
174 +            count = 0
175 +            for score, eids in scores[:5]:
176 +                if len(eids) + count > 10:
177 +                    break
178 +                for eid in eids:
179 +                    if entity.eid != eid:
180 +                        self.w(u'<span>%.2f - %s</span><br />' % (score, self._cw.entity_from_eid(eid).view('outofcontext')))
181 +                count += len(eids)
182 +            self.w(u'</div>')
183 +            self.w(u'</div>')
184 +            self.w(u'</div>')
185 
186 
187 +class ClosestEditorBox(EntityBoxTemplate):
188 +    __regid__ = 'closest_editors_box'
189 +    __select__ = EntityBoxTemplate.__select__ & is_instance('Editor')
190 +
191 +    def cell_call(self, row, col, **kwargs):
192 +        entity = self.cw_rset.get_entity(row, col)
193 +        vspace = get_vspace('Editor', self._cw)
194 +        scores = vspace.similarity_by_id(entity.eid)
195 +        if scores:
196 +            self.w(u'<div class="sideBox">')
197 +            self.w(u'<div class="sideBoxTitle"><span>%s</span></div>' % _('Similar editor names'))
198 +            self.w(u'<div class="%s"><div class="sideBoxBody">' % 'sideBox')
199 +            count = 0
200 +            for score, eids in scores[:5]:
201 +                if len(eids) + count > 10:
202 +                    break
203 +                for eid in eids:
204 +                    if entity.eid != eid:
205 +                        self.w(u'<span>%.2f - %s</span><br />' % (score, self._cw.entity_from_eid(eid).view('outofcontext')))
206 +                count += len(eids)
207 +            self.w(u'</div>')
208 +            self.w(u'</div>')
209 +            self.w(u'</div>')
210 +
diff --git a/views/urls.py b/views/urls.py
@@ -9,12 +9,18 @@
211 
212  from cubicweb.web.views.urlrewrite import SimpleReqRewriter, rgx
213 
214  class BookReqRewriter(SimpleReqRewriter):
215      rules = [
216 +        # books
217          (rgx(r'/book/(\d+)\.rdf'),
218           dict(rql='Any B WHERE B is Book, B eid %(eid)s' % {'eid': r'\1'}, vid='bibo')),
219          (rgx(r'/book/(\d+)\.html'),
220           dict(rql='Any B WHERE B is Book, B eid %(eid)s' % {'eid': r'\1'})),
221          (rgx(r'/book/isbn/(.*)'),
222           dict(rql='Any B WHERE B is Book, (B isbn13 "%(isbn)s") OR (B isbn10 "%(isbn)s")' % {'isbn': r'\1'})),
223 +        # persons
224 +        (rgx('/person/(\d+)\.rdf'),
225 +         dict(rql='Any B WHERE B is Person, B eid %(eid)s' % {'eid': r'\1'}, vid='foaf')),
226 +        (rgx('/book/(\d+)\.html'),
227 +         dict(rql='Any B WHERE B is Person, B eid %(eid)s' % {'eid': r'\1'})),
228          ]