[mb-commits] r9862 - in search_server/trunk: . bin lib

root at musicbrainz.org root at musicbrainz.org
Sat May 31 04:19:10 UTC 2008


Author: robert
Date: 2008-05-31 04:19:10 +0000 (Sat, 31 May 2008)
New Revision: 9862

Modified:
   search_server/trunk/
   search_server/trunk/bin/handler.fcgi
   search_server/trunk/lib/annotationsearch.py
   search_server/trunk/lib/artistsearch.py
   search_server/trunk/lib/freedbsearch.py
   search_server/trunk/lib/labelsearch.py
   search_server/trunk/lib/releasesearch.py
   search_server/trunk/lib/search.py
   search_server/trunk/lib/tracksearch.py
Log:
First set of changes to port this sucker to Xapian!



Property changes on: search_server/trunk
___________________________________________________________________
Name: svn:externals
   - lib/analyzers http://svn.musicbrainz.org/lucene_index/trunk/mbsearch/serverindex/analyzers


Modified: search_server/trunk/bin/handler.fcgi
===================================================================
--- search_server/trunk/bin/handler.fcgi	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/bin/handler.fcgi	2008-05-31 04:19:10 UTC (rev 9862)
@@ -66,46 +66,50 @@
     if not tport: dur = 0
 
     searchobj = None
-    if type == 'artist':
-        if not ar_search:
-            ar_search = artistsearch.ArtistSearch(indexDir + "/artist_index")
-        searchobj = ar_search
+    import search
+    try:
+	if type == 'artist':
+	    if not ar_search:
+		ar_search = artistsearch.ArtistSearch(indexDir + "/artist_index")
+	    searchobj = ar_search
 
-    elif type == 'release':
-        if not re_search:
-            re_search = releasesearch.ReleaseSearch(indexDir + "/release_index")
-        re_search.setTaggerPort(tport)
-        re_search.setDuration(dur)
-        searchobj = re_search
+	elif type == 'release':
+	    if not re_search:
+		re_search = releasesearch.ReleaseSearch(indexDir + "/release_index")
+	    re_search.setTaggerPort(tport)
+	    re_search.setDuration(dur)
+	    searchobj = re_search
 
-    elif type == 'track':
-        if not tr_search:
-            tr_search = tracksearch.TrackSearch(indexDir + "/track_index")
-        tr_search.setMBT(mbt)
-        tr_search.setTaggerPort(tport)
-        tr_search.setDuration(dur)
-        searchobj = tr_search
-    
-    elif type == 'annotation':
-        if not an_search:
-            an_search = annotationsearch.AnnotationSearch(indexDir + "/annotation_index")
-        searchobj = an_search
+	elif type == 'track':
+	    if not tr_search:
+		tr_search = tracksearch.TrackSearch(indexDir + "/track_index")
+	    tr_search.setMBT(mbt)
+	    tr_search.setTaggerPort(tport)
+	    tr_search.setDuration(dur)
+	    searchobj = tr_search
+	
+	elif type == 'annotation':
+	    if not an_search:
+		an_search = annotationsearch.AnnotationSearch(indexDir + "/annotation_index")
+	    searchobj = an_search
 
-    elif type == 'freedb':
-        if not fd_search:
-            fd_search = freedbsearch.FreeDBSearch(indexDir + "/freedb_index")
-        searchobj = fd_search
+	elif type == 'freedb':
+	    if not fd_search:
+		fd_search = freedbsearch.FreeDBSearch(indexDir + "/freedb_index")
+	    searchobj = fd_search
 
-    elif type == 'label':
-        if not la_search:
-            la_search = labelsearch.LabelSearch(indexDir + "/label_index")
-        searchobj = la_search
+	elif type == 'label':
+	    if not la_search:
+		la_search = labelsearch.LabelSearch(indexDir + "/label_index")
+	    searchobj = la_search
 
-    else:
-        start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
-        return "invalid resource requested. %s must be one of artist/release/track/label/annotation." % type
+	else:
+	    start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
+	    return "invalid resource requested. %s must be one of artist/release/track/label/annotation.\n" % type
+    except search.NoSuchIndexError, msg:
+	start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')])
+	return "Cannot find indexes. Server misconfigured: %s\n" % msg
 
-    import search
     ret = 0
     content = ""
 
@@ -131,4 +135,10 @@
     start_response('200 OK', [('Content-Type', 'text/%s' % fmt)])
     return content
 
+#if __name__ == '__main__':
+#    from wsgiref import simple_server
+#    httpd = simple_server.WSGIServer(('',8080),simple_server.WSGIRequestHandler)
+#    httpd.set_app(search)
+#    httpd.serve_forever()
+#else:
 WSGIServer(search, bindAddress = '/tmp/mbsearch.fcgi.sock').run() 

Modified: search_server/trunk/lib/annotationsearch.py
===================================================================
--- search_server/trunk/lib/annotationsearch.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/annotationsearch.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,7 +24,6 @@
 #---------------------------------------------------------------------------
 
 import sys, os, re
-import PyLucene
 import search
 
 class AnnotationSearch(search.TextSearch):
@@ -35,11 +34,9 @@
 
    def __init__(self, index):
        search.TextSearch.__init__(self, index)
-       self.useMultiFields(False)
        self.setDefaultField('text')
-       self.types = ['dummy', 'artist', 'release'];
 
-   def asHTML(self, hits, maxHits, offset):
+   def asHTML(self, hits, count, offset):
        '''
        Output an annotation search result as HTML
        '''
@@ -48,15 +45,14 @@
        out += u'<tr class="searchresultsheader"><td>Score</td><td>Type</td><td>Name</td><td>Annotation</td>'
        out += u'</tr>'
        
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
+       for doc in hits:
            type = doc.get('type')
            text = doc.get('text')
            mbid = doc.get('mbid')
            name = doc.get('name')
 
            out += u'<tr class="searchresults%s">' % search.oddeven[i % 2]
-           out += u"<td>%d</td>" % int(hits.score(i) * 100)
+           out += u"<td>%d</td>" % doc['_score']
            out += u"<td>%s</td>" % self.escape(type)
            out += u"<td><a href=\"/%s/%s.html\">%s</a></td>" % (self.escape(type), 
                                                                self.escape(mbid), self.escape(name))
@@ -65,7 +61,7 @@
        out += u"</table></div>"
        return out
 
-   def asXML(self, hits, maxHits, offset):
+   def asXML(self, hits, count, offset):
        '''
        Output an annotation search result as XML
        '''

Modified: search_server/trunk/lib/artistsearch.py
===================================================================
--- search_server/trunk/lib/artistsearch.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/artistsearch.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,7 +24,6 @@
 #---------------------------------------------------------------------------
 
 import sys, os, re
-import PyLucene
 import search
 
 TYPE_MAPPING = (u'unknown', u'person', 'group')
@@ -43,15 +42,15 @@
 
    def __init__(self, index):
        search.TextSearch.__init__(self, index)
-       self.setDefaultMultiFields(['artist', 'alias', 'sortname'])
-       self.useMultiFields(True)
+       self.setDefaultField('artist')
+       self.setPrefixes(('artist', 'sortname', 'alias', 'begin', 'end', 'type', 'arid', 'comment'))
 
    def mangleQuery(self, query):
        query = query.replace("artype", "type")
        query = re.sub("type:(\d)", replaceType, query)
        return query
 
-   def asHTML(self, hits, maxHits, offset):
+   def asHTML(self, hits, count, offset):
        '''
        Output an artist search result as HTML
        '''
@@ -63,9 +62,7 @@
        if rel: out += u'<td>Rel</td>'
        out += u'</tr>'
        
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
-
+       for i, doc in enumerate(hits):
            artist = doc.get('artist') or u''
            sortname = doc.get('sortname') or u''
            comment = doc.get('comment') or u''
@@ -75,7 +72,7 @@
            artype = doc.get('type') or u''
 
            out += u'<tr class="searchresults%s">' % search.oddeven[i % 2]
-           out += u"<td>%d</td>" % int(hits.score(i) * 100)
+           out += u"<td>%d</td>" % doc['_score']
            out += u"<td><a href=\"/artist/%s.html\">%s</a>" % \
                    (self.escape(arid), self.escape(artist))
            if comment: out += " (%s)" % self.escape(comment)
@@ -87,15 +84,13 @@
        out += u"</table></div>"
        return out
 
-   def asXML(self, hits, maxHits, offset):
+   def asXML(self, hits, count, offset):
        '''
        Output an artist search result as XML
        '''
 
-       out = '<artist-list count="%d" offset="%d">' % (hits.length(), offset)
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
-
+       out = '<artist-list count="%d" offset="%d">' % (count, offset)
+       for doc in hits:
            artist = doc.get('artist') or u''
            sortname = doc.get('sortname') or u''
            artype = doc.get('type') or u''
@@ -105,7 +100,7 @@
 
            out += u'<artist id="%s"' % self.escape(doc.get('arid'))
            if artype: out += u' type="%s"' % artype.title()
-           out += u' ext:score="%d"' % int(hits.score(i) * 100)
+           out += u' ext:score="%d"' % doc['_score']
            out += u'><name>%s</name>' % self.escape(artist)
            if sortname:
                out += u"<sort-name>%s</sort-name>" % self.escape(sortname)

Modified: search_server/trunk/lib/freedbsearch.py
===================================================================
--- search_server/trunk/lib/freedbsearch.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/freedbsearch.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,7 +24,6 @@
 #---------------------------------------------------------------------------
 
 import sys, os, re
-import PyLucene
 import search
 
 class FreeDBSearch(search.TextSearch):
@@ -35,8 +34,8 @@
 
    def __init__(self, index):
        search.TextSearch.__init__(self, index)
-       self.useMultiFields(True)
-       self.setDefaultMultiFields(['artist', 'title'])
+       self.setDefaultField('title')
+       self.setPrefixes(('title', 'artist', 'tracks', 'cat', 'discid', 'year'))
 
    def asHTML(self, hits, maxHits, offset):
        '''
@@ -46,8 +45,7 @@
        out = u'<div><table class="searchresults">'
        out += u'<tr class="searchresultsheader"><td>Score</td><td>Title</td>'
        out += u'<td>Artist</td><td>Tracks</td><td>Discid</td><td>Year</td><td>Action</td></tr>'
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
+       for doc in hits:
            out += u'<tr class="searchresults%s">' % search.oddeven[i % 2]
            out += u"<td>%d</td>" % int(hits.score(i) * 100)
            out += u"<td>%s</td>" % self.escape(doc.get('title'))

Modified: search_server/trunk/lib/labelsearch.py
===================================================================
--- search_server/trunk/lib/labelsearch.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/labelsearch.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,7 +24,6 @@
 #---------------------------------------------------------------------------
 
 import sys, os, re
-import PyLucene
 import search
 
 TYPE_MAPPING = {
@@ -56,14 +55,14 @@
 
    def __init__(self, index):
        search.TextSearch.__init__(self, index)
-       self.setDefaultMultiFields(['label', 'alias', 'sortname'])
-       self.useMultiFields(True)
+       self.setDefaultField('label')
+       self.setPrefixes(('label', 'sortname', 'alias', 'begin', 'end', 'type', 'laid', 'comment'))
 
    def mangleQuery(self, query):
        query = re.sub("type:(\d)", replaceType, query)
        return query
 
-   def asHTML(self, hits, maxHits, offset):
+   def asHTML(self, hits, count, offset):
        '''
        Output a label search result as HTML
        '''
@@ -75,7 +74,7 @@
        if rel: out += u'<td>Rel</td>'
        out += u'</tr>'
        
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
+       for i in xrange(offset, min(count, count + offset)):
            doc = hits.doc(i)
 
            label = doc.get('label') or u''
@@ -87,7 +86,7 @@
            type = doc.get('type') or u''
 
            out += u'<tr class="searchresults%s">' % search.oddeven[i % 2]
-           out += u"<td>%d</td>" % int(hits.score(i) * 100)
+           out += u"<td>%d</td>" % doc['_score']
            out += u"<td><a href=\"/label/%s.html\">%s</a>" % \
                    (self.escape(laid), self.escape(label))
            if comment: out += " (%s)" % self.escape(comment)
@@ -99,13 +98,13 @@
        out += u"</table></div>"
        return out
 
-   def asXML(self, hits, maxHits, offset):
+   def asXML(self, hits, count, offset):
        '''
        Output an artist search result as XML
        '''
 
-       out = '<label-list count="%d" offset="%d">' % (hits.length(), offset)
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
+       out = '<label-list count="%d" offset="%d">' % (count, offset)
+       for i in xrange(offset, min(count, count + offset)):
            doc = hits.doc(i)
 
            label = doc.get('label') or u''
@@ -122,7 +121,7 @@
 
            out += u'<label id="%s"' % self.escape(doc.get('laid'))
            if type: out += u' type="%s"' % type
-           out += u' ext:score="%d"' % int(hits.score(i) * 100)
+           out += u' ext:score="%d"' % doc['_score']
            out += u'><name>%s</name>' % self.escape(label)
            if sortname:
                out += u"<sort-name>%s</sort-name>" % self.escape(sortname)

Modified: search_server/trunk/lib/releasesearch.py
===================================================================
--- search_server/trunk/lib/releasesearch.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/releasesearch.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,7 +24,6 @@
 #---------------------------------------------------------------------------
 
 import sys, os, re
-import PyLucene
 import search
 
 TYPE_MAPPING =  (u'album', u'single', u'ep', u'compilation', u'soundtrack', u'spokenword',
@@ -51,14 +50,14 @@
    def __init__(self, index):
        search.TextSearch.__init__(self, index)
        self.setDefaultField('release')
-       self.useMultiFields(False)
+       self.setPrefixes(('artist', 'arid', 'release', 'reid', 'type', 'tracks', 'disciids', 'lang', 'script', 'date', 'country'))
 
    def mangleQuery(self, query):
        query = re.sub("type:(\d+)", replaceType, query)
        query = re.sub("status:(\d)", replaceStatus, query)
        return query
 
-   def asHTML(self, hits, maxHits, offset):
+   def asHTML(self, hits, count, offset):
        '''
        Output an release search result as HTML
        '''
@@ -66,16 +65,14 @@
        rel = self.rel
 
        out = u'<div><table class="searchresults">'
-       out += u'<tr class="searchresultsheader"><td>Score</td><td>Album</td><td>Artist</td><td>Tracks</td>'
+       out += u'<tr class="searchresultsheader"><td>Score</td><td>Release</td><td>Artist</td><td>Tracks</td>'
        out += u'<td style="white-space: nowrap">CD ids</td><td>Date</td><td>Type</td><td style="white-space: nowrap">Lang/script</td>'
        if self.tport: 
            out += u"<td>Tagger</td>"
        elif rel: 
            out += u"<td>Rel</td>"
        out += u"</tr>"
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
-
+       for i, doc in enumerate(hits):
            artist = doc.get('artist') or u'';
            arid = doc.get('arid') or u'';
            album = doc.get('release') or u'';
@@ -89,13 +86,21 @@
            if lang and not script: script = u'?'
            if not lang and script: lang = u'?'
 
-           dates = doc.getFields('date') or [];
-	   dates = [d.stringValue() for d in dates]
-           countries = doc.getFields('country') or [];
-	   countries = [c.stringValue() for c in countries]
+	   countries = []
+	   dates = []
+	   j = 0
+	   while True:
+	       country = doc.get('country%d' % j) or u'' 
+	       date = doc.get('date%d' % j) or u''
 
+	       if not country and not date: break
+
+	       countries.append(country)
+	       dates.append(date)
+               j += 1
+
            out += u'<tr class="searchresults%s">' % self.escape(search.oddeven[i % 2])
-           out += u"<td>%d</td>" % int(hits.score(i) * 100)
+           out += u"<td>%d</td>" % doc['_score']
            out += u"<td><a href=\"/release/%s.html\">%s</a></td>" % \
                   (self.escape(reid), self.escape(album))
            out += u"<td><a href=\"/artist/%s.html\">%s</a></td>" % \
@@ -116,15 +121,13 @@
        out += u"</table></div>"
        return out
 
-   def asXML(self, hits, maxHits, offset):
+   def asXML(self, hits, count, offset):
        '''
        Output an release search result as XML
        '''
 
-       out = '<release-list count="%d" offset="%d">' % (hits.length(), offset)
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
-
+       out = '<release-list count="%d" offset="%d">' % (count, offset)
+       for doc in hits:
            artist = doc.get('artist') or u''
            arid = doc.get('arid') or u''
            album = doc.get('release') or u''
@@ -137,17 +140,33 @@
            lang = doc.get('lang') or u''
            script = doc.get('script') or u''
 
-           countries = doc.getValues('country')
-           dates = doc.getValues('date')
-           labels = doc.getValues('label')
-           catnos = doc.getValues('catno')
-           barcodes = doc.getValues('barcode')
+	   countries = []
+	   dates = []
+	   labels = []
+	   catnos = []
+	   barcodes = []
+	   i = 0
+	   while True:
+	       country = doc.get('country%d' % i) or u'' 
+	       date = doc.get('date%d' % i) or u''
+	       label = doc.get('label%d' % i) or u''
+	       catno = doc.get('catno%d' % i) or u''
+	       barcode = doc.get('barcode%d' % i) or u''
 
+	       if not country and not date and not label and not catno and not barcode: break
+
+	       countries.append(country)
+	       dates.append(date)
+	       labels.append(label)
+	       catnos.append(catno)
+	       barcodes.append(barcode)
+               i += 1
+
            if status: type = (type + (u" %s" % status)).strip()
 
            out += u'<release id="%s"' % self.escape(reid)
            if type: out += u' type="%s"' % self.escape(type.title())
-           out += u' ext:score="%d"' % int(hits.score(i) * 100)
+           out += u' ext:score="%d"' % doc['_score']
            out += u'><title>%s</title>' % self.escape(album)
 
            if lang or script:

Modified: search_server/trunk/lib/search.py
===================================================================
--- search_server/trunk/lib/search.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/search.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,9 +24,8 @@
 #---------------------------------------------------------------------------
 
 import sys, os
-import PyLucene
+import xapian
 from unac import unac
-from analyzers.unaccent import StandardUnaccentAnalyzer
 import time
 
 # TODO: 
@@ -52,6 +51,8 @@
 class NoResultsError(Exception):
     pass
 
+class NoSuchIndexError(Exception):
+    pass
 
 class TextSearch(object):
     '''
@@ -66,50 +67,38 @@
         self.rel = 0
         self.offset = 0
 
-        self.useMulti = True
-        self.defaultField = "artist"
-        self.defaultMultiFields = ["artist", "album", "track"]
-        self.fields = [] 
-        self.analyzer = self.getAnalyzer()
+        self.defaultField = u''
         try:
-            self.index = PyLucene.IndexSearcher(PyLucene.FSDirectory.getDirectory(indexName, False))
-        except ValueError:
-            raise indexsearch.NoSuchIndexError
+            sys.stderr.write(indexName + "\n")
+            self.index = xapian.WritableDatabase(indexName, xapian.DB_OPEN)
+        except xapian.Error, msg:
+	    text = str(msg)
+            raise NoSuchIndexError(text)
 
+        self.en = enquire = xapian.Enquire(self.index)
+        self.qp = xapian.QueryParser()
+        self.qp.set_database(self.index)
+        self.qp.set_stemming_strategy(xapian.QueryParser.STEM_NONE)
+
     def close(self):
-        self.index.close();
- 
-    def getAnalyzer(self):
-        '''
-        Return the Lucene analyzer object to use with this index
-        This function must be overridden by deriving classes.
-        '''
-        return StandardUnaccentAnalyzer()
+	'''
+	Close the index
+	'''
+        del self.index
 
-    def setFields(self, fields): 
-        ''' 
-        This function sets the actual fields the caller wants searched. (not the default fields) 
-        ''' 
-        self.fields = fields 
+    def setPrefixes(self, prefixes):
+	'''
+	Set the mapping of field prefixes
+	'''
+	for prefix in prefixes:
+            self.qp.add_prefix(prefix, "X" + prefix.upper())
  
-    def useMultiFields(self, multi):
-        ''' 
-        If set, use lucene's MultiFieldQueryParser
-        '''
-        self.useMulti = multi
- 
     def setDefaultField(self, default):
         ''' 
         If set, use lucene's QueryParser for searching on one default field
         '''
-        self.defaultField = default
+        self.defaultField = 'X' + default.upper()
  
-    def setDefaultMultiFields(self, fields):
-        '''
-        The multi fields searched when the users specifies no multi fields
-        '''
-        self.defaultMultiFields = fields
- 
     def escape(self, text):
         '''
         Escape XML/HTML entities and convert output to utf-8
@@ -182,48 +171,51 @@
         '''
         return query
 
-    def queryIndex(self, query):
+    def queryIndex(self, query, offset, maxHits):
         '''
         Carry out a search, and return the hits
         '''
 
         if not query: raise QueryError(u"No query was sent")
 
-        # remove accents from the search query
+
         try:
             query = unac.unac_string(self.mangleQuery(unicode(query, 'utf-8')))
         except UnicodeDecodeError:
             raise QueryError(u"Unicode decode problem: Invalid utf-8 characters passed to search query.")
-            
-        parsedQuery = None
-        if self.useMulti:
-            fields = [] 
-            if len(self.fields): 
-                fields = self.fields 
-            else: 
-                fields = self.defaultMultiFields 
-            try:
-                parsedQuery = PyLucene.MultiFieldQueryParser(fields, self.analyzer).parse(query)
-            except Exception, msg:
-                text = str(msg)
-                raise QueryError(text.encode('utf-8'))
-        else:
-            try:
-                parsedQuery = PyLucene.QueryParser(self.defaultField, self.analyzer).parse(query)
-            except Exception, msg:
-                text = str(msg)
-                raise QueryError(text.encode('utf-8'))
 
-        hits = []
-        err = ''
         try:
-            hits = self.index.search(parsedQuery);
+	    parsedQuery = self.qp.parse_query(unac.unac_string(query), 
+			      	              xapian.QueryParser.FLAG_PHRASE | 
+				              xapian.QueryParser.FLAG_BOOLEAN | 
+				              xapian.QueryParser.FLAG_LOVEHATE,
+				              self.defaultField)
+        except xapian.Error, msg:
+	    text = str(msg)
+	    raise QueryError(text.encode('utf-8'))
+
+        try:
+	    self.en.set_query(parsedQuery)
+	    matches = self.en.get_mset(offset, maxHits)
         except Exception, msg:
             text = str(msg)
-            raise QueryError(text.encode('utf-8'))
+            raise SearchError(text.encode('utf-8'))
 
-        if not hits: raise NoResultsError()
- 
+        if not matches.get_matches_estimated(): raise NoResultsError()
+
+        hits = []
+        for match in matches:
+	    data = match.document.get_data()
+            data = unicode(data, 'utf-8')
+            dataDict = {}
+            for pair in data.split(u"\n"):
+		if not pair: continue
+		key, value = pair.split(u"=", 1)
+		dataDict[key] = value
+
+	    dataDict['_score'] = match.percent
+	    hits.append(dataDict)
+            
         return hits
  
     def log_error(self, msg):
@@ -235,11 +227,10 @@
  
     def search(self, query, maxHits, offset, type='xml'):
         if maxHits < 1: maxHits = MAX_HITS
-        self.offset = offset
-        hits = self.queryIndex(query);
+        hits = self.queryIndex(query, offset, maxHits);
         redirect = ""
         if len(hits) == 1:
-           doc = hits.doc(0)
+           doc = hits[0]
            redirect = doc.get('trid')
            if not redirect: redirect = doc.get('reid')
            if not redirect: redirect = doc.get('arid')

Modified: search_server/trunk/lib/tracksearch.py
===================================================================
--- search_server/trunk/lib/tracksearch.py	2008-05-31 04:11:57 UTC (rev 9861)
+++ search_server/trunk/lib/tracksearch.py	2008-05-31 04:19:10 UTC (rev 9862)
@@ -24,7 +24,6 @@
 #---------------------------------------------------------------------------
 
 import sys, os, re
-import PyLucene
 import search
 
 oddeven = ['even', 'odd']
@@ -46,13 +45,13 @@
    def __init__(self, index):
        search.TextSearch.__init__(self, index)
        self.setDefaultField('track')
-       self.useMultiFields(False)
+       self.setPrefixes(('artist', 'arid', 'reid', 'trid', 'release', 'track', 'tnum', 'tracks', 'dur', 'type'))
 
    def mangleQuery(self, query):
        query = re.sub("type:(\d+)", replaceType, query)
        return query
 
-   def asHTML(self, hits, maxHits, offset):
+   def asHTML(self, hits, count, offset):
        '''
        Output an release search result as HTML
        '''
@@ -60,21 +59,18 @@
        rel = self.rel
        
        out = u'<div><table class="searchresults" id="TagLookupTrackResults">'
-       out += u'<tr class="searchresultsheader"><td>Score</td><td>Num</td><td>Track</td><td>Duration</td><td>Type</td><td>Artist</td><td>Album</td><td>Tracks</td>'
+       out += u'<tr class="searchresultsheader"><td>Score</td><td>Num</td><td>Track</td><td>Duration</td><td>Type</td><td>Artist</td><td>Release</td><td>Tracks</td>'
        if self.tport or self.mbt: 
            out += u"<td>Tagger</td>"
        elif rel: 
            out += u"<td>Rel</td>"
        out += u"</tr>"
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
-
+       for i, doc in enumerate(hits):
            artist = doc.get('artist') or u''
            arid = doc.get('arid') or u'';
            reid = doc.get('reid') or u'';
            trid = doc.get('trid') or u'';
            album = doc.get('release') or u''
-           alid = doc.get('reid') or u''
            track = doc.get('track') or u''
            tnum = doc.get('tnum') or u''
            tracks = int(doc.get('tracks') or u'0')
@@ -82,7 +78,7 @@
            type = doc.get('type') or u'';
 
            out += u'<tr class="searchresults%s">' % search.oddeven[i % 2]
-           out += u"<td>%d</td>" % int(hits.score(i) * 100)
+           out += u"<td>%d</td>" % doc['_score']
            out += u'<td align="center">%s</td>' % self.escape(tnum)
            out += u"<td><a href=\"/track/%s.html\">%s</a></td>" % \
                   (self.escape(trid), self.escape(track))
@@ -109,15 +105,13 @@
        out += u"</table></div>"
        return out
 
-   def asXML(self, hits, maxHits, offset):
+   def asXML(self, hits, count, offset):
        '''
        Output an release search result as XML
        '''
 
-       out = '<track-list count="%d" offset="%d">' % (hits.length(), offset)
-       for i in xrange(offset, min(hits.length(), maxHits + offset)):
-           doc = hits.doc(i)
-
+       out = '<track-list count="%d" offset="%d">' % (count, offset)
+       for doc in hits:
            artist = doc.get('artist') or u''
            arid = doc.get('arid') or u''
            album = doc.get('release') or u''
@@ -128,7 +122,7 @@
            dur = doc.get('dur') or u''
 
            out += u'<track id="%s"' % self.escape(doc.get('trid'))
-           out += u' ext:score="%d">' % int(hits.score(i) * 100)
+           out += u' ext:score="%d">' % doc['_score']
            out += u"<title>%s</title>" % self.escape(track)
            if dur: out += u"<duration>%s</duration>" % self.escape(dur)
            out += u'<artist id="%s"><name>%s</name></artist>' % (self.escape(arid), self.escape(artist))




More information about the MusicBrainz-commits mailing list