[mb-commits] r9885 - in search_index/trunk/mbsearch: . serverindex

root at musicbrainz.org root at musicbrainz.org
Thu Jun 26 08:00:09 UTC 2008


Author: robert
Date: 2008-06-26 08:00:09 +0000 (Thu, 26 Jun 2008)
New Revision: 9885

Modified:
   search_index/trunk/mbsearch/indexcreator.py
   search_index/trunk/mbsearch/normalize.py
   search_index/trunk/mbsearch/serverindex/ar_annotationindex.py
   search_index/trunk/mbsearch/serverindex/artistindex.py
   search_index/trunk/mbsearch/serverindex/la_annotationindex.py
   search_index/trunk/mbsearch/serverindex/labelindex.py
   search_index/trunk/mbsearch/serverindex/re_annotationindex.py
   search_index/trunk/mbsearch/serverindex/releaseindex.py
   search_index/trunk/mbsearch/serverindex/tr_annotationindex.py
   search_index/trunk/mbsearch/serverindex/trackindex.py
Log:
Support mbid searching, primary fields (which have inverse field length as a value field)
and adding artist, sortname and alias data into the index for the artist field. That properly allows
searching all three fields without any funky tricks.


Modified: search_index/trunk/mbsearch/indexcreator.py
===================================================================
--- search_index/trunk/mbsearch/indexcreator.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/indexcreator.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -37,9 +37,18 @@
 
 # TODO: Query DB in transactions to make sure final index is consistent
 
-FIELD_TOKENIZE_YES = 1
-FIELD_TOKENIZE_NO = 0
+# No tokenization at all. Feed the entire field to the document.
+FIELD_TOKENIZE_NONE    = 0 
 
+# Normal tokenization. Lower case, unaccent, etc.
+FIELD_TOKENIZE_NORMAL  = 1
+
+# Normal tokenization and add word count value base on this field
+FIELD_TOKENIZE_PRIMARY = 2
+
+# Tokenize a MBID (remove the - since xapian can't find it with the -)
+FIELD_TOKENIZE_MBID    = 3
+
 FIELD_INDEX_AND_STORE = 0
 FIELD_INDEX = 1
 FIELD_STORE = 2
@@ -102,7 +111,7 @@
 
         ret = []
         for field, value in zip(self.FIELDS, row):
-	    normalize = field[4]
+	    normalize = field[3]
 	    if normalize is not None:
 		value = normalize(value)
             ret.append(value)
@@ -153,7 +162,7 @@
 
         numChunks = (maxId / IDS_PER_CHUNK) + 1
         if doTest:
-            numChunks = min(numChunks, 2)
+            numChunks = min(numChunks, 50)
 
         # Record the start time
         t0 = time.time()
@@ -181,7 +190,7 @@
                         data = [ data ]
 		        isList = False
 
-                    name, weight, method, tokenize = field[:4]
+                    name, method, tokenize = field[:3]
                     for i, text in enumerate(data):
                         # Ensure that the value is an unicode string
                         if not text: continue
@@ -196,22 +205,33 @@
 			        else:
                                     storedata += u"%s=%s\n" % (name, text.replace(u"\n", u"\\n").strip())
 			        
-                            #if method == FIELD_INDEX: print text.encode('utf-8')
                             if method in (FIELD_INDEX, FIELD_INDEX_AND_STORE):
-				if tokenize:
+				if tokenize == FIELD_TOKENIZE_NORMAL or tokenize == FIELD_TOKENIZE_PRIMARY:
                                     text = text.strip().lower()
                                     text = unac.unac_string(text)
                                     text = self.removeDots(text) 
                                     text = addSpacesToIdeographicStrings(text) 
-                                    self.indexer.index_text(text, weight, u"X" + name.upper())
-				    # TEST HACK
-				    if name == u"artist": doc.add_value(0, u"%d" % len(text.split(u" ")))
-			        else:
-				    doc.add_term(u"X" + name.upper() + text, weight)
+                                    self.indexer.index_text(text, 0, u"X" + name.upper())
+				    #print "ndx: %s:'%s'" % (name.encode('utf-8', 'replace'), text.encode('utf-8', 'replace'))
+				    if tokenize == FIELD_TOKENIZE_PRIMARY:
+					doc.add_value(0, u"%d" % (1000 - len(text)))
+			        elif tokenize == FIELD_TOKENIZE_NONE:
+				    #print "add: '%s'" % text.encode('utf-8', 'replace')
+				    doc.add_term(u"X" + name.upper() + text, 0)
+			        elif tokenize == FIELD_TOKENIZE_MBID:
+                                    text = text.strip().lower()
+				    text = text.replace(u'-', u'')
+				    #print "gid: '%s'" % text.encode('utf-8', 'replace')
+                                    self.indexer.index_text(text, 0, u"X" + name.upper())
+
 			        self.indexer.increase_termpos()
+			    elif tokenize == FIELD_TOKENIZE_PRIMARY:
+				#print "val: '%s'" % len(text.split(u" "))
+				doc.add_value(0, u"%d" % (1000 - len(text)))
+			        
 
                 # Add the document to the index
-		#print "data: %s" % (storedata.encode('utf-8', 'replace'))
+		#print "data: %s\n" % (storedata.encode('utf-8', 'replace'))
                 doc.set_data(storedata)
                 self.index.add_document(doc)
                 rowsThisChunk += 1

Modified: search_index/trunk/mbsearch/normalize.py
===================================================================
--- search_index/trunk/mbsearch/normalize.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/normalize.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -62,7 +62,3 @@
         return u''
     value = value.decode('utf-8')
     return _dateRe.sub(u'', value)
-
-def normalizeMBID(value):
-    """Normalize an MBID UUID"""
-    return value.replace(u'-', u'')

Modified: search_index/trunk/mbsearch/serverindex/ar_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/ar_annotationindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/ar_annotationindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
 
 import re
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
 
 class ArtistAnnotationIndex(indexcreator.IndexCreator):
     '''
@@ -34,10 +34,10 @@
     '''
 
     FIELDS = [
-              (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID), 
-              (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-              (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-              (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+              (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None), 
+              (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+              (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeText),
+              (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):

Modified: search_index/trunk/mbsearch/serverindex/artistindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/artistindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/artistindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -27,7 +27,7 @@
 import re
 import psycopg2
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate
 
 TYPES = (u'unknown', u'person', u'group')
 
@@ -41,14 +41,16 @@
     """This class specifies the details on how to create the artist index."""
 
     FIELDS = [
-        (u'arid',     1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-        (u'artist',   1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'sortname', 1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'type',     1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NO,  _normalizeArtistType),
-        (u'begin',    1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NO,  normalizeDate),
-        (u'end',      1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NO,  normalizeDate),
-        (u'comment',  1, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'alias',    0, indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+        (u'arid',     indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_MBID,    None),
+        (u'artist',   indexcreator.FIELD_INDEX,            indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+        (u'artist',   indexcreator.FIELD_STORE,            indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+#        (u'artist',   indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+        (u'sortname', indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+        (u'type',     indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NONE,    _normalizeArtistType),
+        (u'begin',    indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NONE,    normalizeDate),
+        (u'end',      indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NONE,    normalizeDate),
+        (u'comment',  indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+        (u'alias',    indexcreator.FIELD_INDEX_AND_STORE,  indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):
@@ -58,6 +60,18 @@
     def getRowCountQuery(self):
         return "SELECT max(id), count(*) FROM artist"
 
+    def uniquer(self, seq, idfun=None): 
+	if idfun is None: 
+	    def idfun(x): return x 
+	seen = {} 
+	result = [] 
+	for item in seq: 
+	    marker = idfun(item) 
+            if marker in seen: continue 
+	    seen[marker] = 1 
+	    result.append(item) 
+	return result 
+
     def getQuery(self, chunkNum, maxChunks, chunkSize):
 
         # Connect to the DB
@@ -75,7 +89,8 @@
 
         conn.close()
 
-        return """SELECT gid, name, sortname, type, begindate, enddate, resolution
+        return """SELECT gid, name || ' ' || sortname || ' ', name, 
+	                 sortname, type, begindate, enddate, resolution
                     FROM artist
                    WHERE id BETWEEN %d AND %d ORDER BY id""" % (chunkNum * chunkSize, ((chunkNum + 1) * chunkSize) - 1)
 
@@ -84,7 +99,13 @@
         ret = super(ArtistIndex, self).processRow(row)
         try:
             ret.append(self.aliasDict[row[0]])
+	    ret[1] += ' ' + ' '.join(self.aliasDict[row[0]]) 
         except KeyError:
             ret.append([])
 
+	ret[1] = ret[1].replace(u',', u'').lower() 
+	words = ret[1].split(' ') 
+	words = self.uniquer(words) 
+	ret[1] = ' '.join(words) 
+
         return ret

Modified: search_index/trunk/mbsearch/serverindex/la_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/la_annotationindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/la_annotationindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
 
 import re
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
 
 class LabelAnnotationIndex(indexcreator.IndexCreator):
     '''
@@ -34,10 +34,10 @@
     '''
 
     FIELDS = [
-              (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID), 
-              (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-              (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-              (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+              (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None), 
+              (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+              (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeText),
+              (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):

Modified: search_index/trunk/mbsearch/serverindex/labelindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/labelindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/labelindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -27,31 +27,32 @@
 import re
 import psycopg2
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate
 
 TYPES = (u'unknown', u'distributor', u'holding', u'production', u'orig. prod.', 
          u'bootleg prod.', u'reissue prod.', u'publisher')
 
 def _normalizeLabelType(value):
+    if not value: return u''
     try:
-        return TYPES[int(value)]        
-    except:
+        return TYPES[value]        
+    except IndexError:
         return u''
 
 class LabelIndex(indexcreator.IndexCreator):
     """This class specifies the details on how to create the label index."""
 
     FIELDS = [
-        (u'laid',     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-        (u'label',    1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'sortname', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'type',     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, _normalizeLabelType),
-        (u'code',     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-        (u'country',  1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-        (u'begin',    1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeDate),
-        (u'end',      1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeDate),
-        (u'comment',  1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'alias',    1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+        (u'laid',     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,    None),
+        (u'label',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+        (u'sortname', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+        (u'type',     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  _normalizeLabelType),
+        (u'code',     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeText),
+        (u'country',  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeText),
+        (u'begin',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeDate),
+        (u'end',      indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeDate),
+        (u'comment',  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+        (u'alias',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):

Modified: search_index/trunk/mbsearch/serverindex/re_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/re_annotationindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/re_annotationindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
 
 import re
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
 
 class ReleaseAnnotationIndex(indexcreator.IndexCreator):
     '''
@@ -34,10 +34,10 @@
     '''
 
     FIELDS = [
-              (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID), 
-              (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-              (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-              (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+              (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None), 
+              (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+              (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeText),
+              (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):

Modified: search_index/trunk/mbsearch/serverindex/releaseindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/releaseindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/releaseindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -27,7 +27,7 @@
 import re
 import psycopg2
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType
 
 ALBUM_STATUS_FIRST = 100
 ALBUM_STATUS_LAST = 102
@@ -135,22 +135,22 @@
     '''
 
     FIELDS = [ 
-             (u"arid",     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-             (u"artist",   1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-             (u"reid",     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-             (u"release",  1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-             (u"type",     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeReleaseType),
-             (u"status",   1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  _normalizeReleaseStatus),
-             (u"tracks",   1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-             (u"discids",  1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-             (u"asin",     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-             (u"lang",     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  _normalizeLanguage),
-             (u"script",   1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  _normalizeScript),
-             (u"country",  1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-             (u"date",     1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeDate),
-             (u"label",    1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-             (u"catno",    1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-             (u"barcode",  1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
+             (u"arid",     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,    None),
+             (u"artist",   indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+             (u"reid",     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,    None),
+             (u"release",  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+             (u"type",     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeReleaseType),
+             (u"status",   indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    _normalizeReleaseStatus),
+             (u"tracks",   indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeText),
+             (u"discids",  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeText),
+             (u"asin",     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+             (u"lang",     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    _normalizeLanguage),
+             (u"script",   indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   _normalizeScript),
+             (u"country",  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+             (u"date",     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeDate),
+             (u"label",    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL,  normalizeText),
+             (u"catno",    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeText),
+             (u"barcode",  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,    normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):

Modified: search_index/trunk/mbsearch/serverindex/tr_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/tr_annotationindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/tr_annotationindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
 
 import re
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
 
 class TrackAnnotationIndex(indexcreator.IndexCreator):
     '''
@@ -34,10 +34,10 @@
     '''
 
     FIELDS = [
-              (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID), 
-              (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-              (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-              (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+              (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None), 
+              (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+              (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeText),
+              (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):

Modified: search_index/trunk/mbsearch/serverindex/trackindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/trackindex.py	2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/trackindex.py	2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
 
 import re
 from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType
 
 ALBUM_ATTR_FIRST = 1
 ALBUM_ATTR_LAST = 11
@@ -52,17 +52,17 @@
     '''
 
     FIELDS = [
-        (u'arid',    1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-        (u'artist',  1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'reid',    1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-        (u'release', 1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
-        (u'type',    1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeReleaseType),
-        (u'tracks',  1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-        (u'trid',    1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeMBID),
-        (u'track',   1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
-        (u'dur',     1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  _normalizeDur),
-        (u'qdur',    1,  indexcreator.FIELD_INDEX,           indexcreator.FIELD_TOKENIZE_NO,  _normalizeQdur),
-        (u'tnum',    1,  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO,  normalizeText),
+        (u'arid',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None),
+        (u'artist',  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+        (u'reid',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None),
+        (u'release', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+        (u'type',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeReleaseType),
+        (u'tracks',  indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeText),
+        (u'trid',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID,   None),
+        (u'track',   indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY,normalizeText),
+        (u'dur',     indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   _normalizeDur),
+        (u'qdur',    indexcreator.FIELD_INDEX,           indexcreator.FIELD_TOKENIZE_NONE,   _normalizeQdur),
+        (u'tnum',    indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE,   normalizeText),
     ]
 
     def __init__(self, indexName, clear, host, database, user, passwd):




More information about the MusicBrainz-commits mailing list