[mb-commits] r9885 - in search_index/trunk/mbsearch: . serverindex
root at musicbrainz.org
root at musicbrainz.org
Thu Jun 26 08:00:09 UTC 2008
Author: robert
Date: 2008-06-26 08:00:09 +0000 (Thu, 26 Jun 2008)
New Revision: 9885
Modified:
search_index/trunk/mbsearch/indexcreator.py
search_index/trunk/mbsearch/normalize.py
search_index/trunk/mbsearch/serverindex/ar_annotationindex.py
search_index/trunk/mbsearch/serverindex/artistindex.py
search_index/trunk/mbsearch/serverindex/la_annotationindex.py
search_index/trunk/mbsearch/serverindex/labelindex.py
search_index/trunk/mbsearch/serverindex/re_annotationindex.py
search_index/trunk/mbsearch/serverindex/releaseindex.py
search_index/trunk/mbsearch/serverindex/tr_annotationindex.py
search_index/trunk/mbsearch/serverindex/trackindex.py
Log:
Support mbid searching, primary fields (which have inverse field length as a value field)
and adding artist, sortname and alias data into the index for the artist field. That properly allows
searching all three fields without any funky tricks.
Modified: search_index/trunk/mbsearch/indexcreator.py
===================================================================
--- search_index/trunk/mbsearch/indexcreator.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/indexcreator.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -37,9 +37,18 @@
# TODO: Query DB in transactions to make sure final index is consistent
-FIELD_TOKENIZE_YES = 1
-FIELD_TOKENIZE_NO = 0
+# No tokenization at all. Feed the entire field to the document.
+FIELD_TOKENIZE_NONE = 0
+# Normal tokenization. Lower case, unaccent, etc.
+FIELD_TOKENIZE_NORMAL = 1
+
+# Normal tokenization and add word count value base on this field
+FIELD_TOKENIZE_PRIMARY = 2
+
+# Tokenize a MBID (remove the - since xapian can't find it with the -)
+FIELD_TOKENIZE_MBID = 3
+
FIELD_INDEX_AND_STORE = 0
FIELD_INDEX = 1
FIELD_STORE = 2
@@ -102,7 +111,7 @@
ret = []
for field, value in zip(self.FIELDS, row):
- normalize = field[4]
+ normalize = field[3]
if normalize is not None:
value = normalize(value)
ret.append(value)
@@ -153,7 +162,7 @@
numChunks = (maxId / IDS_PER_CHUNK) + 1
if doTest:
- numChunks = min(numChunks, 2)
+ numChunks = min(numChunks, 50)
# Record the start time
t0 = time.time()
@@ -181,7 +190,7 @@
data = [ data ]
isList = False
- name, weight, method, tokenize = field[:4]
+ name, method, tokenize = field[:3]
for i, text in enumerate(data):
# Ensure that the value is an unicode string
if not text: continue
@@ -196,22 +205,33 @@
else:
storedata += u"%s=%s\n" % (name, text.replace(u"\n", u"\\n").strip())
- #if method == FIELD_INDEX: print text.encode('utf-8')
if method in (FIELD_INDEX, FIELD_INDEX_AND_STORE):
- if tokenize:
+ if tokenize == FIELD_TOKENIZE_NORMAL or tokenize == FIELD_TOKENIZE_PRIMARY:
text = text.strip().lower()
text = unac.unac_string(text)
text = self.removeDots(text)
text = addSpacesToIdeographicStrings(text)
- self.indexer.index_text(text, weight, u"X" + name.upper())
- # TEST HACK
- if name == u"artist": doc.add_value(0, u"%d" % len(text.split(u" ")))
- else:
- doc.add_term(u"X" + name.upper() + text, weight)
+ self.indexer.index_text(text, 0, u"X" + name.upper())
+ #print "ndx: %s:'%s'" % (name.encode('utf-8', 'replace'), text.encode('utf-8', 'replace'))
+ if tokenize == FIELD_TOKENIZE_PRIMARY:
+ doc.add_value(0, u"%d" % (1000 - len(text)))
+ elif tokenize == FIELD_TOKENIZE_NONE:
+ #print "add: '%s'" % text.encode('utf-8', 'replace')
+ doc.add_term(u"X" + name.upper() + text, 0)
+ elif tokenize == FIELD_TOKENIZE_MBID:
+ text = text.strip().lower()
+ text = text.replace(u'-', u'')
+ #print "gid: '%s'" % text.encode('utf-8', 'replace')
+ self.indexer.index_text(text, 0, u"X" + name.upper())
+
self.indexer.increase_termpos()
+ elif tokenize == FIELD_TOKENIZE_PRIMARY:
+ #print "val: '%s'" % len(text.split(u" "))
+ doc.add_value(0, u"%d" % (1000 - len(text)))
+
# Add the document to the index
- #print "data: %s" % (storedata.encode('utf-8', 'replace'))
+ #print "data: %s\n" % (storedata.encode('utf-8', 'replace'))
doc.set_data(storedata)
self.index.add_document(doc)
rowsThisChunk += 1
Modified: search_index/trunk/mbsearch/normalize.py
===================================================================
--- search_index/trunk/mbsearch/normalize.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/normalize.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -62,7 +62,3 @@
return u''
value = value.decode('utf-8')
return _dateRe.sub(u'', value)
-
-def normalizeMBID(value):
- """Normalize an MBID UUID"""
- return value.replace(u'-', u'')
Modified: search_index/trunk/mbsearch/serverindex/ar_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/ar_annotationindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/ar_annotationindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
import re
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
class ArtistAnnotationIndex(indexcreator.IndexCreator):
'''
@@ -34,10 +34,10 @@
'''
FIELDS = [
- (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+ (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
Modified: search_index/trunk/mbsearch/serverindex/artistindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/artistindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/artistindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -27,7 +27,7 @@
import re
import psycopg2
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate
TYPES = (u'unknown', u'person', u'group')
@@ -41,14 +41,16 @@
"""This class specifies the details on how to create the artist index."""
FIELDS = [
- (u'arid', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u'artist', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'sortname', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'type', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, _normalizeArtistType),
- (u'begin', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeDate),
- (u'end', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeDate),
- (u'comment', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'alias', 0, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+ (u'arid', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u'artist', indexcreator.FIELD_INDEX, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'artist', indexcreator.FIELD_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+# (u'artist', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+ (u'sortname', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'type', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, _normalizeArtistType),
+ (u'begin', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeDate),
+ (u'end', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeDate),
+ (u'comment', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'alias', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
@@ -58,6 +60,18 @@
def getRowCountQuery(self):
return "SELECT max(id), count(*) FROM artist"
+ def uniquer(self, seq, idfun=None):
+ if idfun is None:
+ def idfun(x): return x
+ seen = {}
+ result = []
+ for item in seq:
+ marker = idfun(item)
+ if marker in seen: continue
+ seen[marker] = 1
+ result.append(item)
+ return result
+
def getQuery(self, chunkNum, maxChunks, chunkSize):
# Connect to the DB
@@ -75,7 +89,8 @@
conn.close()
- return """SELECT gid, name, sortname, type, begindate, enddate, resolution
+ return """SELECT gid, name || ' ' || sortname || ' ', name,
+ sortname, type, begindate, enddate, resolution
FROM artist
WHERE id BETWEEN %d AND %d ORDER BY id""" % (chunkNum * chunkSize, ((chunkNum + 1) * chunkSize) - 1)
@@ -84,7 +99,13 @@
ret = super(ArtistIndex, self).processRow(row)
try:
ret.append(self.aliasDict[row[0]])
+ ret[1] += ' ' + ' '.join(self.aliasDict[row[0]])
except KeyError:
ret.append([])
+ ret[1] = ret[1].replace(u',', u'').lower()
+ words = ret[1].split(' ')
+ words = self.uniquer(words)
+ ret[1] = ' '.join(words)
+
return ret
Modified: search_index/trunk/mbsearch/serverindex/la_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/la_annotationindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/la_annotationindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
import re
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
class LabelAnnotationIndex(indexcreator.IndexCreator):
'''
@@ -34,10 +34,10 @@
'''
FIELDS = [
- (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+ (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
Modified: search_index/trunk/mbsearch/serverindex/labelindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/labelindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/labelindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -27,31 +27,32 @@
import re
import psycopg2
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate
TYPES = (u'unknown', u'distributor', u'holding', u'production', u'orig. prod.',
u'bootleg prod.', u'reissue prod.', u'publisher')
def _normalizeLabelType(value):
+ if not value: return u''
try:
- return TYPES[int(value)]
- except:
+ return TYPES[value]
+ except IndexError:
return u''
class LabelIndex(indexcreator.IndexCreator):
"""This class specifies the details on how to create the label index."""
FIELDS = [
- (u'laid', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u'label', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'sortname', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'type', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, _normalizeLabelType),
- (u'code', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u'country', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u'begin', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeDate),
- (u'end', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeDate),
- (u'comment', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'alias', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+ (u'laid', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u'label', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+ (u'sortname', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'type', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, _normalizeLabelType),
+ (u'code', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u'country', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u'begin', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeDate),
+ (u'end', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeDate),
+ (u'comment', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'alias', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
Modified: search_index/trunk/mbsearch/serverindex/re_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/re_annotationindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/re_annotationindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
import re
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
class ReleaseAnnotationIndex(indexcreator.IndexCreator):
'''
@@ -34,10 +34,10 @@
'''
FIELDS = [
- (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+ (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
Modified: search_index/trunk/mbsearch/serverindex/releaseindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/releaseindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/releaseindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -27,7 +27,7 @@
import re
import psycopg2
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType
ALBUM_STATUS_FIRST = 100
ALBUM_STATUS_LAST = 102
@@ -135,22 +135,22 @@
'''
FIELDS = [
- (u"arid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u"artist", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"reid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u"release", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeReleaseType),
- (u"status", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, _normalizeReleaseStatus),
- (u"tracks", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"discids", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"asin", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"lang", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, _normalizeLanguage),
- (u"script", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, _normalizeScript),
- (u"country", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"date", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeDate),
- (u"label", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"catno", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"barcode", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
+ (u"arid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u"artist", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"reid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u"release", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY, normalizeText),
+ (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeReleaseType),
+ (u"status", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, _normalizeReleaseStatus),
+ (u"tracks", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"discids", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"asin", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"lang", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, _normalizeLanguage),
+ (u"script", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, _normalizeScript),
+ (u"country", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"date", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeDate),
+ (u"label", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"catno", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"barcode", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
Modified: search_index/trunk/mbsearch/serverindex/tr_annotationindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/tr_annotationindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/tr_annotationindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
import re
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeMBID
+from mbsearch.normalize import normalizeText
class TrackAnnotationIndex(indexcreator.IndexCreator):
'''
@@ -34,10 +34,10 @@
'''
FIELDS = [
- (u"mbid", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u"name", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u"type", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u"text", 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
+ (u"mbid", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u"name", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u"type", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u"text", indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
Modified: search_index/trunk/mbsearch/serverindex/trackindex.py
===================================================================
--- search_index/trunk/mbsearch/serverindex/trackindex.py 2008-06-25 13:27:54 UTC (rev 9884)
+++ search_index/trunk/mbsearch/serverindex/trackindex.py 2008-06-26 08:00:09 UTC (rev 9885)
@@ -26,7 +26,7 @@
import re
from mbsearch import indexcreator
-from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType, normalizeMBID
+from mbsearch.normalize import normalizeText, normalizeDate, normalizeReleaseType
ALBUM_ATTR_FIRST = 1
ALBUM_ATTR_LAST = 11
@@ -52,17 +52,17 @@
'''
FIELDS = [
- (u'arid', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u'artist', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'reid', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u'release', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_YES, normalizeText),
- (u'type', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeReleaseType),
- (u'tracks', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u'trid', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeMBID),
- (u'track', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
- (u'dur', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, _normalizeDur),
- (u'qdur', 1, indexcreator.FIELD_INDEX, indexcreator.FIELD_TOKENIZE_NO, _normalizeQdur),
- (u'tnum', 1, indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NO, normalizeText),
+ (u'arid', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u'artist', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'reid', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u'release', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NORMAL, normalizeText),
+ (u'type', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeReleaseType),
+ (u'tracks', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
+ (u'trid', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_MBID, None),
+ (u'track', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_PRIMARY,normalizeText),
+ (u'dur', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, _normalizeDur),
+ (u'qdur', indexcreator.FIELD_INDEX, indexcreator.FIELD_TOKENIZE_NONE, _normalizeQdur),
+ (u'tnum', indexcreator.FIELD_INDEX_AND_STORE, indexcreator.FIELD_TOKENIZE_NONE, normalizeText),
]
def __init__(self, indexName, clear, host, database, user, passwd):
More information about the MusicBrainz-commits
mailing list