[mb-commits] r9894 - search_index/trunk/mbsearch
root at musicbrainz.org
root at musicbrainz.org
Sun Jun 29 08:30:56 UTC 2008
Author: robert
Date: 2008-06-29 08:30:56 +0000 (Sun, 29 Jun 2008)
New Revision: 9894
Modified:
search_index/trunk/mbsearch/indexcreator.py
Log:
Fixes #3804
Modified: search_index/trunk/mbsearch/indexcreator.py
===================================================================
--- search_index/trunk/mbsearch/indexcreator.py 2008-06-29 07:39:56 UTC (rev 9893)
+++ search_index/trunk/mbsearch/indexcreator.py 2008-06-29 08:30:56 UTC (rev 9894)
@@ -179,6 +179,13 @@
return u''.join(bits)
+ def replaceApostrophe(self, query):
+ '''
+ Xapian considers ' as part of a word in order to not split things like "don't" and generate a bunch
+ of t fragments. I can see that for text, but for searching names and titles, no so much.
+ '''
+ return query.replace(u"'", u'')
+
def calculateWeight(self, text):
maxLen = 100
l = min(maxLen, len(text))
@@ -272,6 +279,7 @@
text = text.strip().lower()
text = unac.unac_string(text)
text = self.removeDots(text)
+ text = self.replaceApostrophe(text)
text = addSpacesToIdeographicStrings(text)
self.indexer.index_text(text, weight, u"X" + name.upper())
#print "ndx: %s:'%s' %d" % (name.encode('utf-8', 'replace'), text.encode('utf-8', 'replace'), weight)
More information about the MusicBrainz-commits
mailing list