[mb-commits] r9894 - search_index/trunk/mbsearch

root at musicbrainz.org root at musicbrainz.org
Sun Jun 29 08:30:56 UTC 2008


Author: robert
Date: 2008-06-29 08:30:56 +0000 (Sun, 29 Jun 2008)
New Revision: 9894

Modified:
   search_index/trunk/mbsearch/indexcreator.py
Log:
Fixes #3804


Modified: search_index/trunk/mbsearch/indexcreator.py
===================================================================
--- search_index/trunk/mbsearch/indexcreator.py	2008-06-29 07:39:56 UTC (rev 9893)
+++ search_index/trunk/mbsearch/indexcreator.py	2008-06-29 08:30:56 UTC (rev 9894)
@@ -179,6 +179,13 @@
 
 	return u''.join(bits)
 
+    def replaceApostrophe(self, query):
+	'''
+	Xapian considers ' as part of a word in order to not split things like "don't" and generate a bunch
+	of t fragments. I can see that for text, but for searching names and titles, no so much.
+	'''
+	return query.replace(u"'", u'')
+        
     def calculateWeight(self, text):
 	maxLen = 100
 	l = min(maxLen, len(text))
@@ -272,6 +279,7 @@
                                     text = text.strip().lower()
                                     text = unac.unac_string(text)
                                     text = self.removeDots(text) 
+                                    text = self.replaceApostrophe(text) 
                                     text = addSpacesToIdeographicStrings(text) 
                                     self.indexer.index_text(text, weight, u"X" + name.upper())
 				    #print "ndx: %s:'%s' %d" % (name.encode('utf-8', 'replace'), text.encode('utf-8', 'replace'), weight)




More information about the MusicBrainz-commits mailing list