[mb-commits] r9918 - in search_server/trunk: . bin lib

root at musicbrainz.org root at musicbrainz.org
Mon Jun 30 09:50:16 UTC 2008


Author: robert
Date: 2008-06-30 09:50:16 +0000 (Mon, 30 Jun 2008)
New Revision: 9918

Added:
   search_server/trunk/bin/search_app.py
   search_server/trunk/bin/standalone.py
Modified:
   search_server/trunk/README
   search_server/trunk/bin/handler.fcgi
   search_server/trunk/lib/search.py
Log:
Updated README.
Created a standalone server for easy dev running. Also still runs in FASTCGI


Modified: search_server/trunk/README
===================================================================
--- search_server/trunk/README	2008-06-30 08:38:45 UTC (rev 9917)
+++ search_server/trunk/README	2008-06-30 09:50:16 UTC (rev 9918)
@@ -1,8 +1,17 @@
-Intro
------
+Introduction
+------------
 
-This is the Lucene text searching back-end for MusicBrainz!
+This is the Xapian text searching back-end for MusicBrainz. This is a dead simple
+transclusion based server, which outputs only HTML or XML *fragments*. It does not
+serve complete documents!
 
+A regular MusicBrainz server installation will call this search server and
+render the appropriate HTML page or XML header/footer to create complete HTML/XML
+documents.
+
+Before you can run this server, you will need to install and run the search_index
+project's builder.py script to create xapian indexes.
+
 Requirements:
 -------------
 
@@ -10,7 +19,8 @@
 
 apache2, with mod_fastcgi   - this version is tested with apache 2.0.x
 python 2.4+                 - http://python.org
-PyLucene 2.2.x              - http://downloads.osafoundation.org/PyLucene
+xapian-core 1.0.x           - http://xapian.org/download.php
+xapian-bindings 1.0.x       - http://xapian.org/download.php
 PyUnac 1.7.x                - http://download.gna.org/unac/python-unac-1.7.0.tar.gz
 flup                        - http://trac.saddi.com/flup (release r2311 was used)
 
@@ -20,15 +30,52 @@
 Install:
 --------
 
-Short version:
+Using a standalone development server
 
-- install apache2
-- install and enable mod_fastcgi, mod_rewrite
-- install admin/mbsearch.conf as one of the site configs
-- review and make appropriate changes to admin/mbsearch.conf
-- install indexes into /var/index or change the settings in mbserach.conf
-- install server code into /home/lucene/lucene_server or change settings in mbsearch.conf
+To run the super-easy development server, create indexes from a MB database using
+the search_index project. Stash all the generated indexes (in the data dir) in
+some directory where it makes sense to keep indexes, say /var/index
 
-Long version:
+Then:
 
-Santa Claus will deliver this soon!
+  > cd bin
+  > ./standalone.py -i /var/index
+
+This will start the server on port 8001 and serve indexes from /var/index. You
+can give the standalone server the -p option to change the port the server listens on.
+Alternatively, the location of the indexes can also be specified with the INDEXDIR
+environment variable.
+
+DO NOT USE THIS SERVER IN A PRODUCTION ENVIRONMENT!
+
+
+Apache2 installation notes:
+
+1. install apache2
+2. install and enable mod_fastcgi, mod_rewrite
+3. install admin/mbsearch.conf as one of the site configs
+4. review and make appropriate changes to admin/mbsearch.conf
+5. install indexes into /var/index or change the settings in mbsearch.conf
+6. install server code into /home/search/search_server or change settings in mbsearch.conf
+
+
+Using the seach server:
+-----------------------
+
+To use the standalone server, request any resource and give the following arguments:
+
+  fmt      can be 'xml' or 'html' (default)
+  type     must one of artist, release, track, annotation, label or freedb
+  query    a full xapian query
+  max      The maximum number of hits to return
+  tport    The port number to use for Picard tagger links
+  mbt      Set if Classic Tagger links should be output
+  rel      If non-zero AR creation Rel links will be output
+  offset   The offset in hits to start showing results (used for paging)
+
+By convention, the URL used should be:
+
+    http://localhost:8001/ws/1/<type>/?query=love&type=artist&fmt=xml
+
+The apache2 installation parses the resource path and determines the type from it, and the
+type argument can be omitted.

Modified: search_server/trunk/bin/handler.fcgi
===================================================================
--- search_server/trunk/bin/handler.fcgi	2008-06-30 08:38:45 UTC (rev 9917)
+++ search_server/trunk/bin/handler.fcgi	2008-06-30 09:50:16 UTC (rev 9918)
@@ -1,140 +1,6 @@
 #!/usr/bin/env python
 
-from flup.server.fcgi_fork import WSGIServer; 
-from cgi import FieldStorage;
-import time
-import sys
+from flup.server.fcgi_fork import WSGIServer
+from search_app import search_app
 
-sys.path.append("../lib")
-
-import labelsearch
-import artistsearch
-import releasesearch
-import tracksearch
-import annotationsearch
-import freedbsearch
-
-ar_search = None
-re_search = None
-tr_search = None
-an_search = None
-fd_search = None
-la_search = None
-
-def search(environ, start_response):
-    global ar_search
-    global re_search
-    global tr_search
-    global an_search
-    global fd_search
-    global la_search
-
-    try:
-        indexDir = environ['INDEXDIR']
-    except KeyError:
-        start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
-        return "INDEXDIR environment variable not set. Search server misconfigured.\n"
-
-    args = FieldStorage(environ=environ)
-    query = ""
-    maxHits = -1 
-    fmt = ''
-    type = ''
-    tport = 0
-    dur = 0
-    mbt = 0
-    rel = 0
-    offset = 0
-
-    # Parse arguments
-    query = args.getvalue('query')
-    if not query:
-        start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
-        return "query argument is missing"
-
-    maxHits = int(args.getvalue('max', 0))
-    fmt = args.getvalue('fmt', 'html')
-    tport = int(args.getvalue('tport', 0))
-    dur = int(args.getvalue('dur', 0))
-    mbt = int(args.getvalue('mbt', 0))
-    rel = int(args.getvalue('rel', 0))
-    offset = int(args.getvalue('offset', 0))
-    type = args.getvalue('type')
-    if isinstance(type, list): type = type[0]
-
-    # if we don't have a tagger port, don't color code track lengths
-    if not tport: dur = 0
-
-    searchobj = None
-    import search
-    try:
-	if type == 'artist':
-	    if not ar_search:
-		ar_search = artistsearch.ArtistSearch(indexDir + "/artist_index")
-	    searchobj = ar_search
-
-	elif type == 'release':
-	    if not re_search:
-		re_search = releasesearch.ReleaseSearch(indexDir + "/release_index")
-	    re_search.setTaggerPort(tport)
-	    re_search.setDuration(dur)
-	    searchobj = re_search
-
-	elif type == 'track':
-	    if not tr_search:
-		tr_search = tracksearch.TrackSearch(indexDir + "/track_index")
-	    tr_search.setMBT(mbt)
-	    tr_search.setTaggerPort(tport)
-	    tr_search.setDuration(dur)
-	    searchobj = tr_search
-	
-	elif type == 'annotation':
-	    if not an_search:
-		an_search = annotationsearch.AnnotationSearch(indexDir + "/annotation_index")
-	    searchobj = an_search
-
-	elif type == 'freedb':
-	    if not fd_search:
-		fd_search = freedbsearch.FreeDBSearch(indexDir + "/freedb_index")
-	    searchobj = fd_search
-
-	elif type == 'label':
-	    if not la_search:
-		la_search = labelsearch.LabelSearch(indexDir + "/label_index")
-	    searchobj = la_search
-
-	else:
-	    start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
-	    return "invalid resource requested. %s must be one of artist/release/track/label/annotation.\n" % type
-    except search.NoSuchIndexError, msg:
-	start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')])
-	return "Cannot find indexes. Server misconfigured: %s\n" % msg
-
-    ret = 0
-    content = ""
-
-    searchobj.setShowRelationshipLink(rel);
-    try:
-        content = searchobj.search(query, maxHits, offset, fmt)
-    except search.QueryError, text:
-        text = str(text)
-        text += "\n"
-        start_response('400 BAD REQUEST', [('Content-Type', 'text/plain')])
-        return text.encode('utf-8', 'replace')
-    except search.SearchError:
-        start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')])
-        return "internal server error\n"
-    except search.NoResultsError:
-        start_response('404 NOT FOUND', [('Content-Type', 'text/plain')])
-        return "zero search hits\n"
-    
-    start_response('200 OK', [('Content-Type', 'text/%s' % fmt)])
-    return content
-
-#if __name__ == '__main__':
-#    from wsgiref import simple_server
-#    httpd = simple_server.WSGIServer(('',8080),simple_server.WSGIRequestHandler)
-#    httpd.set_app(search)
-#    httpd.serve_forever()
-#else:
-WSGIServer(search, bindAddress = '/tmp/mbsearch.fcgi.sock').run() 
+WSGIServer(search_app, bindAddress = '/tmp/mbsearch.fcgi.sock').run() 

Added: search_server/trunk/bin/search_app.py


Property changes on: search_server/trunk/bin/search_app.py
___________________________________________________________________
Name: svn:executable
   + *
Name: svn:keywords
   + Id HeadURL

Added: search_server/trunk/bin/standalone.py


Property changes on: search_server/trunk/bin/standalone.py
___________________________________________________________________
Name: svn:executable
   + *
Name: svn:keywords
   + Id HeadURL

Modified: search_server/trunk/lib/search.py
===================================================================
--- search_server/trunk/lib/search.py	2008-06-30 08:38:45 UTC (rev 9917)
+++ search_server/trunk/lib/search.py	2008-06-30 09:50:16 UTC (rev 9918)
@@ -75,7 +75,6 @@
 
         self.defaultField = u''
         try:
-            sys.stderr.write(indexName + "\n")
             self.index = xapian.Database(indexName)
         except xapian.Error, msg:
 	    text = str(msg)
@@ -245,9 +244,9 @@
 
         try:
             query = unicode(query, 'utf-8')
-	    self.f = open("/tmp/log", "a")
-	    print >>self.f, "query: '%s'" % query.encode('utf-8', 'replace') 
-	    self.f.close()
+	    #self.f = open("/tmp/log", "a")
+	    #print >>self.f, "query: '%s'" % query.encode('utf-8', 'replace') 
+	    #self.f.close()
             query = self.lowercaseQuery(query)
             query = self.mangleQuery(query)
             query = self.removeTermBoosting(query)




More information about the MusicBrainz-commits mailing list