[mb-commits] r9918 - in search_server/trunk: . bin lib
root at musicbrainz.org
root at musicbrainz.org
Mon Jun 30 09:50:16 UTC 2008
Author: robert
Date: 2008-06-30 09:50:16 +0000 (Mon, 30 Jun 2008)
New Revision: 9918
Added:
search_server/trunk/bin/search_app.py
search_server/trunk/bin/standalone.py
Modified:
search_server/trunk/README
search_server/trunk/bin/handler.fcgi
search_server/trunk/lib/search.py
Log:
Updated README.
Created a standalone server for easy dev running. Also still runs in FASTCGI
Modified: search_server/trunk/README
===================================================================
--- search_server/trunk/README 2008-06-30 08:38:45 UTC (rev 9917)
+++ search_server/trunk/README 2008-06-30 09:50:16 UTC (rev 9918)
@@ -1,8 +1,17 @@
-Intro
------
+Introduction
+------------
-This is the Lucene text searching back-end for MusicBrainz!
+This is the Xapian text searching back-end for MusicBrainz. This is a dead simple
+transclusion based server, which outputs only HTML or XML *fragments*. It does not
+serve complete documents!
+A regular MusicBrainz server installation will call this search server and
+render the appropriate HTML page or XML header/footer to create complete HTML/XML
+documents.
+
+Before you can run this server, you will need to install and run the search_index
+project's builder.py script to create xapian indexes.
+
Requirements:
-------------
@@ -10,7 +19,8 @@
apache2, with mod_fastcgi - this version is tested with apache 2.0.x
python 2.4+ - http://python.org
-PyLucene 2.2.x - http://downloads.osafoundation.org/PyLucene
+xapian-core 1.0.x - http://xapian.org/download.php
+xapian-bindings 1.0.x - http://xapian.org/download.php
PyUnac 1.7.x - http://download.gna.org/unac/python-unac-1.7.0.tar.gz
flup - http://trac.saddi.com/flup (release r2311 was used)
@@ -20,15 +30,52 @@
Install:
--------
-Short version:
+Using a standalone development server
-- install apache2
-- install and enable mod_fastcgi, mod_rewrite
-- install admin/mbsearch.conf as one of the site configs
-- review and make appropriate changes to admin/mbsearch.conf
-- install indexes into /var/index or change the settings in mbserach.conf
-- install server code into /home/lucene/lucene_server or change settings in mbsearch.conf
+To run the super-easy development server, create indexes from a MB database using
+the search_index project. Stash all the generated indexes (in the data dir) in
+some directory where it makes sense to keep indexes, say /var/index
-Long version:
+Then:
-Santa Claus will deliver this soon!
+ > cd bin
+ > ./standalone.py -i /var/index
+
+This will start the server on port 8001 and serve indexes from /var/index. You
+can give the standalone server the -p option to change the port the server listens on.
+Alternatively, the location of the indexes can also be specified with the INDEXDIR
+environment variable.
+
+DO NOT USE THIS SERVER IN A PRODUCTION ENVIRONMENT!
+
+
+Apache2 installation notes:
+
+1. install apache2
+2. install and enable mod_fastcgi, mod_rewrite
+3. install admin/mbsearch.conf as one of the site configs
+4. review and make appropriate changes to admin/mbsearch.conf
+5. install indexes into /var/index or change the settings in mbsearch.conf
+6. install server code into /home/search/search_server or change settings in mbsearch.conf
+
+
+Using the seach server:
+-----------------------
+
+To use the standalone server, request any resource and give the following arguments:
+
+ fmt can be 'xml' or 'html' (default)
+ type must one of artist, release, track, annotation, label or freedb
+ query a full xapian query
+ max The maximum number of hits to return
+ tport The port number to use for Picard tagger links
+ mbt Set if Classic Tagger links should be output
+ rel If non-zero AR creation Rel links will be output
+ offset The offset in hits to start showing results (used for paging)
+
+By convention, the URL used should be:
+
+ http://localhost:8001/ws/1/<type>/?query=love&type=artist&fmt=xml
+
+The apache2 installation parses the resource path and determines the type from it, and the
+type argument can be omitted.
Modified: search_server/trunk/bin/handler.fcgi
===================================================================
--- search_server/trunk/bin/handler.fcgi 2008-06-30 08:38:45 UTC (rev 9917)
+++ search_server/trunk/bin/handler.fcgi 2008-06-30 09:50:16 UTC (rev 9918)
@@ -1,140 +1,6 @@
#!/usr/bin/env python
-from flup.server.fcgi_fork import WSGIServer;
-from cgi import FieldStorage;
-import time
-import sys
+from flup.server.fcgi_fork import WSGIServer
+from search_app import search_app
-sys.path.append("../lib")
-
-import labelsearch
-import artistsearch
-import releasesearch
-import tracksearch
-import annotationsearch
-import freedbsearch
-
-ar_search = None
-re_search = None
-tr_search = None
-an_search = None
-fd_search = None
-la_search = None
-
-def search(environ, start_response):
- global ar_search
- global re_search
- global tr_search
- global an_search
- global fd_search
- global la_search
-
- try:
- indexDir = environ['INDEXDIR']
- except KeyError:
- start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
- return "INDEXDIR environment variable not set. Search server misconfigured.\n"
-
- args = FieldStorage(environ=environ)
- query = ""
- maxHits = -1
- fmt = ''
- type = ''
- tport = 0
- dur = 0
- mbt = 0
- rel = 0
- offset = 0
-
- # Parse arguments
- query = args.getvalue('query')
- if not query:
- start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
- return "query argument is missing"
-
- maxHits = int(args.getvalue('max', 0))
- fmt = args.getvalue('fmt', 'html')
- tport = int(args.getvalue('tport', 0))
- dur = int(args.getvalue('dur', 0))
- mbt = int(args.getvalue('mbt', 0))
- rel = int(args.getvalue('rel', 0))
- offset = int(args.getvalue('offset', 0))
- type = args.getvalue('type')
- if isinstance(type, list): type = type[0]
-
- # if we don't have a tagger port, don't color code track lengths
- if not tport: dur = 0
-
- searchobj = None
- import search
- try:
- if type == 'artist':
- if not ar_search:
- ar_search = artistsearch.ArtistSearch(indexDir + "/artist_index")
- searchobj = ar_search
-
- elif type == 'release':
- if not re_search:
- re_search = releasesearch.ReleaseSearch(indexDir + "/release_index")
- re_search.setTaggerPort(tport)
- re_search.setDuration(dur)
- searchobj = re_search
-
- elif type == 'track':
- if not tr_search:
- tr_search = tracksearch.TrackSearch(indexDir + "/track_index")
- tr_search.setMBT(mbt)
- tr_search.setTaggerPort(tport)
- tr_search.setDuration(dur)
- searchobj = tr_search
-
- elif type == 'annotation':
- if not an_search:
- an_search = annotationsearch.AnnotationSearch(indexDir + "/annotation_index")
- searchobj = an_search
-
- elif type == 'freedb':
- if not fd_search:
- fd_search = freedbsearch.FreeDBSearch(indexDir + "/freedb_index")
- searchobj = fd_search
-
- elif type == 'label':
- if not la_search:
- la_search = labelsearch.LabelSearch(indexDir + "/label_index")
- searchobj = la_search
-
- else:
- start_response('403 BAD REQUEST', [('Content-Type', 'text/plain')])
- return "invalid resource requested. %s must be one of artist/release/track/label/annotation.\n" % type
- except search.NoSuchIndexError, msg:
- start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')])
- return "Cannot find indexes. Server misconfigured: %s\n" % msg
-
- ret = 0
- content = ""
-
- searchobj.setShowRelationshipLink(rel);
- try:
- content = searchobj.search(query, maxHits, offset, fmt)
- except search.QueryError, text:
- text = str(text)
- text += "\n"
- start_response('400 BAD REQUEST', [('Content-Type', 'text/plain')])
- return text.encode('utf-8', 'replace')
- except search.SearchError:
- start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/plain')])
- return "internal server error\n"
- except search.NoResultsError:
- start_response('404 NOT FOUND', [('Content-Type', 'text/plain')])
- return "zero search hits\n"
-
- start_response('200 OK', [('Content-Type', 'text/%s' % fmt)])
- return content
-
-#if __name__ == '__main__':
-# from wsgiref import simple_server
-# httpd = simple_server.WSGIServer(('',8080),simple_server.WSGIRequestHandler)
-# httpd.set_app(search)
-# httpd.serve_forever()
-#else:
-WSGIServer(search, bindAddress = '/tmp/mbsearch.fcgi.sock').run()
+WSGIServer(search_app, bindAddress = '/tmp/mbsearch.fcgi.sock').run()
Added: search_server/trunk/bin/search_app.py
Property changes on: search_server/trunk/bin/search_app.py
___________________________________________________________________
Name: svn:executable
+ *
Name: svn:keywords
+ Id HeadURL
Added: search_server/trunk/bin/standalone.py
Property changes on: search_server/trunk/bin/standalone.py
___________________________________________________________________
Name: svn:executable
+ *
Name: svn:keywords
+ Id HeadURL
Modified: search_server/trunk/lib/search.py
===================================================================
--- search_server/trunk/lib/search.py 2008-06-30 08:38:45 UTC (rev 9917)
+++ search_server/trunk/lib/search.py 2008-06-30 09:50:16 UTC (rev 9918)
@@ -75,7 +75,6 @@
self.defaultField = u''
try:
- sys.stderr.write(indexName + "\n")
self.index = xapian.Database(indexName)
except xapian.Error, msg:
text = str(msg)
@@ -245,9 +244,9 @@
try:
query = unicode(query, 'utf-8')
- self.f = open("/tmp/log", "a")
- print >>self.f, "query: '%s'" % query.encode('utf-8', 'replace')
- self.f.close()
+ #self.f = open("/tmp/log", "a")
+ #print >>self.f, "query: '%s'" % query.encode('utf-8', 'replace')
+ #self.f.close()
query = self.lowercaseQuery(query)
query = self.mangleQuery(query)
query = self.removeTermBoosting(query)
More information about the MusicBrainz-commits
mailing list