Difference between revisions of "Sphinx search engine Installation"
From Teknologisk videncenter
m |
m |
||
Line 1: | Line 1: | ||
− | Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC) | + | *Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC) |
− | Following the instuctions at [[MW:Extension:SphinxSearch]] | + | *Following the instuctions at [[MW:Extension:SphinxSearch]] |
− | Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs] | + | *Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs] |
− | Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx] | + | *Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx] |
− | Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz] | + | *Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz] |
− | Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk | + | *Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk |
=== Compiling === | === Compiling === | ||
Line 29: | Line 29: | ||
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log | mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log | ||
</pre> | </pre> | ||
+ | |||
+ | * created directory /var/data/tekkom | ||
+ | * created directory /var/data/tekkomstemmed (English morpholgy) | ||
+ | == sphnix.conf == | ||
+ | |||
+ | */usr/local/etc/sphinx.conf | ||
+ | <pre> | ||
+ | # | ||
+ | # Minimal Sphinx configuration sample (clean, simple, functional) | ||
+ | # | ||
+ | |||
+ | source src1 | ||
+ | { | ||
+ | type = mysql | ||
+ | |||
+ | sql_host = localhost | ||
+ | sql_user = heth | ||
+ | sql_pass = l8heise | ||
+ | sql_db = wikidb | ||
+ | # sql_port = 3306 # optional, default is 3306 | ||
+ | ## --> HeTh inserted | ||
+ | sql_query_pre = | ||
+ | sql_query = \ | ||
+ | SELECT old_id, old_text\ | ||
+ | FROM text | ||
+ | sql_query_post = | ||
+ | sql_query_info = SELECT * FROM text WHERE old_id=$id | ||
+ | #<--HeTh | ||
+ | ## --> HeTh Commented | ||
+ | # sql_query = \ | ||
+ | # SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ | ||
+ | # FROM documents | ||
+ | # | ||
+ | # sql_attr_uint = group_id | ||
+ | # sql_attr_timestamp = date_added | ||
+ | #<--HeTh | ||
+ | sql_query_info = SELECT * FROM documents WHERE id=$id | ||
+ | } | ||
+ | |||
+ | |||
+ | ############# --> HeTh Testing out morphology in English | ||
+ | index tekkom | ||
+ | { | ||
+ | source = src1 | ||
+ | path = /var/data/tekkom | ||
+ | docinfo = extern | ||
+ | mlock = 0 | ||
+ | morphology = none | ||
+ | min_word_len = 1 | ||
+ | charset_type = sbcs | ||
+ | html_strip = 0 | ||
+ | } | ||
+ | index tekkomstemmed : tekkom | ||
+ | { | ||
+ | path = /var/data/tekkomstemmed | ||
+ | morphology = stem_en | ||
+ | } | ||
+ | |||
+ | ############# <-- HeTh (Commented the simples index below) | ||
+ | #index tekkom | ||
+ | #{ | ||
+ | # source = src1 | ||
+ | # path = /var/data/tekkom | ||
+ | # docinfo = extern | ||
+ | # charset_type = sbcs | ||
+ | #} | ||
+ | |||
+ | |||
+ | indexer | ||
+ | { | ||
+ | # HeTh rasised to 64M from 32M 28/2-09 | ||
+ | mem_limit = 64M | ||
+ | } | ||
+ | |||
+ | |||
+ | searchd | ||
+ | { | ||
+ | listen = 3312 | ||
+ | log = /var/log/searchd.log | ||
+ | query_log = /var/log/query.log | ||
+ | read_timeout = 5 | ||
+ | max_children = 30 | ||
+ | pid_file = /var/log/searchd.pid | ||
+ | max_matches = 1000 | ||
+ | seamless_rotate = 1 | ||
+ | preopen_indexes = 0 | ||
+ | unlink_old = 1 | ||
+ | } | ||
+ | </pre> | ||
+ | == Indexing the search database == | ||
+ | <pre> | ||
+ | [root@mars /usr/local/etc]# indexer --config /usr/local/etc/sphinx.conf --all | ||
+ | Sphinx 0.9.9-rc1 (r1566) | ||
+ | Copyright (c) 2001-2008, Andrew Aksyonoff | ||
+ | |||
+ | using config file '/usr/local/etc/sphinx.conf'... | ||
+ | indexing index 'tekkom'... | ||
+ | collected 2259 docs, 6.1 MB | ||
+ | sorted 1.0 Mhits, 100.0% done | ||
+ | total 2259 docs, 6148478 bytes | ||
+ | total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec | ||
+ | indexing index 'tekkomstemmed'... | ||
+ | collected 2259 docs, 6.1 MB | ||
+ | sorted 1.0 Mhits, 100.0% done | ||
+ | total 2259 docs, 6148478 bytes | ||
+ | total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec | ||
+ | total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg | ||
+ | total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg | ||
+ | </pre> | ||
+ | == Testing the search database == | ||
+ | <pre> | ||
+ | time search "dhcp relay" | ||
+ | search -q "dhcp relay" | ||
+ | </pre> | ||
+ | Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki. | ||
+ | == Starting the search daemon manually == | ||
+ | <pre> | ||
+ | searchd --config /usr/local/etc/sphinx.conf | ||
+ | </pre> | ||
+ | == Implementing sphinx in the wiki == |
Revision as of 10:40, 28 February 2009
- Installer and date: Henrik Thomsen 07:32, 28 February 2009 (UTC)
- Following the instuctions at MW:Extension:SphinxSearch
- Documentation sphinx docs
- Really good article on configuring sphinx
- Downloaded Sphinx from: [1]
- Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk
Contents
Compiling
Sphinx installs default in /usr/local and search for database. No options really nessasary.
- Executed ./configure which did all the tests succesfully and generated the Makefiles.
- Executed make to generate the binaries. Showing minor warnings, but ran all tests succesfully
- Executed make install showing output below
Making install in src if test -d ../.svn; then svn info .. --xml | perl svnxrev.pl; fi; make install-am test -z "/usr/local/bin" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/bin" /usr/bin/install -c 'indexer' '/usr/local/bin/indexer' /usr/bin/install -c 'searchd' '/usr/local/bin/searchd' /usr/bin/install -c 'search' '/usr/local/bin/search' /usr/bin/install -c 'spelldump' '/usr/local/bin/spelldump' Making install in test test -z "/usr/local/etc" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/etc" /usr/bin/install -c -m 644 'sphinx.conf.dist' '/usr/local/etc/sphinx.conf.dist' /usr/bin/install -c -m 644 'sphinx-min.conf.dist' '/usr/local/etc/sphinx-min.conf.dist' /usr/bin/install -c -m 644 'example.sql' '/usr/local/etc/example.sql' make install-data-hook mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
- created directory /var/data/tekkom
- created directory /var/data/tekkomstemmed (English morpholgy)
sphnix.conf
- /usr/local/etc/sphinx.conf
# # Minimal Sphinx configuration sample (clean, simple, functional) # source src1 { type = mysql sql_host = localhost sql_user = heth sql_pass = l8heise sql_db = wikidb # sql_port = 3306 # optional, default is 3306 ## --> HeTh inserted sql_query_pre = sql_query = \ SELECT old_id, old_text\ FROM text sql_query_post = sql_query_info = SELECT * FROM text WHERE old_id=$id #<--HeTh ## --> HeTh Commented # sql_query = \ # SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ # FROM documents # # sql_attr_uint = group_id # sql_attr_timestamp = date_added #<--HeTh sql_query_info = SELECT * FROM documents WHERE id=$id } ############# --> HeTh Testing out morphology in English index tekkom { source = src1 path = /var/data/tekkom docinfo = extern mlock = 0 morphology = none min_word_len = 1 charset_type = sbcs html_strip = 0 } index tekkomstemmed : tekkom { path = /var/data/tekkomstemmed morphology = stem_en } ############# <-- HeTh (Commented the simples index below) #index tekkom #{ # source = src1 # path = /var/data/tekkom # docinfo = extern # charset_type = sbcs #} indexer { # HeTh rasised to 64M from 32M 28/2-09 mem_limit = 64M } searchd { listen = 3312 log = /var/log/searchd.log query_log = /var/log/query.log read_timeout = 5 max_children = 30 pid_file = /var/log/searchd.pid max_matches = 1000 seamless_rotate = 1 preopen_indexes = 0 unlink_old = 1 }
Indexing the search database
[root@mars /usr/local/etc]# indexer --config /usr/local/etc/sphinx.conf --all Sphinx 0.9.9-rc1 (r1566) Copyright (c) 2001-2008, Andrew Aksyonoff using config file '/usr/local/etc/sphinx.conf'... indexing index 'tekkom'... collected 2259 docs, 6.1 MB sorted 1.0 Mhits, 100.0% done total 2259 docs, 6148478 bytes total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec indexing index 'tekkomstemmed'... collected 2259 docs, 6.1 MB sorted 1.0 Mhits, 100.0% done total 2259 docs, 6148478 bytes total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg
Testing the search database
time search "dhcp relay" search -q "dhcp relay"
Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.
Starting the search daemon manually
searchd --config /usr/local/etc/sphinx.conf