Difference between revisions of "Sphinx search engine Installation"

From Teknologisk videncenter
Jump to: navigation, search
m
m
Line 1: Line 1:
Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC)<br/>
+
*Installer and date: [[User:Heth|Henrik Thomsen]] 07:32, 28 February 2009 (UTC)
Following the instuctions at [[MW:Extension:SphinxSearch]]
+
*Following the instuctions at [[MW:Extension:SphinxSearch]]
Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs]
+
*Documentation [http://www.sphinxsearch.com/docs/current.html sphinx docs]
Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx]
+
*Really good article on configuring [http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx/ sphinx]
Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz]<br/>
+
*Downloaded Sphinx from: [http://www.sphinxsearch.com/downloads/sphinx-0.9.9-rc1.tar.gz]
Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk<br/>
+
*Unpacked and unzipped in /usr/local/sw/sphinx-0.9.9-rc1 om mars.tekkom.dk
  
 
=== Compiling ===
 
=== Compiling ===
Line 29: Line 29:
 
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
 
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
 
</pre>
 
</pre>
 +
 +
* created directory /var/data/tekkom
 +
* created directory /var/data/tekkomstemmed (English morpholgy)
 +
== sphnix.conf ==
 +
 +
*/usr/local/etc/sphinx.conf
 +
<pre>
 +
#
 +
# Minimal Sphinx configuration sample (clean, simple, functional)
 +
#
 +
 +
source src1
 +
{
 +
        type                                    = mysql
 +
 +
        sql_host                                = localhost
 +
        sql_user                                = heth
 +
        sql_pass                                = l8heise
 +
        sql_db                                  = wikidb
 +
#      sql_port                                = 3306  # optional, default is 3306
 +
## --> HeTh inserted
 +
        sql_query_pre  =
 +
        sql_query      = \
 +
          SELECT old_id, old_text\
 +
          FROM text
 +
        sql_query_post  =
 +
        sql_query_info  = SELECT * FROM text WHERE old_id=$id
 +
#<--HeTh
 +
## --> HeTh Commented
 +
#      sql_query                              = \
 +
#              SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
 +
#              FROM documents
 +
#
 +
#      sql_attr_uint                  = group_id
 +
#      sql_attr_timestamp              = date_added
 +
#<--HeTh
 +
        sql_query_info                  = SELECT * FROM documents WHERE id=$id
 +
}
 +
 +
 +
############# --> HeTh Testing out morphology in English
 +
index tekkom
 +
{
 +
        source                  = src1
 +
        path                    = /var/data/tekkom
 +
        docinfo                = extern
 +
        mlock                  = 0
 +
        morphology              = none
 +
        min_word_len            = 1
 +
        charset_type            = sbcs
 +
        html_strip                              = 0
 +
}
 +
index tekkomstemmed : tekkom
 +
{
 +
        path                    = /var/data/tekkomstemmed
 +
        morphology              = stem_en
 +
}
 +
 +
############# <-- HeTh (Commented the simples index below)
 +
#index tekkom
 +
#{
 +
#      source                                  = src1
 +
#      path                                    = /var/data/tekkom
 +
#      docinfo                                = extern
 +
#      charset_type                    = sbcs
 +
#}
 +
 +
 +
indexer
 +
{
 +
        # HeTh rasised to 64M from 32M 28/2-09
 +
        mem_limit                              = 64M
 +
}
 +
 +
 +
searchd
 +
{
 +
        listen                                  = 3312
 +
        log                                            = /var/log/searchd.log
 +
        query_log                              = /var/log/query.log
 +
        read_timeout                    = 5
 +
        max_children                    = 30
 +
        pid_file                                = /var/log/searchd.pid
 +
        max_matches                            = 1000
 +
        seamless_rotate                = 1
 +
        preopen_indexes                = 0
 +
        unlink_old                              = 1
 +
}
 +
</pre>
 +
== Indexing the search database ==
 +
<pre>
 +
[root@mars /usr/local/etc]#  indexer --config /usr/local/etc/sphinx.conf --all
 +
Sphinx 0.9.9-rc1 (r1566)
 +
Copyright (c) 2001-2008, Andrew Aksyonoff
 +
 +
using config file '/usr/local/etc/sphinx.conf'...
 +
indexing index 'tekkom'...
 +
collected 2259 docs, 6.1 MB
 +
sorted 1.0 Mhits, 100.0% done
 +
total 2259 docs, 6148478 bytes
 +
total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec
 +
indexing index 'tekkomstemmed'...
 +
collected 2259 docs, 6.1 MB
 +
sorted 1.0 Mhits, 100.0% done
 +
total 2259 docs, 6148478 bytes
 +
total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec
 +
total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg
 +
total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg
 +
</pre>
 +
== Testing the search database ==
 +
<pre>
 +
time search "dhcp relay"
 +
search -q "dhcp relay"
 +
</pre>
 +
Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.
 +
== Starting the search daemon manually ==
 +
<pre>
 +
searchd --config /usr/local/etc/sphinx.conf
 +
</pre>
 +
== Implementing sphinx in the wiki ==

Revision as of 10:40, 28 February 2009

Compiling

Sphinx installs default in /usr/local and search for database. No options really nessasary.

  1. Executed ./configure which did all the tests succesfully and generated the Makefiles.
  2. Executed make to generate the binaries. Showing minor warnings, but ran all tests succesfully
  3. Executed make install showing output below
Making install in src
if test -d ../.svn; then svn info .. --xml | perl svnxrev.pl; fi;
make  install-am
test -z "/usr/local/bin" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/bin"
  /usr/bin/install -c 'indexer' '/usr/local/bin/indexer'
  /usr/bin/install -c 'searchd' '/usr/local/bin/searchd'
  /usr/bin/install -c 'search' '/usr/local/bin/search'
  /usr/bin/install -c 'spelldump' '/usr/local/bin/spelldump'
Making install in test
test -z "/usr/local/etc" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/etc"
 /usr/bin/install -c -m 644 'sphinx.conf.dist' '/usr/local/etc/sphinx.conf.dist'
 /usr/bin/install -c -m 644 'sphinx-min.conf.dist' '/usr/local/etc/sphinx-min.conf.dist'
 /usr/bin/install -c -m 644 'example.sql' '/usr/local/etc/example.sql'
make  install-data-hook
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
  • created directory /var/data/tekkom
  • created directory /var/data/tekkomstemmed (English morpholgy)

sphnix.conf

  • /usr/local/etc/sphinx.conf
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#

source src1
{
        type                                    = mysql

        sql_host                                = localhost
        sql_user                                = heth
        sql_pass                                = l8heise
        sql_db                                  = wikidb
#       sql_port                                = 3306  # optional, default is 3306
## --> HeTh inserted
        sql_query_pre   =
        sql_query       = \
          SELECT old_id, old_text\
          FROM text
        sql_query_post  =
        sql_query_info  = SELECT * FROM text WHERE old_id=$id
#<--HeTh
## --> HeTh Commented
#       sql_query                               = \
#               SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
#               FROM documents
#
#       sql_attr_uint                   = group_id
#       sql_attr_timestamp              = date_added
#<--HeTh
        sql_query_info                  = SELECT * FROM documents WHERE id=$id
}


############# --> HeTh Testing out morphology in English
index tekkom
{
        source                  = src1
        path                    = /var/data/tekkom
        docinfo                 = extern
        mlock                   = 0
        morphology              = none
        min_word_len            = 1
        charset_type            = sbcs
        html_strip                              = 0
}
index tekkomstemmed : tekkom
{
        path                    = /var/data/tekkomstemmed
        morphology              = stem_en
}

############# <-- HeTh (Commented the simples index below)
#index tekkom
#{
#       source                                  = src1
#       path                                    = /var/data/tekkom
#       docinfo                                 = extern
#       charset_type                    = sbcs
#}


indexer
{
        # HeTh rasised to 64M from 32M 28/2-09
        mem_limit                               = 64M
}


searchd
{
        listen                                  = 3312
        log                                             = /var/log/searchd.log
        query_log                               = /var/log/query.log
        read_timeout                    = 5
        max_children                    = 30
        pid_file                                = /var/log/searchd.pid
        max_matches                             = 1000
        seamless_rotate                 = 1
        preopen_indexes                 = 0
        unlink_old                              = 1
}

Indexing the search database

[root@mars /usr/local/etc]#  indexer --config /usr/local/etc/sphinx.conf --all
Sphinx 0.9.9-rc1 (r1566)
Copyright (c) 2001-2008, Andrew Aksyonoff

using config file '/usr/local/etc/sphinx.conf'...
indexing index 'tekkom'...
collected 2259 docs, 6.1 MB
sorted 1.0 Mhits, 100.0% done
total 2259 docs, 6148478 bytes
total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec
indexing index 'tekkomstemmed'...
collected 2259 docs, 6.1 MB
sorted 1.0 Mhits, 100.0% done
total 2259 docs, 6148478 bytes
total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec
total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg
total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg

Testing the search database

time search "dhcp relay"
search -q "dhcp relay"

Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.

Starting the search daemon manually

searchd --config /usr/local/etc/sphinx.conf

Implementing sphinx in the wiki