Sphinx search engine Installation

From Teknologisk videncenter
Revision as of 12:25, 28 February 2009 by Heth (talk | contribs) (Starting the search daemon)
Jump to: navigation, search

Compiling

Sphinx installs default in /usr/local and search for database. No options really nessasary.

  1. Executed ./configure which did all the tests succesfully and generated the Makefiles.
  2. Executed make to generate the binaries. Showing minor warnings, but ran all tests succesfully
  3. Executed make install showing output below
Making install in src
if test -d ../.svn; then svn info .. --xml | perl svnxrev.pl; fi;
make  install-am
test -z "/usr/local/bin" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/bin"
  /usr/bin/install -c 'indexer' '/usr/local/bin/indexer'
  /usr/bin/install -c 'searchd' '/usr/local/bin/searchd'
  /usr/bin/install -c 'search' '/usr/local/bin/search'
  /usr/bin/install -c 'spelldump' '/usr/local/bin/spelldump'
Making install in test
test -z "/usr/local/etc" || /usr/local/sw/sphinx-0.9.9-rc1/config/install-sh -d "/usr/local/etc"
 /usr/bin/install -c -m 644 'sphinx.conf.dist' '/usr/local/etc/sphinx.conf.dist'
 /usr/bin/install -c -m 644 'sphinx-min.conf.dist' '/usr/local/etc/sphinx-min.conf.dist'
 /usr/bin/install -c -m 644 'example.sql' '/usr/local/etc/example.sql'
make  install-data-hook
mkdir -p /usr/local/var/data && mkdir -p /usr/local/var/log
  • created directory /var/data/tekkom
  • created directory /var/data/tekkomstemmed (English morpholgy)

sphnix.conf

  • /usr/local/etc/sphinx.conf
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#

source src1
{
        type                                    = mysql

        sql_host                                = localhost
        sql_user                                = heth
        sql_pass                                = l8heise
        sql_db                                  = wikidb
#       sql_port                                = 3306  # optional, default is 3306
## --> HeTh inserted
        sql_query_pre   =
        sql_query       = \
          SELECT old_id, old_text\
          FROM text
        sql_query_post  =
        sql_query_info  = SELECT * FROM text WHERE old_id=$id
#<--HeTh
## --> HeTh Commented
#       sql_query                               = \
#               SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
#               FROM documents
#
#       sql_attr_uint                   = group_id
#       sql_attr_timestamp              = date_added
#<--HeTh
        sql_query_info                  = SELECT * FROM documents WHERE id=$id
}


############# --> HeTh Testing out morphology in English
index tekkom
{
        source                  = src1
        path                    = /var/data/tekkom
        docinfo                 = extern
        mlock                   = 0
        morphology              = none
        min_word_len            = 1
        charset_type            = sbcs
        html_strip                              = 0
}
index tekkomstemmed : tekkom
{
        path                    = /var/data/tekkomstemmed
        morphology              = stem_en
}

############# <-- HeTh (Commented the simples index below)
#index tekkom
#{
#       source                                  = src1
#       path                                    = /var/data/tekkom
#       docinfo                                 = extern
#       charset_type                    = sbcs
#}


indexer
{
        # HeTh rasised to 64M from 32M 28/2-09
        mem_limit                               = 64M
}


searchd
{
        listen                                  = 3312
        log                                             = /var/log/searchd.log
        query_log                               = /var/log/query.log
        read_timeout                    = 5
        max_children                    = 30
        pid_file                                = /var/log/searchd.pid
        max_matches                             = 1000
        seamless_rotate                 = 1
        preopen_indexes                 = 0
        unlink_old                              = 1
}

Indexing the search database

[root@mars /usr/local/etc]#  indexer --config /usr/local/etc/sphinx.conf --all
Sphinx 0.9.9-rc1 (r1566)
Copyright (c) 2001-2008, Andrew Aksyonoff

using config file '/usr/local/etc/sphinx.conf'...
indexing index 'tekkom'...
collected 2259 docs, 6.1 MB
sorted 1.0 Mhits, 100.0% done
total 2259 docs, 6148478 bytes
total 0.826 sec, 7440030.11 bytes/sec, 2733.53 docs/sec
indexing index 'tekkomstemmed'...
collected 2259 docs, 6.1 MB
sorted 1.0 Mhits, 100.0% done
total 2259 docs, 6148478 bytes
total 1.559 sec, 3944462.15 bytes/sec, 1449.23 docs/sec
total 4 reads, 0.0 sec, 978.7 kb/read avg, 3.2 msec/read avg
total 28 writes, 0.0 sec, 344.0 kb/write avg, 1.1 msec/write avg

Testing the search database

time search "dhcp relay"
search -q "dhcp relay"

Works fine :-), but apparently shows history documents as well. Lets se later when searching the wiki.

Starting the search daemon

Manually start

searchd --config /usr/local/etc/sphinx.conf

Run command script to FreeBSD

Add this file as /usr/local/etc/rc.d/searchd to automatically start search daemon at boot.

#!/bin/sh
#
# $FreeBSD: src/etc/rc.d/searchd,v 1.00 2009/02/29 09:46:00 HeTh Exp $
# Copyright Mercantec, Viborg, Denmark. www.mercantec.dk
# Auther: Henrik Thomsen/heth@mercantec.dk

# PROVIDE: searchd
# REQUIRE: DAEMON
# BEFORE:  LOGIN

. /etc/rc.subr

name="searchd"
rcvar=`set_rcvar`
command="/usr/local/bin/${name}"

load_rc_config $name
run_rc_command "$1"

add the following line to /etc/rc.conf

searchd_enable="YES"

Updating the search database

The search database should be updated at regular intervals to let new articles and changes searchable.
While the database is small I've set the indexer to run once every hour in root's crontab file

3 * * * * /usr/local/bin/indexer --config /usr/local/etc/sphinx.conf  --rotate --all
  • --rotate sends SIGHUP to the searchdaemon to use the new database index
  • --all indexes all index'es specified in sphinx.conf

Implementing sphinx in the wiki