Sphinx安装及应用

一.安装

参考http://www.coreseek.cn/docs/coreseek_4.1-sphinx_2.0.1-beta.html#supported-system

$ tar xzvf sphinx-2.0.1-beta.tar.gz
$ cd sphinx

1.首先安装MMSeg:

$ cd mmseg

$ ./configure --prefix=/usr/local/mmseg

$ make

$ make install

$ cd ..

2.运行配置程序:

$ ./configure

configure程序有很多运行选项。完整的列表可以通过使用 --help 开关得到。最重要的如下:

--prefix, 定义将Coreseek安装到何处;比如--prefix=/usr/local/sphinx(以下全部示例都假定Coreseek安装在这个位置)

--with-mysql, 当自动检测失败时,需要指出在那里能找到MySQL 头文件和库文件;

--with-pgsql指出在那里能找到PostgreSQL头文件和库文件.

--with-mmseg, 启用基于MMSeg的中文分词法,并指出在那里能找到MMSeg头文件和库文件.

--with-python, 启用Python数据源支持. 需要预先安装Python2.6.

3. 编译源代码生成二进制程序:

$ make

4. 安装二进制程序到你设定的目录下: (Unix操作系统下默认为/usr/local/bin/ , 但是可以被 ./configure --prefix修改安装目录)

$ make install


. 配置

[zdh@gy03 etc]# cat /usr/local/sphinx/etc/movie.conf

#

# Minimal Sphinx configuration sample (clean, simple, functional)

#


source movie

{

type = mysql


sql_host = dbIP

sql_user = dbuser

sql_pass = dbpassword

sql_db = dbname

sql_port = 3306 # optional, default is 3306

sql_query_pre = SET NAMES utf8

sql_query = \

SELECT index_id,movie_id, movie_name,movie_name_alias, movie_name_pinyin, starin,starin_pinyin, director, director_pinyin, movie_type, show_time, region,IFNULL(ABS(TO_DAYS(show_time)-TO_DAYS(now())),187375) as near, weight \

FROM index_movie


sql_attr_uint = movie_id

sql_attr_uint = near

sql_attr_uint = weight

#sql_attr_timestamp = show_time

#sql_attr_str2ordinal = movie_name


sql_query_info = SELECT movie_id, movie_name,starin, director, movie_type, show_time, region FROM index_movie WHEREindex_id=$id

}



index movie

{

source = movie

path =/usr/local/sphinx/var/data/movie

charset_type = utf-8

charset_table = 0..9, A..Z->a..z, _, a..z,U+410..U+42F->U+430..U+44F, U+430..U+44F

#morphology = none

chinese_dictionary =/usr/local/sphinx/etc/xdict

#min_stemming_len = 1

#ngram_len = 1

#min_word_len = 2

ngram_chars = U+3000..U+2FA1F

}

indexer

{

mem_limit = 32M

}

searchd

{

listen = 9312

#listen = 9306:mysql41

log =/usr/local/sphinx/var/log/searchd.log

query_log =/usr/local/sphinx/var/log/query.log

read_timeout = 5

max_children = 30

pid_file =/usr/local/sphinx/var/log/searchd.pid

max_matches = 1000

seamless_rotate = 1

preopen_indexes = 1

unlink_old = 1

workers = threads # for RT to work

binlog_path = /usr/local/sphinx/var/data

}


. 启动和创建索引

[zdh@gy03 etc]# searchd -c etc/movie.conf #启动

[zdh@gy03 etc]# searchd -c etc/movie.conf--stop #关闭


crontab里添加如下内容,使之定时更新

#sphinx indexer

00 06 * * * /usr/local/bin/indexer -c/usr/local/sphinx/etc/movie.conf movie --rotate