背景:
Coreseek版本:coreseek-4.1-beta.tar.gz
Mysql版本:mysql-5.5.31.tar.gz
一:安装
Tar –zxvf coreseek-4.1-beta.tar.gz
Cd coreseek-4.1-beta
cd mmseg-3.2.14
./configure --prefix=/usr/local/mmseg
Make
Make install
如果报错,执行下面的操作
Aclocal
libtoolize –force
automake --add-missing
autoconf
autoheader
make clear
./configure --prefix=/usr/local/mmseg
Make
make install
cd csft-4.1/
./buildconf.sh
./configure --prefix=/usr/local/coreseek
--with-mysql=/usr/local/mysql --with-mmseg=/usr/local/mmseg
--with-mmseg-includes=/usr/local/mmseg/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg/lib/
--without-unixodbc
Make
make install
二:sphinx.conf参数配置
Cd /usr/local/coreseek/etc
Cp sphinx.conf.dist sphinx.conf
Vi sphinx.conf
三:Mysql数据源配置
#MySQL数据源配置,详情请查看:http://www.coreseek.cn/products-install/mysql/
#请先将var/test/documents.sql导入数据库,并配置好以下的MySQL用户密码数据库
#源定义
source mysql
{
type = mysql
sql_host =
10.1.58.191
sql_user = root
sql_pass = 123456
sql_db = test
sql_port = 3306
sql_query_pre = SET
NAMES utf8
sql_query = SELECT id, group_id,
UNIX_TIMESTAMP(date_added) AS date_added, title,title as title1, content FROM
documents
#sql_query第一列id需为整数
#title、content作为字符串/文本字段,被全文索引
sql_attr_uint =
group_id #从SQL读取到的值必须为整数
sql_attr_timestamp =
date_added #从SQL读取到的值必须为整数,作为时间属性
sql_attr_str2ordinal = title
sql_query_info_pre = SET
NAMES utf8 #命令行查询时,设置正确的字符集
sql_query_info = SELECT
* FROM documents WHERE id=$id #命令行查询时,从数据库读取原始数据信息
}
#index定义
index mysql
{
source = mysql #对应的source名称
path =
/data/coreseek/mysql/ #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
#charset_dictpath = etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
ngram_len = 0
}
#全局index定义
indexer
{
mem_limit = 128M
}
#searchd服务定义
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /data/coreseek/logs/searchd_mysql.pid #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
log = /data/coreseek/logs/searchd_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
query_log = /data/coreseek/logs/query_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
}
四:测试
/usr/local/src/coreseek-4.1-beta/testpack
/usr/local/coreseek/bin/indexer
-c etc/csft_mysql.conf –all
/usr/local/coreseek/bin/search -c etc/csft_mysql.conf搜索关键字
五:启动与停止
/usr/local/coreseek/bin/searchd -c etc/csft_mysql.conf
/usr/local/coreseek/bin/searchd -c etc/csft_mysql.conf –stop
六:java客户端API
public
static void main ( String[] argv ) throws SphinxException
{
StringBuffer
q = new StringBuffer();
String
host = "10.1.58.191";
int
port = 9312;
int
mode = SphinxClient.SPH_MATCH_ALL;
String
index = "*";
int
offset = 0;
int
limit = 20;
int
sortMode = SphinxClient.SPH_MATCH_EXTENDED;
String
sortClause = "@relevance DESC,@id DESC";
String
groupBy = "";
String
groupSort = "";
SphinxClient
cl = new SphinxClient();
cl.SetServer
(host, port );
cl.SetWeights
( new int[] { 100, 1 } );
cl.SetMatchMode
( mode );
cl.SetLimits
( offset, limit );
cl.SetSortMode
( sortMode, sortClause );
if
( groupBy.length()>0 )
cl.SetGroupBy
( groupBy, SphinxClient.SPH_GROUPBY_ATTR, groupSort );
cl.SetSelect("*");
SphinxResult
res = cl.Query("300", index);
if
( res==null )
{
System.err.println
( "Error: " + cl.GetLastError() );
System.exit
( 1 );
}
if
( cl.GetLastWarning()!=null && cl.GetLastWarning().length()>0 )
System.out.println
( "WARNING: " + cl.GetLastWarning() + "\n" );
/*
print me out */
System.out.println
( "Query '" + q + "' retrieved " + res.total + " of
" + res.totalFound + " matches in " + res.time + "
sec." );
System.out.println
( "Query stats:" );
for
( int i=0; i
{
SphinxWordInfo
wordInfo = res.words[i];
System.out.println
( "\t'" + wordInfo.word + "' found " + wordInfo.hits +
" times in " + wordInfo.docs + " documents" );
}
System.out.println
( "\nMatches:" );
for
( int i=0; i
{
SphinxMatch
info = res.matches[i];
//System.out.print
( (i+1) + ". id=" + info.docId + ", weight=" + info.weight
);
//获取搜集结果字段值
if
( res.attrNames==null || res.attrTypes==null )
continue;
for
( int a=0; a
{
System.out.print
( ", " + res.attrNames[a] + "=" );
switch
( res.attrTypes[a] )
{
case SphinxClient.SPH_ATTR_INTEGER:
case
SphinxClient.SPH_ATTR_ORDINAL:
System.out.print
( info.attrValues.get(a) );
break;
case
SphinxClient.SPH_ATTR_FLOAT:
case
SphinxClient.SPH_ATTR_STRING:
System.out.print
( info.attrValues.get(a) );
break;
case
SphinxClient.SPH_ATTR_BIGINT:
/*
longs or floats; print as is */
System.out.print
( info.attrValues.get(a) );
break;
case
SphinxClient.SPH_ATTR_TIMESTAMP:
Long
iStamp = (Long) info.attrValues.get(a);
Date
date = new Date ( iStamp.longValue()*1000 );
System.out.print
( date.toString() );
break;
default:
System.out.print
( "(unknown-attr-type=" + res.attrTypes[a] + ")" );
}
}
System.out.println();
}
}