背景:
Coreseek版本:coreseek-4.1-beta.tar.gz
Mysql版本: mysql-5.5.31.tar.gz
一:安装
Tar –zxvf coreseek-4.1-beta.tar.gz
Cd coreseek-4.1-beta
cd mmseg-3.2.14
./configure --prefix=/usr/local/mmseg
Make
Make install
如果报错,执行下面的操作
Aclocal
libtoolize –force
automake --add-missing
autoconf
autoheader
make clear
./configure --prefix=/usr/local/mmseg
Make
make install
cd csft-4.1/
./buildconf.sh
./configure --prefix=/usr/local/coreseek --with-mysql=/usr/local/mysql --with-mmseg=/usr/local/mmseg --with-mmseg-includes=/usr/local/mmseg/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg/lib/ --without-unixodbc
Make
make install
二:sphinx.conf参数配置
Cd /usr/local/coreseek/etc
Cp sphinx.conf.dist sphinx.conf
Vi sphinx.conf
三:Mysql数据源配置
#MySQL数据源配置,详情请查看:http://www.coreseek.cn/products-install/mysql/
#请先将var/test/documents.sql导入数据库,并配置好以下的MySQL用户密码数据库
#源定义
source mysql
{
type = mysql
sql_host = 10.1.58.191
sql_user = root
sql_pass = 123456
sql_db = test
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title,title as title1, content FROM documents
#sql_query第一列id需为整数
#title、content作为字符串/文本字段,被全文索引
sql_attr_uint = group_id #从SQL读取到的值必须为整数
sql_attr_timestamp = date_added #从SQL读取到的值必须为整数,作为时间属性
sql_attr_str2ordinal = title
sql_query_info_pre = SET NAMES utf8 #命令行查询时,设置正确的字符集
sql_query_info = SELECT * FROM documents WHERE id=$id #命令行查询时,从数据库读取原始数据信息
}
#index定义
index mysql
{
source = mysql #对应的source名称
path = /data/coreseek/mysql/ #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
#charset_dictpath = etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
ngram_len = 0
}
#全局index定义
indexer
{
mem_limit = 128M
}
#searchd服务定义
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /data/coreseek/logs/searchd_mysql.pid #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
log = /data/coreseek/logs/searchd_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
query_log = /data/coreseek/logs/query_mysql.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
}
四:测试
/usr/local/src/coreseek-4.1-beta/testpack
/usr/local/coreseek/bin/indexer -c etc/csft_mysql.conf –all
/usr/local/coreseek/bin/search -c etc/csft_mysql.conf 搜索关键字
五:启动与停止
/usr/local/coreseek/bin/searchd -c etc/ csft_mysql.conf
/usr/local/coreseek/bin/searchd -c etc/ csft_mysql.conf –stop
六:java客户端API
public static void main ( String[] argv ) throws SphinxException
{
StringBuffer q = new StringBuffer();
String host = "10.1.58.191";
int port = 9312;
int mode = SphinxClient.SPH_MATCH_ALL;
String index = "*";
int offset = 0;
int limit = 20;
int sortMode = SphinxClient.SPH_MATCH_EXTENDED;
String sortClause = "@relevance DESC,@id DESC";
String groupBy = "";
String groupSort = "";
SphinxClient cl = new SphinxClient();
cl.SetServer (host, port );
cl.SetWeights ( new int[] { 100, 1 } );
cl.SetMatchMode ( mode );
cl.SetLimits ( offset, limit );
cl.SetSortMode ( sortMode, sortClause );
if ( groupBy.length()>0 )
cl.SetGroupBy ( groupBy, SphinxClient.SPH_GROUPBY_ATTR, groupSort );
cl.SetSelect("*");
SphinxResult res = cl.Query("300", index);
if ( res==null )
{
System.err.println ( "Error: " + cl.GetLastError() );
System.exit ( 1 );
}
if ( cl.GetLastWarning()!=null && cl.GetLastWarning().length()>0 )
System.out.println ( "WARNING: " + cl.GetLastWarning() + "\n" );
/* print me out */
System.out.println ( "Query '" + q + "' retrieved " + res.total + " of " + res.totalFound + " matches in " + res.time + " sec." );
System.out.println ( "Query stats:" );
for ( int i=0; i
{
SphinxWordInfo wordInfo = res.words[i];
System.out.println ( "\t'" + wordInfo.word + "' found " + wordInfo.hits + " times in " + wordInfo.docs + " documents" );
}
System.out.println ( "\nMatches:" );
for ( int i=0; i
{
SphinxMatch info = res.matches[i];
//System.out.print ( (i+1) + ". id=" + info.docId + ", weight=" + info.weight );
//获取搜集结果字段值
if ( res.attrNames==null || res.attrTypes==null )
continue;
for ( int a=0; a
{
System.out.print ( ", " + res.attrNames[a] + "=" );
switch ( res.attrTypes[a] )
{
case SphinxClient.SPH_ATTR_INTEGER:
case SphinxClient.SPH_ATTR_ORDINAL:
System.out.print ( info.attrValues.get(a) );
break;
case SphinxClient.SPH_ATTR_FLOAT:
case SphinxClient.SPH_ATTR_STRING:
System.out.print ( info.attrValues.get(a) );
break;
case SphinxClient.SPH_ATTR_BIGINT:
/* longs or floats; print as is */
System.out.print ( info.attrValues.get(a) );
break;
case SphinxClient.SPH_ATTR_TIMESTAMP:
Long iStamp = (Long) info.attrValues.get(a);
Date date = new Date ( iStamp.longValue()*1000 );
System.out.print ( date.toString() );
break;
default:
System.out.print ( "(unknown-attr-type=" + res.attrTypes[a] + ")" );
}
}
System.out.println();
}
}来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/28624388/viewspace-1284505/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/28624388/viewspace-1284505/