sphinx中文分词技术在ubuntu服务器下的配置

####sphinx-for-chinese-2.1.0-dev-r3361 ON ubuntu
apt-get install libmysqlclient-dev
cd /tmp
tar -xvf sphinx-for-chinese-2.1.0-dev-r3361.tar.bz2
cd sphinx-for-chinese-2.1.0-dev-r3361


./configure --prefix=/usr/local/sphinx
make
make install


#配置中文支持
$ tar -xvf xdict_1.1.tar.gz
$ /usr/local/sphinx/bin/mkdict xdict_1.1.txt xdict #从xdict_1.1.txt生成xdict文件,xdict_1.1.txt文件可以根据需要进行修改
$ cp xdict /usr/local/sphinx/etc/
#词典包下载地址http://code.google.com/p/sphinx-for-chinese/downloads/list




####sphinx-for-chinese-0.9.10-dev-r2006
cd /pkg
tar -zxvf sphinx-for-chinese-0.9.10-dev-r2006.tar.gz
cd /pkg/sphinx-for-chinese-0.9.10-dev-r2006
./configure --without-iconv
make
make install
#加载libmysqlclient.so.16
echo "/usr/local/lib/mysql" >> /etc/ld.so.conf
ldconfig


#生成词典
/usr/local/bin/mkdict xdict.txt xdict






####sphinx-0.9.8.1


#更新libmysqlclient.so引用
vi /etc/ld.so.conf
/usr/local/lib/mysql
ldconfig


#安装Sphinx
cd /pkg
tar -zxvf sphinx-0.9.8.1.tar.gz
cd /pkg/sphinx-0.9.8.1
./configure --with-mysql --with-mysql-libs=/usr/local/lib/mysql --with-mysql-includes=/usr/local/include/mysql
make
make install






#安装SphinxSE
mkdir -p /pkg/mysql-5.1.30/storage/sphinx
cp -R /pkg/sphinx-0.9.8.1/mysqlse /pkg/mysql-5.1.30/storage/sphinx
cd /pkg/mysql-5.1.30
./BUILD/autorun.sh
./configure --with-extra-charsets=all --with-plugins=sphinx
make
make install




#安装libmmseg
cd /pkg
tar -zxvf mmseg-3.1.tar.gz
cd /pkg/mmseg-3.1
./configure
make
make install


mkdir /usr/local/sphinx/dict -p
cp /pkg/mmseg-3.1/src/win32/mmseg.ini  /usr/local/sphinx/dict/mmseg.ini


#建立初始词典
/usr/local/bin/mmseg -u /usr/local/sphinx/dict/unigram.txt /usr/local/sphinx/dict/uni.lib
/usr/local/bin/mmseg -d /usr/local/sphinx/dict test.txt


#生成定制词典
cat /usr/local/sphinx/dict/unidefine*.txt /usr/local/sphinx/dict/unigram.txt > /usr/local/sphinx/dict/unicustom.txt
/usr/local/bin/mmseg -u /usr/local/sphinx/dict/unicustom.txt /usr/local/sphinx/dict/uni.lib






####csft-3.1


#安装csft
cd /pkg
tar -zxvf csft-3.1.tar.gz
cd /pkg/csft-3.1
./configure \
--with-mysql --with-mysql-libs=/usr/local/lib/mysql --with-mysql-includes=/usr/local/include/mysql \
--with-mmseg --with-mmseg-libs=/usr/local/lib --with-mmseg-includes=/usr/local/include/mmseg \


make
make install


#强制默认配置文件
cp /pkg/csft-3.1/sphinx.conf.dist /usr/local/etc/csft.conf.dist
cd /usr/local/etc/
cp csft.conf.dist csft.conf


#建立index
/usr/local/bin/indexer --all --rotate


#启动searchd服务
/usr/local/bin/searchd
/usr/local/bin/searchd --stop


#执行查询
/usr/local/bin/search 广告
/usr/local/bin/php /pkg/csft-3.1/api/test.php 广告


-h, --host <HOST>       connect to searchd at host HOST
-p, --port              connect to searchd at port PORT
-i, --index <IDX>       search through index(es) specified by IDX
-s, --sortby <CLAUSE>   sort matches by 'CLAUSE' in sort_extended mode
-S, --sortexpr <EXPR>   sort matches by 'EXPR' DESC in sort_expr mode
-a, --any               use 'match any word' matching mode
-b, --boolean           use 'boolean query' matching mode
-e, --extended          use 'extended query' matching mode
-ph,--phrase            use 'exact phrase' matching mode
-f, --filter <ATTR>     filter by attribute 'ATTR' (default is 'group_id')
-v, --value <VAL>       add VAL to allowed 'group_id' values list
-g, --groupby <EXPR>    group matches by 'EXPR'
-gs,--groupsort <EXPR>  sort groups by 'EXPR'
-d, --distinct <ATTR>   count distinct values of 'ATTR''
-l, --limit <COUNT>     retrieve COUNT matches (default: 20)


#测试
/usr/local/bin/mmseg -u /usr/local/sphinx/dict/unigram.txt /usr/local/sphinx/dict/uni.lib
/usr/local/bin/indexer --all --rotate
/usr/local/bin/search 标识






####增量索引合并主索引库


#初始更新主索引库
/usr/local/bin/indexer company --rotate
#定时更新增量索引
/usr/local/bin/indexer company_delta --rotate


#定时合并
/usr/local/bin/indexer --merge company company_delta --rotate






#未验证,删除标志
--merge-dst-range is_deleted 0 0








####windows环境下


#建立index
D:\csft\bin\indexer --all --rotate --config d:\csft\conf\csft.conf


#启动searchd服务
D:\csft\bin\searchd
D:\csft\bin\searchd --stop


#安装服务
D:\csft\bin\searchd --install --config d:\csft\conf\csft.conf
#卸载服务
sc delete searchd


#重建字库
\csft\bin\mmseg.exe -u unigram.txt uni.lib


#执行查询
\csft\bin\search 深圳
/usr/local/bin/php /pkg/csft-3.1/api/test.php 广告
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值