sphinx实时索引效率很高,上千万条记录也能在不到一秒时间内搜索出来,这一点有时候可以作为数据库的预处理,比如,搜索出的新闻Id,再根据这些Id分页显示,效率很高,基本上如果是搜索导致并发性很高的网站,或搜索导致数据库性能成为瓶颈的网站,都可以这样处理一下,减少数据库的查询,我们都知道,数据库的like全查询,那是不走索引的,基本上几百万条数据可能效率就很低了。
这里是一个实例,需要的话请参考,谢谢您的关注。
首先添加一个配置文件 rt_article.conf
index rt_article
{
type = rt
rt_mem_limit = 512M
path = /data/sphinx/data/rt_article
docinfo = extern
#charset_type = utf-8
morphology = none
min_word_len = 1
min_prefix_len = 0
html_strip = 1
html_remove_elements = style, script
rt_field = articleName
rt_field = tags
rt_field = content
rt_attr_timestamp = announceTime
rt_attr_string = title
rt_attr_string = keywords
rt_attr_string = newsContent
ngram_len = 1
#ngram_chars = 0..9, _, a..z, U+3000..U+2FA1F
#charset_table = 0..9, A..Z->a..z, _, a..z, U+3000..U+2FA1F
#ngram_chars = 0..9, _, a..z, U+00A0..U+2FFFF
#charset_table = 0..9, A..Z->a..z, _, a..z, U+00A0..U+2FFFF
ngram_chars = U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,\
U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,\
U+3040..U+309F, U+30A0..U+30FF, U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF,\
U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF, 0..9, _, a..z
}
然后 ./bin/searchd --config=/usr/local/sphinx/etc/rt_article.conf
启动了sphinx服务后,可以先写一段代码向索引中插入一些记录(这些记录实际中是从mysql读取后写入的)先写
先插入一些数据,然后再测试查询 sphinx_article.php,内容如下:
<?php
$link = mysql_connect('127.0.0.1:9306') or die('can not connect server');
if (! $link) {
echo mysql_errno().': ' . mysql_error()."\n";
}
$id = 27;
$articleName = '学习linux吧,最好是lnmp方面的知识';
$tags = 'linux,nginx,mysql,php,python';
$content = '本文介绍的Linux定制专栏文章关于Gnome 3桌面,这种桌面与Xfce和KDE大不一样。作者在本文中介绍了配置和定制Gnome外壳的方法,但由于Gnome 3采用了模块化设计方式,还可以把Gnome外壳完全换成别的系统,这种系统提供了全然不同的桌面,配置和定制方面有着全然不同的可能性。';
$announceTime = time();
$sql = "insert into rt_article(id,articleName,tags,content,title,keywords,newsContent,announceTime) values($id,'{$articleName}','{$tags}','{$content}','{$articleName}','{$tags}','{$content}',{$announceTime}) ";
$result = mysql_query($sql);
if(! $result) {
echo '未能执行成功!'."\n";
}
if(mysql_errno()) {
echo mysql_errno().': '.mysql_error()."\n";
echo $sql;
mysql_close($link);
exit;
}
$query = mysql_query("select * from rt_article where match('配置');");
if(mysql_errno()) {
echo mysql_errno().': '.mysql_error()."\n";
mysql_close($link);
exit;
}
while($row = mysql_fetch_row($query,MYSQL_ASSOC))
{
var_dump($row);
}
mysql_close($link);
exit;
?>
运行,如何能正确查询出插入的那条数据,就没有问题了
然后测试一下搜索:
文件sphinx_test.php,注意这里引用了sphinxapi.php,我改名了,为sphinx_client.php
这里假定搜索框中的关键词是用空格分开的,然后测试一下搜索吧。
<?php
include('sphinx_client.php');
$sphinx_client = new SphinxClient();
$sphinx_client->SetServer('192.168.1.192',9312);
//$sphinx_client->SetArrayResult(TRUE);
//$sphinx_client->SetIDRange(1,20);
//$sphinx_client->SetFilter('group_id',[1,2]);
$sphinx_client->SetLimits(0,100);
$weights = ['articleName'=>3,'tags'=>2,'content'=>1];
$sphinx_client->SetFieldWeights($weights);
if(! empty($_POST['keywords'])) {
$keywords = htmlentities($_POST['keywords']);
$keywords = explode(' ',$keywords);
// 排序:有关键字查询则用相关度排序,否则用 id 倒排
$sphinx_client->SetSortMode(SPH_SORT_RELEVANCE);
//排序,weight第一,id降序排列
$sphinx_client->SetSortMode(SPH_SORT_EXTENDED, '@weight desc,$id desc');
$tmpArr = [];
$str_query = '';
foreach($keywords as $value) {
$str_query .= '"'.$value.'" | ';
$res = $sphinx_client->Query('@tags ('.$value.')', 'rt_article');
if(isset($res['matches'])) {
$searchedArr = array_keys($res['matches']);
echo $value.json_encode($searchedArr).'<br/>';
if(count($tmpArr) >0) {
$tmpArr = array_intersect($tmpArr,$searchedArr);
}
else {
$tmpArr = $searchedArr;
}
}
}
$result = array_values($tmpArr);
echo '111,result:'.json_encode($result).'<br/>';
if(count($result) < 3) {
$str_query = substr($str_query,0,-2);
}
$res = $sphinx_client->Query('@tags ('.$str_query.')', 'rt_article');
if(isset($res['matches'])) {
$ret = array_keys($res['matches']);
echo '222,ret:'.json_encode($ret).'<br/>';
$result = array_unique(array_merge($result,$ret));
}
echo '333,result:'.json_encode(array_values($result)).'<br/>';
// $sphinx_client->SetMatchMode(SPH_MATCH_EXTENDED2);
// $res = $sphinx_client->Query('@title (测试)','rt');
// $res = $sphinx_client->Query('@title (测试) @content (网络)','rt');
echo '<pre>';
//var_dump($res);
print_r($sphinx_client->GetLastError());
print_r($sphinx_client->GetLastWarning());
echo '</pre>';
}
else {
$sphinx_client->SetSortMode(SPH_SORT_EXTENDED, '@id DESC');
$res = $sphinx_client->Query('','rt_article');
echo '<pre>';
print_r($res['matches']);
//var_dump($res);
print_r($sphinx_client->GetLastError());
print_r($sphinx_client->GetLastWarning());
echo '</pre>';
}
function searchArticle($keywords)
{
$str_query = '';
foreach($keywords as $value) {
$str_query .= '"'.$value.'" | ' ;
}
$str_query = substr($str_query,0,-2);
$str_query = "'{$str_query}'";
echo "[$str_query]";
// 排序:有关键字查询则用相关度排序,否则用 id 倒排
$sphinx_client->SetSortMode(SPH_SORT_RELEVANCE);
//排序,weight第一,id降序排列
$sphinx_client->SetSortMode(SPH_SORT_EXTENDED, '@weight desc,$id desc');
$res = $sphinx_client->Query($str_query,'rt_article');
// $sphinx_client->SetMatchMode(SPH_MATCH_EXTENDED2);
// $res = $sphinx_client->Query('@title (测试)','rt');
// $res = $sphinx_client->Query('@title (测试) @content (网络)','rt');
echo '<pre>';
if(isset($res['matches'])) {
print_r($res['matches']);
}
//var_dump($res);
print_r($sphinx_client->GetLastError());
print_r($sphinx_client->GetLastWarning());
echo '</pre>';
}
function getRecommend($keywords)
{
$tmpArr = [];
$str_query = '';
foreach($keywords as $value) {
$str_query .= '"'.$value.'" | ';
$res = $sphinx_client->Query('@tags ('.$value.')', 'rt_article');
if(isset($res['matches'])) {
$searchedArr = array_keys($res['matches']);
echo $value.json_encode($searchedArr).'<br/>';
if(count($tmpArr) >0) {
$tmpArr = array_intersect($tmpArr,$searchedArr);
}
else {
$tmpArr = $searchedArr;
}
}
}
$result = array_values($tmpArr);
echo '111,result:'.json_encode($result).'<br/>';
if(count($result) < 3) {
$str_query = substr($str_query,0,-2);
}
$res = $sphinx_client->Query('@tags ('.$str_query.')', 'rt_article');
if(isset($res['matches'])) {
$ret = array_keys($res['matches']);
echo '222,ret:'.json_encode($ret).'<br/>';
$result = array_unique(array_merge($result,$ret));
}
echo '333,result:'.json_encode(array_values($result)).'<br/>';
}
?>
<!DOCTYPE html>
<html>
<head>
<title>健康资讯详情分享版</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="initial-scale=1.0, user-scalable=no" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black" />
<meta name="format-detection" content="telephone=no">
</head>
<body>
<div style="width:100%">
<form action="sphinx_test.php" method="post">
<input type="text" name="keywords" value="" placeHolder="请输入关键字,中间以空格分开" />
<input type="submit" name="btnSubmit" value="搜索">
</form>
</div>
</body>
</html>
//可以使用mysql客户端来查找或更新sphinx的内容 根据产品英文名来匹配查找sphinx
select * from rt_goods where match('@productpinyin gmrjn');//查找感冒软胶囊 英文拼音 rt_field为productpinyin的字段内容包含gmrjn的记录都会被查询出来
select * from rt_goods where match('@productpinyin ^gmrjn$');//rt_field为productpinyin的字段内容精确匹配gmrjn的记录都会被查询出来,这样差比较准。