一.安装配置
安装参见: http://www.ttlsa.com/html/1236.html
配置如下:
# vi sphinx.conf
# source块设置数据源
source src1
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass =
sql_db = sphinx
sql_port = 3306 # optional, default is 3306
sql_query = \
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
FROM documents
sql_attr_uint = group_id
sql_attr_timestamp = date_added
sql_query_info = SELECT * FROM documents WHERE id=$id
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
sourcesrc1
{
type=mysql
sql_host=localhost
sql_user=root
sql_pass=
sql_db=sphinx
sql_port=3306# optional, default is 3306
sql_query=\
SELECTid,group_id,UNIX_TIMESTAMP(date_added)ASdate_added,title,content\
FROMdocuments
sql_attr_uint=group_id
sql_attr_timestamp=date_added
sql_query_info=SELECT *FROMdocumentsWHEREid=$id
}
# index块设置索引保存目录等
index test1
{
source = src1
path = /data/sphinx/
docinfo = extern
charset_type = sbcs
}
1
2
3
4
5
6
7
indextest1
{
source=src1
path=/data/sphinx/
docinfo=extern
charset_type=sbcs
}
# indexer块设置索引选项,比如内存限制大小
indexer
{
mem_limit = 32M
}
1
2
3
4
indexer
{
mem_limit=32M
}
# searchd块设置搜索索引时使用的选项
searchd
{
port = 9312
log = /var/log/searchd.log
query_log = /var/log/query.log
read_timeout = 5
max_children = 30
pid_file = /var/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}
1
2
3
4
5
6
7
8
9
10
11
12
13
searchd
{
port=9312
log=/var/log/searchd.log
query_log=/var/log/query.log
read_timeout=5
max_children=30
pid_file=/var/log/searchd.pid
max_matches=1000
seamless_rotate=1
preopen_indexes=0
unlink_old=1
}
二.新建相关表
mysql> CREATE TABLE documents
-> (
-> id INTEGER PRIMARY KEY NOT NULL AUTO_INCREMENT,
-> group_id INTEGER NOT NULL,
-> group_id2 INTEGER NOT NULL,
-> date_added DATETIME NOT NULL,
-> title VARCHAR(255) NOT NULL,
-> content TEXT NOT NULL
-> );
Query OK, 0 rows affected (0.00 sec)
1
2
3
4
5
6
7
8
9
10
mysql>CREATETABLEdocuments
->(
->idINTEGERPRIMARYKEYNOTNULLAUTO_INCREMENT,
->group_idINTEGERNOTNULL,
->group_id2INTEGERNOTNULL,
->date_addedDATETIMENOTNULL,
->titleVARCHAR(255)NOTNULL,
->contentTEXTNOTNULL
->);
QueryOK,0rowsaffected(0.00sec)
mysql> REPLACE INTO documents ( id, group_id, group_id2, date_added, title, content ) VALUES
-> ( 1, 1, 5, NOW(), 'test one', 'this is my test document number one. also checking search within phrases.' ),
-> ( 2, 1, 6, NOW(), 'test two', 'this is my test document number two' ),
-> ( 3, 2, 7, NOW(), 'another doc', 'this is another group' ),
-> ( 4, 2, 8, NOW(), 'doc number four', 'this is to test groups' );
Query OK, 4 rows affected (0.02 sec)
Records: 4 Duplicates: 0 Warnings: 0
1
2
3
4
5
6
7
mysql>REPLACEINTOdocuments(id,group_id,group_id2,date_added,title,content)VALUES
->(1,1,5,NOW(),'test one','this is my test document number one. also checking search within phrases.'),
->(2,1,6,NOW(),'test two','this is my test document number two'),
->(3,2,7,NOW(),'another doc','this is another group'),
->(4,2,8,NOW(),'doc number four','this is to test groups');
QueryOK,4rowsaffected(0.02sec)
Records:4Duplicates:0Warnings:0
mysql> CREATE TABLE tags
-> (
-> docid INTEGER NOT NULL,
-> tagid INTEGER NOT NULL,
-> UNIQUE(docid,tagid)
-> );
Query OK, 0 rows affected (0.01 sec)
1
2
3
4
5
6
7
mysql>CREATETABLEtags
->(
->docidINTEGERNOTNULL,
->tagidINTEGERNOTNULL,
->UNIQUE(docid,tagid)
->);
QueryOK,0rowsaffected(0.01sec)
mysql> INSERT INTO tags VALUES
-> (1,1), (1,3), (1,5), (1,7),
-> (2,6), (2,4), (2,2),
-> (3,15),
-> (4,7), (4,40);
Query OK, 10 rows affected (0.02 sec)
Records: 10 Duplicates: 0 Warnings: 0
1
2
3
4
5
6
7
mysql>INSERTINTOtagsVALUES
->(1,1),(1,3),(1,5),(1,7),
->(2,6),(2,4),(2,2),
->(3,15),
->(4,7),(4,40);
QueryOK,10rowsaffected(0.02sec)
Records:10Duplicates:0Warnings:0
三.创建索引
# indexer --all
using config file '/etc/sphinxsearch/sphinx.conf'...
indexing index 'test1'...
collected 4 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 4 docs, 193 bytes
total 0.072 sec, 2644 bytes/sec, 54.80 docs/sec
total 3 reads, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
total 9 writes, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
四.测试
# search test
using config file '/etc/sphinxsearch/sphinx.conf'...
index 'test1': query 'test ': returned 3 matches of 3 total in 0.022 sec
displaying matches:
1. document=1, weight=2421, group_id=1, date_added=Fri Jun 15 15:58:41 2012
id=1
group_id=1
group_id2=5
date_added=2012-06-15 15:58:41
title=test one
content=this is my test document number one. also checking search within phrases.
2. document=2, weight=2421, group_id=1, date_added=Fri Jun 15 15:58:41 2012
id=2
group_id=1
group_id2=6
date_added=2012-06-15 15:58:41
title=test two
content=this is my test document number two
3. document=4, weight=1442, group_id=2, date_added=Fri Jun 15 15:58:41 2012
id=4
group_id=2
group_id2=8
date_added=2012-06-15 15:58:41
title=doc number four
content=this is to test groups
words:
1. 'test': 3 documents, 5 hits
五.使用sphinx php API测试
1.将sphinxapi.php(此文件在源码api目录下)拷贝到正确位置(取决于个人环境)
# vi search.php
require_once "sphinxapi.php";
$search = new SphinxClient;
$search->setServer("localhost", 9312);
$search->setMatchMode(SPH_MATCH_ANY);
$search->SetArrayResult ( true );
$search->setMaxQueryTime(3);
print_r($search->query("test")); //"test"为要搜索的词
?>
1
2
3
4
5
6
7
8
9
10
require_once"sphinxapi.php";
$search=newSphinxClient;
$search->setServer("localhost",9312);
$search->setMatchMode(SPH_MATCH_ANY);
$search->SetArrayResult(true);
$search->setMaxQueryTime(3);
print_r($search->query("test"));//"test"为要搜索的词
?>
结果如下:
Array
(
[error] =>
[warning] =>
[status] => 0
[fields] => Array
(
[0] => title
[1] => content
)
[attrs] => Array
(
[group_id] => 1
[date_added] => 2
)
[matches] => Array //匹配的结果
(
[0] => Array
(
[id] => 1
[weight] => 2
[attrs] => Array
(
[group_id] => 1
[date_added] => 1339747121
)
)
[1] => Array
(
[id] => 2
[weight] => 2
[attrs] => Array
(
[group_id] => 1
[date_added] => 1339747121
)
)
[2] => Array
(
[id] => 4
[weight] => 1
[attrs] => Array
(
[group_id] => 2
[date_added] => 1339747121
)
)
)
[total] => 3
[total_found] => 3
[time] => 0.000
[words] => Array
(
[test] => Array
(
[docs] => 3
[hits] => 5
)
)
)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
Array
(
[error]=>
[warning]=>
[status]=>0
[fields]=>Array
(
[0]=>title
[1]=>content
)
[attrs]=>Array
(
[group_id]=>1
[date_added]=>2
)
[matches]=>Array//匹配的结果
(
[0]=>Array
(
[id]=>1
[weight]=>2
[attrs]=>Array
(
[group_id]=>1
[date_added]=>1339747121
)
)
[1]=>Array
(
[id]=>2
[weight]=>2
[attrs]=>Array
(
[group_id]=>1
[date_added]=>1339747121
)
)
[2]=>Array
(
[id]=>4
[weight]=>1
[attrs]=>Array
(
[group_id]=>2
[date_added]=>1339747121
)
)
)
[total]=>3
[total_found]=>3
[time]=>0.000
[words]=>Array
(
[test]=>Array
(
[docs]=>3
[hits]=>5
)
)
)
说明: [docs] => 3有三条结果想匹配,[hits] => 5命中5次,与数据库相对应的id是1,2,4.耗时[time] => 0.000
查询数据库结果如下:
mysql> select *, (select unix_timestamp(date_added)) as ut from documents where id in (1,2,4);
+----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+------------+
| id | group_id | group_id2 | date_added | title | content | ut |
+----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+------------+
| 1 | 1 | 5 | 2012-06-15 15:58:41 | test one | this is my test document number one. also checking search within phrases. | 1339747121 |
| 2 | 1 | 6 | 2012-06-15 15:58:41 | test two | this is my test document number two | 1339747121 |
| 4 | 2 | 8 | 2012-06-15 15:58:41 | doc number four | this is to test groups | 1339747121 |
+----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+------------+
3 rows in set (0.00 sec)
因此对于大量数据,为了提高查询速度,可以先通过sphinx查找出id,再通过id来查询数据库数据。
如需转载请注明出处:http://www.ttlsa.com/html/1346.html