原文地址:php sphinx 高效率搜索引擎搭建(二)
作者:zqeesoom
1.下载PHP客户端安装:
http://pecl.php.net/package/sphinx
vim
sphinxclient.c
找到
void sock_close
(
int
sock
);
改为
static void
sock_close
(
int
sock
);
找到
void
改为
static
2.安装sphinx,前提你已经安装mysql且安装了mysql-devel 重源码安装mysql的
mysql-devel都已经安装,yum安装的运行
yum -y install mysql-devel
下载sphinx2.0.1 地址:
http://sphinxsearch.com/downloads/
tar
-xvzf
sphinx-2.0.1-beta.tar.gz
cd sphinx-2.0.1-beta
./configure --prefix=/usr/local/sphinx
--with-mysql
--with-iconv
cd
./configure
备注:64位增加参数 --enable-id64
make && make install
cd /usr/local/sphinx/etc/
cp sphinx.conf.dist sphinx.conf
cd
cp
配置:
#
# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#
#############################################################################
## data source definition
#############################################################################
source src1
{
# data source type. mandatory, no default value
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
type = mysql
#####################################################################
## SQL settings (for 'mysql' and 'pgsql' types)
#####################################################################
# some straightforward parameters for SQL source types
sql_host = localhost
sql_user = root
sql_pass = ******
sql_db = ******
sql_port = 3306 # optional, default is 3306
# UNIX socket name
# optional, default is empty (reuse client library defaults)
# usually '/var/lib/mysql/mysql.sock' on Linux
# usually '/tmp/mysql.sock' on FreeBSD
#
sql_sock = /tmp/mysql.sock
# MySQL specific client connection flags
# optional, default is 0
# 数据传输方式
# mysql_connect_flags = 32 # enable compression
# MySQL specific SSL certificate settings
# optional, defaults are empty
# SLL链接
# mysql_ssl_cert = /etc/ssl/client-cert.pem
# mysql_ssl_key = /etc/ssl/client-key.pem
# mysql_ssl_ca = /etc/ssl/cacert.pem
# MS SQL specific Windows authentication mode flag
# MUST be in sync with charset_type index-level setting
# optional, default is 0
#
# mssql_winauth = 1 # use currently logged on user credentials
# MS SQL specific Unicode indexing flag
# optional, default is 0 (request SBCS data)
#
# mssql_unicode = 1 # request Unicode data from server
# ODBC specific DSN (data source name)
# mandatory for odbc source type, no default value
#
# odbc_dsn = DBQ=C:data;DefaultDir=C:data;Driver={Microsoft Text Driver (*.txt; *.csv)};
# sql_query = SELECT id, data FROM documents.csv
# ODBC and MS SQL specific, per-column buffer sizes
# optional, default is auto-detect
#
# sql_column_buffers = content=12M, comments=1M
# pre-query, executed before the main fetch query
# multi-value, optional, default is empty list of queries
# 发送SQL语句前发送
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
# main document fetch query
# mandatory, integer document ID field MUST be the first selected column
# 需要查询的表 构建查询
sql_query =
SELECT id,target_type,genre,stars,sub_title,sports_team,music_band,music_album
FROM ko_link
#
#
#
#
#
#
#############################################################################
##
#############################################################################
source
{
#如果多个数据源并要在一个索引,必须要保持字段的顺序数量跟数据都要一致,否则将出错
#
joined/payload
field
fetch
query
#
joined
fields
let
you
avoid
(slow)
JOIN
and
GROUP_CONCAT
#
payload
fields
let
you
attach
custom
per-keyword
values
(eg.
for
ranking)
#
#
syntax
is
FIELD-NAME
'from'
(
'query'
|
'payload-query'
)
;
QUERY
#
joined
field
QUERY
should
return
2
columns
(docid,
text)
#
payload
field
QUERY
should
return
3
columns
(docid,
keyword,
weight)
#
#
REQUIRES
that
query
results
are
in
ascending
docuent
ID
order!
#
multi-value,
optional,
default
is
empty
list
of
queries
#
添加字段,来源与表 自动连接
# 字段结果集保持为
# (1,tags1)
# (1,tags2)
# (2,tags3)
# (2,tags4)
# 添加字段将用于搜索,结果如有第3个字段,第3个字段表示该记录的权重,权重为大于1的值
#
sql_joined_field
=
tags
from
query
;
SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
#
sql_joined_field
=
wtags
from
payload-query
;
SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
#
file
based
field
declaration
#
#
content
of
this
field
is
treated
as
a
file
name
#
and
the
file
gets
loaded
and
indexed
in
place
of
a
field
#
#
max
file
size
is
limited
by
max_file_field_buffer
indexer
setting
#
file
IO
errors
are
non-fatal
and
get
reported
as
warnings
#
把字段声明放入文件
#
sql_file_field
=
content_file_path
#
range
query
setup,
query
that
must
return
min
and
max
ID
values
#
optional,
default
is
empty
#
#
sql_query
will
need
to
reference
$start
and
$end
boundaries
#
if
using
ranged
query:
#
分区查询,防止MYSQL死锁
#
sql_query
=
#
SELECT
doc.id,
doc.id
AS
group,
doc.title,
doc.data
#
FROM
documents
doc
#
WHERE
id>=$start
AND
id<=$end
#
#
sql_query_range
=
SELECT
MIN(id),MAX(id)
FROM
documents
#
range
query
step
#
optional,
default
is
1024
#
分区查询跳步
#
sql_range_step
=
1000
#
unsigned
integer
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
optional
bit
size
can
be
specified,
default
is
32
#
声明无符号数字段
#sql_attr_uint
=
target_type
#
sql_attr_uint
=
forum_id:9
#
9
bits
for
forum_id
#sql_attr_uint
=
group_id
#声明BOOL字段
#
boolean
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
equivalent
to
sql_attr_uint
with
1-bit
size
#
#
sql_attr_bool
=
is_deleted
#
bigint
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
declares
a
signed
(unlike
uint!)
64-bit
attribute
#
声明长整字段
#
sql_attr_bigint
=
my_bigint_id
#
UNIX
timestamp
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
similar
to
integer,
but
can
also
be
used
in
date
functions
#
声明时间字段
#
sql_attr_timestamp
=
posted_ts
#
sql_attr_timestamp
=
last_edited_ts
#sql_attr_timestamp
=
date_added
#
string
ordinal
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
sorts
strings
(bytewise),
and
stores
their
indexes
in
the
sorted
list
#
sorting
by
this
attr
is
equivalent
to
sorting
by
the
original
strings
#
声明字符串字段
用于排序等,但此字段不会被存储
#
sql_attr_str2ordinal
=
author_name
#
floating
point
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
values
are
stored
in
single
precision,
32-bit
IEEE
754
format
#
声明浮点字段
#
sql_attr_float
=
lat_radians
#
sql_attr_float
=
long_radians
#
multi-valued
attribute
(MVA)
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
MVA
values
are
variable
length
lists
of
unsigned
32-bit
integers
#
#
syntax
is
ATTR-TYPE
ATTR-NAME
'from'
SOURCE-TYPE
[
;QUERY
]
[
;RANGE-QUERY
]
#
ATTR-TYPE
is
'uint'
or
'timestamp'
#
SOURCE-TYPE
is
'field',
'query',
or
'ranged-query'
#
QUERY
is
SQL
query
used
to
fetch
all
(
docid,
attrvalue
)
pairs
#
RANGE-QUERY
is
SQL
query
used
to
fetch
min
and
max
ID
values,
similar
to
'sql_query_range'
#
声明复合字段
#
sql_attr_multi
=
uint
tag
from
query
;
SELECT docid, tagid FROM tags
#
sql_attr_multi
=
uint
tag
from
ranged-query
;
#
SELECT
docid,
tagid
FROM
tags
WHERE
id>=$start
AND
id<=$end
;
#
SELECT
MIN(docid),
MAX(docid)
FROM
tags
#
string
attribute
declaration
#
multi-value
(an
arbitrary
number
of
these
is
allowed),
optional
#
lets
you
store
and
retrieve
strings
#
只是把数据存储,但不会索引改字段
#
sql_attr_string
=
stitle
#
wordcount
attribute
declaration
#
multi-value
(an
arbitrary
number
of
these
is
allowed),
optional
#
lets
you
count
the
words
at
indexing
time
#
将转化成关键字的字段,用于提高匹配率
#
sql_attr_str2wordcount
=
stitle
#
combined
field
plus
attribute
declaration
(from
a
single
column)
#
stores
column
as
an
attribute,
but
also
indexes
it
as
a
full-text
field
#
跟sql_attr_string不同是该属性加入索引
#
sql_field_string
=
author
#
sql_field_str2wordcount
=
title
#
post-query,
executed
on
sql_query
completion
#
optional,
default
is
empty
#
取后查询
#
sql_query_post
=
#
post-index-query,
executed
on
successful
indexing
completion
#
optional,
default
is
empty
#
$maxid
expands
to
max
document
ID
actually
fetched
from
DB
#
索引后查询
#
sql_query_post_index
=
REPLACE
INTO
counters
(
id,
val
)
#
VALUES
(
'max_indexed_id',
$maxid
)
#
ranged
query
throttling,
in
milliseconds
#
optional,
default
is
0
which
means
no
delay
#
enforces
given
delay
before
each
query
step
#分区查询的时间间隔
sql_ranged_throttle
=
0
#
document
info
query,
ONLY
for
CLI
search
(ie.
testing
and
debugging)
#
optional,
default
is
empty
#
must
contain
$id
macro
and
must
fetch
the
document
by
that
id
#命令行调试查询结果用
sql_query_info
=
SELECT
*
FROM
ko_link
WHERE
id=$id
#
kill-list
query,
fetches
the
document
IDs
for
kill-list
#
k-list
will
suppress
matches
from
preceding
indexes
in
the
same
query
#
optional,
default
is
empty
##清理指定查询ID列表,对于数据的更改
#
sql_query_killlist
=
SELECT
id
FROM
documents
WHERE
edited>=@last_reindex
#
columns
to
unpack
on
indexer
side
when
indexing
#
multi-value,
optional,
default
is
empty
list
#
启用ZIP压缩
可以降低系统负载
但必须保证zlib库zlib-dev库可用
#
unpack_zlib
=
zlib_column
#
unpack_mysqlcompress
=
compressed_column
#
unpack_mysqlcompress
=
compressed_column_2
#
maximum
unpacked
length
allowed
in
MySQL
COMPRESS()
unpacker
#
optional,
default
is
16M
#
压缩缓存区大小
不能小于字段存储值
#
unpack_mysqlcompress_maxsize
=
16M
#####################################################################
##
xmlpipe2
配置
#####################################################################
#
type
=
xmlpipe
#
shell
command
to
invoke
xmlpipe
stream
producer
#
mandatory
#
#
xmlpipe_command
=
cat
/usr/local/sphinx/var/test.xml
#
xmlpipe2
field
declaration
#
multi-value,
optional,
default
is
empty
#
#
xmlpipe_field
=
subject
#
xmlpipe_field
=
content
#
xmlpipe2
attribute
declaration
#
multi-value,
optional,
default
is
empty
#
all
xmlpipe_attr_XXX
options
are
fully
similar
to
sql_attr_XXX
#
#
xmlpipe_attr_timestamp
=
published
#
xmlpipe_attr_uint
=
author_id
#
perform
UTF-8
validation,
and
filter
out
incorrect
codes
#
avoids
XML
parser
choking
on
non-UTF-8
documents
#
optional,
default
is
0
#
#
xmlpipe_fixup_utf8
=
1
}
# inherited
source
example
# 继承数据源
# all
the
parameters
are
copied
from
the
parent
source,
# and
may
then
be
overridden
in
this
source
definition
#source src1throttled
:
src1
#{
#
sql_ranged_throttle
=
100
#}
#############################################################################
## index
definition
#############################################################################
# local
index
example
#
# this
is
an
index
which
is
stored
locally
in
the
filesystem
#
# all
indexing-time
options
(such
as
morphology
and
charsets)
# are
configured
per
local
index
index test1
{
#
index
type
#
optional,
default
is
'plain'
#
known
values
are
'plain',
'distributed',
and
'rt'
(see
samples
below)
#索引类型
本地
分布式
#
type
=
plain
#
document
source(s)
to
index
#
multi-value,
mandatory
#
document
IDs
must
be
globally
unique
across
all
sources
#数据源,可以多个数据源
source
=
src1
#
index
files
path
and
file
name,
without
extension
#
mandatory,
path
must
be
writable,
extensions
will
be
auto-appended
#
索引保存路径
path
=
/usr/local/sphinx/var/data/test1
#
document
attribute
values
(docinfo)
storage
mode
#
optional,
default
is
'extern'
#
known
values
are
'none',
'extern'
and
'inline'
#索引存储方式
docinfo
=
extern
#
memory
locking
for
cached
data
(.spa
and
.spi),
to
prevent
swapping
#
optional,
default
is
0
(do
not
mlock)
#
requires
searchd
to
be
run
from
root
#内存锁定
需要保证足够权限
mlock
=
0
#
a
list
of
morphology
preprocessors
to
apply
#
optional,
default
is
empty
#
#
builtin
preprocessors
are
'none',
'stem_en',
'stem_ru',
'stem_enru',
#
'soundex',
and
'metaphone'
;
additional preprocessors available from
#
libstemmer
are
'libstemmer_XXX',
where
XXX
is
algorithm
code
#
(see
libstemmer_c/libstemmer/modules.txt)
#
词语提取器
#
morphology
=
stem_en,
stem_ru,
soundex
#
morphology
=
libstemmer_german
#
morphology
=
libstemmer_sv
morphology
=
stem_en
#
minimum
word
length
at
which
to
enable
stemming
#
optional,
default
is
1
(stem
everything)
#
词干化的最小词长
#
min_stemming_len
=
1
#
stopword
files
list
(space
separated)
#
optional,
default
is
empty
#
contents
are
plain
text,
charset_table
and
stemming
are
both
applied
#
停用搜索词
#
stopwords
=
/usr/local/sphinx/var/data/stopwords.txt
#
wordforms
file,
in
"mapfrom
>
mapto"
plain
text
format
#
optional,
default
is
empty
#
词型字典
可用spelldump工具生成
#
wordforms
=
/usr/local/sphinx/var/data/wordforms.txt
#
tokenizing
exceptions
file
#
optional,
default
is
empty
#Token特例文件,就是有些词是完整词意,不能拆分索引如a&t
跟a
&
t
#
plain
text,
case
sensitive,
space
insensitive
in
map-from
part
#
one
"Map
Several
Words
=>
ToASingleOne"
entry
per
line
#
#
exceptions
=
/usr/local/sphinx/var/data/exceptions.txt
#
minimum
indexed
word
length
#
default
is
1
(index
everything)
#
最小索引长度,就是小于指定长度的词不被索引
min_word_len
=
1
#
charset
encoding
type
#
optional,
default
is
'sbcs'
#
known
types
are
'sbcs'
(Single
Byte
CharSet)
and
'utf-8'
#
字符编码
charset_type
=
utf-8
#
charset
definition
and
case
folding
rules
"table"
#
optional,
default
value
depends
on
charset_type
#
#
defaults
are
configured
to
include
English
and
Russian
characters
only
#
you
need
to
change
the
table
to
include
additional
ones
#
this
behavior
MAY
change
in
future
versions
#
#
'sbcs'
default
value
is
#
charset_table
=
0..9,
A..Z->a..z,
_,
a..z,
U+A8->U+B8,
U+B8,
U+C0..U+DF->U+E0..U+FF,
U+E0..U+FF
#
转换字符表
#
'utf-8'
default
value
is
#
charset_table
=
0..9,
A..Z->a..z,
_,
a..z,
U+410..U+42F->U+430..U+44F,
U+430..U+44F
#
ignored
characters
list
#
optional,
default
value
is
empty
#
忽略字符表
#
ignore_chars
=
U+00AD
#
minimum
word
prefix
length
to
index
#
optional,
default
is
0
(do
not
index
prefixes)
#索引的最小前缀长度,小心使用,索引和搜索的时间皆会恶化
#
min_prefix_len
=
0
#
minimum
word
infix
length
to
index
#
optional,
default
is
0
(do
not
index
infixes)
#索引的最小中缀长度
小心使用,索引和搜索的时间皆会恶化
#
min_infix_len
=
0
#
list
of
fields
to
limit
prefix/infix
indexing
to
#
optional,
default
value
is
empty
(index
all
fields
in
prefix/infix
mode)
#
未知
#
prefix_fields
=
filename
#
infix_fields
=
url,
domain
#
enable
star-syntax
(wildcards)
when
searching
prefix/infix
indexes
#
search-time
only,
does
not
affect
indexing,
can
be
0
or
1
#
optional,
default
is
0
(do
not
use
wildcard
syntax)
#
启用星号语法
#
enable_star
=
1
#
expand
keywords
with
exact
forms
and/or
stars
when
searching
fit
indexes
#
search-time
only,
does
not
affect
indexing,
can
be
0
or
1
#
optional,
default
is
0
(do
not
expand
keywords)
#
扩大搜索关键字
形式如:
running
->
(
running
|
*running*
|
=running
)
#
expand_keywords
=
1
#
n-gram
length
to
index,
for
CJK
indexing
#
only
supports
0
and
1
for
now,
other
lengths
to
be
implemented
#
optional,
default
is
0
(disable
n-grams)
#
中文等其他语言的基本支持
#
ngram_len
=
1
#
n-gram
characters
list,
for
CJK
indexing
#
optional,
default
is
empty
#中文或其他语言的值范围
#
ngram_chars
=
U+3000..U+2FA1F
#
phrase
boundary
characters
list
#
optional,
default
is
empty
#
边界符
#
phrase_boundary
=
.,
?,
!,
U+2026
#
horizontal
ellipsis
#
phrase
boundary
word
position
increment
#
optional,
default
is
0
#
边界符增量
#
phrase_boundary_step
=
100
#
blended
characters
list
#
blended
chars
are
indexed
both
as
separators
and
valid
characters
#
for
instance,
AT&T
will
results
in
3
tokens
("at",
"t",
and
"at&t")
#
optional,
default
is
empty
#
混合字符列表
#
blend_chars
=
+,
&,
U+23
#
blended
token
indexing
mode
#
a
comma
separated
list
of
blended
token
indexing
variants
#
known
variants
are
trim_none,
trim_head,
trim_tail,
trim_both,
skip_pure
#
optional,
default
is
trim_none
#未知
#
blend_mode
=
trim_tail,
skip_pure
#
whether
to
strip
HTML
tags
from
incoming
documents
#
known
values
are
0
(do
not
strip)
and
1
(do
strip)
#
optional,
default
is
0
#
删除HTML标签
(小心文本被删除)
html_strip
=
0
#
what
HTML
attributes
to
index
if
stripping
HTML
#
optional,
default
is
empty
(do
not
index
anything)
#
保留的HTML标签
#
html_index_attrs
=
img=alt,title
;
a=title;
#
what
HTML
elements
contents
to
strip
#
optional,
default
is
empty
(do
not
strip
element
contents)
#
不但删除标签,其包含的文本也将删除
#
html_remove_elements
=
style,
script
#
whether
to
preopen
index
data
files
on
startup
#
optional,
default
is
0
(do
not
preopen),
searchd-only
#
预先打开索引还是每次查询的时候在打开索引
#
preopen
=
1
#
whether
to
keep
dictionary
(.spi)
on
disk,
or
cache
it
in
RAM
#
optional,
default
is
0
(cache
in
RAM),
searchd-only
#
将字典文件是否保存在内存中
#
ondisk_dict
=
1
#
whether
to
enable
in-place
inversion
(2x
less
disk,
90-95%
speed)
#
optional,
default
is
0
(use
separate
temporary
files),
indexer-only
#
是否启用原地索引倒转
将少磁盘使用
性能会有一点损失
#
inplace_enable
=
1
#
in-place
fine-tuning
options
#
optional,
defaults
are
listed
below
#微调原地倒转
#
inplace_hit_gap
=
0
#
preallocated
hitlist
gap
size
#
inplace_docinfo_gap
=
0
#
preallocated
docinfo
gap
size
#
inplace_reloc_factor
=
0.1
#
relocation
buffer
size
within
arena
#
inplace_write_factor
=
0.1
#
write
buffer
size
within
arena
#
whether
to
index
original
keywords
along
with
stemmed
versions
#
enables
"=exactform"
operator
to
work
#
optional,
default
is
0
#
是否在索引原关键词的词干化/重映射后的形式的同时也索引原词
#
index_exact_words
=
1
#
position
increment
on
overshort
(less
that
min_word_len)
words
#
optional,
allowed
values
are
0
and
1,
default
is
1
#在经过过短的词(比
min_word_len短的词)处后增加位置值
#
overshort_step
=
1
#
position
increment
on
stopword
#
optional,
allowed
values
are
0
and
1,
default
is
1
#在经过
停用词
处后增加位置值可选选项
#
stopword_step
=
1
#
hitless
words
list
#
positions
for
these
keywords
will
not
be
stored
in
the
index
#
optional,
allowed
values
are
'all',
or
a
list
file
name
#
不能中断的字符列表
#
hitless_words
=
all
#
hitless_words
=
hitless.txt
#字符文件
#
detect
and
index
sentence
and
paragraph
boundaries
#
required
for
the
SENTENCE
and
PARAGRAPH
operators
to
work
#
optional,
allowed
values
are
0
and
1,
default
is
0
#
是否检查标签合并
针对HTML
#
index_sp
=
1
#
index
zones,
delimited
by
HTML/XML
tags
#
a
comma
separated
list
of
tags
and
wildcards
#
required
for
the
ZONE
operator
to
work
#
optional,
default
is
empty
string
(do
not
index
zones)
#
对HTML标签的权重
#
index_zones
=
title,
h*,
th
}
# inherited
index
example
# 索引继承
# all
the
parameters
are
copied
from
the
parent
index,
# and
may
then
be
overridden
in
this
index
definition
#index test1stemmed
:
test1
#{
#
path
=
/usr/local/sphinx/var/data/test1stemmed
#
morphology
=
stem_en
#}
# distributed
index
example
#
# this
is
a
virtual
index
which
can
NOT
be
directly
indexed,
# and
only
contains
references
to
other
local
and/or
remote
indexes
#index dist1
#{
#分布式索引配置
#
'distributed'
index
type
MUST
be
specified
#
type
=
distributed
#
local
index
to
be
searched
#
there
can
be
many
local
indexes
configured
#
local
=
test1
#
local
=
test1stemmed
#
remote
agent
#
multiple
remote
agents
may
be
specified
#
syntax
for
TCP
connections
is
'hostname:port:index1,
[
index2[,...
]]'
#
syntax
for
local
UNIX
connections
is
'/path/to/socket:index1,
[
index2[,...
]]'
#
agent
=
localhost:9313:remote1
#
agent
=
localhost:9314:remote2,remote3
#
agent
=
/var/run/searchd.sock:remote4
#
blackhole
remote
agent,
for
debugging/testing
#
network
errors
and
search
results
will
be
ignored
#
#
agent_blackhole
=
testbox:9312:testindex1,testindex2
#
remote
agent
connection
timeout,
milliseconds
#
optional,
default
is
1000
ms,
ie.
1
sec
#
agent_connect_timeout
=
1000
#
remote
agent
query
timeout,
milliseconds
#
optional,
default
is
3000
ms,
ie.
3
sec
#
agent_query_timeout
=
3000
#}
# realtime
index
example
#
# you
can
run
INSERT,
REPLACE,
and
DELETE
on
this
index
on
the
fly
# using
MySQL
protocol
(see
'listen'
directive
below)
#index rt
#{
#
'rt'
index
type
must
be
specified
to
use
RT
index
#
type
=
rt
#
index
files
path
and
file
name,
without
extension
#
mandatory,
path
must
be
writable,
extensions
will
be
auto-appended
#
path
=
/usr/local/sphinx/var/data/rt
#
RAM
chunk
size
limit
#
RT
index
will
keep
at
most
this
much
data
in
RAM,
then
flush
to
disk
#
optional,
default
is
32M
#
#
rt_mem_limit
=
512M
#
full-text
field
declaration
#
multi-value,
mandatory
#
rt_field
=
title
#
rt_field
=
content
#
unsigned
integer
attribute
declaration
#
multi-value
(an
arbitrary
number
of
attributes
is
allowed),
optional
#
declares
an
unsigned
32-bit
attribute
#
rt_attr_uint
=
gid
#
RT
indexes
currently
support
the
following
attribute
types:
#
uint,
bigint,
float,
timestamp,
string
#
#
rt_attr_bigint
=
guid
#
rt_attr_float
=
gpa
#
rt_attr_timestamp
=
ts_added
#
rt_attr_string
=
author
#}
#############################################################################
## indexer
settings
#############################################################################
indexer
{
#索引过程内存使用限制。可选选项,默认32M。
#
memory
limit,
in
bytes,
kiloytes
(16384K)
or
megabytes
(256M)
#
optional,
default
is
32M,
max
is
2047M,
recommended
is
256M
to
1024M
mem_limit
=
32M
#
maximum
IO
calls
per
second
(for
I/O
throttling)
#
optional,
default
is
0
(unlimited)
#
每秒最大I/O操作次数,用于限制I/O操作。可选选项,默认为0(无限制)。
#
max_iops
=
40
#
maximum
IO
call
size,
bytes
(for
I/O
throttling)
#
optional,
default
is
0
(unlimited)
#
最大允许的I/O操作大小,以字节为单位,用于I/O节流。可选选项,默认为0(不限制)。
#
max_iosize
=
1048576
#
maximum
xmlpipe2
field
length,
bytes
#
optional,
default
is
2M
#
对于XMLLpipe2数据源允许的最大的域大小
#
max_xmlpipe2_field
=
4M
#
write
buffer
size,
bytes
#
several
(currently
up
to
4)
buffers
will
be
allocated
#
write
buffers
are
allocated
in
addition
to
mem_limit
#
optional,
default
is
1M
#
写缓冲区的大小,单位是字节。可选选项,默认值是1MB。
#
write_buffer
=
1M
#
maximum
file
field
adaptive
buffer
size
#
optional,
default
is
8M,
minimum
is
1M
#
#
max_file_field_buffer
=
32M
}
#############################################################################
## searchd
settings
#############################################################################
searchd
{
#
[
hostname:
]port
[
:protocol
],
or
/unix/socket/path
to
listen
on
#
known
protocols
are
'sphinx'
(SphinxAPI)
and
'mysql41'
(SphinxQL)
#
#
multi-value,
multiple
listen
points
are
allowed
#
optional,
defaults
are
9312:sphinx
and
9306:mysql41,
as
below
#
#
listen
=
127.0.0.1
#
listen
=
192.168.0.1:9312
#
listen
=
9312
#
listen
=
/var/run/searchd.sock
listen
=
9312
#listen
=
9306:mysql41
#
log
file,
searchd
run
info
is
logged
here
#
optional,
default
is
'searchd.log'
#
全部searchd运行时事件会被记录在这个日志文件中。
log
=
/usr/local/sphinx/var/log/searchd.log
#
query
log
file,
all
search
queries
are
logged
here
#
optional,
default
is
empty
(do
not
log
queries)
#
全部搜索查询会被记录在此文件中。
query_log
=
/usr/local/sphinx/var/log/query.log
#
client
read
timeout,
seconds
#
optional,
default
is
5
#网络客户端请求的读超时时间,单位是秒。
read_timeout
=
5
#
request
timeout,
seconds
#
optional,
default
is
5
minutes
#在使用持久连接时,两次查询之间等待的最长时间(单位是秒)。
client_timeout
=
300
#
maximum
amount
of
children
to
fork
(concurrent
searches
to
run)
#
optional,
default
is
0
(unlimited)
#子进程的最大数量
,用来控制服务器负载。任何时候不可能有比此设置值更多的搜索同时运行。当达到限制时,新的输入客户端会被用临时失败(SEARCH_RETRY)状态码驳回,同时给出一个声明服务器已到最大连接限制的消息。
max_children
=
30
#
PID
file,
searchd
process
ID
file
name
#
mandatory
#进程ID文件
pid_file
=
/usr/local/sphinx/var/log/searchd.pid
#
max
amount
of
matches
the
daemon
ever
keeps
in
RAM,
per-index
#
WARNING,
THERE'S
ALSO
PER-QUERY
LIMIT,
SEE
SetLimits()
API
CALL
#
default
is
1000
(just
like
Google)
#守护进程在内存中为每个索引所保持并返回给客户端的匹配数目的最大值。
max_matches
=
1000
#
seamless
rotate,
prevents
rotate
stalls
if
precaching
huge
datasets
#
optional,
default
is
1
#防止
searchd
轮换在需要预取大量数据的索引时停止响应。可选选项,默认为1(启用无缝(seamless)轮换)。
seamless_rotate
=
1
#
whether
to
forcibly
preopen
all
indexes
on
startup
#
optional,
default
is
1
(preopen
everything)
#是否在启动是强制重新打开所有索引文件。可选选项,默认为0(不重新打开)。
preopen_indexes
=
1
#
whether
to
unlink
.old
index
copies
on
succesful
rotation.
#
optional,
default
is
1
(do
unlink)
#索引轮换成功之后,是否删除以.old为扩展名的索引拷贝。可选选项,默认为1(删除这些索引拷贝)。
unlink_old
=
1
#
attribute
updates
periodic
flush
timeout,
seconds
#
updates
will
be
automatically
dumped
to
disk
this
frequently
#
optional,
default
is
0
(disable
periodic
flush)
#
UpdateAttributes()
调用时候更新是否隔一段时间写入磁盘
#
attr_flush_period
=
900
#
instance-wide
ondisk_dict
defaults
(per-index
value
take
precedence)
#
optional,
default
is
0
(precache
all
dictionaries
in
RAM)
#对
ondisk_dict
指令的全局的默认值。
可选选项,默认值是0(将字典预先缓冲到内存)。
#
ondisk_dict_default
=
1
#
MVA
updates
pool
size
#
shared
between
all
instances
of
searchd,
disables
attr
flushes!
#
optional,
default
size
is
1M
#网络通讯时允许的最大的包的大小。
mva_updates_pool
=
1M
#
max
allowed
network
packet
size
#
limits
both
query
packets
from
clients,
and
responses
from
agents
#
optional,
default
size
is
8M
#用于多值属性MVA更新的存储空间的共享池大小。
max_packet_size
=
8M
#
crash
log
path
#
searchd
will
(try
to)
log
crashed
query
to
'crash_log_path.PID'
file
#
optional,
default
is
empty
(do
not
create
crash
logs)
#崩溃日志文件的路径
#
crash_log_path
=
/usr/local/sphinx/var/log/crash
#
max
allowed
per-query
filter
count
#
optional,
default
is
256
#每次查询允许设置的过滤器的最大个数。只用于内部检查,不直接影响内存使用或性能。
max_filters
=
256
#
max
allowed
per-filter
values
count
#
optional,
default
is
4096
#单个过滤器允许的值的最大个数。只用于内部检查,不直接影响内存使用或性能。
max_filter_values
=
4096
#
socket
listen
queue
length
#
optional,
default
is
5
#TCP监听积压列表长度。无法如对的请求立即失败并收到“连接被拒”错误信息
#
listen_backlog
=
5
#
per-keyword
read
buffer
size
#
optional,
default
is
256K
#每个关键字的读缓冲区的大小。可选选项,默认值是256K。
#
read_buffer
=
256K
#
unhinted
read
size
(currently
used
when
reading
hits)
#
optional,
default
is
32K
#无提示时读操作的大小。可选选项,默认值是32K。
#
read_unhinted
=
32K
#
max
allowed
per-batch
query
count
(aka
multi-query
count)
#
optional,
default
is
32
#限制每批次的查询量。一个OPEN之后的查询量
max_batch_queries
=
32
#
max
common
subtree
document
cache
size,
per-query
#
optional,
default
is
0
(disable
subtree
optimization)
#
#
subtree_docs_cache
=
4M
#
max
common
subtree
hit
cache
size,
per-query
#
optional,
default
is
0
(disable
subtree
optimization)
#
限制RAM使用一个共同的子树优化
默认不优化
#
subtree_hits_cache
=
8M
#
multi-processing
mode
(MPM)
#
known
values
are
none,
fork,
prefork,
and
threads
#
optional,
default
is
fork
#
工作方式
workers
=
threads
#
for
RT
to
work
#
max
threads
to
create
for
searching
local
parts
of
a
distributed
index
#
optional,
default
is
0,
which
means
disable
multi-threaded
searching
#
should
work
with
all
MPMs
(ie.
does
NOT
require
workers=threads)
#
#
dist_threads
=
4
#
binlog
files
path
;
use empty string to disable binlog
#
optional,
default
is
build-time
configured
data
directory
#
二进制日志路径
#
binlog_path
=
#
disable
logging
#
binlog_path
=
/usr/local/sphinx/var/data
#
binlog.001
etc
will
be
created
there
#
binlog
flush/sync
mode
#
0
means
flush
and
sync
every
second
#
1
means
flush
and
sync
every
transaction
#
2
means
flush
every
transaction,
sync
every
second
#
optional,
default
is
2
#
日志刷新模式
#
binlog_flush
=
2
#
binlog
per-file
size
limit
#
optional,
default
is
128M,
0
means
no
limit
#最大日志大小
#
binlog_max_log_size
=
256M
#
per-thread
stack
size,
only
affects
workers=threads
mode
#
optional,
default
is
64K
#每个线程的堆栈大小。
#
thread_stack
=
128K
#
per-keyword
expansion
limit
(for
dict=keywords
prefix
searches)
#
optional,
default
is
0
(no
limit)
#
扩大为一个关键字的最大数目
#
expansion_limit
=
1000
#
RT
RAM
chunks
flush
period
#
optional,
default
is
0
(no
periodic
flush)
#RT索引在内存中检查的时间
#
rt_flush_period
=
900
#
query
log
file
format
#
optional,
known
values
are
plain
and
sphinxql,
default
is
plain
#
查询日志格式
#
query_log_format
=
sphinxql
#
version
string
returned
to
MySQL
network
protocol
clients
#
optional,
default
is
empty
(use
Sphinx
version)
#
MYSQL版本
#
mysql_version_string
=
5.0.37
#
trusted
plugin
directory
#
optional,
default
is
empty
(disable
UDFs)
#
插件目录
#
plugin_dir
=
/usr/local/sphinx/lib
#
default
server-wide
collation
#
optional,
default
is
libc_ci
#
链接字符集
#
collation_server
=
utf8_general_ci
#
server-wide
locale
for
libc
based
collations
#
optional,
default
is
C
#
collation
选项
#
collation_libc_locale
=
ru_RU.UTF-8
#
threaded
server
watchdog
(only
used
in
workers=threads
mode)
#
optional,
values
are
0
and
1,
default
is
1
(watchdog
on)
#
是否启用服务器监控进程
#
watchdog
=
1
#
SphinxQL
compatibility
mode
(legacy
columns
and
their
names)
#
optional,
default
is
0
(SQL
compliant
syntax
and
result
sets)
#sphinxql
兼容模式
#
compat_sphinxql_magics
=
1
}
# --eof--
}
#
#
#
#
#source
#{
#
#}
#############################################################################
##
#############################################################################
#
#
#
#
#
#
index
{
}
#
#
#
#
#index
#{
#
#
#}
#
#
#
#
#index
#{
#分布式索引配置
#
#
#
#
#
#
#
#}
#
#
#
#
#index
#{
#
#
#
#
#
#}
#############################################################################
##
#############################################################################
indexer
{
}
#############################################################################
##
#############################################################################
searchd
{
}
#
建立索引:
/usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf index1
不停服务下索引:
/usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf --all --rotate
启动索引服务,使PHP的客户端可用
/usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/sphinx.conf
建立PHP测试文件
<?php
$s
=
new
SphinxClient;
setServer("localhost",
9312);
$s->setMatchMode(SPH_MATCH_ANY);
$s->setMaxQueryTime(3);
$result
=
$s->query("test");
#
查询
print_r
(
$result);
?>
?>
运行即可
searchd 命令:
强行停止:
searchd
--config
/home/myuser/sphinx.conf
–stop
安静停止:
searchd
--config
/home/myuser/sphinx.conf
–stopwait
状态:
searchd
--config
/home/myuser/sphinx.conf
–status
指定PID文件
searchd
--config
/home/myuser/sphinx.conf
--pidfile
/home/myuser/sphinx.pid
启动的为控制台模式:
searchd
--config
/home/myuser/sphinx.conf
–console
只启动指定索引
searchd
--index
myindex
生成一些字典的工具软件: spelldump
indextool 一些转移等工具的软件
搜索字符串规则:
*
operator
OR:
hello |
world
* operator
NOT:
hello -world
hello !world
* field
search
operator:
@title hello
@body
world
* field
position
limit
modifier
(introduced
in
version
0.9.9-rc1):
@body [ 50 ] hello
* multiple-field
search
operator:
@(title,body) hello
world
* all-field
search
operator:
@* hello
* phrase
search
operator:
"hello world"
* proximity
search
operator:
"hello world"~10
* quorum
matching
operator:
"the world
is
a
wonderful
place"/3
* strict
order
operator
(aka
operator
"before"):
aaa <<
bbb
<<
ccc
* exact
form
modifier
(introduced
in
version
0.9.9-rc1):
raining =cats
and
=dogs
* field-start
and
field-end
modifier
(introduced
in
version
0.9.9-rc2):
^hello world$
* NEAR,
generalized
proximity
operator
(introduced
in
version
2.0.1-beta):
hello NEAR/3
world
NEAR/4
"my
test"
* SENTENCE
operator
(introduced
in
version
2.0.1-beta):
all SENTENCE
words
SENTENCE
"in
one
sentence"
* PARAGRAPH
operator
(introduced
in
version
2.0.1-beta):
"Bill Gates"
PARAGRAPH
"Steve
Jobs"
* zone
limit
operator:
ZONE:(h3,h4) only
in
these
titles
hello
*
hello
hello
*
@title
*
@body [ 50 ]
*
@(title,body)
*
@*
*
"hello
*
"hello
*
"the
*
aaa
*
raining
*
^hello
*
hello
*
all
*
"Bill
*
ZONE:(h3,h4)
表达式,支持函数等 日期用的是时间戳 (好像不是作为MYSQL存储引擎的时候用不到)
*
Arithmetic
operators:
+,
-,
*,
/,
%,
DIV,
MOD
* Comparison
operators:
<,
>
<=,
>=,
=,
<>
* Boolean
operators:
AND,
OR,
NOT
* Bitwise
operators:
&,
|
* ABS()
* BIGINT()
* CEIL()
* COS()
* CRC32()
* DAY()
* EXP()
* FLOOR()
* GEODIST()
* IDIV()
* IF()
* IN()
* INTERVAL()
* LN()
* LOG10()
* LOG2()
* MAX()
* MIN()
* MONTH()
* NOW()
* POW()
* SIN()
* SINT()
* SQRT()
* YEAR()
* YEARMONTH()
* YEARMONTHDAY()
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
客户端方法介绍:
<?php
include_once 'sphinxapi.php';
$s =
new
SphinxClient();
$s->setServer("localhost", 9312);
$s->SetConnectTimeout (
1
);
//
设置链接超时
$s->setMatchMode(SPH_MATCH_ANY); // 匹配模式
$s->setMaxQueryTime(3); // 查询超时
//$s->SetSelect ( $select );//设置返回的字段
// $s->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort );//汇总
//$s->SetGroupDistinct ( $distinct );//设置不重复字段
$s->SetArrayResult (
true
);
//
结果是否有ID
// $s->SetSortMode ( SPH_SORT_EXTENDED, $sortby );//排序模式
// link
//$s->SetFilter ( 'target_type', array(1),true );
$s->SetLimits (
0,
10
);
//
显示数量:开始 量 最大量 右偏移量
$result =
$s->query("good","team");
//
查询
print_r( $result);
源地址: http://www.cnblogs.com/liushannet/archive/2011/10/27/2226696.html
include_once
$s
$s->setServer("localhost",
$s->SetConnectTimeout
$s->setMatchMode(SPH_MATCH_ANY); // 匹配模式
$s->setMaxQueryTime(3); // 查询超时
//$s->SetSelect
// $s->SetGroupBy
//$s->SetGroupDistinct
$s->SetArrayResult
// $s->SetSortMode
// link
//$s->SetFilter
$s->SetLimits
$result
print_r( $result);
源地址: http://www.cnblogs.com/liushannet/archive/2011/10/27/2226696.html