source src1
{
# 数据源类型,需强制指定,没有默认值
# 可选类型: mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
type = mysql # 这里我们使用的 MySQL
# 这里是简单易懂的 SQL 连接参数全家桶
sql_host = localhost # host
sql_user = test # 用户
sql_pass = # 密码,注意当没有密码的时候空着,不可以写 '' 或者 ""
sql_db = test # 数据库
sql_port = 3306 # 可选, 默认是 3306
# 也可以通过 socket 连接
# 可选,默认是空
# Linux 常见路径 '/var/lib/mysql/mysql.sock'
# FreeBSD 常见路径 '/tmp/mysql.sock'
#
# sql_sock = /tmp/mysql.sock
# MySQL 指定客户端连接的一些标识
# 可选,默认是 0
# indexer 和 mysql 之间的交互,需要考虑效率和安全性。
# 有 0/32/2048/32768 可选值, 依次表示 无/使用压缩协议/握手后切换到 ssl/Mysql4.1版本身份认证
# mysql_connect_flags = 32 # enable compression
# 若上文中指定 ssl 协议连接时这里才需要填写
# 可选,默认是空
#
# mysql_ssl_cert = /etc/ssl/client-cert.pem
# mysql_ssl_key = /etc/ssl/client-key.pem
# mysql_ssl_ca = /etc/ssl/cacert.pem
# mssql 特有,是否使用 windows 登录
# 可选,默认是 0
#
# mssql_winauth = 1 # 使用当前登录的用户认证登录
# MS SQL特定的Unicode索引标志
# 可选,默认值是 0 (需要 SBCS - 单字节数据)
#
# mssql_unicode = 1 # 从服务器请求 Unicode 数据
# 指定 ODBC DSN
# 强制指定 odbc 数据源类型,没有默认值
#
# odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
# sql_query = SELECT id, data FROM documents.csv
# ODBC 或 MS SQL 指定, 字段预读缓冲大小
# 为什么要有这么一种缓冲呢?
# 有的字符串,虽然长度很长,但是实际上并没有使用那么长的字符,所以在 Sphinx 并不会收录所有的字符,而是给每个属性一个缓冲作为长度限制。
# 默认情况下非字符类型的属性限制是 1K,字符类型的限制是 1M
# 可选,默认值是自动检测
#
# sql_column_buffers = content=12M, comments=1M
# sql_query 中的语句执行前,需要先执行这个 (可以用户完成一些前置配置工作)
#
# sql_query_pre = SET NAMES utf8
# sql_query_pre = SET SESSION query_cache_type=OFF
# 主文档获取语句
# 强制, 整形的 id 必须出现在 select 语句中 select 关键词后第一个位置
sql_query = \
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
FROM documents
# joined/payload 查询
## 有的时候有多个表,我们想要查询的字段在其他表中。这个时候就需要对 sql_query 进行 join 操作。
## sql_joined_field 是增加一个字段, 这个字段是从其他表中查询出来的。
## 这里对分号后面的查询语句是有要求的,如果是 query, 则返回 id 和查询字段,如果是 payload-query, 则返回 id, 查询字段和权重。
## 并且这里的后一个查询需要按照 id 进行升序排列。
# sql_joined_field = tags from query; select docid, concat('tag', tagid) from tags order by docid asc
# sql_joined_field = wtags from payload-query; select docid, tag, tagweight from tags order by docid asc
# 外部文件字段,在表中一个字段存的是外部文章地址,但是实际上字段内容在文件中。比如这个字段 content_file_path。
# 当 indexer 建立索引的时候,查询这个字段,就读取这个文件,并进行分词和索引建立
# 最大的文件大小通过 indexer 中 max_file_field_buffer 指定,文件 IO 错误是非致命错误,并被当做警告处理。
# sql_file_field = content_file_path
# 当数据源数据太大的时候,一个 sql 语句查询下面往往很有可能锁表等操作。
## 那么就可以使用多次查询,那么这个多次查询就需要有个范围和步长, sql_query_range 和 sql_query_step 就是做这个的。
## 获取最大值和最小的id, 然后根据步长来获取数据。比如下面的例子,如果有 4500 条数据,这个表建立索引的时候就会进行 5 次 sql 查询。
# 而 5 次 sql 查询每次的间隔时间是 sql_ranged_throttle 来进行设置的。单位是毫秒。
# sql_query 需要和 $start 、$end 标记的区间关联
# sql_query = \
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
# FROM documents doc \
# WHERE id>=$start AND id<=$end
#
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
# 步长
# 可选,默认值: 1024
#
# sql_range_step = 1000
## 下面是不同属性
## 属性是存在索引中的,它不进行全文索引,但是可以用于过滤和排序
# 无符号整形属性定义
# 可以定义多个值(可以指定任意数量的属性), 可选
# 可选字节大小可以被指定,默认是 32
# sql_attr_uint = author_id
# sql_attr_uint = forum_id:9 # 9 bits for forum_id
sql_attr_uint = group_id
# 布尔值属性定义
# 可以定义多个值,可选
# 等同于 sql_attr_uint = 1
# sql_attr_bool = is_deleted
# biginit 类型属性定义
# 可以定义多个值,可选
# 可以一个有符号的 64 属性
# bigint attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# declares a signed (unlike uint!) 64-bit attribute
#
# sql_attr_bigint = my_bigint_id
# UNIX 时间戳属性定义
# 可以定义多个值,可选
# 和 integer 类似,但可以用于 date 函数
# UNIX timestamp attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# similar to integer, but can also be used in date functions
#
# sql_attr_timestamp = posted_ts
# sql_attr_timestamp = last_edited_ts
sql_attr_timestamp = date_added
# 字符串排序属性。一般我们按照字符串排序的话,我们会将这个字符串存下来进入到索引中,然后在查询的时候比较索引中得字符大小进行排序。
# 但是这个时候的索引就会很大,于是我们就想到一个方法,我们在建立索引的时候,先将字符串值从数据库中取出、暂存、排序。
# 然后给排序后的数据分配一个序号,然后建立索引的时候,就将这个序号存入到索引中去。这样在查询的时候也就能完成字符串排序的操作。
# string ordinal attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# sorts strings (bytewise), and stores their indexes in the sorted list
# sorting by this attr is equivalent to sorting by the original strings
#
# sql_attr_str2ordinal = author_name
# 浮点数属性,经常在查询地理经纬度的时候用到
# floating point attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# values are stored in single precision, 32-bit IEEE 754 format
#
# sql_attr_float = lat_radians
# sql_attr_float = long_radians
## MVA 属性
# 试想一下,有一个文章系统,每篇文章都有多个标签,这个文章就叫做多值属性.
# 我要对某个标签进行查询过滤,那么在建立查询的时候就应该把这个标签的值放入到索引中。
# multi-valued attribute (MVA) attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# MVA values are variable length lists of unsigned 32-bit integers
#
# syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
# ATTR-TYPE is 'uint' or 'timestamp'
# SOURCE-TYPE is 'field', 'query', or 'ranged-query'
# QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
# RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
#
# sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags
# sql_attr_multi = uint tag from ranged-query; \
# SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
# SELECT MIN(docid), MAX(docid) FROM tags
# 字符串属性定义
# string attribute declaration
# multi-value (an arbitrary number of these is allowed), optional
# lets you store and retrieve strings
#
# sql_attr_string = stitle
# 文档词汇数记录属性。如果下面就是在索引建立的时候增加一个词汇数的字段
# wordcount attribute declaration
# multi-value (an arbitrary number of these is allowed), optional
# lets you count the words at indexing time
#
# sql_attr_str2wordcount = stitle
# 字段和属性定义组合
# 将字段存储为属性,但同时也为其建立全文索引
# combined field plus attribute declaration (from a single column)
# stores column as an attribute, but also indexes it as a full-text field
#
# sql_field_string = author
# sql_field_str2wordcount = title
# 取后查询,在 sql_query 执行后立即操作
# 它和 sql_query_post_index 的区别就是执行时间不同
# sql_query_post 是在 sql_query 执行后执行,而 sql_query_post_index 是在索引建立完成后才执行。
# 所以如果要记录最后索引执行时间,那么应该在 sql_query_post_index 中执行.
# post-query, executed on sql_query completion
# optional, default is empty
#
# sql_query_post =
# 参数 sql_query_post 的说明
# post-index-query, executed on successful indexing completion
# optional, default is empty
# $maxid expands to max document ID actually fetched from DB
#
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
# VALUES ( 'max_indexed_id', $maxid )
# ranged query throttling, in milliseconds
# optional, default is 0 which means no delay
# enforces given delay before each query step
sql_ranged_throttle = 0
## 命令行获取信息查询
# 我们进行索引一般只会返回主键 id, 而不会返回表中的所有字段。
# 但是在调试的时候,我们一般需要返回表中的字段,那这个时候,就需要使用 sql_query_info。
# 同时这个字段只在控制后台有效,在 api 中是无效的。
# 注意: 一定要寒三友 $id 宏,通过它获取数据
sql_query_info = SELECT * FROM documents WHERE id=$id
# 比如有两个索引,一个索引比较旧,一个索引比较新,那么旧索引中就会有数据是旧的。
# 当我要对两个索引进行搜索的时候,哪些数据要按照新的索引来进行查询呢。
# kill-list query, fetches the document IDs for kill-list
# k-list will suppress matches from preceding indexes in the same query
# optional, default is empty
#
# sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
# 详见: http://web.archive.org/web/20150403083256/http://www.coreseek.cn:80/docs/coreseek_4.1-sphinx_2.0.1-beta.html#conf-sql-query-killlist
# 索引时, SQL 数据源的解压字段设置 - 当建立索引的动作发生在数据库所在机器以外的机器时,这个选项会降低数据库的负载,并节约网络带宽。要想使用这个特性,就必须保证在建立时zlib和zlib-devel都是可用的。
# columns to unpack on indexer side when indexing
# multi-value, optional, default is empty list
#
# unpack_zlib = zlib_column
# unpack_mysqlcompress = compressed_column
# unpack_mysqlcompress = compressed_column_2
# maximum unpacked length allowed in MySQL COMPRESS() unpacker
# optional, default is 16M
#
# unpack_mysqlcompress_maxsize = 16M
#####################################################################
## xmlpipe2 settings
#####################################################################
# xmlpipe 的数据源就是一个 xml 文档
# type = xmlpipe
# 读取数据源的 shell 命令
# shell command to invoke xmlpipe stream producer
# mandatory
#
# xmlpipe_command = cat /usr/local/coreseek/var/test.xml
# xmlpipe2 字段定义
# xmlpipe2 field declaration
# multi-value, optional, default is empty
#
# xmlpipe_field = subject
# xmlpipe_field = content
# 属性定义
# xmlpipe2 attribute declaration
# multi-value, optional, default is empty
# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
#
# xmlpipe_attr_timestamp = published
# xmlpipe_attr_uint = author_id
# UTF-8验证和修复设置
# 只适用 xmlpipe2 数据源,数据源中有可能有非 utf-8 的字符,这个时候解析容易出错
# optional, default is 0
#
# xmlpipe_fixup_utf8 = 1
}
#############################################################################
## index definition
#############################################################################
# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index test1
{
# 索引类型,包括 palin, distribute 和 rt。分别是普通索引/分布式索引/增量索引。默认 plain。
# index type
# optional, default is 'plain'
# known values are 'plain', 'distributed', and 'rt' (see samples below)
# type = plain
# 数据源
# document source(s) to index
# multi-value, mandatory
# document IDs must be globally unique across all sources
source = src1
# 索引文件存放的路径
# index files path and file name, without extension
# mandatory, path must be writable, extensions will be auto-appended
path = /usr/local/coreseek/var/data/test1
# 文档信息的存储模式,包括 none, extern, inline。默认是 extern.
# docinfo 指的就是数据的所有属性(field)构成的一个集合。
# 首先文档id是存储在一个文件中的(spa)
# 当时用 inline 的时候,文档的属性和文件id都是存放在 spa 中的,所以进行查询过滤的时候,不要进行额外的操作。
# 当时用 extern 的时候,文档的属性是存放在另外一个 (spd) 中的,但是当启动 searchd 的时候,会把这个文件加载到内存中。
# extern 就意味着每次做查询过滤的时候,除了文档id之外,还需要去内存中根据属性进行过滤。
# 但是即使这样, extern 由于文件小,效率也不低。所以不是有特殊要求,一般都是使用 extern。
# document attribute values (docinfo) storage mode
# optional, default is 'extern'
# known values are 'none', 'extern' and 'inline'
docinfo = extern
# 缓冲内存锁定
# searchd 会将 spa 和 spi 预读到内存中。但是如果这部分内存数据长时间没有访问,则它会被交换到磁盘上。
# 可以通过设置 mlock 来避免出现这个情况。
# memory locking for cached data (.spa and .spi), to prevent swapping
# optional, default is 0 (do not mlock)
# requires searchd to be run from root
mlock = 0
# 词形处理器
# 词形处理是什么意思呢, 比如在英语中, dogs 是 dog 的复数, 所以 dog 是 dogs 的词干,这两个词实际上是通过一个词。
# 所以英语的词形处理器会将 dogs 当做 dog 来处理。
# a list of morphology preprocessors to apply
# optional, default is empty
#
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
# 'soundex', and 'metaphone'; additional preprocessors available from
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
# (see libstemmer_c/libstemmer/modules.txt)
#
# morphology = stem_en, stem_ru, soundex
# morphology = libstemmer_german
# morphology = libstemmer_sv
morphology = none
# 词形处理有时候会出现问题,比如讲 gps 处理成 gp, 这个设置可以允许根据词的长度来决定是否要使用词形处理器
# minimum word length at which to enable stemming
# optional, default is 1 (stem everything)
#
# min_stemming_len = 1
# 停止词,停止词就是不被索引的词
# stopword files list (space separated)
# optional, default is empty
# contents are plain text, charset_table and stemming are both applied
#
# stopwords = /usr/local/coreseek/var/data/stopwords.txt
# 自定义词形字典
# wordforms file, in "mapfrom > mapto" plain text format
# optional, default is empty
#
# wordforms = /usr/local/coreseek/var/data/wordforms.txt
# 词汇特殊处理
# 有的一些特殊词我们希望把它当做另外一个词来处理。比如: c++ => cplusplus来处理。
# tokenizing exceptions file
# optional, default is empty
#
# plain text, case sensitive, space insensitive in map-from part
# one "Map Several Words => ToASingleOne" entry per line
#
# exceptions = /usr/local/coreseek/var/data/exceptions.txt
# 最小索引词长度,小于这个长度的词不会被索引。
# minimum indexed word length
# default is 1 (index everything)
min_word_len = 1
# 字符编码类型,可以为 sbcs,utf-8。对于 coreseek, 还可以有 zn_cn.utf-8,zh_ch.gbk,zh_ch.big5
# charset encoding type
# optional, default is 'sbcs'
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
charset_type = sbcs
# 字符表和大小写转换规则。对于 coreseek, 这个字段无效。
# charset definition and case folding rules "table"
# optional, default value depends on charset_type
#
# defaults are configured to include English and Russian characters only
# you need to change the table to include additional ones
# this behavior MAY change in future versions
#
# 'sbcs' default value is
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
#
# 'utf-8' default value is
# charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
# 忽略字符表。在忽略字符表中的前后缀会被连起来当做一个单独关键词处理。
# ignored characters list
# optional, default value is empty
#
# ignore_chars = U+00AD
# min_prefix_len,min_infix_len,prefix_fields,infix_fields 都是 enable_star 开启的时候 才有效果。
# 最小前缀索引长度
# 为什么要有这个配置项呢?
# 首先这个是当启用通配符配置的前提下说的,前缀索引使得一个关键词产生了多个索引项,导致索引文件体积和搜索时间增加巨大。
# 那么我们就有必要限制下前缀索引的长度,比如 example, 当前缀索引长度设置为 5 的时候,它只会分解为 exampl, example 了
# minimum word prefix length to index
# optional, default is 0 (do not index prefixes)
#
# min_prefix_len = 0
# 最小索引中缀长度
# minimum word infix length to index
# optional, default is 0 (do not index infixes)
#
# min_infix_len = 0
# 前缀索引和中缀索引字段列表。并不是所有的字段都需要进行前缀和中缀索引。
# list of fields to limit prefix/infix indexing to
# optional, default value is empty (index all fields in prefix/infix mode)
#
# prefix_fields = filename
# infix_fields = url, domain
# 是否启用通配符,默认是 0,不启用。
# enable star-syntax (wildcards) when searching prefix/infix indexes
# search-time only, does not affect indexing, can be 0 or 1
# optional, default is 0 (do not use wildcard syntax)
#
# enable_star = 1
# 词汇展开
# 是否尽可能展示关键字的精确格式或者型号形式
# expand keywords with exact forms and/or stars when searching fit indexes
# search-time only, does not affect indexing, can be 0 or 1
# optional, default is 0 (do not expand keywords)
#
# expand_keywords = 1
# n-gram 索引的分词技术
# n-gram 是指不按照词典,而是按照字长来分词,这个主要是针对非英文体系的一些语言来做的(中文、韩文、日文)
# 对 coreseek 来说,这两个配置项可以忽略。
# n-gram length to index, for CJK indexing
# only supports 0 and 1 for now, other lengths to be implemented
# optional, default is 0 (disable n-grams)
#
# ngram_len = 1
# n-gram characters list, for CJK indexing
# optional, default is empty
#
# ngram_chars = U+3000..U+2FA1F
# 词组边界列表和步长
# 哪些字符被看做分割不同词组的边界
# phrase boundary characters list
# optional, default is empty
#
# phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
# phrase boundary word position increment
# optional, default is 0
#
# phrase_boundary_step = 100
# 混合字符列表
# blended characters list
# blended chars are indexed both as separators and valid characters
# for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
# optional, default is empty
#
# blend_chars = +, &, U+23
# blended token indexing mode
# a comma separated list of blended token indexing variants
# known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
# optional, default is trim_none
#
# blend_mode = trim_tail, skip_pure
# html 标记清理,是否从输出全文数据中去除 html 标记
# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
html_strip = 0
# html 标记清理,是否从输出全文数据中去除 html 标记。
# what HTML attributes to index if stripping HTML
# optional, default is empty (do not index anything)
#
# html_index_attrs = img=alt,title; a=title;
# 需要清理的 html 元素
# what HTML elements contents to strip
# optional, default is empty (do not strip element contents)
#
# html_remove_elements = style, script
# searchd 是预先打开全部索引还是将他预先缓冲在内存中
# whether to preopen index data files on startup
# optional, default is 0 (do not preopen), searchd-only
#
# preopen = 1
# 字典文件是保持在磁盘上,预载入内存。
# whether to keep dictionary (.spi) on disk, or cache it in RAM
# optional, default is 0 (cache in RAM), searchd-only
#
# ondisk_dict = 1
# 由于在索引建立的时候,需要建立临时文件和副本,还有旧的索引
# 这个时候磁盘使用量暴增,于是有个方法是临时文件的重复利用
# 这个配置会极大的减少建立索引的时候的磁盘压力,代价是 索引建立速度变慢。
# whether to enable in-place inversion (2x less disk, 90-95% speed)
# optional, default is 0 (use separate temporary files), indexer-only
#
# inplace_enable = 1
# in-place fine-tuning options
# optional, defaults are listed below
#
# inplace_hit_gap = 0 # preallocated hitlist gap size
# inplace_docinfo_gap = 0 # preallocated docinfo gap size
# inplace_reloc_factor = 0.1 # relocation buffer size within arena
# inplace_write_factor = 0.1 # write buffer size within arena
# whether to index original keywords along with stemmed versions
# enables "=exactform" operator to work
# optional, default is 0
#
# index_exact_words = 1
# 在经过过短的位置后增加位置值
# position increment on overshort (less that min_word_len) words
# optional, allowed values are 0 and 1, default is 1
#
# overshort_step = 1
# 在经过停用词后增加位置值
# position increment on stopword
# optional, allowed values are 0 and 1, default is 1
#
# stopword_step = 1
# 位置忽略词汇列表
# hitless words list
# positions for these keywords will not be stored in the index
# optional, allowed values are 'all', or a list file name
#
# hitless_words = all
# hitless_words = hitless.txt
# 是否检测并索引句子和段落边界。
# detect and index sentence and paragraph boundaries
# required for the SENTENCE and PARAGRAPH operators to work
# optional, allowed values are 0 and 1, default is 0
#
# index_sp = 1
# 字段内需要索引的 html / xml 区域的标签列表。
# index zones, delimited by HTML/XML tags
# a comma separated list of tags and wildcards
# required for the ZONE operator to work
# optional, default is empty string (do not index zones)
#
# index_zones = title, h*, th
}
# realtime index example
#
# you can run INSERT, REPLACE, and DELETE on this index on the fly
# using MySQL protocol (see 'listen' directive below)
index rt
{
# 'rt' index type must be specified to use RT index
type = rt
# index files path and file name, without extension
# mandatory, path must be writable, extensions will be auto-appended
path = /usr/local/coreseek/var/data/rt
# RT 内存索引限制
# RAM chunk size limit
# RT index will keep at most this much data in RAM, then flush to disk
# optional, default is 32M
#
# rt_mem_limit = 512M
# 全文字段定义
# full-text field declaration
# multi-value, mandatory
rt_field = title
rt_field = content
# 无符号整数属性定义
# unsigned integer attribute declaration
# multi-value (an arbitrary number of attributes is allowed), optional
# declares an unsigned 32-bit attribute
rt_attr_uint = gid
# 各种属性定义
# RT indexes currently support the following attribute types:
# uint, bigint, float, timestamp, string
#
# rt_attr_bigint = guid
# rt_attr_float = gpa
# rt_attr_timestamp = ts_added
# rt_attr_string = author
}
#############################################################################
## indexer settings
#############################################################################
indexer
{
# 建立索引的时候,索引内存限制
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
mem_limit = 32M
# 每秒最大 I/O 操作次数,用于限制 I/O 操作
# maximum IO calls per second (for I/O throttling)
# optional, default is 0 (unlimited)
#
# max_iops = 40
# 最大允许的 I/O 操作大小,以字节为单位,用于 I/O 节流
# maximum IO call size, bytes (for I/O throttling)
# optional, default is 0 (unlimited)
#
# max_iosize = 1048576
# 对于 xmlpipe2 数据源允许的最大的字段大小,以字节为单位
# maximum xmlpipe2 field length, bytes
# optional, default is 2M
#
# max_xmlpipe2_field = 4M
# 写缓冲区大小,单位是字节
# write buffer size, bytes
# several (currently up to 4) buffers will be allocated
# write buffers are allocated in addition to mem_limit
# optional, default is 1M
#
# write_buffer = 1M
# 文件字段可用的最大缓冲区大小,字节为单位
# maximum file field adaptive buffer size
# optional, default is 8M, minimum is 1M
#
# max_file_field_buffer = 32M
}
#############################################################################
## searchd settings
#############################################################################
searchd
{
# [hostname:]port[:protocol], or /unix/socket/path to listen on
# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
#
# multi-value, multiple listen points are allowed
# optional, defaults are 9312:sphinx and 9306:mysql41, as below
#
# 监听端口
# listen = 127.0.0.1
# listen = 192.168.0.1:9312
# listen = 9312
# listen = /var/run/searchd.sock
listen = 9312
listen = 9306:mysql41
# 监听日志
# log file, searchd run info is logged here
# optional, default is 'searchd.log'
log = /usr/local/coreseek/var/log/searchd.log
# 查询日志
# query log file, all search queries are logged here
# optional, default is empty (do not log queries)
query_log = /usr/local/coreseek/var/log/query.log
# 客户端读超时时间
# client read timeout, seconds
# optional, default is 5
read_timeout = 5
# 客户端持久连接超时时间
# request timeout, seconds
# optional, default is 5 minutes
client_timeout = 300
# 搜索并发数
# maximum amount of children to fork (concurrent searches to run)
# optional, default is 0 (unlimited)
max_children = 30
# 进程id文件
# PID file, searchd process ID file name
# mandatory
pid_file = /usr/local/coreseek/var/log/searchd.pid
# 守护进程在内存中为每个索引保持并返回给客户端的最大值
# max amount of matches the daemon ever keeps in RAM, per-index
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
# default is 1000 (just like Google)
max_matches = 1000
# 无缝轮转。防止 searchd 轮转在需要预取大量数据的索引时停止响应
# 当进行索引轮转的时候,可能需要消耗大量的时候在轮转索引上。
# 但是启动了无缝轮转,就以消耗内存为代价减少轮转时间
# seamless rotate, prevents rotate stalls if precaching huge datasets
# optional, default is 1
seamless_rotate = 1
# 索引预开启,是否强制重新打开所有索引文件
# whether to forcibly preopen all indexes on startup
# optional, default is 1 (preopen everything)
preopen_indexes = 1
# 索引轮转成功之后,是否删除以 .old 为扩展名的索引拷贝
# whether to unlink .old index copies on succesful rotation.
# optional, default is 1 (do unlink)
unlink_old = 1
# 属性刷新周期
# 就是使用 UpdateAttributes() 更新的文档属性每隔多少时间时间写回磁盘中
# attribute updates periodic flush timeout, seconds
# updates will be automatically dumped to disk this frequently
# optional, default is 0 (disable periodic flush)
#
# attr_flush_period = 900
# 索引字典存储方式
# instance-wide ondisk_dict defaults (per-index value take precedence)
# optional, default is 0 (precache all dictionaries in RAM)
#
# ondisk_dict_default = 1
# 用于多值属性 MVA 更新的存储空间的内存共享池大小
# MVA updates pool size
# shared between all instances of searchd, disables attr flushes!
# optional, default size is 1M
mva_updates_pool = 1M
# 网络通讯是允许的最大的包的大小
# max allowed network packet size
# limits both query packets from clients, and responses from agents
# optional, default size is 8M
max_packet_size = 8M
# 奔溃日志文件
# crash log path
# searchd will (try to) log crashed query to 'crash_log_path.PID' file
# optional, default is empty (do not create crash logs)
#
# crash_log_path = /usr/local/coreseek/var/log/crash
# 每次查询允许设置的过滤器的最大个数
# max allowed per-query filter count
# optional, default is 256
max_filters = 256
# 单个过滤器允许的值的最大个数
# max allowed per-filter values count
# optional, default is 4096
max_filter_values = 4096
# tcp 监听待处理队列长度
# socket listen queue length
# optional, default is 5
#
# listen_backlog = 5
# 每个关键字的读缓冲取的大小
# per-keyword read buffer size
# optional, default is 256K
#
# read_buffer = 256K
# 无匹配时读取操作的大小
# unhinted read size (currently used when reading hits)
# optional, default is 32K
#
# read_unhinted = 32K
# 每次批量查询的查询数限制
# max allowed per-batch query count (aka multi-query count)
# optional, default is 32
max_batch_queries = 32
# 每个查询的公共子树文档缓冲大小
# max common subtree document cache size, per-query
# optional, default is 0 (disable subtree optimization)
#
# subtree_docs_cache = 4M
# 每个公共子树命中缓存大小
# max common subtree hit cache size, per-query
# optional, default is 0 (disable subtree optimization)
#
# subtree_hits_cache = 8M
# 多处理模式(MPM)。可选项: 可用值 none、fork、prefork 以及 threads。默认在 unix 类系统为 form, windows 系统为 threads。
# multi-processing mode (MPM)
# known values are none, fork, prefork, and threads
# optional, default is fork
#
workers = threads # for RT to work
# 并发查询线程数
# max threads to create for searching local parts of a distributed index
# optional, default is 0, which means disable multi-threaded searching
# should work with all MPMs (ie. does NOT require workers=threads)
#
# dist_threads = 4
# 二进制日志路径
# binlog files path; use empty string to disable binlog
# optional, default is build-time configured data directory
#
# binlog_path = # disable logging
# binlog_path = /usr/local/coreseek/var/data # binlog.001 etc will be created there
# 二进制日志刷新
# binlog flush/sync mode
# 0 means flush and sync every second
# 1 means flush and sync every transaction
# 2 means flush every transaction, sync every second
# optional, default is 2
#
# binlog_flush = 2
# 二进制日志大小限制
# binlog per-file size limit
# optional, default is 128M, 0 means no limit
#
# binlog_max_log_size = 256M
# 线程堆栈大小
# per-thread stack size, only affects workers=threads mode
# optional, default is 64K
#
# thread_stack = 128K
# 关键字展开限制
# per-keyword expansion limit (for dict=keywords prefix searches)
# optional, default is 0 (no limit)
#
# expansion_limit = 1000
# RT 索引刷新周期
# RT RAM chunks flush period
# optional, default is 0 (no periodic flush)
#
# rt_flush_period = 900
# 查询日志格式
# query log file format
# optional, known values are plain and sphinxql, default is plain
#
# query_log_format = sphinxql
# MySQL 版本设置
# version string returned to MySQL network protocol clients
# optional, default is empty (use Sphinx version)
#
# mysql_version_string = 5.0.37
# 插件目录
# trusted plugin directory
# optional, default is empty (disable UDFs)
#
# plugin_dir = /usr/local/sphinx/lib
# 服务端默认字符集
# default server-wide collation
# optional, default is libc_ci
# collation_server = utf8_general_ci
# 服务端 libc 字符集
# server-wide locale for libc based collations
# optional, default is C
#
# collation_libc_locale = ru_RU.UTF-8
# 线程服务看守
# threaded server watchdog (only used in workers=threads mode)
# optional, values are 0 and 1, default is 1 (watchdog on)
#
# watchdog = 1
# 兼容模式
# SphinxQL compatibility mode (legacy columns and their names)
# optional, default is 0 (SQL compliant syntax and result sets)
#
# compat_sphinxql_magics = 1
}
参考:
https://www.cnblogs.com/yjf512/p/3598332.html
sphinx / coreseek 配置文件详解
最新推荐文章于 2021-01-26 20:40:46 发布