忙活两天,终于有收获了。我的oracle数据库是gbk,需要解码,只好用python生成xmlpipe2解码了
1.csft_xm.conf
source testxml
{
type = xmlpipe2
xmlpipe_command = python C:/coreseek/var/tmp/testx.py
}
index testxml
{
source = testxml
path = C:/coreseek/var/data/sphinx/testxml
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
#charset_dictpath = /usr/local/mmseg3/etc/ #BSD、Linux环境下设置,/符号结尾
charset_dictpath = C:/coreseek/etc/ #Windows环境下设置,/符号结尾
charset_type = zh_cn.utf-8
}
indexer
{
mem_limit = 128M
}
searchd
{
port = 9312
log = C:/coreseek/var/data/log/sphinxsearch/searchd.log
query_log = C:/coreseek/var/data/log/sphinxsearch/query.log
read_timeout = 5
max_children = 30
pid_file = C:/coreseek/var/data/log/sphinxsearch/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}
2.py配置
# coding=utf-8
import sys
from loxun import XmlWriter
from StringIO import StringIO
import cx_Oracle
conn = cx_Oracle.connect("spln/fulong@192.168.0.88/orclgbk")
cur = conn.cursor()
out = StringIO()
xml = XmlWriter(out)
xml.addNamespace("sphinx","http://www.beihai3651.com")
#---docset
xml.startTag("sphinx:docset")
# --- schema
xml.startTag("sphinx:schema")
#--- field
xml.tag("sphinx:field",{"name":"NAME"})
xml.tag("sphinx:attr",{"name":"AID","type":"int"})
#--- /field
xml.endTag()
#--- /schema
#-#--- wenwen--document
cur.execute('SELECT COUNT(*) FROM YQXX')
tj = cur.fetchone()[0]
pNum = 1000
cutSqlNum = pNum
_p = 1
zNum = 0
while True:
if (tj-zNum)<pNum:
cutSqlNum = tj-zNum
zNum = int(pNum * _p)
cur.execute('select id,name,aid from YQXX')
#print 'SELECT * FROM (select top %d* from (select top %d ID,Name from MapObjectInfo ORDER BY ID DESC)t1 ORDER BY ID)t2 ORDER BY ID DESC\n' % (cutSqlNum,zNum)
row = cur.fetchone()
while row:
xml.startTag("sphinx:document",{"id":row[0]})
xml.startTag("name")
xml.text(row[1].decode("gbk"))
xml.endTag()
xml.startTag("AID")
xml.text(row[2])
xml.endTag()
xml.endTag()
row = cur.fetchone()
_p = _p + 1
if zNum > tj:
break
#---#--- /wenwen /document
conn.close()
xml.endTag()
#--- /docset
xml.close()
print out.getvalue()