sphinx 采用c扩展xmlpipe2数据源 .

由于要采用sphinx 作为全文检索引擎,但是我的很多数据都存在硬盘上,如果这些数据都读入数据库那就太痛苦了,数据量很大,mysql数据库存储这些数据,只能是为了全文检索,那也过于浪费了,所以决定采用xmlpipe2数据源,由于c语言的io操作能力超强为了效率问题也就是用,从不会写的c语言。废话少说,代码如下

 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mysql.h>
#include <iconv.h>

#include "dictionary.h"
#include "iniparser.h"

//配置文件名称
#define INI_FILE_NAME "sanshi_xmlpipe.ini"
//sql多条语句分隔符号
#define SQL_SIGN "|"
//设置字段里名字与字段值的分隔符
#define ATTRIBUTE_SIGN ":"
//设置多个字段间的分隔符
#define DIELD_SIGN ","
//设置读取文件的长度
#define READFILE_MAX_LEN 1024
//设置路径+文件的最大长度
#define FILE_NAME_MAX_LEN 1024

typedef struct tag_sanshi_ini_config
{
	char * db_name;
	char * db_user;
	char * db_pwd;
	char * db_host;
	char * sql_query_pre;
	char * sql_query;
	char * sql_query_post;
	char * sphinx_schema;
	char *	sphinx_id;
	char * sphinx_file;
	char * sphinx_other_field;
	char * file_dir;
}sanshi_ini_config;

sanshi_ini_config conf={
	"db:db_name",
	"db:db_user",
	"db:db_pwd",
	"db:db_host",
	"sql:sql_query_pre",
	"sql:sql_query",
	"sql:sql_query_post",
	"xml:schema",
	"xml:index_id",
	"xml:file_field",
	"xml:other_field",
	"file:base_dir"
	};

typedef struct tag_sanshi_field
{
	char *field;
	int  id;
} sanshi_field;

sanshi_field parser_field(char * field_str)
{
	sanshi_field temp;
	char * buf = strstr( field_str, ATTRIBUTE_SIGN);
	temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));
	buf[0]='/0';
	temp.field = field_str;
	//temp.id = buf + strlen(ATTRIBUTE_SIGN);
	//buf = strstr( field_str, ATTRIBUTE_SIGN);
	//temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));
	//printf("%s %s %d/n",field_str,temp.field,temp.id);
	/*
	char * p;
	char field_tmp[strlen(field_str)+1];
	strcpy(field_tmp,field_str);
	//temp.field = strtok( field_str, ATTRIBUTE_SIGN);
	//field_str = strtok( NULL, ATTRIBUTE_SIGN);
	//temp.id = atoi(p);
	printf("%s /n",field_tmp);
	field_str =NULL;
	*/
	return temp;
}

void print_file_content(char * file_name)
{
	FILE *fp;
	char line[READFILE_MAX_LEN];
	fp=fopen(file_name,"r");
	if(fp!=NULL)
	{
		while(fgets(line,READFILE_MAX_LEN,fp)!=NULL)
		{
			printf(line);
		}
		fclose(fp);
	}
}

void exec_mysql_query(MYSQL * mysql_con,char * sql_str)
{
	char* token = strtok( sql_str, SQL_SIGN);
	while( token != NULL )
    {
		int query_error_no=0;
		query_error_no = mysql_query(mysql_con,token);
		if(query_error_no !=0)
		{
			printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",token,query_error_no,mysql_error(mysql_con));
			mysql_close(mysql_con);
			exit(0);
		}
		mysql_free_result(mysql_store_result(mysql_con));
		//printf("exec sql : %s /n",token);
        token = strtok( NULL, SQL_SIGN);
    }
}

int main(int argc,char * argv[])
{
	dictionary * ini;
	
	MYSQL mysql,*mysql_con;
	MYSQL_RES *result;
	MYSQL_ROW row;
	int query_error_no,sphinx_id;
	
	sanshi_field file_field;

	ini = iniparser_load(INI_FILE_NAME);
	//get ini config mysql set 
	conf.db_name = iniparser_getstring(ini,conf.db_name,"test");
	conf.db_user = iniparser_getstring(ini,conf.db_user,"root");
	conf.db_pwd = iniparser_getstring(ini,conf.db_pwd,"");
	conf.db_host = iniparser_getstring(ini,conf.db_host,"localhost");
	conf.sql_query_pre = iniparser_getstring(ini,conf.sql_query_pre,NULL);
	conf.sql_query = iniparser_getstring(ini,conf.sql_query,NULL);
	conf.sql_query_post = iniparser_getstring(ini,conf.sql_query_post,NULL);
	conf.sphinx_schema = iniparser_getstring(ini,conf.sphinx_schema,NULL);
	sphinx_id = iniparser_getint(ini,conf.sphinx_id,0);
	conf.sphinx_file = iniparser_getstring(ini,conf.sphinx_file,NULL);
	file_field = parser_field(conf.sphinx_file);

	conf.sphinx_other_field = iniparser_getstring(ini,conf.sphinx_other_field,NULL);
	conf.file_dir = iniparser_getstring(ini,conf.file_dir,"./");
	
	//printf("db_name=%s /t db_user=%s /t db_pwd=%s /t db_host=%s /n",conf.db_name,conf.db_user,conf.db_pwd,conf.db_host);
	//mysql connect 
	mysql_init(&mysql);
	mysql_con =mysql_real_connect(&mysql,conf.db_host,conf.db_user,conf.db_pwd,conf.db_name,0,NULL,0);
	if(mysql_con == NULL)
	{
		printf("ERROR: connect mysql fail! plaese check ini file in set/n %s /n",mysql_error(&mysql));
		exit(0);
	}
	//printf("mysql connect suc!/n");

	//exec sql
	exec_mysql_query(mysql_con,conf.sql_query_pre);
	
	query_error_no = mysql_query(mysql_con,conf.sql_query);
	
	if(query_error_no !=0)
	{
		printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",conf.sql_query,query_error_no,mysql_error(mysql_con));
		mysql_close(mysql_con);
		exit(0);
	}
	//printf("exec sql_query : %s /n",conf.sql_query);
	result = mysql_store_result(mysql_con);
	//echo xml header
	printf("<?xml version=/"1.0/" encodeing=/"utf-8/"?>/n<sphinx:docset>/n%s/n",conf.sphinx_schema);
	//printf("%s/n",conf.sphinx_other_field);
	while(row = mysql_fetch_row(result))
	{
		printf("<sphinx:document id=/"%d/">/n",(row[sphinx_id]?row[sphinx_id]:0));
		char * field_str;
		char field_tmp[strlen(conf.sphinx_other_field)+1];
		char temp_file_name[FILE_NAME_MAX_LEN];
		memcpy(field_tmp,conf.sphinx_other_field,strlen(conf.sphinx_other_field)+1);
		field_str = strtok( field_tmp, DIELD_SIGN);
		while( field_str != NULL )
		{
			//printf("%s/n",field_str);
			sanshi_field other_field = parser_field(field_str);
			printf("


 

本人第一次写c,有些地方优化部够,还望各位指点

 

配置文件如下:

[db]
db_host=127.0.0.1
db_name=test
db_pwd=123456
db_user=root
[sql]
sql_query_pre=select * from log|select * from log
sql_query = select * from log
sql_query_post =
[xml]
schema=/
<sphinx:schema>/
<sphinx:field name="LogActionType"/>/
<sphinx:field name="LogDataType"/>/
<sphinx:attr name="LogTime" type="timestamp"/>/
<sphinx:attr name="LogIP" type="int" bits="16" default="1"/>/
</sphinx:schema>
index_id=0
file_field=LogActionType:1
other_field=LogDataType:2,LogTime:6,LogIP:4
[file]
base_dir=


 

 

 

备注:该程序的ini解析用到的是iniparser3.0b 的源代码,也就是依赖的2个h文件

#include "dictionary.h"

#include "iniparser.h"

       编译的命令为:

gcc -I /data/app/mysql/include/mysql/ -L /data/app/mysql/lib/mysql/ -l mysqlclient -g -o sanshi sanshi_xmlpipe.c dictionary.c iniparser.c

注意msql的库路径,已经文件名

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值