DB2自定义函数(C语言)实战

1      需求

某表存在如下数据:

4000000001|0|16217|43523|0

4000000004|0|740|464|0;4000000001|0|2717|15394|0

1030000004|0|1074|104|0;4000000004|0|104|104|0;4000000001|0|5219|3582|0

4000000001|0|150129|1285711|0;1040000004|0|4064|32526|0;1030000008|0|1921|15794|0;1030000005|0|2356|2103|0;4000000004|0|5520|3228|0;1030000004|0|856|4252|0;1040000015|0|954|132|0;1040000014|0|5013|494|0

其中“;”分割个数不定,每个“;”中分割的“|”为5部分


 

 

拆分结果示例:

 

如4000000004|0|740|464|0;4000000001|0|2717|15394|0,拆分成:

1       4000000004     0       740  464  0

2       4000000001     0       2717         15394       0


 

刚开始使用自定义函数之sql函数效率无法满足要求,206万数据需要867s,每日数据处理需要80分钟左右,经查资料,c自定义函数在逻辑较复杂时效率较高,sql与java效率高低存在争议,决定使用c自定义函数

 

2      编写程序:

 

#include <stdio.h>
#include <sqludf.h>
#include <sqlstate.h>

void SQL_API_FN gprsSplit(
			SQLUDF_VARCHAR *inputStr,    /* input */
			SQLUDF_INTEGER *intId,      /* output */
			SQLUDF_VARCHAR *charCol,      /* output */
			SQLUDF_BIGINT *intCol1,      /* output */
			SQLUDF_BIGINT *intCol2,      /* output */
			SQLUDF_BIGINT *intCol3,      /* output */
			SQLUDF_BIGINT *intCol4,      /* output */
			/* null indicators */
			SQLUDF_NULLIND *inputStr_ind,
			SQLUDF_NULLIND *intId_ind,
			SQLUDF_NULLIND *charCol_ind,
			SQLUDF_NULLIND *intCol1_ind,
			SQLUDF_NULLIND *intCol2_ind,
			SQLUDF_NULLIND *intCol3_ind,
			SQLUDF_NULLIND *intCol4_ind,
			SQLUDF_TRAIL_ARGS_ALL)
{
	char intValue[50+1] = { '\0' };
	char tempStr[50+1]={'\0'};
	char *tp = NULL;

	struct scratchMap {
		char *p;
		int rowNumber;
	};

	/* map the scratchpad */
	struct scratchMap *scratch = (struct scratchMap *)SQLUDF_SCRAT->data;

	*intId_ind   = -1;
	*charCol_ind = -1;
	*intCol1_ind = -1;
	*intCol2_ind = -1;
	*intCol3_ind = -1;
	*intCol4_ind = -1;



	switch (SQLUDF_CALLT) {
		case SQLUDF_TF_OPEN:		/* Open table.       */
			/* store the pointer on the scratchpad */
			scratch->p = inputStr;
			/**
			if ( (*inputStr_ind == -1) || (*scratch->p) ) {
				strcpy(SQLUDF_MSGTX, "input is null, inputStr=");
				strncat(SQLUDF_MSGTX, inputStr, SQLUDF_MSGTEXT_LEN - strlen(SQLUDF_MSGTX)-1);
				strncpy(SQLUDF_STATE, "38200", SQLUDF_SQLSTATE_LEN);
				return;
			}
			**/
			scratch->rowNumber = 0;

			break;

		case SQLUDF_TF_FETCH:		/* Fetch next row.   */
			/* count the row */
			scratch->rowNumber++;

			if ( !(*scratch->p) ) {
				/* end of string reached */
				strncpy(SQLUDF_STATE, SQL_NODATA_EXCEPTION, SQLUDF_SQLSTATE_LEN);
				return;
			}
			
			//ID字段
			*intId = scratch->rowNumber;
			*intId_ind = 0;

			//第一个字符串字段
			tp = tempStr;
			//sprintf(SQLUDF_MSGTX, "scratch->p=%s", scratch->p);
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(charCol,tempStr);
				*charCol_ind = 0;
			}
			else
			{
				//sprintf(SQLUDF_MSGTX, "tp=%p,tempStr=%p", tp,tempStr);
				//strcat(SQLUDF_MSGTX,", scratch->p=");
				//strcat(SQLUDF_MSGTX,scratch->p);
				//strcat(SQLUDF_MSGTX,", tempStr=");
				//strcat(SQLUDF_MSGTX,tempStr);
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 1 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1); 
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;


			//字段2
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol1) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 2", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol1_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 2 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1);    
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;

			//字段3
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol2) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 3", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol2_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 3 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1);      
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;


			//字段4
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol3) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 4", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol3_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 4 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1);  
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;


			//字段5
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != ';' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol4) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 5", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol4_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 5 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;
			if( (*scratch->p) == ';' ) scratch->p++;

			break;

		case SQLUDF_TF_CLOSE:		
			scratch->p = NULL;
			scratch->rowNumber = 0;
	}
}



 

3      编译程序:

 /opt/IBM/db2/V9.5/samples/c/bldrtn gprsSplit_udf mydbname

4      新建函数:

--drop function gprsSplit;
CREATE FUNCTION
gprsSplit(recordextensionVARCHAR(1025))
  
RETURNS TABLE (idint,busi_typeVARCHAR(10),durationbigint,up_flowbigint,down_flowbigint,feebigint)
  
SPECIFIC gprsSplit
  
EXTERNAL NAME 'gprsSplit_udf!gprsSplit'
  
LANGUAGE C
   PARAMETER STYLE SQL
   DETERMINISTIC
   --NOT DETERMINISTIC
  
--FENCED
  
NOT FENCED
   RETURNS NULL ON NULL INPUT
   NO SQL
   NO EXTERNAL ACTION
   SCRATCHPAD
   NO FINAL CALL
   DISALLOW PARALLEL
   --ALLOW PARALLEL EXECUTE ON ALL DATABASE PARTITIONS RESULT TABLEDISTRIBUTED
  
CARDINALITY 2;

 

 

期间经过几次优化:

1刚开始采用DETERMINISTICFENCEDDISALLOW PARALLELSCRATCHPAD,测试基准性能为680s

2、增加CARDINALITY 2性能提升一倍,为330s

3、改成NOT FENCED,性能大幅提升,为39s

4、虽然重复数据不多,但采用NOTDETERMINISTICDETERMINISTIC性能还是有1/3的提升

5ALLOW PARALLELEXECUTE ON ALL DATABASE PARTITIONS RESULT TABLE DISTRIBUTED无法使用,会导致数据重复,存在多少个节点,重复多少次

6NOT FENCED存在部分风险,此种模式自定义函数在DB2内存空间中运行,如果存在代码问题,可能会造成DB2崩溃

 

 

5      结果:

采用c自定义函数,并设置为NOT FENCED模式后,生产环境原数据处理,从原来的80分钟,缩短为当前的12分钟,性能大幅提升,瓶颈基本为磁盘阵列IO速度,完全满足当前要求

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值