DB2自定义函数（C语言）实战

最新推荐文章于 2023-11-22 21:52:38 发布

huanghelang

最新推荐文章于 2023-11-22 21:52:38 发布

阅读量2.2k

点赞数

分类专栏：技术违章文章标签： db2 语言 c integer output parallel

本文链接：https://blog.csdn.net/huanghelang/article/details/7670446

版权

技术违章专栏收录该内容

25 篇文章 0 订阅

订阅专栏

1 需求

某表存在如下数据：

4000000001|0|16217|43523|0

4000000004|0|740|464|0;4000000001|0|2717|15394|0

1030000004|0|1074|104|0;4000000004|0|104|104|0;4000000001|0|5219|3582|0

4000000001|0|150129|1285711|0;1040000004|0|4064|32526|0;1030000008|0|1921|15794|0;1030000005|0|2356|2103|0;4000000004|0|5520|3228|0;1030000004|0|856|4252|0;1040000015|0|954|132|0;1040000014|0|5013|494|0

其中“；”分割个数不定，每个“；”中分割的“|”为5部分

拆分结果示例：

如4000000004|0|740|464|0;4000000001|0|2717|15394|0，拆分成：

1       4000000004     0       740  464  0

2       4000000001     0       2717         15394       0

刚开始使用自定义函数之sql函数效率无法满足要求，206万数据需要867s，每日数据处理需要80分钟左右，经查资料，c自定义函数在逻辑较复杂时效率较高，sql与java效率高低存在争议，决定使用c自定义函数

2 编写程序：

#include <stdio.h>
#include <sqludf.h>
#include <sqlstate.h>

void SQL_API_FN gprsSplit(
			SQLUDF_VARCHAR *inputStr,    /* input */
			SQLUDF_INTEGER *intId,      /* output */
			SQLUDF_VARCHAR *charCol,      /* output */
			SQLUDF_BIGINT *intCol1,      /* output */
			SQLUDF_BIGINT *intCol2,      /* output */
			SQLUDF_BIGINT *intCol3,      /* output */
			SQLUDF_BIGINT *intCol4,      /* output */
			/* null indicators */
			SQLUDF_NULLIND *inputStr_ind,
			SQLUDF_NULLIND *intId_ind,
			SQLUDF_NULLIND *charCol_ind,
			SQLUDF_NULLIND *intCol1_ind,
			SQLUDF_NULLIND *intCol2_ind,
			SQLUDF_NULLIND *intCol3_ind,
			SQLUDF_NULLIND *intCol4_ind,
			SQLUDF_TRAIL_ARGS_ALL)
{
	char intValue[50+1] = { '\0' };
	char tempStr[50+1]={'\0'};
	char *tp = NULL;

	struct scratchMap {
		char *p;
		int rowNumber;
	};

	/* map the scratchpad */
	struct scratchMap *scratch = (struct scratchMap *)SQLUDF_SCRAT->data;

	*intId_ind   = -1;
	*charCol_ind = -1;
	*intCol1_ind = -1;
	*intCol2_ind = -1;
	*intCol3_ind = -1;
	*intCol4_ind = -1;



	switch (SQLUDF_CALLT) {
		case SQLUDF_TF_OPEN:		/* Open table.       */
			/* store the pointer on the scratchpad */
			scratch->p = inputStr;
			/**
			if ( (*inputStr_ind == -1) || (*scratch->p) ) {
				strcpy(SQLUDF_MSGTX, "input is null, inputStr=");
				strncat(SQLUDF_MSGTX, inputStr, SQLUDF_MSGTEXT_LEN - strlen(SQLUDF_MSGTX)-1);
				strncpy(SQLUDF_STATE, "38200", SQLUDF_SQLSTATE_LEN);
				return;
			}
			**/
			scratch->rowNumber = 0;

			break;

		case SQLUDF_TF_FETCH:		/* Fetch next row.   */
			/* count the row */
			scratch->rowNumber++;

			if ( !(*scratch->p) ) {
				/* end of string reached */
				strncpy(SQLUDF_STATE, SQL_NODATA_EXCEPTION, SQLUDF_SQLSTATE_LEN);
				return;
			}
			
			//ID字段
			*intId = scratch->rowNumber;
			*intId_ind = 0;

			//第一个字符串字段
			tp = tempStr;
			//sprintf(SQLUDF_MSGTX, "scratch->p=%s", scratch->p);
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(charCol,tempStr);
				*charCol_ind = 0;
			}
			else
			{
				//sprintf(SQLUDF_MSGTX, "tp=%p,tempStr=%p", tp,tempStr);
				//strcat(SQLUDF_MSGTX,", scratch->p=");
				//strcat(SQLUDF_MSGTX,scratch->p);
				//strcat(SQLUDF_MSGTX,", tempStr=");
				//strcat(SQLUDF_MSGTX,tempStr);
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 1 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1); 
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;


			//字段2
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol1) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 2", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol1_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 2 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1);    
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;

			//字段3
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol2) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 3", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol2_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 3 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1);      
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;


			//字段4
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != '|' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol3) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 4", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol3_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 4 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//memset(tempStr,'\0',20+1);  
			if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;


			//字段5
			tp = tempStr;
			while( *scratch->p && ( (*scratch->p) != ';' ) ) 
			{ 
				*tp++ = *scratch->p++; 
			}
			if(tp != tempStr)
			{
				*tp = '\0';
				strcpy(intValue,tempStr);
				if (sscanf(intValue, "%lld", intCol4) != 1) {
					sprintf(SQLUDF_MSGTX, "Invalid integer value %s in row %d col 5", intValue, scratch->rowNumber);
					strncpy(SQLUDF_STATE, "38202", SQLUDF_SQLSTATE_LEN);
					return;
				}
				*intCol4_ind = 0;
			}
			else
			{
				sprintf(SQLUDF_MSGTX, "Could not read string value in line %d col 5 : %s", scratch->rowNumber,scratch->p);
				strncpy(SQLUDF_STATE, "38201", SQLUDF_SQLSTATE_LEN);
				return;
			}

			//if( ( (*scratch->p) == ';' ) || ( (*scratch->p) == '|' ) ) scratch->p++;
			if( (*scratch->p) == ';' ) scratch->p++;

			break;

		case SQLUDF_TF_CLOSE:		
			scratch->p = NULL;
			scratch->rowNumber = 0;
	}
}

3 编译程序：

/opt/IBM/db2/V9.5/samples/c/bldrtn gprsSplit_udf mydbname

4 新建函数：

--drop function gprsSplit;
CREATE FUNCTION gprsSplit(recordextensionVARCHAR(1025))
   RETURNS TABLE (idint,busi_typeVARCHAR(10),durationbigint,up_flowbigint,down_flowbigint,feebigint)
   SPECIFIC gprsSplit
   EXTERNAL NAME 'gprsSplit_udf!gprsSplit'
   LANGUAGE C
   PARAMETER STYLE SQL
   DETERMINISTIC
   --NOT DETERMINISTIC
   --FENCED
   NOT FENCED
   RETURNS NULL ON NULL INPUT
   NO SQL
   NO EXTERNAL ACTION
   SCRATCHPAD
   NO FINAL CALL
   DISALLOW PARALLEL
   --ALLOW PARALLEL EXECUTE ON ALL DATABASE PARTITIONS RESULT TABLEDISTRIBUTED
   CARDINALITY 2;

期间经过几次优化：

1、刚开始采用DETERMINISTIC、FENCED、DISALLOW PARALLEL、SCRATCHPAD，测试基准性能为680s

2、增加CARDINALITY 2性能提升一倍，为330s

3、改成NOT FENCED，性能大幅提升，为39s

4、虽然重复数据不多，但采用NOTDETERMINISTIC比DETERMINISTIC性能还是有1/3的提升

5、ALLOW PARALLELEXECUTE ON ALL DATABASE PARTITIONS RESULT TABLE DISTRIBUTED无法使用，会导致数据重复，存在多少个节点，重复多少次

6、NOT FENCED存在部分风险，此种模式自定义函数在DB2内存空间中运行，如果存在代码问题，可能会造成DB2崩溃

5 结果：

采用c自定义函数，并设置为NOT FENCED模式后，生产环境原数据处理，从原来的80分钟，缩短为当前的12分钟，性能大幅提升，瓶颈基本为磁盘阵列IO速度，完全满足当前要求

huanghelang

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
DB2自定义函数（C语言）实战

1 需求某表存在如下数据：4000000001|0|16217|43523|04000000004|0|740|464|0;4000000001|0|2717|15394|01030000004|0|1074|104|0;4000000004|0|104|104|0;4000000001|0|5219|3582|04000000001|0|150129|1285711
复制链接

扫一扫

专栏目录