MySQL数据库通用文本文件导出工具Mysqluldr,指定SQL指定分隔符远程从MySQL数据库导出文本文件到本地

本文解决的问题:指定SQL指定分隔符远程从MySQL数据库导出数据文件到本地。

ETL开发过程中数据库与文本文件是交互最多的一种形式,其中涉及到数据库的表落地文件,指定分隔符,以前都是Oracle有现成的数据导出工具sqluldr、ociuldr。最近转型MySQL数据库,同样的业务场景,尝试了Mysql的select tablename into outfile只能在数据库服务端运行,不满足要求。

借鉴Oracle的ociuldr/sqluldr使用经验,周末时间开发了mysqluldr,其中部分函数代码几乎照搬ociuldr/sqluldr的源码,改动了部分数据类型,(ociuldr/sqluldr源码已在github开源),特此声明,不用做商用,仅作为学习交流。

用了大概半天时间开发调试,导出百万数据,效率客观,虚拟机资源有限没有更多的数据可以测试。测试结果如下,源码及Makefile也在本文档中一并发布。

[root@hadoop qcp]# date
Sat Sep  3 20:56:48 CST 2022
[root@hadoop qcp]# ./mysqluldr user=root pass=root host=127.0.0.1 port=3307 db=test2 query='select * from student' field='^|'
Welcome to use this Unload Data Tools From Mysql DB 
Unload data from MySQL DB Release 1.0 .2022.09.03 Created by qichaopu 
connected mysql user:root
  100000 rows exported at 2022-09-03 20:56:59
  200000 rows exported at 2022-09-03 20:56:59
  300000 rows exported at 2022-09-03 20:56:59
  400000 rows exported at 2022-09-03 20:56:59
  500000 rows exported at 2022-09-03 20:56:59
  600000 rows exported at 2022-09-03 20:56:59
  700000 rows exported at 2022-09-03 20:56:59
  800000 rows exported at 2022-09-03 20:56:59
  900000 rows exported at 2022-09-03 20:56:59
 1000000 rows exported at 2022-09-03 20:56:59
 1100000 rows exported at 2022-09-03 20:56:59
 1200000 rows exported at 2022-09-03 20:56:59
 1300000 rows exported at 2022-09-03 20:56:59
 1400000 rows exported at 2022-09-03 20:57:00
 1500000 rows exported at 2022-09-03 20:57:00
 1600000 rows exported at 2022-09-03 20:57:00
 1700000 rows exported at 2022-09-03 20:57:00
 1800000 rows exported at 2022-09-03 20:57:00
 1900000 rows exported at 2022-09-03 20:57:00
 2000000 rows exported at 2022-09-03 20:57:00
 2100000 rows exported at 2022-09-03 20:57:00
 2200000 rows exported at 2022-09-03 20:57:00
 2300000 rows exported at 2022-09-03 20:57:00
 2400000 rows exported at 2022-09-03 20:57:00
 2500000 rows exported at 2022-09-03 20:57:00
 2600000 rows exported at 2022-09-03 20:57:00
 2700000 rows exported at 2022-09-03 20:57:00
 2800000 rows exported at 2022-09-03 20:57:00
 2900000 rows exported at 2022-09-03 20:57:00
 3000000 rows exported at 2022-09-03 20:57:00
 3100000 rows exported at 2022-09-03 20:57:00
 3200000 rows exported at 2022-09-03 20:57:00
 3300000 rows exported at 2022-09-03 20:57:00
 3400000 rows exported at 2022-09-03 20:57:01
 3500000 rows exported at 2022-09-03 20:57:01
 3600000 rows exported at 2022-09-03 20:57:01
 3700000 rows exported at 2022-09-03 20:57:01
 3800000 rows exported at 2022-09-03 20:57:01
 3900000 rows exported at 2022-09-03 20:57:01
 4000000 rows exported at 2022-09-03 20:57:01
 4100000 rows exported at 2022-09-03 20:57:01
 4194304 rows exported at 2022-09-03 20:57:01
[root@hadoop qcp]# ls -rtl mysqluldrdata.txt 
-rw-r--r-- 1 root root 109051904 Sep  3 20:57 mysqluldrdata.txt

源码如下,与上图的测试结果有所调整,代码进行了优化,

/*
   NAME
     mysqluldr.c - unload text data from mysql database
   MODIFIED   (MM/DD/YY) 
     qichaopu         2022.09.03 -  
*/
 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>

#include <mysql.h>
#define MAX_ITEM_BUFFER_SIZE    33
#if defined(_WIN32)
#define STRNCASECMP memicmp
#else
#define STRNCASECMP strncasecmp
#endif
#define  MIN(a,b) ((a) > (b) ? (b) : (a))
int     return_code = 0;
void printRow(char *fname,MYSQL_RES *res,int cols,char *field,int flen,int rlen,char *record);
int convertOption(const char *src, char* dst, int mlen);
FILE *openFile(const char *fname, char tempbuf[], int batch);
char getHexIndex(char c);
void  printRowInfo(int row);
int main(int argc, char *argv[])
{
  char user[32]="";
  char pass[32]="";
  char host[32]="";
  char port[32]="";
  char db[32]="";
  char query[8192]="";
  char fname[255]="mysqluldrdata.txt";
  char field[32]=",";
  char record[32]="\n";
  int n,i,argcount=0;
  int v_help=0;
  int rlen,flen;
  rlen=flen=1;
  for(i=0;i<argc;i++)
  {
    if (STRNCASECMP("user=",argv[i],5)==0)
    {
        memset(user,0,32);
        memcpy(user,argv[i]+5,MIN(strlen(argv[i]) - 5,31));
    }
	else if (STRNCASECMP("pass=",argv[i],5)==0)
    {
        memset(pass,0,32);
        memcpy(pass,argv[i]+5,MIN(strlen(argv[i]) - 5,31));
    }
	else if (STRNCASECMP("host=",argv[i],5)==0)
    {
        memset(host,0,32);
        memcpy(host,argv[i]+5,MIN(strlen(argv[i]) - 5,31));
    }
	else if (STRNCASECMP("port=",argv[i],5)==0)
    {
        memset(port,0,32);
        memcpy(port,argv[i]+5,MIN(strlen(argv[i]) - 5,31));
    }
	else if (STRNCASECMP("db=",argv[i],3)==0)
    {
        memset(db,0,32);
        memcpy(db,argv[i]+3,MIN(strlen(argv[i]) - 3,31));
    }
	else if (STRNCASECMP("query=",argv[i],6)==0)
    {
        memset(query,0,8192);
        memcpy(query,argv[i]+6,MIN(strlen(argv[i]) - 6,8191));
    }
	else if (STRNCASECMP("field=",argv[i],6)==0)
    {
        memset(field,0,32);
        flen=convertOption(argv[i]+6,field,MIN(strlen(argv[i]) - 6,31));
    }
	else if (STRNCASECMP("record=",argv[i],7)==0)
    {
        memset(record,0,32);
        rlen=convertOption(argv[i]+7,record,MIN(strlen(argv[i]) - 7,31));
    }
	else if (STRNCASECMP("file=",argv[i],5)==0)
    {
        memset(fname,0,132);
        memcpy(fname,argv[i]+5,MIN(strlen(argv[i]) - 5,254));
    }
	else if (STRNCASECMP("-help",argv[i],4)==0)
    {
    	  v_help=1;
    }
  }
  if (strlen(user)==0 || strlen(query)==0|| v_help)
  {
    if (v_help||1==1)
    {
       printf("mysqluldr: Unload data from MySQL DB Release 1.0  2022.09.03 Created by qichaopu\n");
       printf("\n");
       printf("Usage: %s user=... host=... port=... db=... file=...\n",argv[0]);
       printf("Notes:\n");
       printf("       user  = username\n");
	   printf("       pass  = password\n");
	   printf("       host  = mysql host\n");
	   printf("       port  = port\n");
	   printf("       db  	= database name\n");
       printf("       query = select statement\n");
       printf("       field = seperator string between fields\n");
       printf("       record= seperator string between records\n");
       printf("       file  = output file name(default: mysqluldrdata.txt)\n");
       printf("\n");
       printf("  for field and record, you can use '0x' to specify hex character code,\n");
       printf("  \\r=0x%02x \\n=0x%02x |=0x%0x ,=0x%02x \\t=0x%02x\n",'\r','\n','|',',','\t');
       printf("  for more hex character code,you can use unix command:man ascii\n");
       exit(0);
    }
  }	
  printf("Welcome to use this Unload Data Tools From Mysql DB \n");
  printf("Unload data from MySQL DB Release 1.0 .2022.09.03 Created by qichaopu \n");
	MYSQL dbconn;
	MYSQL_RES *res;
	
	mysql_init(&dbconn);
	if(mysql_real_connect(&dbconn,host,user,pass,db,atoi(port),NULL,0))
	{
		printf("connected mysql user:%s\n",user);
	}
	else
	{ 
		printf("connected failed mysql user:%s\n",user);
	}
	mysql_query(&dbconn,"set names utf8");
	mysql_query(&dbconn,query);
	res=mysql_use_result(&dbconn);
	if(res==NULL)
	{
		printf("data result is empty\n");
		return 0;
	}
	int cols=mysql_field_count(&dbconn);
	printRow(fname,res,cols,field,flen,rlen,record);
	mysql_free_result(res);
	mysql_close(&dbconn);
	
}
void printRow(char *fname,MYSQL_RES *res,int cols,char *field,int flen,int rlen,char *record)
{
	MYSQL_ROW row;
	char tempbuf[512];
	FILE *fp;
	int bcount=1;
	memset(tempbuf,0,512);
	int rc=0;
	if((fp = openFile(fname,tempbuf,bcount)) == NULL) 
  {
    fprintf(stdout,"ERROR -- Cannot write to file : %s\n", tempbuf);
    return_code = 6;
  }
	int c=0;
	printRowInfo(rc);
	while(row=mysql_fetch_row(res))
		{
			c=0;
			rc++;
			for(c=0;c<cols;c++)
			{
                if(row[c]!=NULL)
				fprintf(fp, "%s",row[c]);
				if (c < cols - 1)
				fwrite(field,flen,1,fp);
			}
			fwrite(record,rlen,1,fp);
			if(rc%500000==0)
				printRowInfo(rc);
		}
		fclose(fp);
		if(rc%500000!=0)
		printRowInfo(rc);
}
void  printRowInfo(int row)
{
	time_t now = time(0);
	struct tm *ptm = localtime(&now);
	fprintf(stdout,"%8u rows exported at %04d-%02d-%02d %02d:%02d:%02d\n",
                row,
		ptm->tm_year + 1900,
		ptm->tm_mon + 1,
		ptm->tm_mday,
		ptm->tm_hour,
		ptm->tm_min,
		ptm->tm_sec);
        fflush(stdout);
}
int convertOption(const char *src, char* dst, int mlen)
{
   int i,len,pos;
   char c,c1,c2; 
   i=pos=0;
   len = strlen(src);
   while(i<MIN(mlen,len))
   {
      if ( *(src+i) == '0')
      {
          if (i < len - 1)
          {
             c = *(src+i + 1);
             switch(c)
             {
                 case 'x':
                 case 'X':
                   if (i < len - 3)
                   {
                       c1 = getHexIndex(*(src+i + 2));
                       c2 = getHexIndex(*(src+i + 3));
                       *(dst + pos) = (char)((c1 << 4) + c2);
                       i=i+2;
                   }
                   else if (i < len - 2)
                   {
                       c1 = *(src+i + 2);
                       *(dst + pos) = c1;
                       i=i+1;
                   }
                   break;
                 default:
                   *(dst + pos) = c;
                   break;
             }
             i = i + 2;
          }
          else
          {
            i ++;
          }
      }
      else
      {
          *(dst + pos) = *(src+i);
          i ++;
      }
      pos ++;
   }
   *(dst+pos) = '\0';
   return pos;
}
FILE *openFile(const char *fname, char tempbuf[], int batch)
{
   FILE *fp=NULL;
   int i, j, len;
   time_t now = time(0);
   struct tm *ptm = localtime(&now);   
   len = strlen(fname);
   j = 0;
   for(i=0;i<len;i++)
   {
      tempbuf[j++] = *(fname+i);
   }
   tempbuf[j]=0;
   if (tempbuf[0] == '+')
       fp = fopen(tempbuf+1, "ab+");
   else
       fp = fopen(tempbuf, "wb+");
   return fp;
}
char  getHexIndex(char c)
{
   if ( c >='0' && c <='9') return c - '0';
   if ( c >='a' && c <='f') return 10 + c - 'a';
   if ( c >='A' && c <='F') return 10 + c - 'A';
   return 0;
}

Makefile如下:

MYSQL_PUBLIC_H=/data/mysql/include
CC = gcc
CPP = g++
OBJS = mysqluldr.o
PROG = mysqluldr
 
all:$(PROG)
 
$(PROG):$(OBJS)
	@echo "[link] ... "
	@echo "---------------------"
	$(CC) -g -o $(PROG) $(OBJS) -lmysqlclient
	@echo
 
clean:
	rm -f $(PROG) $(OBJS)
 
 
.SUFFIXES: .cpp .c
 
.c.o:
	@echo "[$*.o]"
	@echo "---------------------"
	$(CC) -g -o $*.o -I$(MYSQL_PUBLIC_H) -c $*.c
	@echo

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

pentiumcpu

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值