/*
* codeconv.c
*
* Heidun IDS developer team
* Created on: Jun 11, 2009
* Author: Xiaodong Zhong
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "our_design_config_init.h"
#include "code_table.h"
typedef unsigned char uchar;
typedef unsigned short ushort;
static u_int8_t EBCDIC_translate_ASCII [ 256 ] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51,
0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
};
void EBCDIC_to_ASCII(u_int8_t *buf, u_int32_t bytes)
{
u_int32_t i;
u_int8_t *bufptr;
bufptr = buf;
for (i = 0; i < bytes; i++, bufptr++) {
*bufptr = EBCDIC_translate_ASCII[*bufptr];
}
}
u_int8_t EBCDIC_to_ASCII1(u_int8_t c)
{
return EBCDIC_translate_ASCII[c];
}
/**
* 0 ----Little Endian UTF-16 串(低字节在前)
* != 0 ----Big Endian UTF-16 串(高字节在前)
**/
static int isLittleEndian = 0;
/**
* 功 能: 设置 UTF-16 编码方式
* 输 入:
* 0 ----Little Endian UTF-16 串(低字节在前)
* 1 ----Big Endian UTF-16 串(高字节在前)
* 输 出:
* 返 回:
**/
int SetEndian()
{
uchar *p = NULL;
ushort tst = 0x1234;
/* 获取CPU字节码顺序 */
p = (uchar *)&tst;
if (*p == 0x12) isLittleEndian = 1;
else if(*p == 0x34) isLittleEndian = 0;
else
{
LogMessage( "Unknown CPU Bytes Order!/n");
return -1;
}
return 0;
}
/**
* 功 能: 将 UTF-16 编码转成 UTF-8 编码
* 输 入: inb ----utf-16字串
* inlenb ----utf-16字串长度(字节数)
* outlen ----输出缓冲区最大值
* 输 出: out ----utf-8字串
* outlen ----utf-8字串长度
* 返 回: >= 0 ----转换后utf-8字串长度
* -1 ----转换出错
**/
int UTF16ToUTF8(uchar *out, int *outlen, const uchar *inb, int *inlenb)
{
uchar* outstart = out;
const uchar* processed = inb;
uchar* outend = out + *outlen;
ushort* in = (unsigned short*) inb;
ushort* inend;
unsigned int c, d, inlen;
unsigned char *tmp;
int bits;
if((*inlenb % 2) == 1) (*inlenb)--;
inlen = *inlenb / 2;
inend = in + inlen;
while((in < inend) && (out - outstart + 5 < *outlen))
{
if(isLittleEndian)
{
c= *in++;
}
else
{
tmp = (unsigned char *) in;
c = *tmp++;
c = c | (((unsigned int)*tmp) << 8);
in++;
}
if((c & 0xFC00) == 0xD800)
{
if(in >= inend) break;
if(isLittleEndian) { d = *in++; }
else
{
tmp = (unsigned char *) in;
d = *tmp++;
d = d | (((unsigned int)*tmp) << 8);
in++;
}
if((d & 0xFC00) == 0xDC00)
{
c &= 0x03FF;
c <<= 10;
c |= d & 0x03FF;
c += 0x10000;
}
else
{
*outlen = out - outstart;
*inlenb = processed - inb;
return -1;
}
}
if(out >= outend) break;
if (c < 0x80) { *out++ = c; bits= -6; }
else if(c < 0x800) { *out++ = ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
else if(c < 0x10000) { *out++ = ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
else { *out++ = ((c >> 18) & 0x07) | 0xF0; bits= 12; }
for(; bits >= 0; bits-= 6)
{
if (out >= outend)
break;
*out++ = ((c >> bits) & 0x3F) | 0x80;
}
processed = (const unsigned char*) in;
}
*outlen = out - outstart;
*inlenb = processed - inb;
return(*outlen);
}
/**
* 功 能: 将 UTF-8 编码转成 UTF-16 编码
* 输 入: in ----utf-8字串
* inlen ----utf-8字串长度
* outlen ----输出缓冲区最大值
* 输 出: outb ----utf-16字串
* outlenb ----utf-16字串长度(字节数)
* 返 回: >= 0 ----转换后utf-16字串长度(字节数)
* -1 ----转换出错
**/
int UTF8ToUTF16(uchar *outb, int *outlen, const uchar *in, int *inlen)
{
ushort *out = (unsigned short*) outb;
const uchar* processed = in;
const uchar *const instart = in;
ushort *outstart= out, *outend;
const uchar* inend;
unsigned int c, d;
int trailing;
uchar *tmp;
ushort tmp1, tmp2;
if((out == NULL) || (outlen == NULL) || (inlen == NULL)) return -1;
if(in == NULL)
{
*outlen = 0;
*inlen = 0;
return 0;
}
inend= in + *inlen;
outend = out + (*outlen / 2);
while(in < inend)
{
d= *in++;
if (d < 0x80) { c= d; trailing= 0; }
else if(d < 0xC0)
{
*outlen = (out - outstart) * 2;
*inlen = processed - instart;
return -1;
}
else if(d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if(d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if(d < 0xF8) { c= d & 0x07; trailing= 3; }
else
{
*outlen = (out - outstart) * 2;
*inlen = processed - instart;
return -1;
}
if(inend - in < trailing) break;
for(; trailing; trailing--)
{
if((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
break;
c <<= 6;
c |= d & 0x3F;
}
if(c < 0x10000)
{
if(out >= outend) break;
if(isLittleEndian) { *out++ = c; }
else
{
tmp = (unsigned char *) out;
*tmp = c ;
*(tmp + 1) = c >> 8 ;
out++;
}
}
else if(c < 0x110000)
{
if(out+1 >= outend) break;
c -= 0x10000;
if(isLittleEndian)
{
*out++ = 0xD800 | (c >> 10);
*out++ = 0xDC00 | (c & 0x03FF);
}
else
{
tmp1 = 0xD800 | (c >> 10);
tmp = (unsigned char *) out;
*tmp = (unsigned char) tmp1;
*(tmp + 1) = tmp1 >> 8;
out++;
tmp2 = 0xDC00 | (c & 0x03FF);
tmp = (unsigned char *) out;
*tmp = (unsigned char) tmp2;
*(tmp + 1) = tmp2 >> 8;
out++;
}
}
else break;
processed = in;
}
*outlen = (out - outstart) * 2;
*inlen = processed - instart;
return(*outlen);
}
/**
* 功 能: 查找 GB2312 编码对应的 UTF-16 编码值(二分法查找)
* 输 入: gbkey ----GB2312编码值
* 输 出:
* 返 回: >= 0 ----UTF-16编码值
* -1 ----编码表无对应GB2312编码
* 引 用: 全局变量
* _GB2UTFTable ----GB2312->UNICODE对照表
**/
short UTF16Seek(ushort gbkey)
{
int first = 0, mid = 0;
int end = GBUTF_TABLE_SIZE - 1;
while(first <= end)
{
mid = (first + end) >> 1;
if (_GB2UTFTable[mid].gbcode == gbkey)
{
return _GB2UTFTable[mid].unicode;
}
if (_GB2UTFTable[mid].gbcode > gbkey)
{
end = mid - 1;
}
else
{
first = mid + 1;
}
}
return -1;
}
/**
* 功 能: 查找 UTF-16 编码对应的 GB2312 编码值(二分法查找)
* 输 入: utfkey ----UTF-16编码值
* 输 出:
* 返 回: >= 0 ----GB2312编码值
* -1 ----编码表无对应UTF-16编码
* 引 用: 全局变量
* _UTF2GBTable ----UNICODE->GB2312对照表
**/
short GBSeek(ushort utfkey)
{
int first = 0, mid = 0;
int end = UTFGB_TABLE_SIZE - 1;
while(first <= end)
{
mid = (first + end) >> 1;
if (_UTF2GBTable[mid].unicode == utfkey)
{
return _UTF2GBTable[mid].gbcode;
}
if (_UTF2GBTable[mid].unicode > utfkey)
{
end = mid - 1;
}
else
{
first = mid + 1;
}
}
return -1;
}
/**
* 功 能: 计算 UTF-16 编码字串的长度(双字节数)
* 输 入: in ----UTF-16字串缓冲区
* 输 出:
* 返 回: >= 0 ----UTF-16编码长度(双字节数)
**/
long UTF16Len(const uchar *in)
{
register int i = 0;
if(NULL == in) return 0;
while(1)
{
if(0 == in[2*i] && 0 == in[2*i+1])
break;
i++;
}
return i;
}
/**
* 功 能: 将 UTF-16 编码转成 GB2312 编码
* 输 入: in ----utf-16字串
* inlen ----utf-16字串长度(字节数)
* outlen ----输出缓冲区最大值
* 输 出: out ----GB2312字串
* 返 回: >=0 ----转换后GB2312字串长度(字节数)
* -1 ----转换出错
**/
int UTF16ToGB(uchar *out, int outlen, uchar *in, int inlen)
{
register int i = 0, k = 0;
ushort c1 = 0, c2 = 0;
int len = 0;
if(NULL == in) { out[0] = '/0'; return 0; }
len = (inlen < 0 ? 0 : inlen);
if(len % 2 == 1) len --;
i = 0;
if(len >= 2)
{
/* UTF-16编码方式 */
if(in[0] == 0xFF && in[1] == 0xFE)
{
i = 2;
}
if(in[0] == 0xFE && in[1] == 0xFF)
{
i = 2;
}
}
for(i, k = 0; i < len; i += 2)
{
if(isLittleEndian)
{
c1 = (in[i] << 8) | (uchar)in[i+1];
}
else
{
c1 = (in[i+1] << 8) | (uchar)in[i];
}
c2 = GBSeek(c1);
if(-1 == (short)c2)
{
LogMessage( "At [%d] [0x%4.4X] not found!/n", i, c1);
return -1;
}
if(NULL == out) return -1;
if(0xFF00 & c2)
{
if(k >= outlen - 3) return -1;
out[k++] = c2 >> 8;
out[k++] = c2 & 0x00FF;
}
else
{
if(k >= outlen - 1) return -1;
out[k++] = (uchar)c2;
}
}
out[k] = 0;
return k;
}
/**
* 功 能: 将 UTF-8 编码转成 GB2312 编码
* 输 入: in ----utf-8字串
* inlen ----utf-8字串长度
* outlen ----输出缓冲区最大值
* 输 出: out ----GB2312字串
* 返 回: >=0 ----转换后GB2312字串长度(字节数)
* -1 ----转换出错
**/
int UTF8ToGB(uchar *out, int outlen, uchar *in, int inlen)
{
uchar *out1 = NULL;
int len = 0, len1 = 0, len2 = 0;
len = (inlen < 0 ? 0 : inlen);
len1 = 2 * len;
out1 = (uchar *)calloc(1, len1 + 2);
if(NULL == out1)
{
LogMessage( "allot memory error!/n");
return -1;
}
len1 = UTF8ToUTF16(out1, &len1, in, &len);
if(len1 <= 0)
{
free(out1);
return len1;
}
len2 = outlen;
len2 = UTF16ToGB(out, len2, out1, len1);
free(out1);
return len2;
}
/**
* 功 能: 将 GB2312 编码转成 UTF-16 编码
* 输 入: in ----GB2312字串
* inlen ----GB2312字串长度
* outlen ----输出缓冲区最大值
* 输 出: out ----utf-16字串
* 返 回: >=0 ----转换后utf-16字串长度(字节数)
* -1 ----转换出错
**/
int GBToUTF16(uchar *out, int outlen, uchar *in, int inlen)
{
register int i, k;
ushort c1, c2;
int len = 0;
if(NULL == in)
{
out[0] = '/0';
return 0;
}
len = (inlen < 0 ? 0 : inlen);
for(i = 0, k = 0; i < len; i++)
{
c1 = (uchar)in[i];
c2 = UTF16Seek(c1); /* 查找单字节码 */
if(-1 == (short)c2)
{
if(++i < len)
{
c1 = (c1 << 8) | (uchar)in[i];
c2 = UTF16Seek(c1); /* 查找双字节码 */
if(-1 == (short)c2)
{
LogMessage( "At [%d] [0x%4.4X] not found1!/n", i, c1);
return -1;
}
}
else
{
LogMessage( "At [%d] [0x%4.4X] not found2!/n", i, c1);
return -1;
}
}
/* 超出输出缓冲区的大小 */
if(k > outlen -3) return -1;
if(isLittleEndian)
{
out[k++] = c2 >> 8;
out[k++] = c2 & 0x00FF;
}
else
{
out[k++] = c2 & 0x00FF;
out[k++] = c2 >> 8;
}
}
return k;
}
/**
* 功 能: 将 GB2312 编码转成 UTF-8 编码
* 输 入: in ----GB2312字串
* inlen ----GB2312字串长度
* outlen ----输出缓冲区最大值
* 输 出: out ----utf-8字串
* 返 回: >=0 ----转换后utf-8字串长度(字节数)
* -1 ----转换出错
**/
int GBToUTF8(uchar *out, int outlen, uchar *in, int inlen)
{
uchar *out1 = NULL;
int len = 0, len1 = 0, len2 = 0;
len = (inlen < 0 ? 0 : inlen);
len1 = 2 * len + 2;
out1 = (uchar *)calloc(1, len1);
if(NULL == out1)
{
LogMessage( "allot memory error!/n");
return -1;
}
len1 = GBToUTF16(out1, len1, in, len);
if(len1 <= 0)
{
free(out1);
return len1;
}
len2 = outlen;
len2 = UTF16ToUTF8(out, &len2, out1, &len1);
free(out1);
return len2;
}
/**
* 功 能: 将 fcode 编码转成 tcode 编码
* 输 入: fcode ----源编码类型
* tcode ----目标编码类型
* in ----fcode类型编码字串
* inlen ----focde类型编码字串长度
* outlen ----输出缓冲区上限
* 输 出: out ----tcode类型编码字串
* outlen ----tcode类型编码字串长度
* 返 回: >=0 ----转换后字串长度(字节数)
* -1 ----转换出错
**/
int CodeConv(char *incode, char *outcode,uint8_t *out, int *outlen, uint8_t *in, int inlen)
{
int len = 0, len1 = 0;
if(NULL == incode || NULL == outcode) { *outlen = 0; return -1; }
if(NULL == in) { *outlen = 0; out[0] = '/0'; return 0; }
if(*outlen <= 0) return (*outlen % 2);
if(SetEndian() < 0) { *outlen = 0; return -1; }
len = (inlen < 0 ? 0 : inlen);
len1 = *outlen;
if(strcasecmp(incode, "UTF-8") != 0 &&
strcasecmp(incode, "UTF8") != 0 &&
strcasecmp(incode, "UTF-16") != 0 &&
strcasecmp(incode, "UTF16") != 0 &&
strcasecmp(incode, "UNICODE") != 0 &&
strcasecmp(incode, "GB2312") != 0)
{
LogMessage( "Encoding [%s] is not supported/n", incode);
return -1;
}
if(strcasecmp(outcode, "UTF-8") != 0 &&
strcasecmp(outcode, "UTF8") != 0 &&
strcasecmp(outcode, "UTF-16") != 0 &&
strcasecmp(outcode, "UTF16") != 0 &&
strcasecmp(outcode, "UNICODE") != 0 &&
strcasecmp(outcode, "GB2312") != 0)
{
LogMessage( "Ecoding [%s] is not supported/n", outcode);
return -1;
}
/* GB2312 -> UTF-8 */
if(strcasecmp(incode, "GB2312") == 0 &&
(strcasecmp(outcode, "UTF8") == 0 ||
strcasecmp(outcode, "UTF-8") == 0))
{
len1 = GBToUTF8(out, len1, in, len);
}
/* UTF-8 -> GB2312 */
else if(strcasecmp(outcode, "GB2312") == 0 &&
(strcasecmp(incode, "UTF8") == 0 ||
strcasecmp(incode, "UTF-8") == 0))
{
len1 = UTF8ToGB(out, len1, in, len);
}
/* GB2312 -> UTF-16 */
else if(strcasecmp(incode, "GB2312") == 0 &&
(strcasecmp(outcode, "UTF16") == 0 ||
strcasecmp(outcode, "UTF-16") == 0 ||
strcasecmp(outcode, "UNICODE") == 0))
{
len1 = GBToUTF16(out, len1, in, len);
}
/* UTF-16 -> GB2312 */
else if(strcasecmp(outcode, "GB2312") == 0 &&
(strcasecmp(incode, "UTF16") == 0 ||
strcasecmp(incode, "UTF-16") == 0 ||
strcasecmp(incode, "UNICODE") == 0))
{
len1 = UTF16ToGB(out, len1, in, len);
}
/* UTF-8 -> UTF-16 */
else if((strcasecmp(incode, "UTF8") == 0 ||
strcasecmp(incode, "UTF-8") == 0) &&
(strcasecmp(outcode, "UTF16") == 0 ||
strcasecmp(outcode, "UTF-16") == 0 ||
strcasecmp(outcode, "UNICODE") == 0))
{
len1 = UTF8ToUTF16(out, &len1, in, &len);
}
/* UTF-16 -> UTF-8 */
else if((strcasecmp(outcode, "UTF8") == 0 ||
strcasecmp(outcode, "UTF-8") == 0) &&
(strcasecmp(incode, "UTF16") == 0 ||
strcasecmp(incode, "UTF-16") == 0 ||
strcasecmp(incode, "UNICODE") == 0))
{
len1 = UTF16ToUTF8(out, &len1, in, &len);
}
else
{
len1 = len;
memcpy(out, in, len);
}
*outlen = (len1 < 0 ? 0 : len1);
if(len1 < 0) return -1;
return (*outlen);
}
int sessionAuditDecodeToURL(u_int8_t *inputstr,u_int32_t inputlen, u_int8_t *outputstr)
{
char conv[] = "0123456789ABCDEF";
u_int32_t i ;
u_int8_t *d_ptr;
u_int8_t *data;
data = inputstr;
d_ptr = outputstr;
for(i = 0; i < inputlen; i++)
{
*d_ptr = conv[((*data & 0xFF) >> 4)];
d_ptr++;
*d_ptr = conv[((*data & 0xFF) & 0x0F)];
d_ptr++;
data++;
}
return 0;
}
#ifdef SAS3_10Q2_DECODE
//zxd change 20100315
int DecodeToURL(uint8_t *inputstr,u_int32_t inputlen, uint8_t *outputstr)
{
int i;
for (i = 0;i<inputlen;i++)
{
if (inputstr[i] == '/0')
{
outputstr[i] = '/0';
}else if (inputstr[i] == '/'')
{
outputstr[i] = '"';
}
else if (inputstr[i] == '//')
{
outputstr[i] = '/';
}
else
outputstr[i] = inputstr[i];
}
return 0;
}
#else
/*zxd change 20100222
* URL decode
*
* */
int DecodeToURL(u_int8_t *inputstr,u_int32_t inputlen, u_int8_t *outputstr)
{
char conv[] = "0123456789ABCDEF";
u_int32_t i ;
char *d_ptr;
char *data;
data = inputstr;
d_ptr = outputstr;
for(i = 0; i < inputlen; i++)
{
*d_ptr = conv[((*data & 0xFF) >> 4)];
d_ptr++;
*d_ptr = conv[((*data & 0xFF) & 0x0F)];
d_ptr++;
data++;
}
return 0;
}
#endif