以下对象用于字符串PB实现GB2312与Unicode、UTF8之间的相互转换,对象的代码将持续完善
$PBExportHeader$n_cst_unicode_cvt.sru
forward
global type n_cst_unicode_cvt from nonvisualobject
end type
end forward
global type n_cst_unicode_cvt from nonvisualobject autoinstantiate
end type
type prototypes
Function int WideCharToMultiByte(uint CodePage, ulong dwFlags, ref char lpWideCharStr[], int cchWideChar, ref string lpMultiByteStr, int cbMultiByte,ref string lpDefaultChar,ref boolean lpUsedDefaultChar) Library "KERNEL32.DLL"
FUNCTION int MultiByteToWideChar(uint CodePage, ulong dwFlags, ref string lpMultiByteStr,int cchMultiByte,ref char lpWideCharStr[], int cchWideChar) LIBRARY "kernel32.dll" ALIAS FOR "MultiByteToWideChar"
FUNCTION ulong LCMapString(ulong Locale,ulong dwMapFlags,ref string lpSrcStr,ulong cchSrc,ref string lpDestStr,ulong cchDest) LIBRARY "kernel32.dll" ALIAS FOR "LCMapStringA"
end prototypes
type variables
constant long CP_ACP = 0 // default to ANSI code page
constant long CP_UTF8 = 65001 // default to UTF-8 code page
constant long MB_PRECOMPOSED = 1
end variables
forward prototypes
public function string of_jt_2_ft (string src)
public function string of_gb_to_unicode (string as_source)
public function string of_unicode_to_gb (string as_source)
public function string of_unicode_to_utf8 (string as)
private function string of_dec2hex (long al)
private function long of_hex2dec (string as)
public function string of_uft8_to_unicode (string as)
public function string of_hex2bin (string s_hex)
public function string of_bin2hex (string s_binary)
public function string of_gb_to_utf8 (string as)
end prototypes
public function string of_jt_2_ft (string src);//Constant long LCMAP_TRADITIONAL_CHINESE=67108864
//Constant long LCMAP_SIMPLIFIED_CHINESE=33554432
//
string ls_target,ls_src
//string sn
//ulong un
//blob lb_blob
//long ll_len
//
//ls_src=src
//
//ll_len=len(ls_src)*2+1
//
//ls_target=space(ll_len)
//
//setnull(sn)
//setnull(un)
//
//LCMapString(2052,LCMAP_TRADITIONAL_CHINESE,ls_src,-1,ls_target,ll_len)
//MultiByteToWideChar (936, 0, ls_target, -1, lb_blob,ll_len)
//WideCharToMultiByte (950, 0, lb_blob, -1, ls_target,ll_len, sn,un)
return ls_target
end function
public function string of_gb_to_unicode (string as_source);//gb2312→unicode
STRING ls_Ret,ls_NULL,ls_tmp
Char lc_Data[]
LONG ll_Count,ll_Index,ll_Pos,ll_Len
BOOLEAN lb_UseDefault = FALSE
SetNULL(ls_NULL)
ls_tmp = lc_Data
//首先获取转换后返回串的长度
ll_Len = lenw(as_source) * 2
//分配空间
lc_Data = Space(ll_Len)
//获取转换后返回串内容
MultiByteToWideChar(936,0,as_source,-1,lc_Data,ll_Len)
//MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,as_source,-1,lc_Data,ll_Len)
ll_count = upperbound(lc_Data)
if mod(ll_count, 2) <> 0 then return ''
for ll_index = 1 to ll_count step 2
ls_Ret += (of_dec2hex(asc(lc_data[ll_index + 1])) + of_dec2hex(asc(lc_data[ll_index])))
next
RETURN ls_Ret
end function
public function string of_unicode_to_gb (string as_source);//unicode→gb2312
STRING ls_Ret,ls_NULL,ls_tmp
Char lc_Data[]
LONG ll_Count,ll_Index,ll_Pos,ll_Len
BOOLEAN lb_UseDefault = FALSE
ll_Count = Len(as_Source) / 2
FOR ll_Index = 1 TO ll_Count
IF ll_Index / 2 = INT(ll_Index / 2) THEN
ll_Pos = ll_Index - 1
ELSE
ll_Pos = ll_Index + 1
END IF
lc_Data[ll_Pos] = Char(of_hex2dec(Mid(as_Source,(ll_Index - 1) * 2 + 1,2)))
NEXT
lc_Data[ll_Index + 1] = Char(0)
SetNULL(ls_NULL)
ls_tmp = lc_Data
//首先获取转换后返回串的长度
ll_Len = WideCharToMultiByte(936,0,lc_Data,-1,ls_NULL,0,ls_NULL,lb_UseDefault)
//分配空间
ls_Ret = Space(ll_Len)
//获取转换后返回串内容
WideCharToMultiByte(936,0,lc_Data,-1,ls_Ret,ll_Len,ls_NULL,lb_UseDefault)
RETURN ls_Ret
end function
public function string of_unicode_to_utf8 (string as);//unicode→utf8
/*
Unicode编码(16进制) UTF-8 字节流(二进制)
0000 - 007F 0xxxxxxx
0080 - 07FF 110xxxxx 10xxxxxx
0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx
*/
string ls, ls_sub, ls_utf
int i, j
long ll
ll = len(as)
for i = 1 to ll step 4
ls_sub = mid(as, i, 4)
choose case ls_sub
case '0000' to '007F'
ls_sub = '0' + right('000000' + of_hex2bin( ls_sub), 7)
case '0080' to '07FF'
ls_sub = right('0000000000' + of_hex2bin( ls_sub), 11)
ls_sub = '110' + left(ls_sub, 5) + '10' + right(ls_sub,6)
case else //'0800' to 'FFFF'
ls_sub = right('0000000000000000' + of_hex2bin( ls_sub), 16)
ls_sub = '1110' + left(ls_sub, 4) + '10' + mid(ls_sub,5, 6) + '10' + mid(ls_sub, 11, 6)
end choose
for j = 1 to len(ls_sub) step 8
ls += '%' + of_bin2hex(mid(ls_sub, j, 4)) + of_bin2hex(mid(ls_sub, j + 4, 4))
next
next
return ls
end function
private function string of_dec2hex (long al);long l,m,n
string ls ,a
if al = 0 then return '00'
l = al
do while l > 0
m = mod(l ,16 )
l = Truncate((l/16),0)
if m < 10 then
a = string(m)
else
a = char(m + 55)
end if
ls = a + ls
loop
return ls
end function
private function long of_hex2dec (string as);long j,k,l,m,n
string ls
as = upper(as)
for j = 1 to len(as)
ls = mid(as,(len(as) - j +1 ),1)
choose case ls
case 'A','B','C','D','E','F'
l = asc(ls) - 55
case else
l = long(ls)
end choose
m = 16^(j - 1)
n += m * l
next
return n
end function
public function string of_uft8_to_unicode (string as);//utf8→unicode
string ls
/*
Unicode编码(16进制) UTF-8 字节流(二进制)
0000 - 007F 0xxxxxxx
0080 - 07FF 110xxxxx 10xxxxxx
0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx
*/
return ls
end function
public function string of_hex2bin (string s_hex);//16进制→2进制
string s_bin, s_return, s_tmp
int i
for i = 1 to len(s_hex)
s_tmp = mid(s_hex, i, 1)
choose case s_tmp
case "0"
s_return += "0000"
case "1"
s_return += "0001"
case "2"
s_return += "0010"
case "3"
s_return += "0011"
case "4"
s_return += "0100"
case "5"
s_return += "0101"
case "6"
s_return += "0110"
case "7"
s_return += "0111"
case "8"
s_return += "1000"
case "9"
s_return += "1001"
case "A"
s_return += "1010"
case "B"
s_return += "1011"
case "C"
s_return += "1100"
case "D"
s_return += "1101"
case "E"
s_return += "1110"
case "F"
s_return += "1111"
case else
end choose
next
return s_return
end function
public function string of_bin2hex (string s_binary);//2进制→16进制
string s_bin, s_return, s_tmp
int i
for i = 1 to len(s_binary) step 4
s_tmp = mid(s_binary, i, 4)
choose case s_tmp
case "0000"
s_return += "0"
case "0001"
s_return += "1"
case "0010"
s_return += "2"
case "0011"
s_return += "3"
case "0100"
s_return += "4"
case "0101"
s_return += "5"
case "0110"
s_return += "6"
case "0111"
s_return += "7"
case "1000"
s_return += "8"
case "1001"
s_return += "9"
case "1010"
s_return += "A"
case "1011"
s_return += "B"
case "1100"
s_return += "C"
case "1101"
s_return += "D"
case "1110"
s_return += "E"
case "1111"
s_return += "F"
case else
end choose
next
return s_return
end function
public function string of_gb_to_utf8 (string as);//gb2312→unicode→utf8
return of_unicode_to_utf8( of_gb_to_unicode( as))
end function
on n_cst_unicode_cvt.create
call super::create
TriggerEvent( this, "constructor" )
end on
on n_cst_unicode_cvt.destroy
TriggerEvent( this, "destructor" )
call super::destroy
end on