pb字符串实现GB2312与Unicode、UTF8之间的相互转换

以下对象用于字符串PB实现GB2312与Unicode、UTF8之间的相互转换,对象的代码将持续完善

 

$PBExportHeader$n_cst_unicode_cvt.sru
forward
global type n_cst_unicode_cvt from nonvisualobject
end type
end forward

global type n_cst_unicode_cvt from nonvisualobject autoinstantiate
end type

type prototypes
Function int WideCharToMultiByte(uint CodePage, ulong dwFlags, ref char lpWideCharStr[], int cchWideChar, ref string lpMultiByteStr, int cbMultiByte,ref string lpDefaultChar,ref boolean lpUsedDefaultChar) Library "KERNEL32.DLL" 
FUNCTION int MultiByteToWideChar(uint CodePage, ulong dwFlags, ref string lpMultiByteStr,int cchMultiByte,ref char lpWideCharStr[],  int cchWideChar) LIBRARY "kernel32.dll" ALIAS FOR "MultiByteToWideChar"

FUNCTION ulong LCMapString(ulong Locale,ulong dwMapFlags,ref string lpSrcStr,ulong cchSrc,ref string lpDestStr,ulong cchDest) LIBRARY "kernel32.dll" ALIAS FOR "LCMapStringA"


end prototypes
type variables
constant long CP_ACP = 0        // default to ANSI code page
constant long CP_UTF8 = 65001   // default to UTF-8 code page
constant long MB_PRECOMPOSED = 1
end variables
forward prototypes
public function string of_jt_2_ft (string src)
public function string of_gb_to_unicode (string as_source)
public function string of_unicode_to_gb (string as_source)
public function string of_unicode_to_utf8 (string as)
private function string of_dec2hex (long al)
private function long of_hex2dec (string as)
public function string of_uft8_to_unicode (string as)
public function string of_hex2bin (string s_hex)
public function string of_bin2hex (string s_binary)
public function string of_gb_to_utf8 (string as)
end prototypes

public function string of_jt_2_ft (string src);//Constant long LCMAP_TRADITIONAL_CHINESE=67108864
//Constant long LCMAP_SIMPLIFIED_CHINESE=33554432
//
string ls_target,ls_src
//string sn
//ulong un
//blob lb_blob
//long ll_len
//
//ls_src=src
//
//ll_len=len(ls_src)*2+1
//
//ls_target=space(ll_len)
//
//setnull(sn)
//setnull(un)
//
//LCMapString(2052,LCMAP_TRADITIONAL_CHINESE,ls_src,-1,ls_target,ll_len)
//MultiByteToWideChar (936, 0, ls_target, -1, lb_blob,ll_len)
//WideCharToMultiByte (950, 0, lb_blob, -1, ls_target,ll_len, sn,un)

return ls_target


end function

public function string of_gb_to_unicode (string as_source);//gb2312→unicode
STRING ls_Ret,ls_NULL,ls_tmp 
Char lc_Data[] 
LONG ll_Count,ll_Index,ll_Pos,ll_Len 
BOOLEAN lb_UseDefault   =   FALSE 
SetNULL(ls_NULL) 
ls_tmp   =   lc_Data 

//首先获取转换后返回串的长度 
ll_Len = lenw(as_source) * 2
//分配空间 
lc_Data   =   Space(ll_Len) 
//获取转换后返回串内容 
MultiByteToWideChar(936,0,as_source,-1,lc_Data,ll_Len)
//MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,as_source,-1,lc_Data,ll_Len)
ll_count = upperbound(lc_Data)
if mod(ll_count, 2) <> 0 then return ''
for ll_index = 1 to ll_count step 2
	ls_Ret += (of_dec2hex(asc(lc_data[ll_index + 1])) + of_dec2hex(asc(lc_data[ll_index])))
next
RETURN   ls_Ret

end function

public function string of_unicode_to_gb (string as_source);//unicode→gb2312
STRING ls_Ret,ls_NULL,ls_tmp 
Char lc_Data[] 
LONG ll_Count,ll_Index,ll_Pos,ll_Len 
BOOLEAN lb_UseDefault   =   FALSE 
ll_Count   =   Len(as_Source)   /   2   
FOR   ll_Index   =   1   TO   ll_Count 
	IF   ll_Index   /   2   =   INT(ll_Index   /   2)   THEN 
		ll_Pos   =   ll_Index   -   1 
	ELSE 
		ll_Pos   =   ll_Index   +   1 
	END   IF 
	lc_Data[ll_Pos]   =   Char(of_hex2dec(Mid(as_Source,(ll_Index   -   1)   *   2   +   1,2))) 
NEXT 
lc_Data[ll_Index   +   1]   =   Char(0) 
SetNULL(ls_NULL) 
ls_tmp   =   lc_Data 

//首先获取转换后返回串的长度 
ll_Len   =   WideCharToMultiByte(936,0,lc_Data,-1,ls_NULL,0,ls_NULL,lb_UseDefault) 

//分配空间 
ls_Ret   =   Space(ll_Len) 
//获取转换后返回串内容 
WideCharToMultiByte(936,0,lc_Data,-1,ls_Ret,ll_Len,ls_NULL,lb_UseDefault) 

RETURN   ls_Ret 

end function

public function string of_unicode_to_utf8 (string as);//unicode→utf8

/*
Unicode编码(16进制)      UTF-8 字节流(二进制) 
  0000 - 007F         0xxxxxxx 
  0080 - 07FF         110xxxxx 10xxxxxx 
  0800 - FFFF         1110xxxx 10xxxxxx 10xxxxxx 
*/
string ls, ls_sub, ls_utf
int i, j
long ll
ll = len(as)
for i = 1 to ll step 4
	ls_sub = mid(as, i, 4)
	choose case ls_sub
		case '0000' to '007F'
			ls_sub = '0' + right('000000' + of_hex2bin( ls_sub), 7)
		case '0080' to '07FF'
			ls_sub = right('0000000000' + of_hex2bin( ls_sub), 11)
			ls_sub = '110' + left(ls_sub, 5) + '10' + right(ls_sub,6)
		case else //'0800' to 'FFFF'
			ls_sub = right('0000000000000000' + of_hex2bin( ls_sub), 16)
			ls_sub = '1110' + left(ls_sub, 4) + '10' + mid(ls_sub,5, 6) + '10' + mid(ls_sub, 11, 6)
	end choose
	for j = 1 to len(ls_sub) step 8
		ls +=  '%' + of_bin2hex(mid(ls_sub, j, 4)) + of_bin2hex(mid(ls_sub, j + 4, 4))
	next
next

return ls
end function

private function string of_dec2hex (long al);long  l,m,n 
string  ls ,a 
if al = 0 then return '00'
l  =  al 
do  while  l  >  0 
	m  =  mod(l  ,16  ) 	
	l  =  Truncate((l/16),0) 	
	if m < 10 then
		a = string(m)
	else
		a = char(m + 55)
	end if
	ls  =  a  +  ls 
loop 
return ls 
end function

private function long of_hex2dec (string as);long j,k,l,m,n 
string ls 
as = upper(as)
for j = 1 to len(as) 
	ls = mid(as,(len(as) - j +1 ),1) 	
	choose case ls 
		case 'A','B','C','D','E','F' 
			l = asc(ls) - 55
		case else
			l = long(ls)
	end choose 
	m = 16^(j - 1)
	n += m * l 
next 

return n 

end function

public function string of_uft8_to_unicode (string as);//utf8→unicode
string ls
/*
Unicode编码(16进制)      UTF-8 字节流(二进制) 
  0000 - 007F         0xxxxxxx 
  0080 - 07FF         110xxxxx 10xxxxxx 
  0800 - FFFF         1110xxxx 10xxxxxx 10xxxxxx 
*/

return ls
end function

public function string of_hex2bin (string s_hex);//16进制→2进制

string s_bin, s_return, s_tmp
int    i

for i = 1 to len(s_hex)
	s_tmp = mid(s_hex, i, 1)
	choose case s_tmp
		case "0"
			s_return += "0000"
		case "1"
			s_return += "0001"
		case "2"
			s_return += "0010"
		case "3"
			s_return += "0011"
		case "4"
			s_return += "0100"
		case "5"
			s_return += "0101"
		case "6"
			s_return += "0110"
		case "7"
			s_return += "0111"
		case "8"
			s_return += "1000"
		case "9"
			s_return += "1001"
		case "A"
			s_return += "1010"
		case "B"
			s_return += "1011"
		case "C"
			s_return += "1100"
		case "D"
			s_return += "1101"
		case "E"
			s_return += "1110"
		case "F"
			s_return += "1111"
		case else
			
	end choose
next

return s_return

end function

public function string of_bin2hex (string s_binary);//2进制→16进制

string s_bin, s_return, s_tmp
int    i

for i = 1 to len(s_binary) step 4
	s_tmp = mid(s_binary, i, 4)
	choose case s_tmp
		case "0000"
			s_return += "0"
		case "0001"
			s_return += "1"
		case "0010"
			s_return += "2"
		case "0011"
			s_return += "3"
		case "0100"
			s_return += "4"
		case "0101"
			s_return += "5"
		case "0110"
			s_return += "6"
		case "0111"
			s_return += "7"
		case "1000"
			s_return += "8"
		case "1001"
			s_return += "9"
		case "1010"
			s_return += "A"
		case "1011"
			s_return += "B"
		case "1100"
			s_return += "C"
		case "1101"
			s_return += "D"
		case "1110"
			s_return += "E"
		case "1111"
			s_return += "F"
		case else
			
	end choose
next

return s_return

end function

public function string of_gb_to_utf8 (string as);//gb2312→unicode→utf8
return of_unicode_to_utf8( of_gb_to_unicode( as))
end function

on n_cst_unicode_cvt.create
call super::create
TriggerEvent( this, "constructor" )
end on

on n_cst_unicode_cvt.destroy
TriggerEvent( this, "destructor" )
call super::destroy
end on


  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 6
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值