字符集编码ANSI和UNICODE

编码指不同国家的语言在计算机中的一种存储和解释规范
ANSI与ASCII

       最初,Internet上只有一种字符集——ANSI的ASCII字符集(American Standard Code for Information Interchange, “美国信息交换标准码),它使用7 bits来表示一个字符,总共表示128个字符,后来IBM公司在此基础上进行了扩展,用8bit来表示一个字符,总共可以表示256个字符,充分利用了 一个字节所能表达的最大信息
nANSI字符集:ASCII字符集,以及由此派生并兼容的字符集,如:GB2312,正式的名称为MBCS(Multi-Byte Chactacter System,多字节字符系统),通常也称为ANSI字符集。

UNICODE与UTF8,UTF16

         由于每种语言都制定了自己的字符集,导致最后存在的各种字符集实在太多,在国际交流中要经常转换字符集非常不便。因此,产生了Unicode字符集,它固定使用16 bits(两个字节)来表示一个字符,共可以表示65536个字符
        标准的Unicode称为UTF-16(UTF:UCS Transformation Format )。后来为了双字节的Unicode能够在现存的处理单字节的系统上正确传输,出现了UTF-8,使用类似MBCS的方式对Unicode进行编码。 (Unicode字符集有多种编码形式)
 例如“连通”两个字的Unicode标准编码UTF-16 (big endian)为:DE 8F 1A 90
                 而其UTF-8编码为:E8 BF 9E E9 80 9A

 
        当一个软件打开一个文本时,它要做的第一件事是决定这个文本究竟是使用哪种字符集的哪种编码保存的。软件一般采用三种方式来决定文本的字符集和编码:
检测文件头标识,提示用户选择,根据一定的规则猜测
最标准的途径是检测文本最开头的几个字节,开头字节 Charset/encoding,


 


==========================================================

收集的utf-8编码函数
<script language="VBScript">
'http://www.linuxforum.net/books/UTF-8-Unicode.html
Public Function UTF8EncodeChar(z)
Dim c : c=AscW(z)'取UNICODE编码
if c>0 And c<256 Then'Asc编码直接返回
UTF8EncodeChar=z
Exit Function
End If
If c<0 Then c=c + &H10000&'VBScript的Integer溢出,加上
Dim k : k=CLng(c)'备份一个编码,后面判断要用
Dim b()
Dim i : i=0
While c>&H0&'将编码按照6位一组,分组存到字节数组 b 中
ReDim Preserve b(i)
b(i)=CByte(c And &H3F&)
c=c / &H40&
i=i+1
Wend
If UBound(b)>0 Then '如果分开的6位组不止一个,除最高一组外,全部加上二进制10000000
For i=0 To UBound(b)-1
b(i)=b(i) + &H80
Next
End If
i=UBound(b)'根据字符的UNICODE编码范围,给最高组加上前缀
If k<=CLng(&H7F&) Then
b(i) = b(i) + 0
ElseIf k<=CLng(&H7FF&) Then
b(i) = b(i) + &HC0
ElseIf k<=Clng(&HFFFF&) Then
b(i) = b(i) + &HE0
ElseIf k<=CLng(&H1FFFFF&) Then
b(i) = b(i) + &HF0
ElseIf k<=CLng(&H3FFFFFF&) Then
b(i) = b(i) + &HF8
Else
b(i) = b(i) + &HFC
End If
UTF8EncodeChar=""
For i=UBound(b) To 0 Step -1'将分组转换成URL编码
UTF8EncodeChar=UTF8EncodeChar & "%" & Right("00" & Hex(b(i)),2)
Next
Erase b
End Function
Public Function UTF8EncodeString(s)
Dim i,l,c : l=Len(s)
For i=1 To l
UTF8EncodeString=UTF8EncodeString & UTF8EncodeChar(Mid(s,i,1))
Next
End Function
MsgBox UTF8EncodeString("第编三码 ")
</script>


=====================================================

function revertUTF8(szInput)
{
var x,wch,wch1,wch2,uch="",szRet="";
for (x=0; x<szInput.length; x++)
{
if (szInput.charAt(x)=="%")
{
wch =parseInt(szInput.charAt(++x) + szInput.charAt(++x),16);
if (!wch) {break;}
if (!(wch & 0x80))
{
wch = wch;
}
else if (!(wch & 0x20))
{
x++;
wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16);
wch = (wch & 0x1F)<< 6;
wch1 = wch1 & 0x3F;
wch = wch + wch1;
}
else
{
x++;
wch1 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16);
x++;
wch2 = parseInt(szInput.charAt(++x) + szInput.charAt(++x),16);
wch = (wch & 0x0F)<< 12;
wch1 = (wch1 & 0x3F)<< 6;
wch2 = (wch2 & 0x3F);
wch = wch + wch1 + wch2;
}
szRet += String.fromCharCode(wch);
}
else
{
szRet += szInput.charAt(x);
}
}
return(szRet);
}



function u2utf8($c)
{
/*for($i=0;$i<count($c);$i++)*/
$str="";
if ($c < 0x80) {
$str.=$c;
}
else if ($c < 0x800) {
$str.=chr(0xC0 | $c>>6);
$str.=chr(0x80 | $c & 0x3F);
}
else if ($c < 0x10000) {
$str.=chr(0xE0 | $c>>12);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
}
else if ($c < 0x200000) {
$str.=chr(0xF0 | $c>>18);
$str.=chr(0x80 | $c>>12 & 0x3F);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
}
return $str;
}

================================================================

'UTF8 URLEncode
Public Function URLEncodeUTF8(ByVal s)
Dim i, k
Dim sl
Dim c
Dim uni
Dim tp, h1, h2, h3
sl = Len(s)
tp = ""
k = 0
For i = 1 To sl
c = Mid(s, i, 1)
uni = AscW(c)
If uni < 0 Then uni = uni + 65536
If uni < 128 Then
tp = tp & Chr(c)
k = k + 1
ElseIf uni < 2048 Then
k = k + 2
h2 = "%" & Hex(&H80 XOr (uni And &H3F))
uni = uni / (2^6)
h1 = "%" & Hex(&HC0 XOr (uni And &H1F))
tp = tp & h1 & h2
Else
h3 = "%" & Hex(&H80 XOr (uni And &H3F))
uni = uni / (2^6)
h2 = "%" & Hex(&H80 XOr (uni And &H3F))
uni = uni / (2^6)
h1 = "%" & Hex(&HE0 XOr (uni And &H0F))
tp = tp & h1 & h2 & h3
End If
Next
URLEncodeUTF8 = tp
End Function

'A-Fa-f0-9 Byte
Public Function isxdigit(c)
isxdigit = CBool((c>=48 And c<=57) Or (c>=65 And c<=70) Or (c>=97 And c<=102))
End Function

Public Function isascii(c)
isascii = CBool(c>0 And c<128)
End Function

'判断是否是UTF8字节
Public Function IsUTF8Body(ByVal u)
IsUTF8Body = CBool(u>=&H80 And u<=&HBF)
End Function

'判断有几个UTF8字节
Private Function UTF8Byte(ByVal u)
If u > &H00 And u <= &H7F Then
UTF8Byte = 1
ElseIf u >= &HC0 And u <= &HDF Then
UTF8Byte = 2
ElseIf u >= &HE0 And u <= &HEF Then
UTF8Byte = 3
ElseIf u >= &HF0 And u <= &HF7 Then
UTF8Byte = 4
ElseIf u >= &HF8 And u <= &HFB Then
UTF8Byte = 5
ElseIf u >= &HFC And u <= &HFD Then
UTF8Byte = 6
Else
UTF8Byte = 0
End If
End Function

'判断三个连续字节是不是UTF8字符
Private Function UTF8Test(ByVal u1, ByVal u2, ByVal u3)
UTF8Test = False
If CBool(u1>=&HC0 And u1<=&HFD) Then
UTF8Test = CBool(IsUTF8Body(u2) And IsUTF8Body(u3))
End If
End Function

Private Function ishex(s)
ishex = False
If Len(s)<2 Then Exit Function
If isxdigit(Asc(Mid(s, 1, 1)))=False Then Exit Function
If isxdigit(Asc(Mid(s, 2, 1)))=False Then Exit Function
ishex = True
End Function

Private Function isescape(s)
isescape = False
If Len(s)<5 Then Exit Function
If UCase(Mid(s, 1, 1)) <> "U" Then Exit Function
If isxdigit(Asc(Mid(s, 2, 1)))=False Then Exit Function
If isxdigit(Asc(Mid(s, 3, 1)))=False Then Exit Function
If isxdigit(Asc(Mid(s, 4, 1)))=False Then Exit Function
If isxdigit(Asc(Mid(s, 5, 1)))=False Then Exit Function
isescape = True
End Function

Private Function AscX(s)
AscX = CInt("&H" & Mid(s, 1, 2))
End Function

'URLDecode 完全版
'支持Server.URLEncode,UTF8 URLEncode,Escape 加密的字符串
Public Function URLDecode(s)
Dim tp
Dim i
Dim tl
Dim pp
Dim a, b, c
Dim h
URLDecode = ""
tp = Split(Replace(s, "+", " "), "%")
tl = UBound(tp)
If tl = -1 Then Exit Function
pp = tp(0)
For i = 1 To tl
If isescape(tp(i)) Then
pp = pp & ChrW("&H" & Mid(tp(i), 2, 4)) & Mid(tp(i), 6)
ElseIf ishex(tp(i))=False Then
pp = pp & tp(i)
Else
a = AscX(tp(i))
If isascii(a)=False And Len(tp(i))=2 Then
If (i+1)>tl Then Exit For
b = AscX(tp(i+1))
If (i+2)>tl Then
pp = pp & Chr(a*2^8 Or b) & Mid(tp(i+1), 3)
i = i + 1
Else
c = AscX(tp(i+2))
If UTF8Byte(a)=3 And UTF8Test(a,b,c)=True Then
h = (a And &H0F) * 2 ^12 Or (b And &H3F) * 2 ^ 6 Or (c And &H3F)
If h<0 Then h = h + 65536
pp = pp & ChrW(h) & Mid(tp(i+2), 3)
i = i + 2
Else
pp = pp & Chr(a*2^8 Or b) & Mid(tp(i+1), 3)
i = i + 1
End If
End If
ElseIf isascii(a)=False Then
pp = pp & tp(i)
Else
pp = pp & Chr(a) & Mid(tp(i), 3)
End If
End If
Next
URLDecode = pp
End Function

=====================================================================
http://www.sikeu.com/user1/yesu/archives/2005/20051117233125.html
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值