Javascript url 解码算法(utf-8) by shawl.qiu
注:
上一篇相关文章:
javascript utf-8 url 编码 之 纯算法 解码 by shawl.qiu 2007-9-24
http://blog.csdn.net/btbtd/archive/2007/09/24/1797805.aspx
由于理解不全, 内容作废.
说明:
写这个大部分原因是想了解编码集, 至于这个东西的用处嘛...基本没用.
现在明白, UTF-8 中的编码分为四个分区,
一, 二, 三 分区是常用区, 第四部分是极少用的部分也是待扩展的部分...
解码 URL 编码时, 得先识别分区
第一区是一字节, 对应编码是 %xx
第二分区是二字节, 对应编码是 %xx%xx
....
以此类推.
至于分区边界, 请查看相关 RFC 档...
值得一提的是, 我在测试时使用第四区的字符, 但是UTF-8 是使用三字节编码方式.
shawl.qiu
2007-09-25
http://blog.csdn.net/btbtd
下载:
http://files.myopera.com/btbtd/javascript/function/fDecodingPEUtf8.7z
演示:
http://files.myopera.com/btbtd/javascript/function/fDecodingPEUtf8(Fix).htm
内容:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>shawl.qiu template</title>
<style type="text/css">
/* <![CDATA[ */
/* ]]> */
</style>
<script type="text/javascript">
/*<![CDATA[*/
if (navigator.appName=="Microsoft Internet Explorer")
{
//最大化窗口
self.moveTo(-5,-5)
self.resizeTo(screen.availWidth +8,screen.availHeight+8)
//这个脚本定义的宽度其实比原窗口还要大那么一点.
}
/*]]*/
</script>
</head>
<body>
<script type="text/javascript">
/*<![CDATA[*/
var IWrite = document;
var str = fGeneratingBoundStr(65, 100);
var pe = encodeURI(str);
var d = fDecodingPEUtf8(pe);
IWrite.write("<p/>str: "+str);
IWrite.write("<p/> pe: "+pe);
IWrite.write("<p/> d: "+d);
IWrite.write("<hr/>");
//-----------------------------------------------
// PE = percent encoding
function fDecodingPEUtf8(sUtf8PE) // %xx[,%xx]
{ // shawl.qiu code, return string; Func: fGetPEUtf8, fGetPEUtf8Bound
if(sUtf8PE.indexOf("%")===-1) return sUtf8PE;
var iLBound = 0x7f;
sUtf8PE = sUtf8PE.replace(//+/g, " ");
for(var i=0, j=sUtf8PE.length; i<j; i++)
{
var iIndex = sUtf8PE.indexOf("%", i);
if(iIndex===-1) break;
i=iIndex+1;
var sHex = sUtf8PE.slice(i, i+2);
var iCode = parseInt(sHex, 16);
var sPE = sUtf8PE.slice(i, i+2);
var iBound = fGetPEUtf8Bound(sUtf8PE.slice(i, i+2));
switch(iBound)
{
case 1:
var sHexExt = sUtf8PE.slice(i, i+2);
TempStr = String.fromCharCode(parseInt(sHexExt, 16));
sUtf8PE = [sUtf8PE.slice(0, i-1), TempStr, sUtf8PE.slice(i+2)].join("");
i-=1;
break;
case 2:
var sHexExt = sUtf8PE.slice(i+2, i+5);
if(/%../.test(sHexExt))
{
var TempStr = sUtf8PE.slice(i-1, i+5);
TempStr = fGetPEUtf8(TempStr);
sUtf8PE = [sUtf8PE.slice(0, i-1), TempStr, sUtf8PE.slice(i+5)].join("");
i-=1;
}
break;
case 3:
var sHexExt = sUtf8PE.slice(i+2, i+8);
if(//%../%/.test(sHexExt))
{
var TempStr = sUtf8PE.slice(i-1, i+8);
TempStr = fGetPEUtf8(TempStr);
sUtf8PE = [sUtf8PE.slice(0, i-1), TempStr, sUtf8PE.slice(i+8)].join("");
i-=1;
}
break;
}
} // end for(var i=0, j=sUtf8PE.length; i<j; i++)
return sUtf8PE;
} // end function fDecodingPEUtf8(sUtf8PE)
// PE = percent encoding
function fGetPEUtf8(sUtf8PE) // %xx[,%xx]
{ // shawl.qiu code, return string
sUtf8PE = sUtf8PE.replace(/%/, "");
//IWrite.write("<br/>sUtfPE: "+sUtf8PE);
var Ar = sUtf8PE.split("%");
for(var i=0, j=Ar.length; i<j; i++)
{
Ar[i] = parseInt(Ar[i], 16).toString(2);
var iZeroIndex = Ar[i].indexOf("0");
Ar[i] = Ar[i].slice(iZeroIndex+1);
}
var sBin = Ar.join("");
var iCode = parseInt(sBin, 2);
return String.fromCharCode(iCode);
} // end function fGetPEUtf8(sUtf8PE)
// PE = percent encoding
function fGetPEUtf8Bound(cUtf8PE) // %xx
{ // shawl.qiu code, return integer
cUtf8PE = cUtf8PE.replace(/%/, "");
var iCharCode = parseInt(cUtf8PE, 16);
var iLBound = iUBound = 0;
//00-7F
iLBound = 0; iUBound = 0x7f;
if(iCharCode>=iLBound&&iCharCode<=iUBound) return 1;
//C2-DF
iLBound = 0xC2; iUBound = 0xDF;
if(iCharCode>=iLBound&&iCharCode<=iUBound) return 2;
//E0-EF
iLBound = 0xE0; iUBound = 0xEF;
if(iCharCode>=iLBound&&iCharCode<=iUBound) return 3;
//F0-F4
iLBound = 0xF0; iUBound = 0xF4;
if(iCharCode>=iLBound&&iCharCode<=iUBound) return 4;
return 0;
} // end function fGetPEUtf8Bound(cUtf8PE)
function fGeneratingBoundStr(iPadBegin, iLen)
{
if(!iPadBegin) iPadBegin = 0;
if(!iLen) iLen = 10;
var Ar = [];
var iLBound = iUBound = 0;
iLBound = 0+iPadBegin; iUBound = iLBound+iLen;
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x000080+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x000800+19000+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x010000+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
return Ar;
}
/*]]*/
</script>
</body>
</html>