Javascript url 编码算法(utf) by shawl.qiu
说明:
这个算法没考虑四字节的编码(目前没见过四字节的编码).
另外使用 -1(escape) 选项时, 会丢失数据, 不过 0, 1 没问题.
在我的印像中 escape 好像专门为 codepage 936 而设计的...因此 unicode 的某些字符在 codepage 936 没有时, 解码就出错了...
至于 gb url 编码解码 的算法...使用JS的话, 就必须得用 编码表(code table)
目前已实现, 以后再发表相关文章.
对 gb 编码解码感兴趣的话, 到 下面URL弄个gb-uni 对应表:
http://www.herongyang.com/gb2312/ug_map.html
shawl.qiu
2007-09-26
http://blog.csdn.net/btbtd
下载:
http://files.myopera.com/btbtd/javascript/function/IEncodeURI.7z
演示:
http://files.myopera.com/btbtd/javascript/function/IEncodeURI(encodeURI).htm
http://files.myopera.com/btbtd/javascript/function/IEncodeURI(encodeURIComponent).htm
内容(IEncodeURI(encodeURI).htm):
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>shawl.qiu template </title>
<style type="text/css">
/* <![CDATA[ */
/* ]]> */
</style>
<script type="text/javascript">
/*<![CDATA[*/
if (navigator.appName=="Microsoft Internet Explorer")
{
//最大化窗口
self.moveTo(-5,-5)
self.resizeTo(screen.availWidth +8,screen.availHeight+8)
//这个脚本定义的宽度其实比原窗口还要大那么一点.
}
/*]]*/
</script>
</head>
<body>
<script type="text/javascript">
/*<![CDATA[*/
var Str = fGeneratingBoundStr(1, 127);
var sPe = encodeURI(Str.join(""));
var sEn = IEncodeURI(Str.join(""))
var sDe = decodeURI(sEn)
document.write("<p/>Str: "+Str.join(""));
document.write("<p/>sPe: "+sPe);
document.write("<p/>sEn: "+sEn);
document.write("<p/>sDe: "+sDe);
document.write("<hr/>");
function IEncodeURI(sStr, iOneByteStyle)
{// shawl.qiu code, return string; Func: fUtfCharToPE
var Len = sStr.length;
var Ar = [];
for(var i=0; i<Len; i++)
Ar.push(fUtfCharToPE(sStr.charAt(i), iOneByteStyle));
return Ar.join("");
} // end function IEncodeURI(Source)
/*
iOneByteStyle meaning:
Enum{-1, 0, 1}:
-1 = escape, ignore: @+-./_*
0 = encodeURI, ignore: ,-.#$@&*()=:/;?+'_~
1 = encodeURIComponent, ignore: !*()-.'_~
-1 选项, 在页面编码为 utf-8 时, unescape解码 会丢失数据
*/
function fUtfCharToPE(sChar, iOneByteStyle)
{ // shawl.qiu code, return char; Func: fStringSpace, fGetUtfCharBound
var iCode = sChar.charCodeAt();
var iBound = fGetUtfCharBound(iCode);
var sBin = iCode.toString(2);
if(typeof(iOneByteStyle)=="undefined") iOneByteStyle = 0;
var Ar = [];
var iLen = sBin.length;
switch(iBound)
{
case 1:
switch(iOneByteStyle)
{
case -1:
if(/[^a-zA-Z0-9/@/*///+/-/.///_]/.test(sChar))
{
var sHex = parseInt(["0", sBin].join(''), 2).toString(16);
if(sHex.length<2) sHex = "0"+sHex;
Ar.push(sHex);
}
else return sChar;
break;
case 0:
if(/[^a-zA-Z0-9/,/-/./#/$/@/&/*/(/)/=/:///;/?/+/'/_/~]/.test(sChar))
{
var sHex = parseInt(["0", sBin].join(''), 2).toString(16);
if(sHex.length<2) sHex = "0"+sHex;
Ar.push(sHex);
}
else return sChar;
break;
case 1:
if(/[^a-zA-Z0-9/!/*/(/)/-/./'/_/~]/.test(sChar))
{
var sHex = parseInt(["0", sBin].join(''), 2).toString(16);
if(sHex.length<2) sHex = "0"+sHex;
Ar.push(sHex);
}
else return sChar;
break;
}
break;
case 2:
if(iLen<11) sBin = fStringSpace("0", 11-iLen)+sBin;
Ar.push(parseInt(["110", sBin.substring(0, 5)].join(''), 2).toString(16));
Ar.push(parseInt(["10", sBin.slice(5)].join(''), 2).toString(16));
break;
case 3:
if(iLen<16) sBin = fStringSpace("0", 16-iLen)+sBin;
Ar.push(parseInt(["1110", sBin.substring(0, 4)].join(''), 2).toString(16));
Ar.push(parseInt(["10", sBin.substring(4, 10)].join(''), 2).toString(16));
Ar.push(parseInt(["10", sBin.slice(10)].join(''), 2).toString(16));
break;
case 4:
break;
}
return "%"+Ar.join("%").toUpperCase();
} // end function fUtfCharToPE(sChar, iOneByteStyle)
function fStringSpace(sStr, iTime)
{ // shawl.qiu code, return string
var iLen=arguments.length;
if(iLen===0)
{
sStr=' '
iTime=2;
}
else if(iLen===1) iTime=2;
return new Array(iTime+1).join(sStr);
} // end function fStringSpace
function fGetUtfCharBound(iCharCode)
{ // shawl.qiu code, return integer
var iLBound = iUBound = 0;
//000000 - 00007F
iLBound = 0x000000-1; iUBound = 0x00007f+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 1;
//000080 - 0007FF
iLBound = 0x000080-1; iUBound = 0x0007FF+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 2;
//000800 - 00FFFF
iLBound = 0x000800-1; iUBound = 0x00FFFF+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 3;
//010000 - 10FFFF
iLBound = 0x010000-1; iUBound = 0x10FFFF+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 4;
return 0;
} // end function fGetUtfCharBound(iCharCode)
function fGeneratingBoundStr(iPadBegin, iLen)
{
if(!iPadBegin) iPadBegin = 0;
if(!iLen) iLen = 10;
var Ar = [];
var iLBound = iUBound = 0;
iLBound = 0+iPadBegin; iUBound = iLBound+iLen;
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x000080+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x000800+19000+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x010000+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
return Ar;
}
/*]]*/
</script>
</body>
</html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>shawl.qiu template </title>
<style type="text/css">
/* <![CDATA[ */
/* ]]> */
</style>
<script type="text/javascript">
/*<![CDATA[*/
if (navigator.appName=="Microsoft Internet Explorer")
{
//最大化窗口
self.moveTo(-5,-5)
self.resizeTo(screen.availWidth +8,screen.availHeight+8)
//这个脚本定义的宽度其实比原窗口还要大那么一点.
}
/*]]*/
</script>
</head>
<body>
<script type="text/javascript">
/*<![CDATA[*/
var Str = fGeneratingBoundStr(1, 127);
var sPe = encodeURI(Str.join(""));
var sEn = IEncodeURI(Str.join(""))
var sDe = decodeURI(sEn)
document.write("<p/>Str: "+Str.join(""));
document.write("<p/>sPe: "+sPe);
document.write("<p/>sEn: "+sEn);
document.write("<p/>sDe: "+sDe);
document.write("<hr/>");
function IEncodeURI(sStr, iOneByteStyle)
{// shawl.qiu code, return string; Func: fUtfCharToPE
var Len = sStr.length;
var Ar = [];
for(var i=0; i<Len; i++)
Ar.push(fUtfCharToPE(sStr.charAt(i), iOneByteStyle));
return Ar.join("");
} // end function IEncodeURI(Source)
/*
iOneByteStyle meaning:
Enum{-1, 0, 1}:
-1 = escape, ignore: @+-./_*
0 = encodeURI, ignore: ,-.#$@&*()=:/;?+'_~
1 = encodeURIComponent, ignore: !*()-.'_~
-1 选项, 在页面编码为 utf-8 时, unescape解码 会丢失数据
*/
function fUtfCharToPE(sChar, iOneByteStyle)
{ // shawl.qiu code, return char; Func: fStringSpace, fGetUtfCharBound
var iCode = sChar.charCodeAt();
var iBound = fGetUtfCharBound(iCode);
var sBin = iCode.toString(2);
if(typeof(iOneByteStyle)=="undefined") iOneByteStyle = 0;
var Ar = [];
var iLen = sBin.length;
switch(iBound)
{
case 1:
switch(iOneByteStyle)
{
case -1:
if(/[^a-zA-Z0-9/@/*///+/-/.///_]/.test(sChar))
{
var sHex = parseInt(["0", sBin].join(''), 2).toString(16);
if(sHex.length<2) sHex = "0"+sHex;
Ar.push(sHex);
}
else return sChar;
break;
case 0:
if(/[^a-zA-Z0-9/,/-/./#/$/@/&/*/(/)/=/:///;/?/+/'/_/~]/.test(sChar))
{
var sHex = parseInt(["0", sBin].join(''), 2).toString(16);
if(sHex.length<2) sHex = "0"+sHex;
Ar.push(sHex);
}
else return sChar;
break;
case 1:
if(/[^a-zA-Z0-9/!/*/(/)/-/./'/_/~]/.test(sChar))
{
var sHex = parseInt(["0", sBin].join(''), 2).toString(16);
if(sHex.length<2) sHex = "0"+sHex;
Ar.push(sHex);
}
else return sChar;
break;
}
break;
case 2:
if(iLen<11) sBin = fStringSpace("0", 11-iLen)+sBin;
Ar.push(parseInt(["110", sBin.substring(0, 5)].join(''), 2).toString(16));
Ar.push(parseInt(["10", sBin.slice(5)].join(''), 2).toString(16));
break;
case 3:
if(iLen<16) sBin = fStringSpace("0", 16-iLen)+sBin;
Ar.push(parseInt(["1110", sBin.substring(0, 4)].join(''), 2).toString(16));
Ar.push(parseInt(["10", sBin.substring(4, 10)].join(''), 2).toString(16));
Ar.push(parseInt(["10", sBin.slice(10)].join(''), 2).toString(16));
break;
case 4:
break;
}
return "%"+Ar.join("%").toUpperCase();
} // end function fUtfCharToPE(sChar, iOneByteStyle)
function fStringSpace(sStr, iTime)
{ // shawl.qiu code, return string
var iLen=arguments.length;
if(iLen===0)
{
sStr=' '
iTime=2;
}
else if(iLen===1) iTime=2;
return new Array(iTime+1).join(sStr);
} // end function fStringSpace
function fGetUtfCharBound(iCharCode)
{ // shawl.qiu code, return integer
var iLBound = iUBound = 0;
//000000 - 00007F
iLBound = 0x000000-1; iUBound = 0x00007f+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 1;
//000080 - 0007FF
iLBound = 0x000080-1; iUBound = 0x0007FF+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 2;
//000800 - 00FFFF
iLBound = 0x000800-1; iUBound = 0x00FFFF+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 3;
//010000 - 10FFFF
iLBound = 0x010000-1; iUBound = 0x10FFFF+1;
if(iCharCode>iLBound&&iCharCode<iUBound) return 4;
return 0;
} // end function fGetUtfCharBound(iCharCode)
function fGeneratingBoundStr(iPadBegin, iLen)
{
if(!iPadBegin) iPadBegin = 0;
if(!iLen) iLen = 10;
var Ar = [];
var iLBound = iUBound = 0;
iLBound = 0+iPadBegin; iUBound = iLBound+iLen;
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x000080+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x000800+19000+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
iLBound = 0x010000+iPadBegin; iUBound = iLBound+iLen;
//IWrite.write("<br/>iLBound: "+iLBound);
for(;iLBound<iUBound; iLBound++ )
{
Ar.push(String.fromCharCode(iLBound));
}
return Ar;
}
/*]]*/
</script>
</body>
</html>