function GetHTMLByCharSet(sUrl, charset)
{
var objXML = new ActiveXObject("WinHttp.WinHttpRequest.5.1"), html = "";
with (objXML)
{
setTimeouts(3000, 3000, 3000, 10000);//设置超时时间
open("GET", sUrl, false);//必须用同步模式请求
send();
if (Status == 200)
{
if (charset)
{
var stm = new ActiveXObject("adodb.stream");
stm.Type = 1; //adTypeBinary,按二进制数据读入
stm.Mode = 3; //adModeReadWrite ,这里只能用3用其他会出错
stm.Open();
stm.Write(responseBody);
stm.Position = 0; //把指针移回起点
stm.Type = 2; //文本数据
stm.Charset = charset || "utf-8";
html = stm.ReadText();
stm.Close();
stm = null;
} else html = responseText;
}
}
objXML = null;
return html;
}
上面这个代码在页面响应头Content-Type中没有charset时,winhttp5.1会以ansi2码解析,所以内容为utf-8当中有中文就会乱码。用GetHTMLByCharSet(url,'utf-8')来获取到二进制(官方解释是无符号字节数组)转换下编码,但使用adodb.stream遇到emoji字符时会转换成乱码,所以调整成下面这个,使用chilkat bindata来处理。
function newChilkat950ByNotUnlock(objstr)
{
var obj = new ActiveXObject("Chilkat_9_5_0." + objstr);
return obj;
}
function GetHTMLByDLL(sUrl, charset)
{
var objXML = new ActiveXObject("WinHttp.WinHttpRequest.5.1"), html = "";
with (objXML)
{
setTimeouts(3000, 3000, 3000, 10000);//设置超时时间
open("GET", sUrl, false);//必须用同步模式请求
send();
if (Status == 200)
{
if (charset)
{
var binData = newChilkat950ByNotUnlock("BinData");
binData.LoadBinary(responseBody);
html = binData.GetString(charset);
} else html = responseText;
}
}
objXML = null;
return html;
}