不需要词库,直接分割网页内容提取词语.并且计算词语出现次数按照从多到少排序, 这里能区分中英文词语
//石卓林 2008-7-12 第二版.左右右左匹配版
function keywords(ftitle,ftbody){
this.trim = function(text){return text.replace(/(^\s*)|(\s*$)/g,'');}
this.title = ftitle;
this.tbody = ftbody.replace(/(\s+)/g,' ');//.substr(40,400);//截取最可能的内容此处数字需改进
this.tbody = this.trim(this.tbody);
this.tbodylen = this.tbody.length;
this.chardic = new ActiveXObject('Scripting.Dictionary');
this.tempasc = 0;
this.tempchar = '';
this.tempcharat='';
this.endchar = '。,:… (—)》《';
this.chscount = 0;
this.keys = new Array();
var oldchar='',oldcount=0;
for(var i=0;i<this.tbodylen;i++){
this.chscount = 0;
for(var j=1;j<=15;j++){//最长英文单词15
this.tempchar = this.tbody.substr(i,j);
this.tempasc = this.tempchar.charCodeAt(j-