文章截图 - 更好的排版
源代码下载
上一篇文章,我们按照 http://jsbeautifier.org/ 的基本框架,用我们自己的语言实现了对Html简单的格式化功能。
今天我们来完善这段JavaScript脚本,主要添加如下功能:
- 删除多余的空格,回车换行以及制表符
- 保持script和style标签里面的内容格式不变化
- 注释部分的格式不变化
- 标签属性赋值等号前后不能有空格(type="text/javascript")
- 自封闭标签/>之前必须有空格(<br />)
//
优化过的HTML-Beautify
function HtmlBeautify(source, indent_value) {
this .source = source;
this .indent_value = indent_value;
this .result = "" ;
this .parse();
}
// 分析并产生输出到this.result
HtmlBeautify.prototype.parse = function () {
var that = this ;
// 当前分析到哪个字符,当前标记值,标记类型,
// 输出数组,缩进级别,当前格式化内容(去掉多余空格)
var pos = 0 , token_value = "" , token_type = "" ,
output = [], indent_level = 0 , is_format_content = true ;
// 把这些标签作为Single Tag
var single_token = " br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed " .split( ' , ' );
var white_space = " \r\n\t " .split( "" );
// 获取下一个标记(首先获取正文,如果正文为空则获取标签)
function nextToken() {
var token_value_array = [], val = "" , space = false ;
// "<"之前的所有内容作为正文标签
while ((val = that.source[pos]) !== " < " ) {
if (pos & gt; = that.source.length) {
token_type = " END " ;
return ;
}
if (is_format_content) {
if ($.inArray(val, white_space) & gt; = 0 ) {
space = true ;
pos ++ ;
continue ;
}
if (space) {
token_value_array.push( " " );
space = false ;
}
}
token_value_array.push(val);
pos ++ ;
}
token_value = token_value_array.join( "" ).replace( / ^\n+|\n$ / g, "" );
if ($.trim(token_value) === "" ) {
// 如果正文标记为空,则获取标签标记
if ( ! is_format_content) {
is_format_content = true ;
}
nextTokenTag();
} else {
token_type = " CONTENT " ;
}
}
// 下一个标签标记
function nextTokenTag() {
var token_value_array = [], val = "" ,
tagName = "" , space = false , is_comment = false ;
// 这是一个注释标签
if (that.source[pos + 1 ] === " ! " &&
that.source[pos + 2 ] === " - " &&
that.source[pos + 3 ] === " - " ) {
is_comment = true ;
}
// 获取标签标记,直到遇到">"
do {
val = that.source[pos];
if ( ! is_comment) {
// 如果此字符为空格换行制表符,则跳过此字符
if ($.inArray(val, white_space) & gt; = 0 ) {
space = true ;
pos ++ ;
continue ;
}
if (space) {
if (token_value_array[token_value_array.length - 1 ] !== " = " && val !== " = " ) {
token_value_array.push( " " );
}
space = false ;
}
}
if (val === " / " && that.source[pos + 1 ] === " > " && token_value_array[token_value_array.length - 1 ] !== " " ) {
token_value_array.push( " " );
}
token_value_array.push(val);
pos ++ ;
} while (val !== " > " );
token_value = $.trim(token_value_array.join( "" ));
// 当前标签的名称(小写)
tagName = getTagName();
if (is_comment) {
token_type = " SINGLE_TAG " ;
} else {
if (token_value[ 1 ] === " / " ) {
// token_value以"</"开始,则认为是结束标签
token_type = " END_TAG " ;
} else if ($.inArray(tagName, single_token) & gt; = 0 || token_value[token_value.length - 2 ] === " / " ) {
// 如果标签在single_token或者token_value以"/>"结尾,则认为是独立标签
// 这种判断没有考虑这种情况:"<br></br>"
token_type = " SINGLE_TAG " ;
} else {
token_type = " START_TAG " ;
if (tagName === " script " || tagName === " style " ) {
is_format_content = false ;
}
}
}
}
function getTagName() {
var tagName = token_value.substr( 1 , token_value.length - 2 );
var spaceIndex = tagName.indexOf( " " );
if (spaceIndex & gt; 0 ) {
tagName = tagName.substr( 0 , spaceIndex);
}
return tagName.toLowerCase();
}
// 输出当前标记
function outputToken() {
output.push(token_value);
}
// 输出新行
function outputLine() {
output.push( " \n " );
}
// 输出缩进
function outputIndent() {
for ( var i = 0 ; i & lt; indent_level; i ++ ) {
output.push(that.indent_value);
}
}
// parse的主体函数,循环获取下一个Token
while ( true ) {
nextToken();
// 当前Token为结束标记
if (token_type === " END " ) {
break ;
}
switch (token_type) {
case " START_TAG " :
// 我们对缩进的控制非常简单,开始标签后缩进一个单位
outputLine();
outputIndent();
outputToken();
indent_level ++ ;
break ;
case " END_TAG " :
// 结束标签前减少一个单位缩进
indent_level -- ;
outputLine();
outputIndent();
outputToken();
break ;
case " SINGLE_TAG " :
outputLine();
outputIndent();
outputToken();
break ;
case " CONTENT " :
outputLine();
if (is_format_content) {
outputIndent();
}
outputToken();
break ;
}
}
// 去除最前面的"\n"
this .result = output.join( "" ).substr( 1 );
};
$( function () {
$( " #format " ).click( function () {
// 实例化HtmlBeautify,传递需要解析的HTML片段和缩进字符串
var beautify = new HtmlBeautify($( " #content " ).val(), " " );
$( " #content " ).val(beautify.result);
});
});
function HtmlBeautify(source, indent_value) {
this .source = source;
this .indent_value = indent_value;
this .result = "" ;
this .parse();
}
// 分析并产生输出到this.result
HtmlBeautify.prototype.parse = function () {
var that = this ;
// 当前分析到哪个字符,当前标记值,标记类型,
// 输出数组,缩进级别,当前格式化内容(去掉多余空格)
var pos = 0 , token_value = "" , token_type = "" ,
output = [], indent_level = 0 , is_format_content = true ;
// 把这些标签作为Single Tag
var single_token = " br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed " .split( ' , ' );
var white_space = " \r\n\t " .split( "" );
// 获取下一个标记(首先获取正文,如果正文为空则获取标签)
function nextToken() {
var token_value_array = [], val = "" , space = false ;
// "<"之前的所有内容作为正文标签
while ((val = that.source[pos]) !== " < " ) {
if (pos & gt; = that.source.length) {
token_type = " END " ;
return ;
}
if (is_format_content) {
if ($.inArray(val, white_space) & gt; = 0 ) {
space = true ;
pos ++ ;
continue ;
}
if (space) {
token_value_array.push( " " );
space = false ;
}
}
token_value_array.push(val);
pos ++ ;
}
token_value = token_value_array.join( "" ).replace( / ^\n+|\n$ / g, "" );
if ($.trim(token_value) === "" ) {
// 如果正文标记为空,则获取标签标记
if ( ! is_format_content) {
is_format_content = true ;
}
nextTokenTag();
} else {
token_type = " CONTENT " ;
}
}
// 下一个标签标记
function nextTokenTag() {
var token_value_array = [], val = "" ,
tagName = "" , space = false , is_comment = false ;
// 这是一个注释标签
if (that.source[pos + 1 ] === " ! " &&
that.source[pos + 2 ] === " - " &&
that.source[pos + 3 ] === " - " ) {
is_comment = true ;
}
// 获取标签标记,直到遇到">"
do {
val = that.source[pos];
if ( ! is_comment) {
// 如果此字符为空格换行制表符,则跳过此字符
if ($.inArray(val, white_space) & gt; = 0 ) {
space = true ;
pos ++ ;
continue ;
}
if (space) {
if (token_value_array[token_value_array.length - 1 ] !== " = " && val !== " = " ) {
token_value_array.push( " " );
}
space = false ;
}
}
if (val === " / " && that.source[pos + 1 ] === " > " && token_value_array[token_value_array.length - 1 ] !== " " ) {
token_value_array.push( " " );
}
token_value_array.push(val);
pos ++ ;
} while (val !== " > " );
token_value = $.trim(token_value_array.join( "" ));
// 当前标签的名称(小写)
tagName = getTagName();
if (is_comment) {
token_type = " SINGLE_TAG " ;
} else {
if (token_value[ 1 ] === " / " ) {
// token_value以"</"开始,则认为是结束标签
token_type = " END_TAG " ;
} else if ($.inArray(tagName, single_token) & gt; = 0 || token_value[token_value.length - 2 ] === " / " ) {
// 如果标签在single_token或者token_value以"/>"结尾,则认为是独立标签
// 这种判断没有考虑这种情况:"<br></br>"
token_type = " SINGLE_TAG " ;
} else {
token_type = " START_TAG " ;
if (tagName === " script " || tagName === " style " ) {
is_format_content = false ;
}
}
}
}
function getTagName() {
var tagName = token_value.substr( 1 , token_value.length - 2 );
var spaceIndex = tagName.indexOf( " " );
if (spaceIndex & gt; 0 ) {
tagName = tagName.substr( 0 , spaceIndex);
}
return tagName.toLowerCase();
}
// 输出当前标记
function outputToken() {
output.push(token_value);
}
// 输出新行
function outputLine() {
output.push( " \n " );
}
// 输出缩进
function outputIndent() {
for ( var i = 0 ; i & lt; indent_level; i ++ ) {
output.push(that.indent_value);
}
}
// parse的主体函数,循环获取下一个Token
while ( true ) {
nextToken();
// 当前Token为结束标记
if (token_type === " END " ) {
break ;
}
switch (token_type) {
case " START_TAG " :
// 我们对缩进的控制非常简单,开始标签后缩进一个单位
outputLine();
outputIndent();
outputToken();
indent_level ++ ;
break ;
case " END_TAG " :
// 结束标签前减少一个单位缩进
indent_level -- ;
outputLine();
outputIndent();
outputToken();
break ;
case " SINGLE_TAG " :
outputLine();
outputIndent();
outputToken();
break ;
case " CONTENT " :
outputLine();
if (is_format_content) {
outputIndent();
}
outputToken();
break ;
}
}
// 去除最前面的"\n"
this .result = output.join( "" ).substr( 1 );
};
$( function () {
$( " #format " ).click( function () {
// 实例化HtmlBeautify,传递需要解析的HTML片段和缩进字符串
var beautify = new HtmlBeautify($( " #content " ).val(), " " );
$( " #content " ).val(beautify.result);
});
});
此时的代码已经有点复杂了,而对于更加复杂的环境,比如对JavaScript的格式化如果还是这样手工解析势必会更加复杂,或许有更好的解决办法。