自己实现HTML-Beautify - 2

最新推荐文章于 2024-03-18 09:49:45 发布

weixin_33989780

最新推荐文章于 2024-03-18 09:49:45 发布

阅读量100

点赞数

文章标签： javascript ViewUI

文章截图 - 更好的排版
 源代码下载

上一篇文章，我们按照 http://jsbeautifier.org/ 的基本框架，用我们自己的语言实现了对Html简单的格式化功能。
今天我们来完善这段JavaScript脚本，主要添加如下功能：

删除多余的空格，回车换行以及制表符
保持script和style标签里面的内容格式不变化
注释部分的格式不变化
标签属性赋值等号前后不能有空格（type="text/javascript"）
自封闭标签/>之前必须有空格（<br />）

源代码：

// 优化过的HTML-Beautify
     function HtmlBeautify(source, indent_value) {
         this .source = source;
         this .indent_value = indent_value;
         this .result = "" ;

         this .parse();
    }

     // 分析并产生输出到this.result
    HtmlBeautify.prototype.parse = function () {
         var that = this ;
         // 当前分析到哪个字符，当前标记值，标记类型，
         // 输出数组，缩进级别，当前格式化内容（去掉多余空格）
         var pos = 0 , token_value = "" , token_type = "" ,
        output = [], indent_level = 0 , is_format_content = true ;

         // 把这些标签作为Single Tag
         var single_token = " br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed " .split( ' , ' );
         var white_space = " \r\n\t " .split( "" );

         // 获取下一个标记（首先获取正文，如果正文为空则获取标签）
         function nextToken() {
             var token_value_array = [], val = "" , space = false ;

             // "<"之前的所有内容作为正文标签
             while ((val = that.source[pos]) !== " < " ) {
                 if (pos & gt; = that.source.length) {
                    token_type = " END " ;
                     return ;
                }

                 if (is_format_content) {
                     if ($.inArray(val, white_space) & gt; = 0 ) {
                        space = true ;
                        pos ++ ;
                         continue ;
                    }

                     if (space) {
                        token_value_array.push( " " );
                        space = false ;
                    }
                }
                token_value_array.push(val);
                pos ++ ;
            }

            token_value = token_value_array.join( "" ).replace( / ^\n+|\n$ / g, "" );
             if ($.trim(token_value) === "" ) {
                 // 如果正文标记为空，则获取标签标记
                 if ( ! is_format_content) {
                    is_format_content = true ;
                }
                nextTokenTag();
            } else {
                token_type = " CONTENT " ;
            }
        }

         // 下一个标签标记
         function nextTokenTag() {
             var token_value_array = [], val = "" ,
                tagName = "" , space = false , is_comment = false ;

             // 这是一个注释标签
             if (that.source[pos + 1 ] === " ! " &&
                that.source[pos + 2 ] === " - " &&
                that.source[pos + 3 ] === " - " ) {
                is_comment = true ;
            }

             // 获取标签标记，直到遇到">"
             do {
                val = that.source[pos];

                 if ( ! is_comment) {
                     // 如果此字符为空格换行制表符，则跳过此字符
                     if ($.inArray(val, white_space) & gt; = 0 ) {
                        space = true ;
                        pos ++ ;
                         continue ;
                    }

                     if (space) {
                         if (token_value_array[token_value_array.length - 1 ] !== " = " && val !== " = " ) {
                            token_value_array.push( " " );
                        }
                        space = false ;
                    }
                }

                 if (val === " / " && that.source[pos + 1 ] === " > " && token_value_array[token_value_array.length - 1 ] !== " " ) {
                    token_value_array.push( " " );
                }

                token_value_array.push(val);
                pos ++ ;
            } while (val !== " > " );

            token_value = $.trim(token_value_array.join( "" ));
             // 当前标签的名称（小写）
            tagName = getTagName();

             if (is_comment) {
                token_type = " SINGLE_TAG " ;
            } else {
                 if (token_value[ 1 ] === " / " ) {
                     // token_value以"</"开始，则认为是结束标签
                    token_type = " END_TAG " ;
                } else if ($.inArray(tagName, single_token) & gt; = 0 || token_value[token_value.length - 2 ] === " / " ) {
                     // 如果标签在single_token或者token_value以"/>"结尾，则认为是独立标签
                     // 这种判断没有考虑这种情况："<br></br>"
                    token_type = " SINGLE_TAG " ;
                } else {
                    token_type = " START_TAG " ;
                     if (tagName === " script " || tagName === " style " ) {
                        is_format_content = false ;
                    }
                }
            }
        }

         function getTagName() {
             var tagName = token_value.substr( 1 , token_value.length - 2 );
             var spaceIndex = tagName.indexOf( " " );
             if (spaceIndex & gt; 0 ) {
                tagName = tagName.substr( 0 , spaceIndex);
            }
             return tagName.toLowerCase();
        }

         // 输出当前标记
         function outputToken() {
            output.push(token_value);
        }
         // 输出新行
         function outputLine() {
            output.push( " \n " );
        }
         // 输出缩进
         function outputIndent() {
             for ( var i = 0 ; i & lt; indent_level; i ++ ) {
                output.push(that.indent_value);
            }
        }

         // parse的主体函数，循环获取下一个Token
         while ( true ) {
            nextToken();

             // 当前Token为结束标记
             if (token_type === " END " ) {
                 break ;
            }

             switch (token_type) {
                 case " START_TAG " :
                     // 我们对缩进的控制非常简单，开始标签后缩进一个单位
                    outputLine();
                    outputIndent();
                    outputToken();
                    indent_level ++ ;
                     break ;
                 case " END_TAG " :
                     // 结束标签前减少一个单位缩进
                    indent_level -- ;
                    outputLine();
                    outputIndent();
                    outputToken();
                     break ;
                 case " SINGLE_TAG " :
                    outputLine();
                    outputIndent();
                    outputToken();
                     break ;
                 case " CONTENT " :
                    outputLine();
                     if (is_format_content) {
                        outputIndent();
                    }
                    outputToken();
                     break ;
            }
        }
         // 去除最前面的"\n"
         this .result = output.join( "" ).substr( 1 );
    };

    $( function () {
        $( " #format " ).click( function () {

             // 实例化HtmlBeautify，传递需要解析的HTML片段和缩进字符串
             var beautify = new HtmlBeautify($( " #content " ).val(), "      " );
            $( " #content " ).val(beautify.result);

        });
    });

此时的代码已经有点复杂了，而对于更加复杂的环境，比如对JavaScript的格式化如果还是这样手工解析势必会更加复杂，或许有更好的解决办法。

weixin_33989780

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
自己实现HTML-Beautify - 2

文章截图 - 更好的排版源代码下载上一篇文章，我们按照 http://jsbeautifier.org/ 的基本框架，用我们自己的语言实现了对Html简单的格式化功能。今天我们来完善这段JavaScript脚本，主要添加如下功能：删除多余的空格，回车换行以及制表符保持script和style标签里面的内容格式不变化注释部分的格式不变化标签属性赋值等号前后不能有空格（type="text/j...
复制链接

扫一扫