FreeMarker源码分析（十一）

最新推荐文章于 2024-08-24 16:44:42 发布
weixin_45769314
最新推荐文章于 2024-08-24 16:44:42 发布
阅读量751
点赞数
分类专栏：软件工程应用与实践文章标签： java 开发语言后端
本文链接：https://blog.csdn.net/weixin_45769314/article/details/122021999
版权
2021SC@SDUSC
StringUtil.java
源码分析
TemplateModelUtils.java
源码分析
OptimizerUtil.java
源码分析
StringUtil.java

源码分析

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package freemarker.template.utility;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

import freemarker.core.Environment;
import freemarker.core.ParseException;
import freemarker.ext.dom._ExtDomApi;
import freemarker.template.Version;

/**
 *  Some text related utilities.
 */
public class StringUtil {
    
    //用于查找编码较低的字符是否需要转义，但注意'='的结果不好

//如果在'['之后，那就很重要了。
    
    private static final char[] ESCAPES = createEscapes();
    
    private static final char[] LT = new char[] { '&', 'l', 't', ';' };
    private static final char[] GT = new char[] { '&', 'g', 't', ';' };
    private static final char[] AMP = new char[] { '&', 'a', 'm', 'p', ';' };
    private static final char[] QUOT = new char[] { '&', 'q', 'u', 'o', 't', ';' };
    private static final char[] HTML_APOS = new char[] { '&', '#', '3', '9', ';' };
    private static final char[] XML_APOS = new char[] { '&', 'a', 'p', 'o', 's', ';' };

    /*
     *  For better performance most methods are folded down. Don't you scream... :)
     */

    /**
     *  HTML encoding (does not convert line breaks and apostrophe-quote).
     *  Replaces all '&gt;' '&lt;' '&amp;' and '"' with entity reference, but not "'" (apostrophe-quote).
     *  The last is not escaped as back then when this was written some user agents didn't understood 
     *  "&amp;apos;" nor "&amp;#39;".
     *    
     *  @deprecated Use {@link #XHTMLEnc(String)} instead, because it escapes apostrophe-quote too.
     */
    @Deprecated
    public static String HTMLEnc(String s) {
        return XMLEncNA(s);
    }

    /**
     *  XML Encoding.
     *  用实体引用替换所有的'>' '<' '&'， ' ' '和' ' '
     */
    public static String XMLEnc(String s) {
        return XMLOrHTMLEnc(s, true, true, XML_APOS);
    }

    /**
     * Like {@link #XMLEnc(String)}, but writes the result into a {@link Writer}.
     * 
     * @since 2.3.24
     */
    public static void XMLEnc(String s, Writer out) throws IOException {
        XMLOrHTMLEnc(s, XML_APOS, out);
    }
    
    
   // 用实体引用替换所有的'>' '<' '&'， ' ' '和' ' '适用于普通用户代理(包括遗留的)的XHTML解码
    public static String XHTMLEnc(String s) {
        return XMLOrHTMLEnc(s, true, true, HTML_APOS);
    }

    /**
     * Like {@link #XHTMLEnc(String)}, but writes the result into a {@link Writer}.
     * 
     * @since 2.3.24
     */
    public static void XHTMLEnc(String s, Writer out) throws IOException {
        XMLOrHTMLEnc(s, HTML_APOS, out);
    }
    
    private static String XMLOrHTMLEnc(String s, boolean escGT, boolean escQuot, char[] apos) {
        final int ln = s.length();
        
        // 首先，我们知道是否需要转义，如果需要，输出的长度是多少
        int firstEscIdx = -1;
        int lastEscIdx = 0;
        int plusOutLn = 0;
        for (int i = 0; i < ln; i++) {
            escape: do {
                final char c = s.charAt(i);
                switch (c) {
                case '<':
                    plusOutLn += LT.length - 1;
                    break;
                case '>':
                    if (!(escGT || maybeCDataEndGT(s, i))) {
                        break escape;
                    }
                    plusOutLn += GT.length - 1;
                    break;
                case '&':
                    plusOutLn += AMP.length - 1;
                    break;
                case '"':
                    if (!escQuot) {
                        break escape;
                    }
                    plusOutLn += QUOT.length - 1;
                    break;
                case '\'': // apos
                    if (apos == null) {
                        break escape;
                    }
                    plusOutLn += apos.length - 1;
                    break;
                default:
                    break escape;
                }
                
                if (firstEscIdx == -1) {
                    firstEscIdx = i;
                }
                lastEscIdx = i;
            } while (false);
        }
        
        if (firstEscIdx == -1) {
            return s; // Nothing to escape
        } else {
            final char[] esced = new char[ln + plusOutLn];
            if (firstEscIdx != 0) {
                s.getChars(0, firstEscIdx, esced, 0);
            }
            int dst = firstEscIdx;
            scan: for (int i = firstEscIdx; i <= lastEscIdx; i++) {
                final char c = s.charAt(i);
                switch (c) {
                case '<':
                    dst = shortArrayCopy(LT, esced, dst);
                    continue scan;
                case '>':
                    if (!(escGT || maybeCDataEndGT(s, i))) {
                        break;
                    }
                    dst = shortArrayCopy(GT, esced, dst);
                    continue scan;
                case '&':
                    dst = shortArrayCopy(AMP, esced, dst);
                    continue scan;
                case '"':
                    if (!escQuot) {
                        break;
                    }
                    dst = shortArrayCopy(QUOT, esced, dst);
                    continue scan;
                case '\'': // apos
                    if (apos == null) {
                        break;
                    }
                    dst = shortArrayCopy(apos, esced, dst);
                    continue scan;
                }
                esced[dst++] = c;
            }
            if (lastEscIdx != ln - 1) {
                s.getChars(lastEscIdx + 1, ln, esced, dst);
            }
            
            return String.valueOf(esced);
        }
    }
    
    private static boolean maybeCDataEndGT(String s, int i) {
        if (i == 0) return true;
        if (s.charAt(i - 1) != ']') return false;
        if (i == 1 || s.charAt(i - 2) == ']') return true;
        return false;
    }

    private static void XMLOrHTMLEnc(String s, char[] apos, Writer out) throws IOException {
        int writtenEnd = 0;  // exclusive end
        int ln = s.length();
        for (int i = 0; i < ln; i++) {
            char c = s.charAt(i);
            if (c == '<' || c == '>' || c == '&' || c == '"' || c == '\'') {
                int flushLn = i - writtenEnd;
                if (flushLn != 0) {
                    out.write(s, writtenEnd, flushLn);
                }
                writtenEnd = i + 1;
                
                switch (c) {
                case '<': out.write(LT); break;
                case '>': out.write(GT); break;
                case '&': out.write(AMP); break;
                case '"': out.write(QUOT); break;
                default: out.write(apos); break;
                }
            }
        }
        if (writtenEnd < ln) {
            out.write(s, writtenEnd, ln - writtenEnd);
        }
    }
    
    /**
     * 用于有效地复制非常短的字符数组.
     */
    private static int shortArrayCopy(char[] src, char[] dst, int dstOffset) {
        int ln = src.length;
        for (int i = 0; i < ln; i++) {
            dst[dstOffset++] = src[i];
        }
        return dstOffset;
    }
    
    /**
     *  不替换撇号的XML编码。
     *  @see #XMLEnc(String)
     */
    public static String XMLEncNA(String s) {
        return XMLOrHTMLEnc(s, true, true, null);
    }

    /**
     *  XML encoding for attribute values quoted with <tt>"</tt> (not with <tt>'</tt>!).
     *  Also can be used for HTML attributes that are quoted with <tt>"</tt>.
     *  @see #XMLEnc(String)
     */
    public static String XMLEncQAttr(String s) {
        return XMLOrHTMLEnc(s, false, true, null);
    }

    /**
     *  XML encoding without replacing apostrophes and quotation marks and
     *  greater-thans (except in {@code ]]>}).
     *  @see #XMLEnc(String)
     */
    public static String XMLEncNQG(String s) {
        return XMLOrHTMLEnc(s, false, false, null);
    }
    
    /**
     *  Rich Text Format encoding (does not replace line breaks).
     *  Escapes all '\' '{' '}'.
     */
    public static String RTFEnc(String s) {
        int ln = s.length();
        
        // 首先，我们知道是否需要转义，如果需要，输出的长度是多少:
        int firstEscIdx = -1;
        int lastEscIdx = 0;
        int plusOutLn = 0;
        for (int i = 0; i < ln; i++) {
            char c = s.charAt(i);
            if (c == '{' || c == '}' || c == '\\') {
                if (firstEscIdx == -1) {
                    firstEscIdx = i;
                }
                lastEscIdx = i;
                plusOutLn++;
            }
        }
        
        if (firstEscIdx == -1) {
            return s; // Nothing to escape
        } else {
            char[] esced = new char[ln + plusOutLn];
            if (firstEscIdx != 0) {
                s.getChars(0, firstEscIdx, esced, 0);
            }
            int dst = firstEscIdx;
            for (int i = firstEscIdx; i <= lastEscIdx; i++) {
                char c = s.charAt(i);
                if (c == '{' || c == '}' || c == '\\') {
                    esced[dst++] = '\\';
                }
                esced[dst++] = c;
            }
            if (lastEscIdx != ln - 1) {
                s.getChars(lastEscIdx + 1, ln, esced, dst);
            }
            
            return String.valueOf(esced);
        }
    }
    
    /**
     * Like {@link #RTFEnc(String)}, but writes the result into a {@link Writer}.
     * 
     * @since 2.3.24
     */
    public static void RTFEnc(String s, Writer out) throws IOException {
        int writtenEnd = 0;  // exclusive end
        int ln = s.length();
        for (int i = 0; i < ln; i++) {
            char c = s.charAt(i);
            if (c == '{' || c == '}' || c == '\\') {
                int flushLn = i - writtenEnd;
                if (flushLn != 0) {
                    out.write(s, writtenEnd, flushLn);
                }
                out.write('\\');
                writtenEnd = i; // Not i + 1, so c will be written out later
            }
        }
        if (writtenEnd < ln) {
            out.write(s, writtenEnd, ln - writtenEnd);
        }
    }
    

    /**
     * URL编码(像%20this)查询参数值，路径段，片段;这个编码在任何地方保留的字符。
     */
    public static String URLEnc(String s, String charset) throws UnsupportedEncodingException {
        return URLEnc(s, charset, false);
    }
    
    /**
     * Like {@link #URLEnc(String, String)} but doesn't escape the slash character ({@code /}).
     * This can be used to encode a path only if you know that no folder or file name will contain {@code /}
     * character (not in the path, but in the name itself), which usually stands, as the commonly used OS-es don't
     * allow that.
     * 
     * @since 2.3.21
     */
    public static String URLPathEnc(String s, String charset) throws UnsupportedEncodingException {
        return URLEnc(s, charset, true);
    }
    
    private static String URLEnc(String s, String charset, boolean keepSlash)
            throws UnsupportedEncodingException {
        int ln = s.length();
        int i;
        for (i = 0; i < ln; i++) {
            char c = s.charAt(i);
            if (!safeInURL(c, keepSlash)) {
                break;
            }
        }
        if (i == ln) {
            // Nothing to escape
            return s;
        }

        StringBuilder b = new StringBuilder(ln + ln / 3 + 2);
        b.append(s.substring(0, i));

        int encStart = i;
        for (i++; i < ln; i++) {
            char c = s.charAt(i);
            if (safeInURL(c, keepSlash)) {
                if (encStart != -1) {
                    byte[] o = s.substring(encStart, i).getBytes(charset);
                    for (int j = 0; j < o.length; j++) {
                        b.append('%');
                        byte bc = o[j];
                        int c1 = bc & 0x0F;
                        int c2 = (bc >> 4) & 0x0F;
                        b.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
                        b.append((char) (c1 < 10 ? c1 + '0' : c1 - 10 + 'A'));
                    }
                    encStart = -1;
                }
                b.append(c);
            } else {
                if (encStart == -1) {
                    encStart = i;
                }
            }
        }
        if (encStart != -1) {
            byte[] o = s.substring(encStart, i).getBytes(charset);
            for (int j = 0; j < o.length; j++) {
                b.append('%');
                byte bc = o[j];
                int c1 = bc & 0x0F;
                int c2 = (bc >> 4) & 0x0F;
                b.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
                b.append((char) (c1 < 10 ? c1 + '0' : c1 - 10 + 'A'));
            }
        }
        
        return b.toString();
    }

    private static boolean safeInURL(char c, boolean keepSlash) {
        return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
                || c >= '0' && c <= '9'
                || c == '_' || c == '-' || c == '.' || c == '!' || c == '~'
                || c >= '\'' && c <= '*'
                || keepSlash && c == '/';
    }
    
    private static char[] createEscapes() {
        char[] escapes = new char['\\' + 1];
        for (int i = 0; i < 32; ++i) {
            escapes[i] = 1;
        }
        escapes['\\'] = '\\';
        escapes['\''] = '\'';
        escapes['"'] = '"';
        escapes['<'] = 'l';
        // As '=' is only escaped if it's after '[', we can't handle it here
        escapes['>'] = 'g';
        escapes['&'] = 'a';
        escapes['\b'] = 'b';
        escapes['\t'] = 't';
        escapes['\n'] = 'n';
        escapes['\f'] = 'f';
        escapes['\r'] = 'r';
        return escapes;
    }

    
     // 根据FTL字符串转义规则转义一个字符串，假设该字符串用引号括起来
    
    public static String FTLStringLiteralEnc(String s, char quotation) {
        return FTLStringLiteralEnc(s, quotation, false);
    }
    
    /**
     * Escapes a string according the FTL string literal escaping rules; it doesn't add the quotation marks. As this
     * method doesn't know if the string literal is quoted with reuglar quotation marks or apostrophe quute, it will
     * escape both.
     * 
     * @see #FTLStringLiteralEnc(String, char)
     */
    public static String FTLStringLiteralEnc(String s) {
        return FTLStringLiteralEnc(s, (char) 0, false);
    }

    private static String FTLStringLiteralEnc(String s, char quotation, boolean addQuotation) {
        final int ln = s.length();
        
        final char otherQuotation;
        if (quotation == 0) {
            otherQuotation = 0;
        } else if (quotation == '"') {
            otherQuotation = '\'';
        } else if (quotation == '\'') {
            otherQuotation = '"';
        } else {
            throw new IllegalArgumentException("Unsupported quotation character: " + quotation);
        }
        
        final int escLn = ESCAPES.length;
        StringBuilder buf = null;
        for (int i = 0; i < ln; i++) {
            char c = s.charAt(i);
            char escape;
            if (c == '=') {
                escape = i > 0 && s.charAt(i - 1) == '[' ? '=' : 0;
            } else if (c < escLn) {
                escape = ESCAPES[c]; //
            } else if (c == '{' && i > 0 && isInterpolationStart(s.charAt(i - 1))) {
                escape = '{';
            } else {
                escape = 0;
            }
            if (escape == 0 || escape == otherQuotation) {
                if (buf != null) {
                    buf.append(c);
                }
            } else {
                if (buf == null) {
                    buf = new StringBuilder(s.length() + 4 + (addQuotation ? 2 : 0));
                    if (addQuotation) {
                        buf.append(quotation);
                    }
                    buf.append(s.substring(0, i));
                }
                if (escape == 1) {
                    // hex encoding for characters below 0x20
                    // that have no other escape representation
                    buf.append("\\x00");
                    int c2 = (c >> 4) & 0x0F;
                    c = (char) (c & 0x0F);
                    buf.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
                    buf.append((char) (c < 10 ? c + '0' : c - 10 + 'A'));
                } else {
                    buf.append('\\');
                    buf.append(escape);
                }
            }
        }
        
        if (buf == null) {
            return addQuotation ? quotation + s + quotation : s;
        } else {
            if (addQuotation) {
                buf.append(quotation);
            }
            return buf.toString();
        }
    }

    private static boolean isInterpolationStart(char c) {
        return c == '$' || c == '#';
    }

    /**
     * FTL string literal decoding.
     *
     * \\, \", \', \n, \t, \r, \b and \f will be replaced according to
     * Java rules. In additional, it knows \g, \l, \a and \{ which are
     * replaced with &lt;, &gt;, &amp; and { respectively.
     * \x works as hexadecimal character code escape. The character
     * codes are interpreted according to UCS basic plane (Unicode).
     * "f\x006Fo", "f\x06Fo" and "f\x6Fo" will be "foo".
     * "f\x006F123" will be "foo123" as the maximum number of digits is 4.
     *
     * All other \X (where X is any character not mentioned above or End-of-string)
     * will cause a ParseException.
     *
     * @param s String literal <em>without</em> the surrounding quotation marks
     * @return String with all escape sequences resolved
     * @throws ParseException if there string contains illegal escapes
     */
    public static String FTLStringLiteralDec(String s) throws ParseException {

        int idx = s.indexOf('\\');
        if (idx == -1) {
            return s;
        }

        int lidx = s.length() - 1;
        int bidx = 0;
        StringBuilder buf = new StringBuilder(lidx);
        do {
            buf.append(s.substring(bidx, idx));
            if (idx >= lidx) {
                throw new ParseException("The last character of string literal is backslash", 0,0);
            }
            char c = s.charAt(idx + 1);
            switch (c) {
                case '"':
                    buf.append('"');