2021SC@SDUSC
目录
StringUtil.java
源码分析
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package freemarker.template.utility;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import freemarker.core.Environment;
import freemarker.core.ParseException;
import freemarker.ext.dom._ExtDomApi;
import freemarker.template.Version;
/**
* Some text related utilities.
*/
public class StringUtil {
//用于查找编码较低的字符是否需要转义,但注意'='的结果不好
//如果在'['之后,那就很重要了。
private static final char[] ESCAPES = createEscapes();
private static final char[] LT = new char[] { '&', 'l', 't', ';' };
private static final char[] GT = new char[] { '&', 'g', 't', ';' };
private static final char[] AMP = new char[] { '&', 'a', 'm', 'p', ';' };
private static final char[] QUOT = new char[] { '&', 'q', 'u', 'o', 't', ';' };
private static final char[] HTML_APOS = new char[] { '&', '#', '3', '9', ';' };
private static final char[] XML_APOS = new char[] { '&', 'a', 'p', 'o', 's', ';' };
/*
* For better performance most methods are folded down. Don't you scream... :)
*/
/**
* HTML encoding (does not convert line breaks and apostrophe-quote).
* Replaces all '>' '<' '&' and '"' with entity reference, but not "'" (apostrophe-quote).
* The last is not escaped as back then when this was written some user agents didn't understood
* "&apos;" nor "&#39;".
*
* @deprecated Use {@link #XHTMLEnc(String)} instead, because it escapes apostrophe-quote too.
*/
@Deprecated
public static String HTMLEnc(String s) {
return XMLEncNA(s);
}
/**
* XML Encoding.
* 用实体引用替换所有的'>' '<' '&', ' ' '和' ' '
*/
public static String XMLEnc(String s) {
return XMLOrHTMLEnc(s, true, true, XML_APOS);
}
/**
* Like {@link #XMLEnc(String)}, but writes the result into a {@link Writer}.
*
* @since 2.3.24
*/
public static void XMLEnc(String s, Writer out) throws IOException {
XMLOrHTMLEnc(s, XML_APOS, out);
}
// 用实体引用替换所有的'>' '<' '&', ' ' '和' ' '适用于普通用户代理(包括遗留的)的XHTML解码
public static String XHTMLEnc(String s) {
return XMLOrHTMLEnc(s, true, true, HTML_APOS);
}
/**
* Like {@link #XHTMLEnc(String)}, but writes the result into a {@link Writer}.
*
* @since 2.3.24
*/
public static void XHTMLEnc(String s, Writer out) throws IOException {
XMLOrHTMLEnc(s, HTML_APOS, out);
}
private static String XMLOrHTMLEnc(String s, boolean escGT, boolean escQuot, char[] apos) {
final int ln = s.length();
// 首先,我们知道是否需要转义,如果需要,输出的长度是多少
int firstEscIdx = -1;
int lastEscIdx = 0;
int plusOutLn = 0;
for (int i = 0; i < ln; i++) {
escape: do {
final char c = s.charAt(i);
switch (c) {
case '<':
plusOutLn += LT.length - 1;
break;
case '>':
if (!(escGT || maybeCDataEndGT(s, i))) {
break escape;
}
plusOutLn += GT.length - 1;
break;
case '&':
plusOutLn += AMP.length - 1;
break;
case '"':
if (!escQuot) {
break escape;
}
plusOutLn += QUOT.length - 1;
break;
case '\'': // apos
if (apos == null) {
break escape;
}
plusOutLn += apos.length - 1;
break;
default:
break escape;
}
if (firstEscIdx == -1) {
firstEscIdx = i;
}
lastEscIdx = i;
} while (false);
}
if (firstEscIdx == -1) {
return s; // Nothing to escape
} else {
final char[] esced = new char[ln + plusOutLn];
if (firstEscIdx != 0) {
s.getChars(0, firstEscIdx, esced, 0);
}
int dst = firstEscIdx;
scan: for (int i = firstEscIdx; i <= lastEscIdx; i++) {
final char c = s.charAt(i);
switch (c) {
case '<':
dst = shortArrayCopy(LT, esced, dst);
continue scan;
case '>':
if (!(escGT || maybeCDataEndGT(s, i))) {
break;
}
dst = shortArrayCopy(GT, esced, dst);
continue scan;
case '&':
dst = shortArrayCopy(AMP, esced, dst);
continue scan;
case '"':
if (!escQuot) {
break;
}
dst = shortArrayCopy(QUOT, esced, dst);
continue scan;
case '\'': // apos
if (apos == null) {
break;
}
dst = shortArrayCopy(apos, esced, dst);
continue scan;
}
esced[dst++] = c;
}
if (lastEscIdx != ln - 1) {
s.getChars(lastEscIdx + 1, ln, esced, dst);
}
return String.valueOf(esced);
}
}
private static boolean maybeCDataEndGT(String s, int i) {
if (i == 0) return true;
if (s.charAt(i - 1) != ']') return false;
if (i == 1 || s.charAt(i - 2) == ']') return true;
return false;
}
private static void XMLOrHTMLEnc(String s, char[] apos, Writer out) throws IOException {
int writtenEnd = 0; // exclusive end
int ln = s.length();
for (int i = 0; i < ln; i++) {
char c = s.charAt(i);
if (c == '<' || c == '>' || c == '&' || c == '"' || c == '\'') {
int flushLn = i - writtenEnd;
if (flushLn != 0) {
out.write(s, writtenEnd, flushLn);
}
writtenEnd = i + 1;
switch (c) {
case '<': out.write(LT); break;
case '>': out.write(GT); break;
case '&': out.write(AMP); break;
case '"': out.write(QUOT); break;
default: out.write(apos); break;
}
}
}
if (writtenEnd < ln) {
out.write(s, writtenEnd, ln - writtenEnd);
}
}
/**
* 用于有效地复制非常短的字符数组.
*/
private static int shortArrayCopy(char[] src, char[] dst, int dstOffset) {
int ln = src.length;
for (int i = 0; i < ln; i++) {
dst[dstOffset++] = src[i];
}
return dstOffset;
}
/**
* 不替换撇号的XML编码。
* @see #XMLEnc(String)
*/
public static String XMLEncNA(String s) {
return XMLOrHTMLEnc(s, true, true, null);
}
/**
* XML encoding for attribute values quoted with <tt>"</tt> (not with <tt>'</tt>!).
* Also can be used for HTML attributes that are quoted with <tt>"</tt>.
* @see #XMLEnc(String)
*/
public static String XMLEncQAttr(String s) {
return XMLOrHTMLEnc(s, false, true, null);
}
/**
* XML encoding without replacing apostrophes and quotation marks and
* greater-thans (except in {@code ]]>}).
* @see #XMLEnc(String)
*/
public static String XMLEncNQG(String s) {
return XMLOrHTMLEnc(s, false, false, null);
}
/**
* Rich Text Format encoding (does not replace line breaks).
* Escapes all '\' '{' '}'.
*/
public static String RTFEnc(String s) {
int ln = s.length();
// 首先,我们知道是否需要转义,如果需要,输出的长度是多少:
int firstEscIdx = -1;
int lastEscIdx = 0;
int plusOutLn = 0;
for (int i = 0; i < ln; i++) {
char c = s.charAt(i);
if (c == '{' || c == '}' || c == '\\') {
if (firstEscIdx == -1) {
firstEscIdx = i;
}
lastEscIdx = i;
plusOutLn++;
}
}
if (firstEscIdx == -1) {
return s; // Nothing to escape
} else {
char[] esced = new char[ln + plusOutLn];
if (firstEscIdx != 0) {
s.getChars(0, firstEscIdx, esced, 0);
}
int dst = firstEscIdx;
for (int i = firstEscIdx; i <= lastEscIdx; i++) {
char c = s.charAt(i);
if (c == '{' || c == '}' || c == '\\') {
esced[dst++] = '\\';
}
esced[dst++] = c;
}
if (lastEscIdx != ln - 1) {
s.getChars(lastEscIdx + 1, ln, esced, dst);
}
return String.valueOf(esced);
}
}
/**
* Like {@link #RTFEnc(String)}, but writes the result into a {@link Writer}.
*
* @since 2.3.24
*/
public static void RTFEnc(String s, Writer out) throws IOException {
int writtenEnd = 0; // exclusive end
int ln = s.length();
for (int i = 0; i < ln; i++) {
char c = s.charAt(i);
if (c == '{' || c == '}' || c == '\\') {
int flushLn = i - writtenEnd;
if (flushLn != 0) {
out.write(s, writtenEnd, flushLn);
}
out.write('\\');
writtenEnd = i; // Not i + 1, so c will be written out later
}
}
if (writtenEnd < ln) {
out.write(s, writtenEnd, ln - writtenEnd);
}
}
/**
* URL编码(像%20this)查询参数值,路径段,片段;这个编码在任何地方保留的字符。
*/
public static String URLEnc(String s, String charset) throws UnsupportedEncodingException {
return URLEnc(s, charset, false);
}
/**
* Like {@link #URLEnc(String, String)} but doesn't escape the slash character ({@code /}).
* This can be used to encode a path only if you know that no folder or file name will contain {@code /}
* character (not in the path, but in the name itself), which usually stands, as the commonly used OS-es don't
* allow that.
*
* @since 2.3.21
*/
public static String URLPathEnc(String s, String charset) throws UnsupportedEncodingException {
return URLEnc(s, charset, true);
}
private static String URLEnc(String s, String charset, boolean keepSlash)
throws UnsupportedEncodingException {
int ln = s.length();
int i;
for (i = 0; i < ln; i++) {
char c = s.charAt(i);
if (!safeInURL(c, keepSlash)) {
break;
}
}
if (i == ln) {
// Nothing to escape
return s;
}
StringBuilder b = new StringBuilder(ln + ln / 3 + 2);
b.append(s.substring(0, i));
int encStart = i;
for (i++; i < ln; i++) {
char c = s.charAt(i);
if (safeInURL(c, keepSlash)) {
if (encStart != -1) {
byte[] o = s.substring(encStart, i).getBytes(charset);
for (int j = 0; j < o.length; j++) {
b.append('%');
byte bc = o[j];
int c1 = bc & 0x0F;
int c2 = (bc >> 4) & 0x0F;
b.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
b.append((char) (c1 < 10 ? c1 + '0' : c1 - 10 + 'A'));
}
encStart = -1;
}
b.append(c);
} else {
if (encStart == -1) {
encStart = i;
}
}
}
if (encStart != -1) {
byte[] o = s.substring(encStart, i).getBytes(charset);
for (int j = 0; j < o.length; j++) {
b.append('%');
byte bc = o[j];
int c1 = bc & 0x0F;
int c2 = (bc >> 4) & 0x0F;
b.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
b.append((char) (c1 < 10 ? c1 + '0' : c1 - 10 + 'A'));
}
}
return b.toString();
}
private static boolean safeInURL(char c, boolean keepSlash) {
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
|| c >= '0' && c <= '9'
|| c == '_' || c == '-' || c == '.' || c == '!' || c == '~'
|| c >= '\'' && c <= '*'
|| keepSlash && c == '/';
}
private static char[] createEscapes() {
char[] escapes = new char['\\' + 1];
for (int i = 0; i < 32; ++i) {
escapes[i] = 1;
}
escapes['\\'] = '\\';
escapes['\''] = '\'';
escapes['"'] = '"';
escapes['<'] = 'l';
// As '=' is only escaped if it's after '[', we can't handle it here
escapes['>'] = 'g';
escapes['&'] = 'a';
escapes['\b'] = 'b';
escapes['\t'] = 't';
escapes['\n'] = 'n';
escapes['\f'] = 'f';
escapes['\r'] = 'r';
return escapes;
}
// 根据FTL字符串转义规则转义一个字符串,假设该字符串用引号括起来
public static String FTLStringLiteralEnc(String s, char quotation) {
return FTLStringLiteralEnc(s, quotation, false);
}
/**
* Escapes a string according the FTL string literal escaping rules; it doesn't add the quotation marks. As this
* method doesn't know if the string literal is quoted with reuglar quotation marks or apostrophe quute, it will
* escape both.
*
* @see #FTLStringLiteralEnc(String, char)
*/
public static String FTLStringLiteralEnc(String s) {
return FTLStringLiteralEnc(s, (char) 0, false);
}
private static String FTLStringLiteralEnc(String s, char quotation, boolean addQuotation) {
final int ln = s.length();
final char otherQuotation;
if (quotation == 0) {
otherQuotation = 0;
} else if (quotation == '"') {
otherQuotation = '\'';
} else if (quotation == '\'') {
otherQuotation = '"';
} else {
throw new IllegalArgumentException("Unsupported quotation character: " + quotation);
}
final int escLn = ESCAPES.length;
StringBuilder buf = null;
for (int i = 0; i < ln; i++) {
char c = s.charAt(i);
char escape;
if (c == '=') {
escape = i > 0 && s.charAt(i - 1) == '[' ? '=' : 0;
} else if (c < escLn) {
escape = ESCAPES[c]; //
} else if (c == '{' && i > 0 && isInterpolationStart(s.charAt(i - 1))) {
escape = '{';
} else {
escape = 0;
}
if (escape == 0 || escape == otherQuotation) {
if (buf != null) {
buf.append(c);
}
} else {
if (buf == null) {
buf = new StringBuilder(s.length() + 4 + (addQuotation ? 2 : 0));
if (addQuotation) {
buf.append(quotation);
}
buf.append(s.substring(0, i));
}
if (escape == 1) {
// hex encoding for characters below 0x20
// that have no other escape representation
buf.append("\\x00");
int c2 = (c >> 4) & 0x0F;
c = (char) (c & 0x0F);
buf.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
buf.append((char) (c < 10 ? c + '0' : c - 10 + 'A'));
} else {
buf.append('\\');
buf.append(escape);
}
}
}
if (buf == null) {
return addQuotation ? quotation + s + quotation : s;
} else {
if (addQuotation) {
buf.append(quotation);
}
return buf.toString();
}
}
private static boolean isInterpolationStart(char c) {
return c == '$' || c == '#';
}
/**
* FTL string literal decoding.
*
* \\, \", \', \n, \t, \r, \b and \f will be replaced according to
* Java rules. In additional, it knows \g, \l, \a and \{ which are
* replaced with <, >, & and { respectively.
* \x works as hexadecimal character code escape. The character
* codes are interpreted according to UCS basic plane (Unicode).
* "f\x006Fo", "f\x06Fo" and "f\x6Fo" will be "foo".
* "f\x006F123" will be "foo123" as the maximum number of digits is 4.
*
* All other \X (where X is any character not mentioned above or End-of-string)
* will cause a ParseException.
*
* @param s String literal <em>without</em> the surrounding quotation marks
* @return String with all escape sequences resolved
* @throws ParseException if there string contains illegal escapes
*/
public static String FTLStringLiteralDec(String s) throws ParseException {
int idx = s.indexOf('\\');
if (idx == -1) {
return s;
}
int lidx = s.length() - 1;
int bidx = 0;
StringBuilder buf = new StringBuilder(lidx);
do {
buf.append(s.substring(bidx, idx));
if (idx >= lidx) {
throw new ParseException("The last character of string literal is backslash", 0,0);
}
char c = s.charAt(idx + 1);
switch (c) {
case '"':
buf.append('"');