XSS过滤 XssFilter
组件说明
跨站脚本攻击,为不和层叠样式表的缩写混淆,故将跨站脚本攻击缩写为XSS。恶意攻击者往Web页面里插入恶意html代码,当用户浏览该页之时,嵌入其中Web里面的html代码会被执行,从而达到恶意攻击用户的特殊目的
实现原理
通过注册过滤器,通过HTMLFilter工具类,对于前端有可能产生安全问题的标签进行替换操作。
HTMLFilter核心源码
public final class HTMLFilter {
/** regex flag union representing /si modifiers in php **/
private static final int REGEX_FLAGS_SI = Pattern.CASE_INSENSITIVE | Pattern.DOTALL;
private static final Pattern P_COMMENTS = Pattern.compile("<!--(.*?)-->", Pattern.DOTALL);
private static final Pattern P_COMMENT = Pattern.compile("^!--(.*)--$", REGEX_FLAGS_SI);
private static final Pattern P_TAGS = Pattern.compile("<(.*?)>", Pattern.DOTALL);
private static final Pattern P_END_TAG = Pattern.compile("^/([a-z0-9]+)", REGEX_FLAGS_SI);
private static final Pattern P_START_TAG = Pattern.compile("^([a-z0-9]+)(.*?)(/?)$", REGEX_FLAGS_SI);
private static final Pattern P_QUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)=([\"'])(.*?)\\2", REGEX_FLAGS_SI);
private static final Pattern P_UNQUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)(=)([^\"\\s']+)", REGEX_FLAGS_SI);
private static final Pattern P_PROTOCOL = Pattern.compile("^([^:]+):", REGEX_FLAGS_SI);
private static final Pattern P_ENTITY = Pattern.compile("&#(\\d+);?");
private static final Pattern P_ENTITY_UNICODE = Pattern.compile("&#x([0-9a-f]+);?");
private static final Pattern P_ENCODE = Pattern.compile("%([0-9a-f]{2});?");
private static final Pattern P_VALID_ENTITIES = Pattern.compile("&([^&;]*)(?=(;|&|$))");
private static final Pattern P_VALID_QUOTES = Pattern.compile("(>|^)([^<]+?)(<|$)", Pattern.DOTALL);
private static final Pattern P_END_ARROW = Pattern.compile("^>");
private static final Pattern P_BODY_TO_END = Pattern.compile("<([^>]*?)(?=<|$)");
private static final Pattern P_XML_CONTENT = Pattern.compile("(^|>)([^<]*?)(?=>)");
private static final Pattern P_STRAY_LEFT_ARROW = Pattern.compile("<([^>]*?)(?=<|$)");
private static final Pattern P_STRAY_RIGHT_ARROW = Pattern.compile("(^|>)([^<]*?)(?=>)");
private static final Pattern P_AMP = Pattern.compile("&");
private static final Pattern P_QUOTE = Pattern.compile("<");
private static final Pattern P_LEFT_ARROW = Pattern.compile("<");
private static final Pattern P_RIGHT_ARROW = Pattern.compile(">");
private static final Pattern P_BOTH_ARROWS = Pattern.compile("<>");
// @xxx could grow large... maybe use sesat's ReferenceMap
private static final ConcurrentMap<String,Pattern> P_REMOVE_PAIR_BLANKS = new ConcurrentHashMap<String, Pattern>();
private static final ConcurrentMap<String,Pattern> P_REMOVE_SELF_BLANKS = new ConcurrentHashMap<String, Pattern>();
/** set of allowed html elements, along with allowed attributes for each element **/
private final Map<String, List<String>> vAllowed;
/** counts of open tags for each (allowable) html element **/
private final Map<String, Integer> vTagCounts = new HashMap<String, Integer>();
/** html elements which must always be self-closing (e.g. "<img />") **/
private final String[] vSelfClosingTags;
/** html elements which must always have separate opening and closing tags (e.g. "<b></b>") **/
private final String[] vNeedClosingTags;
/** set of disallowed html elements **/
private final String[] vDisallowed;
/** attributes which should be checked for valid protocols **/
private final String[] vProtocolAtts;
/** allowed protocols **/
private final String[] vAllowedProtocols;
/** tags which should be removed if they contain no content (e.g. "<b></b>" or "<b />") **/
private final String[] vRemoveBlanks;
/** entities allowed within html markup **/
private final String[] vAllowedEntities;
/** flag determining whether comments are allowed in input String. */
private final boolean stripComment;
private final boolean encodeQuotes;
private boolean vDebug = false;
/**
* flag determining whether to try to make tags when presented with "unbalanced"
* angle brackets (e.g. "<b text </b>" becomes "<b> text </b>"). If set to false,
* unbalanced angle brackets will be html escaped.
*/
private final boolean alwaysMakeTags;
/** Default constructor.
*
*/
public HTMLFilter() {
vAllowed = new HashMap<>();
final ArrayList<String> a_atts = new ArrayList<String>();
a_atts.add("href");
a_atts.add("target");
vAllowed.put("a", a_atts);
final ArrayList<String> img_atts = new ArrayList<String>();
img_atts.add("src");
img_atts.add("width");
img_atts.add("height");
img_atts.add("alt");
vAllowed.put("img", img_atts);
final ArrayList<String> no_atts = new ArrayList<String>();
vAllowed.put("b", no_atts);
vAllowed.put("strong", no_atts);
vAllowed.put("i"