xss注入框架antisamy

本文介绍了如何在Java项目中使用antisamy框架来防御XSS攻击。通过添加依赖、编写过滤器及配置文件,实现了对请求参数的安全过滤。
摘要由CSDN通过智能技术生成

首先在依赖中加入


        <dependency>
            <groupId>org.owasp.antisamy</groupId>
            <artifactId>antisamy</artifactId>
            <version>1.5.3</version>
        </dependency>

然后写filter


import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
public class XssFilter implements Filter{

    private FilterConfig filterConfig;

    @Override
    public void destroy() {
        this.filterConfig = null;
    }

    @Override
    public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
            throws IOException, ServletException {
        chain.doFilter(new RequestWrapper((HttpServletRequest)request), response);
    }

    @Override
    public void init(FilterConfig arg0) throws ServletException {
        this.filterConfig= filterConfig;
    }

}

requestWrapper


public class RequestWrapper extends HttpServletRequestWrapper {

    private static final Logger log = Logger.getLogger(RequestWrapper.class);

    public RequestWrapper(HttpServletRequest request) {
        super(request);
    }

    @Override
    public Map<String, String[]> getParameterMap() {
        Map<String, String[]> request_map = super.getParameterMap();
        Iterator iterator = request_map.entrySet().iterator();
        while (iterator.hasNext()) {
            Map.Entry me = (Map.Entry) iterator.next();
            // System.out.println(me.getKey()+":");
            String[] values = (String[]) me.getValue();
            for (int i = 0; i < values.length; i++) {
                log.debug(me.getKey() + "-------" + values[i]);
                values[i] = xssClean(values[i]);
            }
        }
        return request_map;
    }

    @Override
    public String[] getParameterValues(String name) {
        String[] values = super.getParameterValues(name);
        if (values == null) {
            return null;
        }

        int len = values.length;
        String[] newArray = new String[len];
        for (int j = 0; j < len; j++) {
            // ����
            newArray[j] = xssClean(values[j]);
        }

        return newArray;
    }

    @Override
    public String getHeader(String name) {
        String header = super.getHeader(name);
        if (header == null) {
            return null;
        }
        return xssClean(header);
    }

    @Override
    public String getQueryString() {
        String qstr = super.getQueryString();
        if (qstr == null) {
            return null;
        }
        return xssClean(qstr);
    }

    @Override
    public String getParameter(String name) {
        String param = super.getParameter(name);
        if (param == null) {
            return null;
        }
        return xssClean(param);
    }

    private String xssClean(String value) {
        AntiSamy antiSamy = new AntiSamy();
        try {
            log.debug(value);
            Policy policy = Policy.getInstance(RequestWrapper.class.getClassLoader().getResource("antisamy-myspace-1.4.4.xml")
                    .getFile());
            final CleanResults cr = antiSamy.scan(value, policy);
            // ��ȫ��HTML���
            log.debug(cr.getCleanHTML());
            return cr.getCleanHTML();
        } catch (ScanException e) {
            e.printStackTrace();
        } catch (PolicyException e) {
            e.printStackTrace();
        }
        return value;
    }
}

最后加入配置文件 搞定.

<?xml version="1.0" encoding="ISO-8859-1" ?>


<!--
W3C rules retrieved from:
http://www.w3.org/TR/html401/struct/global.html
-->


<anti-samy-rules xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                 xsi:noNamespaceSchemaLocation="antisamy.xsd">

    <directives>
        <directive name="omitXmlDeclaration" value="true"/>
        <directive name="omitDoctypeDeclaration" value="true"/>
        <directive name="maxInputSize" value="15000"/>
        <directive name="useXHTML" value="true"/>
        <directive name="formatOutput" value="true"/>

        <!--
        remember, this won't work for relative URIs - AntiSamy doesn't
        know anything about the URL or your web structure
        -->
        <directive name="embedStyleSheets" value="false"/>
        <directive name="connectionTimeout" value="5000"/>
        <directive name="maxStyleSheetImports" value="3"/>

    </directives>

    <common-regexps>

        <!--
        From W3C:
        This attribute assigns a class name or set of class names to an
        element. Any number of elements may be assigned the same class
        name or names. Multiple class names must be separated by white
        space characters.
        -->

        <!-- The 16 colors defined by the HTML Spec (also used by the CSS Spec) -->
        <regexp name="colorName" value="(aqua|black|blue|fuchsia|gray|grey|green|lime|maroon|navy|olive|purple|red|silver|teal|white|yellow)"/>

        <!-- HTML/CSS Spec allows 3 or 6 digit hex to specify color -->
        <regexp name="colorCode" value="(#([0-9a-fA-F]{6}|[0-9a-fA-F]{3}))"/>

        <regexp name="anything" value=".*"/>
        <regexp name="numberOrPercent" value="(\d)+(%{0,1})"/>
        <regexp name="paragraph" value="([\p{L}\p{N},'\.\s\-_\(\)]|&amp;[0-9]{2};)*"/>
        <regexp name="htmlId" value="[a-zA-Z0-9\:\-_\.]+"/>
        <regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
        <regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

        <regexp name="onsiteURL" value="([\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]+|\#(\w)+)"/>
        <regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

        <regexp name="boolean" value="(true|false)"/>
        <regexp name="singlePrintable" value="[a-zA-Z0-9]{1}"/> <!-- \w allows the '_' character -->

        <!-- This is for elements (ex: elemName { ... }) -->
        <regexp name="cssElementSelector" value="[a-zA-Z0-9\-_]+|\*"/>

        <!--  This is to list out any element names that are *not* valid -->
        <regexp name="cssElementExclusion" value=""/>

        <!--  This if for classes (ex: .className { ... }) -->
        <regexp name="cssClassSelector" value="\.[a-zA-Z0-9\-_]+"/>

        <!--  This is to list out any class names that are *not* valid -->
        <regexp name="cssClassExclusion" value=""/>

        <!--  This is for ID selectors (ex: #myId { ... } -->
        <regexp name="cssIDSelector" value="#[a-zA-Z0-9\-_]+"/>

        <!--  This is to list out any IDs that are *not* valid - FIXME: What should the default be to avoid div hijacking? *? -->
        <regexp name="cssIDExclusion" value=""/>

        <!--  This is for pseudo-element selector (ex. foo:pseudo-element { ... } -->
        <regexp name="cssPseudoElementSelector" value=":[a-zA-Z0-9\-_]+"/>

        <!--  This is to list out any psuedo-element names that are *not* valid -->
        <regexp name="cssPsuedoElementExclusion" value=""/>

        <!--  This is for attribute selectors (ex. foo[attr=value] { ... } -->
        <regexp name="cssAttributeSelector" value="\[[a-zA-Z0-9\-_]+((=|~=|\|=){1}[a-zA-Z0-9\-_]+){1}\]"/>

        <!--  This is to list out any attribute names that are *not* valid -->
        <regexp name="cssAttributeExclusion" value=""/>

        <!--  This is for resources referenced from CSS (such as background images and other imported stylesheets) -->
        <regexp name="cssOnsiteUri" value="url\(([\p{L}\p{N}\\/\.\?=\#&amp;;\-_~]+|\#(\w)+)\)"/>
        <regexp name="cssOffsiteUri" value="url\((\s)*((ht|f)tp(s?)://)[\p{L}\p{N}]+[~\p{L}\p{N}\p{Zs}\-_\.@#$%&amp;;:,\?=/\+!]*(\s)*\)"/>

        <!--  This if for CSS Identifiers -->
        <regexp name="cssIdentifier" value="[a-zA-Z0-9\-_]+"/>

        <!--  This is for comments within CSS (ex. /* comment */) -->
        <regexp name="cssCommentText" value="[\p{L}\p{N}\-_,\/\\\.\s\(\)!\?\=\$#%\^&amp;:&quot;']+"/>

        <regexp name="integer" value="(-|\+)?[0-9]+"/>
        <regexp name="positiveInteger" value="(\+)?[0-9]+"/>
        <regexp name="number" value="(-|\+)?([0-9]+(\.[0-9]+)?)"/>
        <regexp name="angle" value="(-|\+)?([0-9]+(\.[0-9]+)?)(deg|grads|rad)"/>
        <regexp name="time" value="([0-9]+(\.[0-9]+)?)(ms|s)"/>
        <regexp name="frequency" value="([0-9]+(\.[0-9]+)?)(hz|khz)"/>
        <regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
        <regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
        <regexp name="percentage" value="(-|\+)?([0-9]+(\.[0-9]+)?)%"/>
        <regexp name="positivePercentage" value="(\+)?([0-9]+(\.[0-9]+)?)%"/>

        <regexp name="absolute-size" value="(xx-small|x-small|small|medium|large|x-large|xx-large)"/>
        <regexp name="relative-size" value="(larger|smaller)"/>

        <!-- Used for CSS Color specifications (complex regexp expresses integer values of 0-255) -->
        <regexp name="rgbCode" value="rgb\(([1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]),([1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]),([1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])\)"/>

        <!-- CSS2 Allowed System Color Values -->
        <regexp name="systemColor" value="(activeborder|activecaption|appworkspace|background|buttonface|buttonhighlight|buttonshadow|buttontext|captiontext|graytext|highlight|highlighttext|inactiveborder|inactivecaption|inactivecaptiontext|infobackground|infotext|menu|menutext|scrollbar|threeddarkshadow|threedface|threedhighlight|threedlightshadow|threedshadow|window|windowframe|windowtext)"/>

    </common-regexps>

    <!--
    Tag.name = a, b, div, body, etc.
    Tag.action = filter: remove tags, but keep content, validate: keep content as long as it passes rules, remove: remove tag and contents
    Attribute.name = id, class, href, align, width, etc.
    Attribute.onInvalid = what to do when the attribute is invalid, e.g., remove the tag (removeTag), remove the attribute (removeAttribute), filter the tag (filterTag)
    Attribute.description = What rules in English you want to tell the users they can have for this attribute. Include helpful things so they'll be able to tune their HTML
     -->

    <!--
    Some attributes are common to all (or most) HTML tags. There aren't many that qualify for this. You have to make sure there's no
    collisions between any of these attribute names with attribute names of other tags that are for different purposes.
    -->

    <common-attributes>


        <!-- Common to all HTML tags  -->

        <attribute name="id" description="The 'id' of any HTML attribute should not contain anything besides letters and numbers">
            <regexp-list>
                <regexp name="htmlId"/>
            </regexp-list>
        </attribute>

        <attribute name="class" description="The 'class' of any HTML attribute is usually a single word, but it can also be a list of class names separated by spaces">
            <regexp-list>
                <regexp name="htmlClass"/>
            </regexp-list>
        </attribute>

        <attribute name="lang" description="The 'lang' attribute tells the browser what language the element's attribute values and content are written in">
            <regexp-list>
                <regexp value="[a-zA-Z]{2,20}"/>
            </regexp-list>
        </attribute>
        <attribute name="title" description="The 'title' attribute provides text that shows up in a 'tooltip' when a user hovers their mouse over the element">
            <regexp-list>
                <regexp name="htmlTitle"/>
            </regexp-list>
        </attribute>

        <attribute name="alt" description="The 'alt' attribute provides alternative text to users when its visual representation is not available">
            <regexp-list>
                <regexp name="paragraph"/>
            </regexp-list>
        </attribute>


        <!-- the "style" attribute will be validated by an inline stylesheet scanner, so no need to define anything here - i hate having to special case this but no other choice -->
        <attribute name="style" description="The 'style' attribute provides the ability for users to change many attributes of the tag's contents using a strict syntax"/>

        <attribute name="media">
            <regexp-list>
                <regexp value="[a-zA-Z0-9,\-\s]+"/>
            </regexp-list>

            <literal-list>
                <literal value="screen"/>
                <literal value="tty"/>
                <literal value="tv"/>
                <literal value="projection"/>
                <literal value="handheld"/>
                <literal value="print"/>
                <literal value="braille"/>
                <literal value="aural"/>
                <literal value="all"/>
            </literal-list>
        </attribute>


        <!-- Anchor related -->

        <!--  onInvalid="filterTag" has been removed as per suggestion at OWASP SJ 2007 - just "name" is valid -->
        <attribute name="href">
            <regexp-list>
                <regexp name="onsiteURL"/>
                <regexp name="offsiteURL"/>
            </regexp-list>
        </attribute>

        <attribute name="name">
            <regexp-list>

                <regexp value="[a-zA-Z0-9\-_\$]+"/>

                <!--
                have to allow the $ for .NET controls - although,
                will users be supplying input that has server-generated
                .NET control names? methinks not, but i want to pass my
                test cases
                -->

            </regexp-list>
        </attribute>


        <attribute name="shape" description="The 'shape' attribute defines the shape of the selectable area">
            <literal-list>
                <literal value="default"/>
                <literal value="rect"/>
                <literal value="circle"/>
                <literal value="poly"/>
            </literal-list>
        </attribute>



        <!--  Table attributes  -->

        <attribute name="border">
            <regexp-list>
                <regexp name="number"/>
            </regexp-list>
        </attribute>

        <attribute name="cellpadding">
            <regexp-list>
                <regexp name="number"/>
            </regexp-list>
        </attribute>

        <attribute name="cellspacing">
            <regexp-list>
                <regexp name="number"/>
            </regexp-list>
        </attribute>

        <attribute name="colspan">
            <regexp-list>
                <regexp name="number"/>
            </regexp-list>
        </attribute>

        <attribute name="rowspan">
            <regexp-list>
                <regexp name="number"/>
            </regexp-list>
        </attribute>

        <attribute name="background">
            <regexp-list>
                <regexp name="onsiteURL"/>
            </regexp-list>
        </attribute>

        <attribute name="bgcolor">
            <regexp-list>
                <regexp name="colorName"/>
                <regexp name="colorCode"/>
            </regexp-list>
        </attribute>

        <attribute name="abbr">
            <regexp-list>
                <regexp name="paragraph"/>
            </regexp-list>
        </attribute>

        <attribute name="headers" description="The 'headers' attribute is a space-separated list of cell IDs">
            <regexp-list>
                <regexp value="[a-zA-Z0-9\s*]*"/>
            </regexp-list>
        </attribute>

        <attribute name="charoff">
            <regexp-list>
                <regexp value="numberOrPercent"/>
            </regexp-list>
        </attribute>

        <attribute name="char">
            <regexp-list>
                <regexp value=".{0,1}"/>
            </regexp-list>
        </attribute>


        <attribute name="axis" description="The 'headers' attribute is a comma-separated list of related header cells">
            <regexp-list>
                <regexp value="[a-zA-Z0-9\s*,]*"/>
            </regexp-list>
        </attribute>

        <attribute name="nowrap" description="The 'nowrap' attribute tells the browser not to wrap text that goes over one line">
            <regexp-list>
                <regexp name="anything"/>
                <!-- <regexp value="(nowrap){0,1}"/>  -->
            </regexp-list>
        </attribute>


        <!--  Common positioning attributes  -->

        <attribute name="width">
            <regexp-list>
                <regexp name="numberOrPercent"/>
            </regexp-list>
        </attribute>

        <attribute name="height">
            <regexp-list>
                <regexp name="numberOrPercent"/>
            </regexp-list>
        </attribute>

        <attribute name="align" description="The 'align' attribute of an HTML element is a direction word, like 'left', 'right' or 'center'">
            <literal-list>
                <literal value="center"/>
                <literal value="middle"/>
                <literal value="left"/>
                <literal value="right"/>
                <literal value="justify"/>
                <literal value="char"/>
            </literal-list>
        </attribute>

        <attribute name="valign" description="The 'valign' attribute of an HTML attribute is a direction word, like 'baseline','bottom','middle' or 'top'">
            <literal-list>
                <literal value="baseline"/>
                <literal value="bottom"/>
                <literal value="middle"/>
                <literal value="top"/>
            </literal-list>
        </attribute>



        <!-- Intrinsic JavaScript Events -->

        <attribute name="onFocus" description="The 'onFocus' event is executed when the control associated with the tag gains focus">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="onBlur" description="The 'onBlur' event is executed when the control associated with the tag loses focus">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="onClick" description="The 'onClick' event is executed when the control associated with the tag is clicked">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="onDblClick" description="The 'onDblClick' event is executed when the control associated with the tag is clicked twice immediately">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="onMouseDown" description="The 'onMouseDown' event is executed when the control associated with the tag is clicked but not yet released">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="onMouseUp" description="The 'onMouseUp' event is executed when the control associated with the tag is clicked after the button is released">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="onMouseOver" description="The 'onMouseOver' event is executed when the user's mouse hovers over the control associated with the tag">
            <literal-list>
                <literal value="javascript:void(0)"/>
                <literal value="javascript:history.go(-1)"/>
            </literal-list>
        </attribute>

        <attribute name="scope" description="The 'scope' attribute defines what's covered by the header cells">
            <literal-list>
                <literal value="row"/>
                <literal value="col"/>
                <literal value="rowgroup"/>
                <literal value="colgroup"/>
            </literal-list>
        </attribute>



        <!-- If you want users to be able to mess with tabindex, uncomment this -->
        <!--
        <attribute name="tabindex" description="...">
            <regexp-list>
                <regexp name="number"/>
            </regexp-list>
        </attribute>
         -->


        <!-- Input/form related common attributes -->

        <attribute name="disabled">
            <regexp-list>
                <regexp name="anything"/>
            </regexp-list>
        </attribute>

      
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值