导入依赖
<!-- xss -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.4</version>
</dependency>
<!-- Jsoup 如果使用Jsoup过滤,则需要添加该依赖,否则不需要 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
工具类
JsoupUtil .java
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Whitelist;
/**
* 描述: 过滤和转义html标签和属性中的敏感字符
*/
@Slf4j
public class JsoupUtil {
/**
* 标签白名单
*/
public static Whitelist WHITELIST = Whitelist.relaxed();
/**
* 配置过滤化参数,不对代码进行格式化
*/
static Document.OutputSettings OUTPUT_SETTINGS = new Document.OutputSettings().prettyPrint(false);
/**
* 设置自定义的标签和属性
*/
static {
/**
* addAttributes() 设置标签需要保留的属性 ,[:all]表示所有
* preserveRelativeLinks() 是否保留元素的URL属性中的相对链接,或将它们转换为绝对链接,默认为false. 为false时将会把baseUri和元素的URL属性拼接起来
*/
WHITELIST.addAttributes(":all","style");
WHITELIST.preserveRelativeLinks(true);
}
public static String clean(String s) {
log.info("[xss过滤标签和属性] [原字符串为] : {}",s);
String r = Jsoup.clean(s, WHITELIST);
log.info("[xss过滤标签和属性] [过滤后的字符串为] : {}",r);
return r;
}
/**
* 处理Json类型的Html标签,进行xss过滤
* @param s
* @return
*/
public static String cleanJson(String s) {
//先处理双引号的问题
s = jsonStringConvert(s);
return clean(s);
}
/**
* 将json字符串本身的双引号以外的双引号变成单引号
* @param s
* @return
*/
public static String jsonStringConvert(String s) {
log.info("[处理JSON字符串] [将嵌套的双引号转成单引号] [原JSON] :{}",s);
char[] temp = s.toCharArray();
int n = temp.length;
for (int i = 0; i < n; i++) {
if (temp[i] == ':' && temp[i + 1] == '"') {
for (int j = i + 2; j < n; j++) {
if (temp[j] == '"') {
//如果该字符为双引号,下个字符不是逗号或大括号,替换
if (temp[j + 1] != ',' && temp[j + 1] != '}') {
//将json字符串本身的双引号以外的双引号变成单引号
temp[j] = '\'';
} else if (temp[j + 1] == ',' || temp[j + 1] == '}') {
break;
}
}
}
}
}
String r = new String(temp);
log.info("[处理JSON字符串] [将嵌套的双引号转成单引号] [处理后的JSON] :{}",r);
return r;
}
}
HtmlText.java
import java.util.regex.Pattern;
public class HtmlText {
public static String Html2Text(String inputString) {
String htmlStr = inputString;
String textStr ="";
java.util.regex.Pattern p_script;
java.util.regex.Matcher m_script;
java.util.regex.Pattern p_style;
java.util.regex.Matcher m_style;
java.util.regex.Pattern p_html;
java.util.regex.Matcher m_html;
java.util.regex.Pattern p_html1;
java.util.regex.Matcher m_html1;
try {
//定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script> }
String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
//定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style> }
String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>";
//定义HTML标签的正则表达式
String regEx_html = "<[^>]+>";
String regEx_html1 = "<[^>]+";
p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);
m_script = p_script.matcher(htmlStr);
htmlStr = m_script.replaceAll(""); //过滤script标签
p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);
m_style = p_style.matcher(htmlStr);
htmlStr = m_style.replaceAll(""); //过滤style标签
p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);
m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll(""); //过滤html标签
p_html1 = Pattern.compile(regEx_html1,Pattern.CASE_INSENSITIVE);
m_html1 = p_html1.matcher(htmlStr);
htmlStr = m_html1.replaceAll(""); //过滤html标签
textStr = htmlStr;
}catch(Exception e) {
System.err.println("HtmlText: " + e.getMessage());
}
return textStr;
}
}
XssAndSqlHttpServletRequestWrapper.java
import org.apache.commons.lang3.StringUtils;
import javax.servlet.ReadListener;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
public class XssAndSqlHttpServletRequestWrapper extends HttpServletRequestWrapper {
private HttpServletRequest request;
//判断是否是上传 上传忽略
boolean isUpData = false;
public XssAndSqlHttpServletRequestWrapper(HttpServletRequest request) {
super(request);
this.request = request;
String contentType = request.getContentType();
if (null != contentType) {
isUpData = contentType.startsWith("multipart");
}
}
/**
* 覆盖getParameter方法,将参数名和参数值都做xss过滤。
* getParameter方法是直接通过request获得querystring类型的入参调用的方法
*/
@Override
public String getParameter(String name) {
String value = request.getParameter(name);
if (!StringUtils.isEmpty(value)) {
// value = StringEscapeUtils.escapeHtml4(value);
// value = JsoupUtil.clean(value);
value = HtmlText.Html2Text(value);
}
return value;
}
/**
* 覆盖getParameterValues方法
*/
@Override
public String[] getParameterValues(String name) {
String[] parameterValues = super.getParameterValues(name);
if (parameterValues == null) {
return null;
}
for (int i = 0; i < parameterValues.length; i++) {
String value = parameterValues[i];
// parameterValues[i] = JsoupUtil.clean(value);
parameterValues[i] = HtmlText.Html2Text(value);
// parameterValues[i] = StringEscapeUtils.escapeHtml4(value);
}
return parameterValues;
}
/**
* 覆盖getHeader方法,将参数名和参数值都做xss过滤。
* 如果需要获得原始的值,则通过super.getHeaders(name)来获取
* getHeaderNames 也可能需要覆盖
// */
// @Override
// public String getHeader(String name) {
// String value = super.getHeader(name);
// if (value != null) {
// value = JsoupUtil.clean(value);
// }
// return value;
// }
@Override
public ServletInputStream getInputStream() throws IOException {
if (isUpData) {
return super.getInputStream();
} else {
//处理原request的流中的数据
byte[] bytes = inputHandlers(super.getInputStream()).getBytes();
final ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
return new ServletInputStream() {
@Override
public int read() throws IOException {
return bais.read();
}
@Override
public boolean isFinished() {
return false;
}
@Override
public boolean isReady() {
return false;
}
@Override
public void setReadListener(ReadListener readListener) {
}
};
}
}
public String inputHandlers(ServletInputStream servletInputStream) {
StringBuilder sb = new StringBuilder();
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(servletInputStream, Charset.forName("UTF-8")));
String line = "";
while ((line = reader.readLine()) != null) {
sb.append(line);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (servletInputStream != null) {
try {
servletInputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
String finl = JsoupUtil.cleanJson(sb.toString());
return finl;
}
}
XssFilter.java
这里要注意filterName可能会重名
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder;
import org.springframework.stereotype.Component;
import javax.servlet.*;
import javax.servlet.annotation.WebFilter;
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @Description:TODO(防止xss 的过滤器)
*/
@WebFilter(filterName = "xssFilter1", urlPatterns = "/*", asyncSupported = true)
@Slf4j
@Configuration
@Component
public class XssFilter implements Filter {
private static List<String> MATCH_WORD = new ArrayList<>();
/**
* 不是所有请求都经过xss过滤处理, 所以就把请求路径中,
* 选择请求路径中含有"ADD",“EDIT”,“REMOVE”,"LIST"的这些接口进行过滤
*/
static {
MATCH_WORD.add("ADD");
MATCH_WORD.add("EDIT");
MATCH_WORD.add("REMOVE");
MATCH_WORD.add("LIST");
MATCH_WORD.add("edit");
MATCH_WORD.add("verifyCarmi");
MATCH_WORD.add("updata");
MATCH_WORD.add("add");
MATCH_WORD.add("get");
MATCH_WORD.add("check");
}
@Override
public void destroy() {
}
@Override
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
throws IOException, ServletException {
if (request instanceof HttpServletRequest){
HttpServletRequest req = (HttpServletRequest) request;
String s = req.getRequestURL().toString().toUpperCase();
//涉及保存操作的进行xss过滤
boolean b = MATCH_WORD.stream().anyMatch(w -> s.contains(w));
if(b) {
request = new XssAndSqlHttpServletRequestWrapper((HttpServletRequest) request);
}
}
chain.doFilter(request, response);
}
@Override
public void init(FilterConfig arg0) throws ServletException {
}
@Bean
@Primary
public ObjectMapper xssObjectMapper(Jackson2ObjectMapperBuilder builder) {
// 解析器
ObjectMapper objectMapper = builder.createXmlMapper(false).build();
// 注册xss解析器
SimpleModule xssModule = new SimpleModule("XssStringJsonSerializer");
xssModule.addSerializer(new XssStringJsonSerializer());
objectMapper.registerModule(xssModule);
// 通过该方法对mapper对象进行设置,所有序列化的对象都将按改规则进行系列化
// Include.Include.ALWAYS 默认
// Include.NON_DEFAULT 属性为默认值不序列化
// Include.NON_EMPTY 属性为 空("") 或者为 NULL 都不序列化,则返回的json是没有这个字段的。这样对移动端会更省流量
// Include.NON_NULL 属性为NULL 不序列化
objectMapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
// 允许出现特殊字符和转义符
objectMapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
// 允许出现单引号
objectMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
return objectMapper;
}
}
XssStringJsonSerializer.java
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import java.io.IOException;
/**
*实现过滤json类型的代码
*/
public class XssStringJsonSerializer extends JsonSerializer<String> {
@Override
public Class<String> handledType() {
return String.class;
}
@Override
public void serialize(String value, JsonGenerator jsonGenerator, SerializerProvider serializerProvider)
throws IOException {
if (value != null) {
String encodedValue = HtmlText.Html2Text(value);
jsonGenerator.writeString(encodedValue);
}
}
}
前端在输入框输入标签的时候就会被过滤掉例如
<img data-v-0947769e="" src="https://11111111111" class="user-logo">
这个标签会被过滤成
<img>