java 停顿词过滤示例_好记性不如烂笔头31-java应用中的敏感词过滤实现(3)

敏感词过滤,国内混的同学看到这个都会会心一笑。其实敏感词过滤,在几乎所有国家都是存在的,只是表现的形式并不完全相同而已。

既然这个功能叫做关键词过滤,那么做在过滤器中,应该是一个好主意。

1、JAVA实现利用过滤器实现敏感信息过滤

过滤器的JAVA代码:

package com.filter;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

importjava.io.UnsupportedEncodingException;

import java.util.ArrayList;

import java.util.List;

import javax.servlet.Filter;

import javax.servlet.FilterChain;

import javax.servlet.FilterConfig;

import javax.servlet.ServletException;

import javax.servlet.ServletRequest;

import javax.servlet.ServletResponse;

importjavax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletRequestWrapper;

importjavax.servlet.http.HttpServletResponse;

/**

*一个简单的敏感词过滤器,这里针对从GET的方式做了转码,如果用POST方式,将ISO8859-1字符转换为UTF-8下面一段要注释掉

*@author范芳铭

*/

public class EasyDirtyFilter implementsFilter{

private FilterConfig config = null;

@Override

public void init(FilterConfig filterConfig) throws ServletException {

System.out.println("----过滤器初始化----");

this.config = filterConfig;

}

//过滤器功能在这里实现

@Override

public void doFilter(ServletRequest req, ServletResponse resp,

FilterChain chain) throwsIOException, ServletException {

HttpServletRequest request = (HttpServletRequest) req;

HttpServletResponse response = (HttpServletResponse) resp;

String charset = "UTF-8";

request.setCharacterEncoding(charset);

response.setCharacterEncoding(charset);

response.setContentType("text/html;charset="+charset);

DirtyRequest dirtyreq = new DirtyRequest(request);

chain.doFilter(dirtyreq, response);

}

@Override

public void destroy() {

System.out.println("----过滤器销毁----");

}

private List getDirtyWords(){

List dirtyWords = new ArrayList();

String dirtyWordPath = config.getInitParameter("dirtyword");

InputStream inputStream =config.getServletContext().getResourceAsStream(dirtyWordPath);

InputStreamReader is = null;

try {

is = newInputStreamReader(inputStream,"UTF-8");

} catch (UnsupportedEncodingException e2) {

e2.printStackTrace();

}

BufferedReader reader = new BufferedReader(is);

String line;

try {

while ((line =reader.readLine())!= null) {//如果 line为空说明读完了

dirtyWords.add(line);

}

} catch (IOException e) {

e.printStackTrace();

}

return dirtyWords;

}

//使用Decorator模式包装request对象,实现敏感字符过滤功能

class DirtyRequest extends HttpServletRequestWrapper{

private List dirtyWords = getDirtyWords();

private HttpServletRequest request;

public DirtyRequest(HttpServletRequest request) {

super(request);

this.request = request;

}

//重写getParameter方法,实现对敏感字符的过滤

@Override

public String getParameter(String name) {

String value =this.request.getParameter(name);

//如果get的方式提交表单,通过request.setCharacterEncoding("UTF-8");这种方式是解决不了中文乱码问题

//参考:http://blog.csdn.net/ffm83/article/details/43229819

if(value==null){

return null;

}

//将ISO8859-1字符转换为UTF-8

try {

value=new String(value.getBytes("ISO8859-1"),"UTF-8") ;

}catch (UnsupportedEncodingException e) {

//TODO Auto-generated catch block

e.printStackTrace();

}

for(String dirtyWord : dirtyWords){

if(value.contains(dirtyWord)){

System.out.println("内容中包含敏感词:"+dirtyWord+",将会被替换成****");

//替换敏感字符

value =value.replace(dirtyWord, "****");

}

}

return value;

}

}

}

2、将过滤器添加到Web.xml

easyFilter

com.filter.EasyDirtyFilter2

dirtyword

/WEB-INF/dirtyword.txt

easyFilter

/*

dirty

com.servlet.RequestDirty

dirty

/dirty

3、测试用的servlet源代码

package com.servlet;

import java.io.IOException;

import java.io.PrintWriter;

import javax.servlet.ServletException;

import javax.servlet.http.HttpServlet;

importjavax.servlet.http.HttpServletRequest;

importjavax.servlet.http.HttpServletResponse;

/**

*从外部获取信息,如果有敏感词,那么需要过滤

*@author范芳铭

*/

public class RequestDirty extendsHttpServlet {

publicvoid doGet(HttpServletRequest request, HttpServletResponse response)

throwsServletException, IOException {

Stringinfo = request.getParameter("info");

//在过滤器里进行了转码,这里就不要再进行转码

//info=new String(info.getBytes("ISO8859-1"),"UTF-8") ;

System.out.println(info);

PrintWriterout = response.getWriter();

out.write("获得信息如下:" + info);

}

publicvoid doPost(HttpServletRequest request, HttpServletResponse response)

throwsServletException, IOException {

doGet(request,response);

}

}

4、其他

一个关键词文件,dirtyword.txt放在WEB-INF下。

关键词文件如下:(仅供示例,无任何含义)

粗话

黑人

黑鬼

5、测试

在浏览器输入:

页面输出结果:获得信息如下:****是美国无产阶级的成员

后台输入情况:

内容中包含敏感词:黑人,将会被替换成****

****是美国无产阶级的成员

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值