前言
本章讲述使用256tree过滤脏字
c++ golang 2个版本
老早以前在 https://github.com/progtesttes 写的
这里稍微优化下
1:c++ code
dirtywords.h
#if !defined DIRTY_WORDS_H_
#define DIRTY_WORDS_H_
//#include<stdio.h>
#include<string.h>
class CFilterDirtyWords
{
private:
CFilterDirtyWords();
~CFilterDirtyWords();
public:
static CFilterDirtyWords* GetInstance();
void ReleaseByOwner() { delete this; }
private:
typedef struct _dirtytree
{
bool bend;
struct _dirtytree * subtree[256];
_dirtytree() {
bend = false;
memset(subtree, 0, sizeof(_dirtytree*) * 256);
}
}DIRTYTREE, *PDIRTYTREE;
PDIRTYTREE m_phead;
static CFilterDirtyWords* pFilterDirtyWords;
private:
bool loaddirtywords(const char* filepath);
bool hasdirtywords(const PDIRTYTREE pHead, const char * pstring);
void filterdirtywords(const PDIRTYTREE pHead, char * pstring);
void insertdirtywords(PDIRTYTREE& pHead, const char * pstring);
void releasedirtytree(PDIRTYTREE pHead);
public:
bool LoadDirtyFile(const char* filepath=nullptr);
bool HasDirtyWords(const char* lpstr);
void FilterDirtyWords(char * pstring);
};
#endif
dirtywords.cpp
#include <stdio.h>
#include<ctype.h>
#include "dirtywords.h"
#define CONFIG_DIRTY_WORDS "dirtywords.txt"
CFilterDirtyWords* CFilterDirtyWords::pFilterDirtyWords = NULL;
CFilterDirtyWords::CFilterDirtyWords()
{
m_phead = NULL;
}
CFilterDirtyWords::~CFilterDirtyWords()
{
releasedirtytree(m_phead);
}
CFilterDirtyWords* CFilterDirtyWords::GetInstance()
{
if(pFilterDirtyWords == NULL) {
pFilterDirtyWords = new CFilterDirtyWords();
}
return pFilterDirtyWords;
}
bool CFilterDirtyWords::LoadDirtyFile(const char* filepath)
{
return loaddirtywords(filepath);
}
bool CFilterDirtyWords::loaddirtywords(const char* filepath)
{
FILE * f = fopen(filepath== nullptr? CONFIG_DIRTY_WORDS : filepath, "r");
if (NULL == f) {
return false;
}
char szbuf[256];
PDIRTYTREE phead = NULL;
while (NULL != fgets(szbuf, 256, f)) {
insertdirtywords(phead, szbuf);
}
fclose(f);
m_phead = phead;
if (NULL == m_phead) {
printf("CFilterDirtyWords::loaddirtywords is NULL"); return false;
}
return true;
// return m_phead?true:false ;
}
void CFilterDirtyWords::filterdirtywords(const PDIRTYTREE pHead, char * pstring)
{
if (!pHead) return;
PDIRTYTREE pTree = pHead;
unsigned char ch = '\0';
int pos = 0;
char * pTemp = pstring;
bool bBegin = false;
while (*pTemp != '\0')
{
ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;
if (pTree->subtree[ch]) {
if (!bBegin) {
bBegin = true; pos = pTemp - pstring;
}
pTree = pTree->subtree[ch];
if (pTree->bend) {
while (pos <= pTemp - pstring) *(pstring + pos++) = '*';
}
}
else if (bBegin && pHead->subtree[ch]) {
pos = pTemp - pstring; pTree = pHead->subtree[ch];
if (pTree->bend) {
while (pos <= pTemp - pstring) *(pstring + pos++) = '*';
}
}
else {
pTree = pHead; bBegin = false;
}
++pTemp;
}
}
void CFilterDirtyWords::insertdirtywords(PDIRTYTREE& pHead, const char * pstring)
{
if (!pstring) return;
if (!pHead) pHead = new DIRTYTREE;
const char * pTemp = (char*)pstring;
PDIRTYTREE pTree = pHead;
unsigned char ch = '\0';
while (*pTemp != '\0' && *pTemp != '\r' && *pTemp != '\n') {
ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;
if (!pTree->subtree[ch]) pTree->subtree[ch] = new DIRTYTREE;
pTree = pTree->subtree[ch];
++pTemp;
}
pTree->bend = true;
}
void CFilterDirtyWords::releasedirtytree(PDIRTYTREE pHead)
{
if (!pHead) return;
for (unsigned int i = 0; i< 256; i++) {
releasedirtytree(pHead->subtree[i]);
}
delete pHead;
}
bool CFilterDirtyWords::hasdirtywords(const PDIRTYTREE pHead, const char * pstring)
{
if (!pHead) return false;
PDIRTYTREE pTree = pHead;
unsigned char ch = '\0';
char * pTemp = (char*)pstring;
while (*pTemp != '\0')
{
ch = isupper(*pTemp) ? _tolower(*pTemp) : *pTemp;
if (pTree->subtree[ch]) {
pTree = pTree->subtree[ch];
if (pTree->bend) {
return true;
}
}
else {
pTree = pHead;
}
++pTemp;
}
return false;
}
bool CFilterDirtyWords::HasDirtyWords(const char * pstring)
{
return hasdirtywords(m_phead, pstring);
}
void CFilterDirtyWords::FilterDirtyWords(char * pstring)
{
filterdirtywords(m_phead, pstring);
}
main.cpp
#include "dirtywords.h"
#include <stdio.h>
int main() {
if (CFilterDirtyWords::GetInstance()->LoadDirtyFile()) {
printf("%d \n", CFilterDirtyWords::GetInstance()->HasDirtyWords("123")); //1
printf("%d \n", CFilterDirtyWords::GetInstance()->HasDirtyWords("12")); //0
}
CFilterDirtyWords::GetInstance()->ReleaseByOwner() ;
return 0;
}
/*
dirtywords.txt 内容如下
132
123
121
1221
1121
*/
运行结果
2:golang code
dirtyword.go
package dityword
import (
"bufio"
"io"
"log"
"os"
"strings"
)
//256 tree
type dirtytree struct {
bend bool
subtree [256]*dirtytree
}
var (
dirtyhead *dirtytree = nil
)
func loaddirtywords(filename string) bool {
fi, err := os.Open(filename)
if err != nil {
log.Printf("filename=%v Error: %s\n", filename, err)
return false
}
defer fi.Close()
phead := new(dirtytree)
br := bufio.NewReader(fi)
for {
a, _, c := br.ReadLine()
if c == io.EOF {
break
}
// log.Printf("a=%v \n",string(a))
l := len(a)
if l < 1 {
continue
}
if l > 256 {
a = a[:256]
}
//fmt.Println(string(a))
insertdirtywords(phead, a)
}
dirtyhead = phead
return true
}
func hasdirtywords(phead *dirtytree, str string) bool {
if phead == nil {
return false
}
var pTree *dirtytree = phead
//log.Printf("cmp string=%#v \n",str)
strlower := []byte(strings.ToLower(string(str)))
l := len([]byte(strlower))
if l < 1 {
return false
}
//log.Printf("cmp ToLower string=%#v \n",string(strlower))
for i := 0; i < l; i++ {
ch := byte(strlower[i])
if pTree.subtree[ch] != nil {
pTree = pTree.subtree[ch]
if pTree.bend {
return true
}
} else {
pTree = phead
}
}
return false
}
//func filterdirtywords(phead *dirtytree,str string) {
//
//}
func insertdirtywords(phead *dirtytree, str []byte) {
//全部小写
// log.Printf("org string=%#v \n",str)
strlower := []byte(strings.ToLower(string(str)))
l := len([]byte(strlower))
if l < 1 {
return
}
// log.Printf("org ToLower string=%#v \n",string(strlower))
// log.Printf("org ToLower string=%#v \n",strlower)
if phead == nil {
phead = new(dirtytree)
}
pTree := phead
for i := 0; i < l; i++ {
ch := byte(strlower[i])
if pTree.subtree[ch] == nil {
pTree.subtree[ch] = new(dirtytree)
pTree = pTree.subtree[ch]
}
}
pTree.bend = true
}
//func releasedirtytree(phead *dirtytree) {
//}
//api
func LoadDirtyWordsFile(filename string) bool {
return loaddirtywords(filename)
}
func HasDirtyWords(chstr string) bool {
return hasdirtywords(dirtyhead, chstr)
}
//func FilterDirtyWords(filterstr string) {
//
//}
main.go
package main
import (
"bytes"
"dirtywords/dityword"
"fmt"
"github.com/henrylee2cn/mahonia"
"log"
"os"
"path"
"regexp"
"unicode/utf8"
)
func check(src string) bool {
str := "(?:')|(?:--)|(/\\*(?:.|[\\n\\r])*?\\*/)|(\b(select|update|and|or|delete|insert|trancate|char|chr|into|substr|ascii|declare|exec|count|master|into|drop|execute)\b)" //此处改为“
re, err := regexp.Compile(str)
if err != nil {
fmt.Println(err.Error())
return true
}
b := re.MatchString(src)
fmt.Println("lllll", b) //打印出false。
return b
}
func main() {
//1读取配置文件连
cfgpath, _ := os.Getwd()
filename := path.Join(cfgpath, "ditylist.txt")
if !dityword.LoadDirtyWordsFile(filename) {
os.Exit(1)
}
for {
var input string
fmt.Scanln(&input)
log.Printf("input=%v len=%v \n", input, len(input))
if utf8.ValidString(input) {
enc := mahonia.NewEncoder("gbk")
gbkstr := enc.ConvertString(input)
log.Printf("gbkstr=%v \n", []byte(gbkstr))
b := dityword.HasDirtyWords(gbkstr)
usrc := bytes.Runes([]byte(input))
log.Printf("check b=%v uscr=%#v %v\n", b, usrc, len(usrc))
// 2018/05/26 00:02:12 input=日 len=3
// 2018/05/26 00:02:12 gbkstr=[200 213]
// 2018/05/26 00:02:12 check b=true uscr=[]int32{26085} 1
//r, size := utf8.DecodeRuneInString(input)
//fmt.Printf("%c %v\n", r, size)
// newdata := string(([]byte(input))[size:])
// fmt.Printf("%c %v data=%v \n", r, size,newdata)
//str = str[size:]
// if data,num := utf8.DecodeRuneInString(input); ok {
// b := dityword.HasDirtyWords(input)
// fmt.Printf("check b=%v \n",b)
// }
}
}
}
/*
ditylist.txt 内容如下
fyou
fky
fyou1
*/
目录结构及运行结果
3:工程有如要后续上传
如果觉得有用,麻烦点个赞,加个收藏