思路:
- 将敏感词都存到数据库表中
- 定时读取数据到内存中,构建敏感词前缀树
- 写工具方法,使用内存中的前缀树判断消息中是否包含敏感词或完成替换操作
1. 建立敏感词数据库表
CREATE TABLE `bad_words` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键id',
`content` text COMMENT '敏感词内容',
`create_user_id`bigint(20) unsigned COMMENT '创建用户id',
`extra` text COMMENT '扩展信息',
`create_time` timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP(3) COMMENT '创建时间',
`modify_time` timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP(3) ON UPDATE CURRENT_TIMESTAMP(3) COMMENT '修改时间',
`status` int DEFAULT 0 COMMENT '0存在,1删除',
PRIMARY KEY (`id`)
) ENGINE=InnoDB CHARSET=utf8 COLLATE=utf8_general_ci;
2. 定时任务,读数据并建立敏感词树
项目启动的时候,开始执行这一块。
2.1 开启定时任务
var (
ctx context.Context
BadWordsTrie *BadWordsTrieNode
)
func InitCronLoader() {
var err error
ctx = context.Background()
err = getBadWordsAndInitBadWordsTrie() // 初始化敏感词树
if err != nil {
panic(err)
}
gocron.Every(3).Minutes().Do(getBadWordsAndInitBadWordsTrie)
go func() {
<-gocron.Start()
}()
}
2.2 读数据并建立敏感词树
func getBadWordsAndInitBadWordsTrie() error {
// 从数据库获取信息
badWordsList := getBadWordsFromDB()
// 初始化敏感词树
BadWordsTrie = initTrie(badWordsList)
return nil
}
func getBadWordsFromDB() []string {
badWords := make([]*models.BadWord, 0)
caller.LyhTestDB.Debug().Table(models.TableNameBadWord).Where("status=0").Find(&badWords)
badWordsList := make([]string, 0)
for _, item := range badWords {
if item.Content != "" {
badWordsList = append(badWordsList, item.Content)
}
}
return badWordsList
}
func initTrie(badWordsList []string) *BadWordsTrieNode {
root := &BadWordsTrieNode{}
for _, word := range badWordsList {
addWord(root, word)
}
return root
}
type BadWordsTrieNode struct {
Children map[rune]*BadWordsTrieNode
}
func addWord(root *BadWordsTrieNode, word string) {
node := root
for _, char := range word {
if node.Children == nil {
node.Children = make(map[rune]*BadWordsTrieNode)
}
if _, ok := node.Children[char]; !ok {
node.Children[char] = &BadWordsTrieNode{}
}
node = node.Children[char]
}
}
3. 使用
可以写到工具中
// 检查是否包含敏感词
func CheckBadWords(message string) bool {
return isContain(message, cronloader.BadWordsTrie)
}
// 将敏感词字符替换为*
func CheckAndReplaceBadWords(message string) string {
return replaceBadWords(message, '*', cronloader.BadWordsTrie)
}
// 将敏感词字符替换为自定义字符
func CheckAndReplaceBadWordsWithSep(message string, sep rune) string {
return replaceBadWords(message, sep, cronloader.BadWordsTrie)
}
func isContain(message string, root *cronloader.BadWordsTrieNode) bool {
message = strings.ToLower(message)
runes := []rune(message)
for i := 0; i < len(runes); i++ {
p := root
j := i
for j < len(runes) && p.Children != nil {
char := runes[j]
if _, ok := p.Children[char]; ok {
p = p.Children[char]
j++
} else {
break
}
}
if p.Children == nil {
return true
}
}
return false
}
func replaceBadWords(message string, sep rune, root *cronloader.BadWordsTrieNode) string {
message = strings.ToLower(message)
runes := []rune(message)
for i := 0; i < len(runes); i++ {
p := root
j := i
for j < len(runes) && p.Children != nil {
char := runes[j]
if _, ok := p.Children[char]; ok {
p = p.Children[char]
j++
} else {
break
}
}
if p.Children == nil {
for k := i; k < j; k++ {
runes[k] = sep
}
}
}
return string(runes)
}