iOS 敏感词过滤
在app发布内容时, 经常需要过滤些敏感词, 查看了java DFA算法和OC DFA实现, 发现OC 版本相比java 版本存在2个问题.
1. 不能进行最大限度匹配
比如敏感词有"abc","ab", OC 版本只能匹配ab, 匹配不了abc
2. 不能进行最小字数判断
比如需要判断大于3的词.
对OC 版本进行了升级, 代码如下:
.h
//
// YQSensitiveWord.h
//
//
// Created by yaoqiang on 2016/11/29.
// Copyright © 2016年 Newestage,lnc. All rights reserved.
//
#import <Foundation/Foundation.h>
typedef NS_ENUM(NSInteger, YQMatchType) {
YQMatchTypeMin = 0,
YQMatchTypeMax,
};
@interface YQSensitiveWord : NSObject
/**
default YQMatchTypeMax
*/
@property (nonatomic, assign) YQMatchType matchType;
/**
default 1
*/
@property (nonatomic, assign) NSUInteger minMatchLenth;
/**
stop filter
*/
@property (nonatomic, assign, getter=isStop) BOOL stop;
+ (instancetype)sharedInstance;
- (void)loadWordsWithArray:(NSArray *)array;
- (NSString *)filter:(NSString *)str;
- (NSArray *)getFilterKeys:(NSString *)str;
- (void)freeFilter;
@end
.m
//
// YQSensitiveWord.h
//
//
// Created by yaoqiang on 2016/11/29.
// Copyright © 2016年 Newestage,lnc. All rights reserved.
//
#import "YQSensitiveWord.h"
#define KYQ_IS_EXIST @"isExists"
@interface YQSensitiveWord()
@property (nonatomic, strong) NSMutableDictionary *root;
@end
@implementation YQSensitiveWord
+ (instancetype)sharedInstance{
static YQSensitiveWord *instance;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
instance = [[self alloc]init];
});
return instance;
}
- (instancetype)init {
self = [super init];
if (self) {
self.root = nil;
self.stop = NO;
self.matchType = YQMatchTypeMax;
self.minMatchLenth = 1;
}
return self;
}
- (void)loadWordsWithArray:(NSArray *)array {
if (array.count == 0) return;
// 停止当前遍历 清空数据
self.stop = YES;
[self freeFilter];
self.root = [NSMutableDictionary dictionary];
[array enumerateObjectsUsingBlock:^(id _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) {
if ([obj isKindOfClass:[NSString class]] && obj) {
[self insertWords:obj];
}
}];
self.stop = NO;
}
- (void)insertWords:(NSString *)words{
// 敏感词构造数据树
/*
{
a = {
b = {
isExist = 1
}
}
}
*/
NSMutableDictionary *node = self.root;
for (int i = 0; i < words.length; i ++) {
NSString *word = [words substringWithRange:NSMakeRange(i, 1)];
if (node[word] == nil) {
node[word] = [NSMutableDictionary dictionary];
}
node = node[word];
}
node[KYQ_IS_EXIST] = [NSNumber numberWithInt:1];
}
// MARK: - filter
- (NSString *)filter:(NSString *)str {
if (str == nil) return @"";
if (self.isStop || !self.root) return str;
NSMutableString *result = [str mutableCopy];
@autoreleasepool {
for (int i = 0; i < str.length; i ++) {
NSString *subString = [str substringFromIndex:i];
NSMutableDictionary *node = [self.root mutableCopy] ;
int num = 0;
NSRange lastRange = NSMakeRange(0, 0);
for (int j = 0; j < subString.length; j ++) {
NSString *word = [subString substringWithRange:NSMakeRange(j, 1)];
if (node[word] == nil) {
break;
}else{
num ++;
node = node[word];
}
//敏感词匹配成功
if ([node[KYQ_IS_EXIST] integerValue] == 1) {
lastRange = NSMakeRange(i, num);
if (self.matchType == YQMatchTypeMax || lastRange.length < self.minMatchLenth) {
continue;
}else {
i += j;
break;
}
}
}
if (lastRange.length >= self.minMatchLenth) {
NSMutableString *symbolStr = [NSMutableString string];
for (int k = 0; k < lastRange.length; k ++) {
[symbolStr appendString:@"*"];
}
[result replaceCharactersInRange:lastRange withString:symbolStr];
}
}
}
return result;
}
- (NSArray *)getFilterKeys:(NSString *)str {
if ( str == nil ) return @[];
if (self.isStop || !self.root) {
return nil;
}
NSMutableArray *resultArr = [NSMutableArray array];
@autoreleasepool {
for (int i = 0; i < str.length; i ++) {
NSString *subString = [str substringFromIndex:i];
NSMutableDictionary *node = [self.root mutableCopy] ;
int num = 0;
NSString *lastMatchStr = nil;
for (int j = 0; j < subString.length; j ++) {
NSString *word = [subString substringWithRange:NSMakeRange(j, 1)];
if (node[word] == nil) {
break;
}else{
num ++;
node = node[word];
}
//敏感词匹配成功
if ([node[KYQ_IS_EXIST] integerValue] == 1) {
lastMatchStr = [str substringWithRange:NSMakeRange(i, num)];
if (self.matchType == YQMatchTypeMax || lastMatchStr.length < self.minMatchLenth) {
continue;
}else {
i += j;
break;
}
}
}
if (lastMatchStr.length >= self.minMatchLenth) {
[resultArr addObject:lastMatchStr];
}
}
}
return resultArr.copy;
}
- (void)freeFilter {
self.root = nil;
}
@end
使用方法:
YQSensitiveWord *instance = [YQSensitiveWord sharedInstance];
[instance loadWordsWithArray:@[@"abc",@"abcd",@"pli",@"ytl",@"ab"]];
{
instance.matchType = YQMatchTypeMax;
NSString *str = [instance filter:@"fadfabcijfableplifaljefytlvaabcdidea"];
NSArray *keys = [instance getFilterKeys:@"fadfabcijfableplifaljefytlvaabcdidea"];
NSLog(@"%@",str);
NSLog(@"%@",keys);
}
{
instance.matchType = YQMatchTypeMin;
instance.minMatchLenth = 3;
NSString *str = [instance filter:@"fadfabcijfableplifaljefytlvaabcdidea"];
NSArray *keys = [instance getFilterKeys:@"fadfabcijfableplifaljefytlvaabcdidea"];
NSLog(@"%@",str);
NSLog(@"%@",keys);
}