iOS 敏感词过滤

iOS 敏感词过滤

在app发布内容时, 经常需要过滤些敏感词, 查看了java DFA算法OC DFA实现, 发现OC 版本相比java 版本存在2个问题.

1. 不能进行最大限度匹配

比如敏感词有"abc","ab", OC 版本只能匹配ab, 匹配不了abc

2. 不能进行最小字数判断

比如需要判断大于3的词.

对OC 版本进行了升级, 代码如下:

.h

//
//  YQSensitiveWord.h
//  
//
//  Created by yaoqiang on 2016/11/29.
//  Copyright © 2016年 Newestage,lnc. All rights reserved.
//

#import <Foundation/Foundation.h>

typedef NS_ENUM(NSInteger, YQMatchType) {
    YQMatchTypeMin = 0,
    YQMatchTypeMax,
};


@interface YQSensitiveWord : NSObject

/**
 default YQMatchTypeMax
 */
@property (nonatomic, assign) YQMatchType matchType;

/**
 default 1
 */
@property (nonatomic, assign) NSUInteger minMatchLenth;

/**
 stop filter
 */
@property (nonatomic, assign, getter=isStop) BOOL stop;


+ (instancetype)sharedInstance;

- (void)loadWordsWithArray:(NSArray *)array;

- (NSString *)filter:(NSString *)str;

- (NSArray *)getFilterKeys:(NSString *)str;

- (void)freeFilter;

@end

.m

//
//  YQSensitiveWord.h
//  
//
//  Created by yaoqiang on 2016/11/29.
//  Copyright © 2016年 Newestage,lnc. All rights reserved.
//

#import "YQSensitiveWord.h"

#define KYQ_IS_EXIST @"isExists"

@interface YQSensitiveWord()
@property (nonatomic, strong) NSMutableDictionary *root;
@end

@implementation YQSensitiveWord

+ (instancetype)sharedInstance{
    static YQSensitiveWord *instance;
    static dispatch_once_t onceToken;
    dispatch_once(&onceToken, ^{
        instance = [[self alloc]init];
    });
    return instance;
}

- (instancetype)init {
    self = [super init];
    if (self) {
        self.root = nil;
        self.stop = NO;
        self.matchType = YQMatchTypeMax;
        self.minMatchLenth = 1;
    }
    return self;
}

- (void)loadWordsWithArray:(NSArray *)array {
    
    if (array.count == 0) return;
    
    // 停止当前遍历 清空数据
    self.stop = YES;
    [self freeFilter];
    self.root = [NSMutableDictionary dictionary];
    
    [array enumerateObjectsUsingBlock:^(id  _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) {
        if ([obj isKindOfClass:[NSString class]] && obj) {
            [self insertWords:obj];
        }
    }];
    self.stop = NO;
}

- (void)insertWords:(NSString *)words{
    // 敏感词构造数据树
    /*
     {
        a = {
            b = {
                isExist = 1
                }
            }
     }
     */
    NSMutableDictionary *node = self.root;
    for (int i = 0; i < words.length; i ++) {
        NSString *word = [words substringWithRange:NSMakeRange(i, 1)];
        if (node[word] == nil) {
            node[word] = [NSMutableDictionary dictionary];
        }
        node = node[word];
    }
    node[KYQ_IS_EXIST] = [NSNumber numberWithInt:1];
}

// MARK: - filter
- (NSString *)filter:(NSString *)str {
    
    if (str == nil) return @"";
    if (self.isStop || !self.root) return str;
    
    NSMutableString *result = [str mutableCopy];
    
    @autoreleasepool {
        
        for (int i = 0; i < str.length; i ++) {
            NSString *subString = [str substringFromIndex:i];
            NSMutableDictionary *node = [self.root mutableCopy] ;
            int num = 0;
            
            NSRange lastRange = NSMakeRange(0, 0);
            for (int j = 0; j < subString.length; j ++) {
                NSString *word = [subString substringWithRange:NSMakeRange(j, 1)];
                
                if (node[word] == nil) {
                    break;
                }else{
                    num ++;
                    node = node[word];
                }
                
                //敏感词匹配成功
                if ([node[KYQ_IS_EXIST] integerValue] == 1) {
                    lastRange = NSMakeRange(i, num);
                    if (self.matchType == YQMatchTypeMax || lastRange.length < self.minMatchLenth) {
                        continue;
                    }else {
                        i += j;
                        break;
                    }
                }
            }
            if (lastRange.length >= self.minMatchLenth) {
                NSMutableString *symbolStr = [NSMutableString string];
                for (int k = 0; k < lastRange.length; k ++) {
                    [symbolStr appendString:@"*"];
                }
                [result replaceCharactersInRange:lastRange withString:symbolStr];
            }
        }
    }
    
    return result;
}


- (NSArray *)getFilterKeys:(NSString *)str {
    
    if ( str == nil ) return @[];
    
    if (self.isStop || !self.root) {
        return nil;
    }
    
    NSMutableArray *resultArr = [NSMutableArray array];
    @autoreleasepool {
        for (int i = 0; i < str.length; i ++) {
            NSString *subString = [str substringFromIndex:i];
            NSMutableDictionary *node = [self.root mutableCopy] ;
            int num = 0;
            
            NSString *lastMatchStr = nil;
            for (int j = 0; j < subString.length; j ++) {
                NSString *word = [subString substringWithRange:NSMakeRange(j, 1)];
                
                if (node[word] == nil) {
                    break;
                }else{
                    num ++;
                    node = node[word];
                }
                //敏感词匹配成功
                if ([node[KYQ_IS_EXIST] integerValue] == 1) {
                    lastMatchStr = [str substringWithRange:NSMakeRange(i, num)];
                    if (self.matchType == YQMatchTypeMax || lastMatchStr.length < self.minMatchLenth) {
                        continue;
                    }else {
                        i += j;
                        break;
                    }
                }
            }
            if (lastMatchStr.length >= self.minMatchLenth) {
                [resultArr addObject:lastMatchStr];
            }
        }
    }

    return resultArr.copy;
    
}



- (void)freeFilter {
    self.root = nil;
}



@end

使用方法:

    YQSensitiveWord *instance = [YQSensitiveWord sharedInstance];
    [instance loadWordsWithArray:@[@"abc",@"abcd",@"pli",@"ytl",@"ab"]];
    
    {
        instance.matchType = YQMatchTypeMax;
        NSString *str = [instance filter:@"fadfabcijfableplifaljefytlvaabcdidea"];
        NSArray *keys = [instance getFilterKeys:@"fadfabcijfableplifaljefytlvaabcdidea"];
        NSLog(@"%@",str);
        NSLog(@"%@",keys);
    }
    {
        instance.matchType = YQMatchTypeMin;
        instance.minMatchLenth = 3;
        NSString *str = [instance filter:@"fadfabcijfableplifaljefytlvaabcdidea"];
        NSArray *keys = [instance getFilterKeys:@"fadfabcijfableplifaljefytlvaabcdidea"];
        NSLog(@"%@",str);
        NSLog(@"%@",keys);
    }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值