using System;using System.Collections.Generic;using System.Text;namespace Tup.AhoCorasick
{/// <summary>/// Aho-Corasick搜索/// </summary>/// 搜索时间:O(n + m + z),其中z是模式出现的次数publicclassAhoCorasickSearch{/// <summary>/// AC Tree Root/// </summary>privateNode TreeRoot =null;/// <summary>/// search all and replace text/// </summary>/// <param name="ac"></param>/// <param name="content"></param>/// <param name="newWord"></param>/// <returns></returns>publicstringReplace(string text,string newWord){if(string.IsNullOrEmpty(text))return text;var result =this.SearchAll(text);if(result ==null|| result.Length <=0)return text;if(newWord ==null)
newWord ="";var startIndex =0;var endIndex =0;var resContent =newStringBuilder();foreach(var v in result){
endIndex = v.Index;if(endIndex > startIndex)
resContent.Append(text.Substring(startIndex, endIndex - startIndex));
resContent.Append(newWord);
startIndex = v.Index + v.Match.Length;}var rcLen = text.Length;if(startIndex < rcLen){
endIndex = rcLen;
resContent.Append(text.Substring(startIndex, endIndex - startIndex));}return resContent.ToString();}/// <summary>/// Search All/// </summary>/// <param name="text"></param>/// <returns></returns>publicvirtual SearchResult[]SearchAll(string text){returnSearchAll(text,0,int.MaxValue);}publicvirtual SearchResult[]SearchAll(string text,int start){returnSearchAll(text, start,int.MaxValue);}publicvirtualSearchResultSearchFirst(string text){returnSearchFirst(text,0);}public SearchResult[]SearchAll(string text,int start,int count){CheckArguments(text, start, count);
List<SearchResult> results =null;if(count ==int.MaxValue)
results =newList<SearchResult>();else
results =newList<SearchResult>(count);foreach(SearchResult result inSearchIterator(text, start)){
results.Add(result);if(results.Count == count)break;}return results.ToArray();}publicSearchResultSearchFirst(string text,int start){CheckArguments(text, start,int.MaxValue);
IEnumerator<SearchResult> iter =SearchIterator(text, start).GetEnumerator();if(iter.MoveNext())return iter.Current;return SearchResult.Empty;}protected IEnumerable<SearchResult>SearchIterator(string text,int start){var root =this.TreeRoot;if(root ==null)thrownewArgumentNullException("root","need search.Build()");var ptr = root;int index =0;if(start >0)
text = text.Substring(start);while(index < text.Length){Node trans =null;while(trans ==null){
trans = ptr.GetTransition(text[index]);if(ptr == root)break;if(trans ==null)
ptr = ptr.Failure;}if(trans !=null)
ptr = trans;if(ptr.Outputs !=null){foreach(string found in ptr.Outputs)yieldreturnnewSearchResult(index - found.Length +1, found);}
index++;}}/// <summary>/// Build AC Tree/// </summary>/// <param name="keywords"></param>/// <returns></returns>publicboolBuild(paramsstring[] keywords){CheckKeywords(keywords);this.TreeRoot =BuildTree(keywords);returntrue;}/// <summary>/// /// </summary>/// <param name="keywords"></param>/// <returns></returns>protectedstaticNodeBuildTree(string[] keywords){Node root =newNode(null,' ');#region build trie tree{Node cNode = root;Node newNode =null;foreach(string keyword in keywords){
cNode = root;// add pattern to treeforeach(char c in keyword){
newNode =null;if((newNode = cNode.GetTransition(c))==null){
newNode =newNode(cNode, c);
cNode.AddTransition(newNode);}
cNode = newNode;}
cNode.AddResult(keyword);}}#endregion// Find failure functionsvar nodesQueue =newQueue<Node>();// level 1 nodes - fail to root nodeforeach(Node cNode in root.Transition.Values){
cNode.Failure = root;QueueAddRange(nodesQueue, cNode.Transition.Values);}{Node cNode =null, r =null, nNode =null;// other nodes - using BFSwhile(nodesQueue.Count !=0){
cNode = nodesQueue.Dequeue();
r = cNode.Parent.Failure;while(r !=null&&(nNode = r.GetTransition(cNode.Char))==null)
r = r.Failure;if(r ==null){
cNode.Failure = root;}else{
cNode.Failure = nNode;
cNode.AddResults(cNode.Failure.Outputs);}//add child nodes to BFS list QueueAddRange(nodesQueue, cNode.Transition.Values);}}
root.Failure = root;return root;}protectedclassNode{internalchar Char;internalNode Parent;internalNode Failure;internal HashSet<string> Outputs;internal Dictionary<char, Node> Transition;publicNode(Node parent,char c){
Char = c;
Parent = parent;
Transition =newDictionary<char,Node>();}/// <summary>/// /// </summary>/// <param name="results"></param>publicvoidAddResult(string result){if(string.IsNullOrEmpty(result))return;if(Outputs ==null)
Outputs =newHashSet<string>();
Outputs.Add(result);}/// <summary>/// /// </summary>/// <param name="results"></param>publicvoidAddResults(IEnumerable<string> results){if(results ==null)return;if(Outputs ==null)
Outputs =newHashSet<string>();foreach(var result in results){
Outputs.Add(result);}}publicvoidAddTransition(Node node){
Transition.Add(node.Char, node);}publicNodeGetTransition(char c){Node node =null;if(Transition.TryGetValue(c,out node))return node;returnnull;}}/// <summary>/// /// </summary>/// <param name="queue"></param>/// <param name="collection"></param>privatestaticvoidQueueAddRange(Queue<Node> queue, IEnumerable<Node> collection){if(queue ==null|| collection ==null)return;foreach(var item in collection){
queue.Enqueue(item);}}[System.Diagnostics.DebuggerHidden]protectedstaticvoidCheckKeywords(paramsstring[] keywords){if(keywords ==null)thrownewArgumentNullException("keywords");if(keywords.Length ==0)thrownewArgumentException("keywords");foreach(string keyword in keywords){if(string.IsNullOrEmpty(keyword))thrownewArgumentException("The keyword set cannot contain null references or empty strings.");}}[System.Diagnostics.DebuggerHidden]protectedstaticvoidCheckArguments(string text,int start,int count){if(text ==null)thrownewArgumentNullException("text");if(text.Length ==0)thrownewArgumentException("text");if(start <0)thrownewArgumentOutOfRangeException("start");if(start >= text.Length)thrownewArgumentOutOfRangeException("start");if(count <=0)thrownewArgumentOutOfRangeException("count");}}/// <summary>/// Container class for <see cref="ISetSearchAlgorithm"/> search results./// </summary>publicstruct SearchResult : IEquatable<SearchResult>{/// <summary>/// /// </summary>publicstaticreadonlySearchResult Empty =newSearchResult(-1,null);/// <summary>/// Initializes a new instance of the <see cref="SearchResult"/> struct./// </summary>/// <param name="index">The index.</param>/// <param name="match">The match.</param>internalSearchResult(int index,string match):this(){
Index = index;
Match = match;}/// <summary>/// Gets the index./// </summary>/// <value>The index.</value>publicint Index {get;internalset;}/// <summary>/// Gets the length of the Matched keyword./// </summary>/// <value>The length.</value>publicint Length {get;internalset;}/// <summary>/// Gets the matched keyword./// </summary>/// <value>The matched keyword.</value>publicstring Match {get;internalset;}publicoverridestringToString(){returnstring.Format("[SearchResult Index:{0}, Length:{1}, Match:{2}]",this.Index,this.Length,this.Match);}/// <summary>/// Determines whether the specified <see cref="System.Object"/> is equal to this instance./// </summary>/// <param name="obj">The <see cref="System.Object"/> to compare with this instance.</param>/// <returns>/// <c>true</c> if the specified <see cref="System.Object"/> is equal to this instance; otherwise, <c>false</c>./// </returns>publicoverrideboolEquals(object obj){if(obj ==null)returnfalse;SearchResult sr =(SearchResult) obj;return Index == sr.Index && Match == sr.Match;}/// <summary>/// /// </summary>/// <param name="other"></param>/// <returns></returns>publicboolEquals(SearchResult other){return Index == other.Index && Match == other.Match;}/// <summary>/// Returns a hash code for this instance./// </summary>/// <returns>/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table. /// </returns>publicoverrideintGetHashCode(){return Index.GetHashCode()^ Match.GetHashCode();}/// <summary>/// Implements the operator ==./// </summary>/// <param name="sr1">A SearchResult.</param>/// <param name="sr2">A SearchResult.</param>/// <returns>The result of the comparison.</returns>publicstaticbooloperator==(SearchResult sr1,SearchResult sr2){return sr1.Index == sr2.Index && sr1.Match == sr2.Match;}/// <summary>/// Implements the operator !=./// </summary>/// <param name="sr1">A SearchResult.</param>/// <param name="sr2">A SearchResult.</param>/// <returns>The result of the comparison.</returns>publicstaticbooloperator!=(SearchResult sr1,SearchResult sr2){return sr1.Index != sr2.Index || sr1.Match != sr2.Match;}}}
Program.cs
using System;using System.Text;namespace Tup.AhoCorasick
{classProgram{staticvoidMain(string[] args){var search =newAhoCorasickSearch();var keywords =newstring[]{"he","she","his","hers"};
search.Build(keywords);searchTest(search);replaceTest(search);replaceTest2();replaceTest3();
Console.Read();}privatestaticvoidsearchTest(AhoCorasickSearch search){var res = search.SearchAll("ushers");
Console.WriteLine(res);for(int i =0; i < res.Length; i++){
Console.WriteLine(res[i].ToString());
Console.WriteLine("Match="+ res[i].Match);}}privatestaticvoidreplaceTest(AhoCorasickSearch search){var text ="ushers";var res = search.Replace(text,"-");
Console.WriteLine(res);
text ="shersx";
res = search.Replace(text,"-");
Console.WriteLine(res);
text ="her";
res = search.Replace(text,"-");
Console.WriteLine(res);
text ="she";
res = search.Replace(text,"-");
Console.WriteLine(res);}privatestaticvoidreplaceTest2(){var search =newAhoCorasickSearch();var keywords =newstring[]{"伟大","特色主义","公园"};
search.Build(keywords);var text ="从这里建设伟大的特色主义主题公园";var res = search.Replace(text,"-");
Console.WriteLine(res);
text ="主题公园";
res = search.Replace(text,"-");
Console.WriteLine(res);
text ="伟大的特色主义主题公园";
res = search.Replace(text,"-");
Console.WriteLine(res);
text ="伟大特色主义公园";
res = search.Replace(text,"-");
Console.WriteLine(res);}privatestaticvoidreplaceTest3(){var search =newAhoCorasickSearch();var keywords =newstring[]{"一边","刀锋","烽火"};
search.Build(keywords);var text ="一边烽火, 一边烽火天";var res = search.Replace(text,"-");
Console.WriteLine(res);
text ="一边刀锋很犀利";
res = search.Replace(text,"-");
Console.WriteLine(res);}}}