Speech Patterns (string)

最新推荐文章于 2023-08-05 01:22:48 发布

weixin_30627381

最新推荐文章于 2023-08-05 01:22:48 发布

阅读量113

点赞数

原文链接：http://www.cnblogs.com/xiaoyesoso/p/4235161.html

版权

People often have a preference among synonyms of the same word. For example, some may prefer "the police", while others may prefer "the cops". Analyzing such patterns can help to narrow down a speaker's identity, which is useful when validating, for example, whether it's still the same person behind an online avatar.

Now given a paragraph of text sampled from someone's speech, can you find the person's most commonly used word?

Input Specification:

Each input file contains one test case. For each case, there is one line of text no more than 1048576 characters in length, terminated by a carriage return '\n'. The input contains at least one alphanumerical character, i.e., one character from the set [0-9 A-Z a-z].

Output Specification:

For each test case, print in one line the most commonly occurring word in the input text, followed by a space and the number of times it has occurred in the input. If there are more than one such words, print the lexicographically smallest one. The word should be printed in all lower case. Here a "word" is defined as a continuous sequence of alphanumerical characters separated by non-alphanumerical characters or the line beginning/end.

Note that words are case insensitive.

Sample Input:

Can1: "Can a can can a can? It can!"

Sample Output:

can 5

例子是个坑，一开始认为格式是can1:”_______” ______为要检查的内容。

其实“ne character from the set [0-9 A-Z a-z]”，所以“can1”是一种字符，有别于“can”

还有就是容易超时，后来想了一个O（n）的算法，如下：

  1 #include <iostream>
  2 
  3 #include <map>
  4 
  5 #include <string>
  6 
  7 using namespace std;
  8 
  9  
 10 
 11 struct word
 12 
 13 {
 14 
 15    int time;
 16 
 17    int len;
 18 
 19 };
 20 
 21  
 22 
 23  map<string,word>  mm;
 24 
 25  
 26 
 27 void fun1(string ss)
 28 
 29 {
 30 
 31    int i=0;string tem="";
 32 
 33    while(i<ss.length())
 34 
 35    {
 36 
 37         while(i<ss.length())
 38 
 39     {
 40 
 41       if((ss[i]>='A'&&ss[i]<='Z')||(ss[i]>='a'&&ss[i]<='z')||(ss[i]>='0'&&ss[i]<='9'))
 42 
 43         {
 44 
 45               if(ss[i]>='A'&&ss[i]<='Z')
 46 
 47                     ss[i]=ss[i]-'A'+'a';
 48 
 49               tem+=ss[i];
 50 
 51               ++i;
 52 
 53         }
 54 
 55         else break;
 56 
 57     }
 58 
 59      ++mm[tem].time;
 60 
 61       mm[tem].len=i;
 62 
 63       tem="";
 64 
 65       while(i<ss.length())
 66 
 67      {
 68 
 69       if((ss[i]>='A'&&ss[i]<='Z')||(ss[i]>='a'&&ss[i]<='z')||(ss[i]>='0'&&ss[i]<='9'))
 70 
 71         {
 72 
 73               break;
 74 
 75         }
 76 
 77         else i++;
 78 
 79      }
 80 
 81    }
 82 
 83  
 84 
 85  
 86 
 87 }
 88 
 89  
 90 
 91 void fun2(string ss)
 92 
 93 {
 94 
 95      int i=0;string tem="";
 96 
 97                 while(i<ss.length())
 98 
 99        {
100 
101                 while(i<ss.length())
102 
103             {
104 
105                if((ss[i]>='A'&&ss[i]<='Z')||(ss[i]>='a'&&ss[i]<='z')||(ss[i]>='0'&&ss[i]<='9'))
106 
107                      break;
108 
109                   else i++;
110 
111            }
112 
113              while(i<ss.length())
114 
115            {
116 
117               if((ss[i]>='A'&&ss[i]<='Z')||(ss[i]>='a'&&ss[i]<='z')||(ss[i]>='0'&&ss[i]<='9'))
118 
119                {
120 
121                 if(ss[i]>='A'&&ss[i]<='Z')
122 
123                       ss[i]=ss[i]-'A'+'a';
124 
125                    tem+=ss[i];
126 
127                     ++i;
128 
129                 }
130 
131                else break;
132 
133             }
134 
135            ++mm[tem].time;
136 
137                mm[tem].len=i;
138 
139             tem="";  
140 
141          }
142 
143    }
144 
145  
146 
147 int main()
148 
149 {
150 
151       string ss;
152 
153       int i,j;int c1,c2;
154 
155      while(getline(cin,ss))
156 
157       {
158 
159             mm.clear();
160 
161             
162 
163            
164 
165             if((ss[0]>='A'&&ss[0]<='Z')||(ss[0]>='a'&&ss[0]<='z')||(ss[0]>='0'&&ss[0]<='9'))
166 
167                   fun1(ss);
168 
169             else fun2(ss);
170 
171        
172 
173         int Max=0;int Min=1048576;
174 
175                   string most;
176 
177               map<string,word>::iterator it;
178 
179               for(it=mm.begin();it!=mm.end();it++)
180 
181               {                    if((it->second).time>Max||((it->second).time==Max&&(it->second).len<Min))
182 
183                   {
184 
185                      Max=(it->second).time;
186 
187                      Min=(it->second).len;
188 
189                most=it->first;
190 
191                   }
192 
193               }
194 
195         cout<<most<<" "<<Max<<endl;
196 
197       }
198 
199  
200 
201    return 0;
202 
203 }