Time Limit: 10000ms
Case Time Limit: 3000ms
Memory Limit: 256MB
Description
In a running system, there're many logs produced within a short period of time, we'd like to know the count of the most frequent logs.
Logs are produced by a few non-empty format strings, the number of logs is N(1=N=20000), the maximum length of each log is 256.
Here we consider a log same with another when their edit distance (see note) is = 5.
Also we have a) logs are all the same with each other produced by a certain format string b) format strings have edit distance 5 of each other.
Your program will be dealing with lots of logs, so please try to keep the time cost close to O(nl), where n is the number of logs, and l is the average log length.
Note edit distance is the minimum number of operations (insertdeletereplace a character) required to transform one string into the other, please refer to httpen.wikipedia.orgwikiEdit_distance for more details.
Input
Multiple lines of non-empty strings.
Output
The count of the most frequent logs.
Sample In
Logging started for id:1
Module ABC has completed its job
Module XYZ has completed its job
Logging started for id:10
Module ? has completed its job
Sample Out
3
以下为个人给出的一个解,计算编辑距离使用了动态规划
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
//计算编辑距离,使用动态规划法
int calEditDis(char* a,char* b){
int len_a=strlen(a);
int len_b=strlen(b);
int tmp=0;
int leftUp,left,up;
len_a++; //维数分配 需有边界1维
len_b++;
int result = 0;
int* assistArray; //二维数组 assistArray[len_a][len_b]
assistArray = new int [(len_a)*(len_b)]();
for(int i=0;i<len_a;i++)
assistArray[i*len_b]=i; //assistArray[i][0]
for(int j=0;j<len_b;j++)
assistArray[j] = j; //assistArray[0][j]
//cout<<"********************************************"<<endl;
for(int i=1;i<len_a;i++){
for(int j=1;j<len_b;j++){
if(a[i-1]==b[j-1])
leftUp = assistArray[(i-1)*(len_b)+(j-1)];
else
leftUp = assistArray[(i-1)*(len_b)+(j-1)] + 1;
left = assistArray[i*len_b+(j-1)] + 1;
up = assistArray[(i-1)*len_b+j] + 1 ;
assistArray[i*len_b+j] = left<up?(left<leftUp?left:leftUp):(up<leftUp?up:leftUp);
//cout<<assistArray[i*len_b+j]<<" ";
}
//cout<<endl;
}
//cout<<"**********************************************"<<endl;
result = assistArray[len_a*len_b-1];
delete[] assistArray;
return result;
}
#define LOGNUM (20000)
#define LOGLEN (256+1)
typedef struct RECORDER{
bool useful;
int index;
int num;
}RECORDER;
int main(){
char (*ptr)[LOGLEN];
ptr = new char[LOGNUM][LOGLEN];
int dis=0;
bool newLog =false;
int result = 0;
//record[LOGNUM] 数组中记录了 出现次数 依次从多到少的相关信息,在整个过程中都维护该数据结构
RECORDER record[LOGNUM];
int count = 0;
memset(record,0,sizeof(record));
fstream fstr("testCase.txt");
for(int i= 0,j=0,k=0;i< LOGNUM;i++){
newLog = true;
if(fstr.eof())
break;
fstr.getline(ptr[i],LOGLEN);
if(strlen(ptr[i])==0)
continue;
for(j=0;j<count;j++){
dis = calEditDis(ptr[(record[j].index)],ptr[i]);
if(dis <=5){
record[j].num++;
newLog = false;
break;
}
}
if(newLog == true){
count++;
record[count-1].index=i;
record[count-1].useful = true;
record[count-1].num = 1;
}
else if(j!=0){
if(j>=1 && record[j].num > record[j-1].num){
k=j;
while(k>=1 && record[k].num > record[k-1].num)
k--;
{
RECORDER tmp = record[j];
record[j] = record[k];
record[k] = tmp;
}
}
}
}
cout<<record[0].num<<endl;
cout<<ptr[record[0].index]<<endl;
delete[] ptr;
}