IOI'98
The cows have developed a new interest in scanning the universe outside their farm with radiotelescopes. Recently, they noticed a very curious microwave pulsing emission sent right from the centre of the galaxy. They wish to know if the emission is transmitted by some extraterrestrial form of intelligent life or if it is nothing but the usual heartbeat of the stars.
Help the cows to find the Truth by providing a tool to analyze bit patterns in the files they record. They are seeking bit patterns of lengthA through B inclusive (1 <= A <= B <= 12) that repeat themselves most often in each day's data file. They are looking for the patterns that repeat themselves most often. An input limit tells how many of the most frequent patterns to output.
Pattern occurrences may overlap, and only patterns that occur at least once are taken into account.
PROGRAM NAME: contact
INPUT FORMAT
Line 1: | Three space-separated integers: A, B, N; (1 <= N < 50) |
Lines 2 and beyond: | A sequence of as many as 200,000 characters, all 0 or 1; the characters are presented 80 per line, except potentially the last line. |
SAMPLE INPUT (file contact.in)
2 4 10 01010010010001000111101100001010011001111000010010011110010000000
In this example, pattern 100 occurs 12 times, and pattern 1000 occurs 5 times. The most frequent pattern is 00, with 23 occurrences.
OUTPUT FORMAT
Lines that list the N highest frequencies (in descending order of frequency) along with the patterns that occur in those frequencies. Order those patterns by shortest-to-longest and increasing binary number for those of the same frequency. If fewer than N highest frequencies are available, print only those that are.
Print the frequency alone by itself on a line. Then print the actual patterns space separated, six to a line (unless fewer than six remain).
SAMPLE OUTPUT (file contact.out)
23 00 15 01 10 12 100 11 11 000 001 10 010 8 0100 7 0010 1001 6 111 0000 5 011 110 1000 4 0001 0011 1100
思路:用AC自动机匹配所有字串的个数,然后拍序,按要求输出。自动机里的插入用深搜枚举题目要求的长度。
然后匹配,在深搜一遍吧结果存到类数组中,排序输出即可。
失误点:原先居然用KMP,毫无疑问死了,很坑人的是此题中有不够的输出就结束,并且个数0的不输出,结果WA了一次
效率还是很好的,
Test 1: TEST OK [0.000 secs, 3888 KB]
Test 2: TEST OK [0.000 secs, 3888 KB]
Test 3: TEST OK [0.032 secs, 4020 KB]
Test 4: TEST OK [0.000 secs, 3888 KB]
Test 5: TEST OK [0.065 secs, 4020 KB]
Test 6: TEST OK [0.097 secs, 3888 KB]
Test 7: TEST OK [0.162 secs, 4020 KB]
All tests OK.
具体看代码:
/*
ID:nealgav1
LANG:C++
PROG:contact
*/
#include<fstream>
#include<cstring>
#include<algorithm>
#include<queue>
using namespace std;
ifstream cin("contact.in");
ofstream cout("contact.out");
char s[201000];
const int mm=16384;///最大的个数
class node
{public:
node*fail;
node*next[2];
int num;
node(){fail=NULL;memset(next,NULL,sizeof(next));num=0;}
};
class AC
{
public:
char s[14];
int num;
}ans[mm];
int pos;
int _min,_max;
/**
void insert(node*root,int len)
{ node*p=root;
for(int i=0;i<len;i++)
{
int index=0;
if(p->next[index]==NULL)
p->next[index]=new node();
p=p->next[index];i++;
}
p->num++;
}*/
/**使用深搜插入所有可能的情况,其实就是建个字典树啦*/
void insert(node*root,int num,int dep)
{
node*p=root;
if(dep>_max)return;
if(p->next[num]==NULL)
p->next[num]=new node();
//if(dep>=_min)p->num++;
insert(p->next[num],0,dep+1);
insert(p->next[num],1,dep+1);
}
///构造失败指针
void AC_anto(node*root)
{
node*temp,*p;
root->fail=NULL;
queue<node*>Q;
Q.push(root);
while(!Q.empty())
{
temp=Q.front();Q.pop();
for(int i=0;i<2;i++)
{
if(temp->next[i]!=NULL)
{
p=temp->fail;
while(p!=NULL)
{
if(p->next[i]!=NULL)
{
temp->next[i]->fail=p->next[i];break;
}p=p->fail;
}
if(p==NULL)
temp->next[i]->fail=root;
Q.push(temp->next[i]);
}
}
}
}
///算出所有匹配情况的个数
int query(node*root,char s[])
{ AC_anto(root);
int i=0;
int cnt=0;
node*p=root;
while(s[i]!='\0')
{
int index=s[i]-'0';
while(p->next[index]==NULL&&p!=root)
p=p->fail;
p=p->next[index];
if(p==NULL)p=root;
node*temp=p;
while(temp!=root)
{
temp->num++;
temp=temp->fail;
}
i++;
}
return cnt;
}
///从字典树中转化为类数组
void _data(node*root,int num,int dep)
{
node*p=root;
s[dep-1]=num+'0';
//if(p->next[num]==NULL)
//p->next[num]=new node();
if(dep>_max)return;
if(dep>=_min)
{
//cout<<p->next[num]->num<<"\n";
s[dep]='\0';
//cout<<s<<"\n";
///为什么是p->next[num]->num?不解释
ans[pos].num=p->next[num]->num;
strcpy(ans[pos].s,s);
pos++;
}
_data(p->next[num],0,dep+1);
_data(p->next[num],1,dep+1);
}
bool cmp(AC a,AC b)
{ if(a.num==b.num)
{ int c=strlen(a.s),d=strlen(b.s);
if(c==d)
return strcmp(a.s,b.s)<0;
else return c<d;
}
return a.num>b.num;
}
int main()
{ int m;
cin>>_min>>_max>>m;
char kk[88];
int lens=0;
while(cin>>kk)
{
strcpy(s+lens,kk);
lens=strlen(s);
}
pos=0;
node*root=new node();
insert(root,0,1);insert(root,1,1);
query(root,s);
_data(root,0,1);_data(root,1,1);
sort(ans,ans+pos,cmp);
cout<<ans[0].num<<"\n"<<ans[0].s;
int len=0;
int past=1;///每行保证输出6个
for(int i=1;i<pos&&ans[i].num/**个数需要大于0才能输出*/;i++)
{
if(ans[i].num==ans[i-1].num)
{ if(past%6==0)cout<<"\n"<<ans[i].s;
else
cout<<" "<<ans[i].s;
++past;
}
else
{ ++len;if(len==m)break;
cout<<"\n"<<ans[i].num<<"\n"<<ans[i].s;past=1;
}
}
cout<<"\n";
}
Russ Cox
For this problem, we keep track of every bit sequence we see. We could use the bit sequence itself as an index into a table of frequencies, but that would not distinguish between the 2-bit sequence "10" and the 4-bit sequence "0010". To solve this, we always add a 1 to the beginning of the number, so "10" becomes "110" and "0010" becomes "10010".
After reading the entire bit string, we sort the frequency table and walk through it to print out the top sequences.
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #define MAXBITS 12 #define MAXSEQ (1<<(MAXBITS+1)) typedef struct Seq Seq; struct Seq { unsigned bits; int count; }; Seq seq[MAXSEQ]; /* increment the count for the n-bit sequence "bits" */ void addseq(unsigned bits, int n) { bits &= (1<<n)-1; bits |= 1<<n; assert(seq[bits].bits == bits); seq[bits].count++; } /* print the bit sequence, decoding the 1<<n stuff */ /* recurse to print the bits most significant bit first */ void printbits(FILE *fout, unsigned bits) { assert(bits >= 1); if(bits == 1) /* zero-bit sequence */ return; printbits(fout, bits>>1); fprintf(fout, "%d", bits&1); } int seqcmp(const void *va, const void *vb) { Seq *a, *b; a = (Seq*)va; b = (Seq*)vb; /* big counts first */ if(a->count < b->count) return 1; if(a->count > b->count) return -1; /* same count: small numbers first */ if(a->bits < b->bits) return -1; if(a->bits > b->bits) return 1; return 0; } void main(void) { FILE *fin, *fout; int i, a, b, n, nbit, c, j, k; unsigned bit; char *sep; fin = fopen("contact.in", "r"); fout = fopen("contact.out", "w"); assert(fin != NULL && fout != NULL); nbit = 0; bit = 0; for(i=0; i<=MAXBITS; i++) for(j=0; j<(1<<i); j++) seq[(1<<i) | j].bits = (1<<i) | j; fscanf(fin, "%d %d %d", &a, &b, &n); while((c = getc(fin)) != EOF) { if(c != '0' && c != '1') continue; bit <<= 1; if(c == '1') bit |= 1; if(nbit < b) nbit++; for(i=a; i<=nbit; i++) addseq(bit, i); } qsort(seq, MAXSEQ, sizeof(Seq), seqcmp); /* print top n frequencies for number of bits between a and b */ j = 0; for(i=0; i<n && j < MAXSEQ; i++) { if(seq[j].count == 0) break; c = seq[j].count; fprintf(fout, "%d\n", c); /* print all entries with frequency c */ sep = ""; for(k=0; seq[j].count == c; j++, k++) { fprintf(fout, sep); printbits(fout, seq[j].bits); if(k%6 == 5) sep = "\n"; else sep = " "; } fprintf(fout, "\n"); } exit(0); }