Your task is to write a program that given the size, N, of the substring, the number of different characters that may occur in the text, NC, and the text itself, determines the number of different substrings of size N that appear in the text.
As an example, consider N=3, NC=4 and the text "daababac". The different substrings of size 3 that can be found in this text are: "daa"; "aab"; "aba"; "bab"; "bac". Therefore, the answer should be 5.
输入 The first line of input consists of two numbers, N and NC, separated by exactly one space. This is followed by the text where the search takes place. You may assume that the maximum number of substrings formed by the possible set of characters does not exceed 16 Millions. 输出 The program should output just an integer corresponding to the number of different substrings of size N found in the given text. 样例输入
3 4
daababac样例输出
5
解答:
//错误的,输出为6,重新写一次就对了,对比了好久也没找出原因
#include <iostream>
#include <string>
using namespace std;
char a[16000010];//定义为全局数组才不出现0xc00000fd错误
bool hash[16000010]={0};
int main()
{
int i,j,l,n,nc;
int sum,ans=0,num=0;
bool alp[256]={0};//有256个字符吗?
int f[256];
cin>>n>>nc;
scanf("%s",a);
//scanf("%d %d", &n, &nc);
// getchar();
// gets(a);
l=strlen(a);
for(i=0;i<l;i++)
{
if(!alp[a[i]])
{
alp[a[i]]=true;
f[a[i]]=num++;
}
}
for(i=0;i<=l-n;i++)//所循环的范围
{
sum=0;
for(j=i;j<i+n;j++)//每次循环所取的个数
sum=sum*nc+f[a[j]]; //计算这个字符代表的数值
if(!hash[sum])
{
hash[num]=true;
ans++;
}
else
continue;
}
printf("%d\n",ans);
system("pause");
return 0;
}
///
//正确的
#include <iostream>
#include <string>
using namespace std;
bool hash[16000010]={0};
char s[16000010];
int main()
{
int i,j,l,n,nc;
int num=0,ans=0,sum;
bool alp[256]={0};
int f[256];
cin>>n>>nc;
//getchar();
//gets(s);
scanf("%s",s);
l=strlen(s);
for(i=0;i<l;i++)
{
if(!alp[s[i]])
{
alp[s[i]]=true;
f[s[i]]=num++;
}
}
for(i=0;i<=l-n;i++)
{
sum=0;
for(j=i;j<i+n;j++)
sum=sum*nc+f[s[j]];
if(!hash[sum])
{
hash[sum]=true;
ans++;
}
else
continue;
}
printf("%d\n",ans);
system("pause");
return 0;
}
/
//hash,是每个串能够对应一个关键码, 如果这样的话,内存可能会很大
#include <stdio.h>
#include <memory>
#include <cmath>
using namespace std ;
const int NUM = 16000010 ;
bool Exit [NUM ] = { false } ;
int hash [ 250 ]; //hash[i]表示字母char(i + 'a')对应的hash值
char input [ 20000000 ]; //存储输入,内存也许还需要大一些
int N , NC ;
void initial ();
int Ans ();
int main ()
{
scanf ( "%d%d%s" , &N , &NC , input );
initial ();
printf ( "%d \n " , Ans ());
return 0 ;
}
void initial ()
{
memset (hash , - 1 , sizeof (hash ));
int i , counter = 0 , size = strlen (input );
for (i = 0 ; i < size ; i ++)
if (hash [ int (input [i ])] == - 1 ) //input[i]如果尚未出现
{
hash [ int (input [i ])] = counter ;
counter ++;
if (counter == NC )
break ;
}
}
int Ans ()
{
int ans = 0 ;
int size = strlen (input );
int temp ;
int i , end = size - N , j , k ;
for (i = 0 ; i <= end ; i ++)
{
temp = 0 ;
for (k = 0 ; k < N ; k ++)
{
j = i + k ;
temp = temp * NC + hash [input [j ]];
}
if (!Exit [temp ])
{
Exit [temp ] = true ;
ans ++;
}
}
return ans ;
}
///
#include <stdio.h>
#include <memory>
#include <cmath>
using namespace std ;
const int NUM = 16000010 ;
bool Exit [NUM ] = { false } ;
int hash [ 250 ]; //hash[i]表示字母char(i + 'a')对应的hash值
char input [ 20000000 ]; //存储输入,内存也许还需要大一些
int N , NC ;
void initial ();
int Ans ();
int main ()
{
scanf ( "%d%d%s" , &N , &NC , input );
initial ();
printf ( "%d \n " , Ans ());
return 0 ;
}
void initial ()
{
memset (hash , - 1 , sizeof (hash ));
int i , counter = 0 , size = strlen (input );
for (i = 0 ; i < size ; i ++)
if (hash [ int (input [i ])] == - 1 ) //input[i]如果尚未出现
{
hash [ int (input [i ])] = counter ;
counter ++;
if (counter == NC )
break ;
}
}
int Ans ()
{
int ans = 0 ;
int size = strlen (input );
int temp ;
int i , end = size - N , j , k ;
for (i = 0 ; i <= end ; i ++)
{
temp = 0 ;
for (k = 0 ; k < N ; k ++)
{
j = i + k ;
temp = temp * NC + hash [input [j ]];
}
if (!Exit [temp ])
{
Exit [temp ] = true ;
ans ++;
}
}
return ans ;
}
#include
<stdlib.h>
#include <memory.h>
#include <stdio.h>
#include <iostream>
using namespace std ;
#define MAX 5000000
#define P 3313
//用静态链表代替动态链表
struct Hash {
int hash ;
int next ;
} ;
Hash h [MAX ];
int c [MAX ];
char x [MAX ];
int main ()
{
int n ,nc ;
while ( scanf ( "%d%d" ,&n ,&nc )!=EOF )
{
memset (c , 0 , sizeof (c ));
memset (h , 0 , sizeof (h ));
//tmp1用于储存字符串转换为26进制的数
//tmp2用于储存哈希值
int tmp1 ,tmp2 ,zz ,ptr = 1 ,res = 0 ;
cin >> x ;
bool flag ;
int len = strlen (x ) - n + 1 ;
for ( int i = 0 ; i < len ; i ++) {
flag = true ;
tmp1 = 0 ;
tmp2 = 0 ;
for ( int j = 0 ; j < n ; j ++) {
tmp1 = tmp1 * 26 + x [i +j ]- 'a' ;
tmp2 = tmp2 *P + x [i +j ]- 'a' ;
}
tmp1 = abs (tmp1 )%MAX ;
tmp2 = abs (tmp2 )%MAX ;
zz = c [tmp1 ];
while (zz )
if (h [zz ].hash == tmp2 ) {
flag = false ;
break ;
}
else
zz = h [zz ].next ;
if (flag ) {
res ++;
h [ptr ].hash = tmp2 ;
h [ptr ].next = c [tmp1 ];
c [tmp1 ] = ptr ++;
}
}
printf ( "%d \n " ,res );
}
return 0 ;
}
//
#include <memory.h>
#include <stdio.h>
#include <iostream>
using namespace std ;
#define MAX 5000000
#define P 3313
//用静态链表代替动态链表
struct Hash {
int hash ;
int next ;
} ;
Hash h [MAX ];
int c [MAX ];
char x [MAX ];
int main ()
{
int n ,nc ;
while ( scanf ( "%d%d" ,&n ,&nc )!=EOF )
{
memset (c , 0 , sizeof (c ));
memset (h , 0 , sizeof (h ));
//tmp1用于储存字符串转换为26进制的数
//tmp2用于储存哈希值
int tmp1 ,tmp2 ,zz ,ptr = 1 ,res = 0 ;
cin >> x ;
bool flag ;
int len = strlen (x ) - n + 1 ;
for ( int i = 0 ; i < len ; i ++) {
flag = true ;
tmp1 = 0 ;
tmp2 = 0 ;
for ( int j = 0 ; j < n ; j ++) {
tmp1 = tmp1 * 26 + x [i +j ]- 'a' ;
tmp2 = tmp2 *P + x [i +j ]- 'a' ;
}
tmp1 = abs (tmp1 )%MAX ;
tmp2 = abs (tmp2 )%MAX ;
zz = c [tmp1 ];
while (zz )
if (h [zz ].hash == tmp2 ) {
flag = false ;
break ;
}
else
zz = h [zz ].next ;
if (flag ) {
res ++;
h [ptr ].hash = tmp2 ;
h [ptr ].next = c [tmp1 ];
c [tmp1 ] = ptr ++;
}
}
printf ( "%d \n " ,res );
}
return 0 ;
}
// 字符串哈希
// 即Rabin-Karp方法,该法算法导论中有介绍,就是将每个子字符串对应算出一个整数(该整数唯一标记字符串),
//然后统计asca数组存的是字符串中每个不同字母对应的一个值(0到N-1),采用N进制,本题中N即为nc,
//因为字母的ASCII码最大不会超过122,所以数组开到125足以,具体实现见代码.
#include <stdio.h>
#include <string.h>
int n ,nc ;
char str [ 20000000 ],asca [ 125 ];
int hash [ 16000005 ];
int main ()
{ while ( scanf ( "%d%d" ,&n ,&nc )!=EOF )
{ scanf ( "%s" ,str );
int i = 0 ,j ,key = 0 ,len = strlen (str ),sum ,cnt = 0 ;
while (str [i ])
{ if (asca [str [i ]]== 0 ) asca [str [i ]]=key ++;
i ++;
if (key ==nc ) break ;
}
for (i = 0 ;i +n -1 <len ;i ++)
{ sum = 0 ;
for (j =i ;j <=i +n -1 ;j ++)sum =sum *nc +asca [str [j ]];
if (hash [sum ]== 0 )
{ hash [sum ]= 1 ;cnt ++; }
}
printf ( "%d \n " ,cnt );
}
return 0 ;
}
// 即Rabin-Karp方法,该法算法导论中有介绍,就是将每个子字符串对应算出一个整数(该整数唯一标记字符串),
//然后统计asca数组存的是字符串中每个不同字母对应的一个值(0到N-1),采用N进制,本题中N即为nc,
//因为字母的ASCII码最大不会超过122,所以数组开到125足以,具体实现见代码.
#include <stdio.h>
#include <string.h>
int n ,nc ;
char str [ 20000000 ],asca [ 125 ];
int hash [ 16000005 ];
int main ()
{ while ( scanf ( "%d%d" ,&n ,&nc )!=EOF )
{ scanf ( "%s" ,str );
int i = 0 ,j ,key = 0 ,len = strlen (str ),sum ,cnt = 0 ;
while (str [i ])
{ if (asca [str [i ]]== 0 ) asca [str [i ]]=key ++;
i ++;
if (key ==nc ) break ;
}
for (i = 0 ;i +n -1 <len ;i ++)
{ sum = 0 ;
for (j =i ;j <=i +n -1 ;j ++)sum =sum *nc +asca [str [j ]];
if (hash [sum ]== 0 )
{ hash [sum ]= 1 ;cnt ++; }
}
printf ( "%d \n " ,cnt );
}
return 0 ;
}