Wildcard
TimeLimit: 2000/1000 MS (Java/Others) Memory Limit: 125536/65536 K(Java/Others)Total Submission(s): 563 Accepted Submission(s):135
Problem Description
When specifying file names (or paths) inDOS, Microsoft Windows and Unix-like operating systems,
theasterisk character (“*") substitutes for any zero or morecharacters, and the
question mark (“?")substitutes for any one character.
Now give you a text and a pattern, you should judge whether thepattern matches the text or not.
Now give you a text and a pattern, you should judge whether thepattern matches the text or not.
Input
There are several cases. For each case,only two lines. The first line contains a text, which contains onlylower letters. The last line contains a pattern, which consists oflower letters and the two wildcards (“*", "?").
The text is a non-empty string and its size is less than 100,000,so do the pattern.
We ensure the number of “?” and the number of “*” in the patternare both no more than 10.
The text is a non-empty string and its size is less than 100,000,so do the pattern.
We ensure the number of “?” and the number of “*” in the patternare both no more than 10.
Output
Output “YES” if the pattern matches thetext, otherwise “NO”.
Sample Input
abcdefa*c*f
Sample Output
YES
Source
2011 Multi-University Training Contest 7 - Host by ECNU
给两个长度不超过100000的字符串, 一个是带有通配符?和*的模式串, 问能否匹配. 通配符不超过10个.
HDU 3901 Wildcard
题目大意:
给两个长度不超过100000的字符串, 一个是带有通配符?和*的模式串, 问能否匹配. 通配符不超过10个.
这题弄了差不多一天= =···不过还算是有收获吧!
方法是我自己YY出来的,代码老长==···希望能有更好的方法,忘各位神牛指教!!
我先把字符串处理成s1-s2-s3这样,就是两头没有'?'和'*'(其中'*'直接忽视,计算'?'的数目,然后对文本串减去该数目的长度),一以方便下面的处理。
然后分别对子串进行kmp,找出所有匹配。
再然后对'?'两边进行合并,是匹配串都是s1*s2*s3这样,然后就可以进行贪心了。
不过具体的实现我觉得有挺多细节的···具体代码有详细注释:
#include <cstdio>
#include <cstring>
static const int maxn = 100100;
//这些数组下标为0处用来表示数组长度
//前面四个都是用来储存子串的起点or终点的
static int start[25][maxn], end[25][maxn], s[25][maxn], e[25][maxn], temp1[maxn], temp2[maxn];
//front和back表示当前匹配的子串前or后紧接着的'?'个数,然后会加到start or end数组中
int front, back;
int fail[maxn];
void kmp(char* str, char* pat, int num)
{
int i, j, k;
memset(fail, -1, sizeof(fail));
start[num][0] = 0;
end[num][0] = 0;
for( i = 1; pat[i]; ++i )
{
for( k = fail[i-1]; k >= 0 && pat[i] - pat[k+1]; k = fail[k] );
if( pat[k+1] == pat[i] )
fail[i] = k + 1;
}
i = j = 0;
while( str[i] && pat[j] )
{
if( str[i] == pat[j] )
{
++i, ++j;
if( pat[j] == '\0' )
{
start[num][++start[num][0]] = i - j - front;
end[num][++end[num][0]] = i - 1 + back;
j = fail[j-1] + 1;
}
}
else if( j == 0 ) ++i;
else j = fail[j-1] + 1;
}
front = back = 0;
}
static char str[maxn], p[maxn], t[maxn];
//st表示的是该间断点的状态,1代表'?',0是'*'
static bool st[maxn];
int main()
{
int i, j, k, ll, rr, mid;
int len, cnt1, cnt2;
bool ok;
while( scanf("%s %s", str, p) != EOF )
{
ok = 1;
front = back = 0;
//先处理一下p的头尾,并相应的对str做出改变
//使之变成统一的s1-s2-s3-s4形式,两头没有'?'or'*',方便下面的处理
len = strlen(p);
for( i = cnt1 = 0; i < len && (p[i] == '*' || p[i] == '?'); ++i )
if( p[i] == '?' )
++cnt1;
for( j = len-1, cnt2 = 0; j >= 0 && (p[j] == '*' || p[j] == '?'); --j )
if( p[j] == '?' )
++cnt2;
if( i > j )
{
if( cnt1 <= strlen(str) )
printf("YES 0 0\n");
else
printf("NO\n");
continue;
}
p[j+1] = '\0';
str[strlen(str)-cnt2] = '\0';
strcpy(p, p+i);
strcpy(str, str+cnt1);
//对每个子串求它的匹配
len = strlen(p);
for( i = j = k = 0; i < len; ++i )
{
if( p[i] == '*' )
{
if( !j )
continue;
t[j] = '\0';
st[k] = 0;
kmp(str, t, k++);
j = 0;
}
else if( p[i] == '?' )
{
if( !j )
{
//例子: aa*??b
//??应该算到b中,就是后一个子串
front = 1;
while( p[i+1] == '?' )
{
++front;
++i;
}
continue;
}
back = 1;
while( p[i+1] == '?' )
{
//例子: aa???
++back;
++i;
}
//例子: aa???*b 与 aa???b
st[k] = p[i+1] == '*' ? 0 : 1;
t[j] = '\0';
kmp(str, t, k++);
j = 0;
}
else
t[j++] = p[i];
}
t[j] = '\0';
kmp(str, t, k);
//检查每个子串是否都出现了
for( i = 0; ok && i <= k; ++i )
if( !start[i][0] )
ok = 0;
//对状态为1,就是'?'两边的子串合并,使p串成为统一由'*'分隔的,以进行贪心
if( ok )
{
len = 0;
for( i = 0; ok && i < k; ++i )
{
//临时数组,储存合并后的始点和终点
temp1[0] = temp2[0] = 0;
if( st[i] )
{
//枚举左边的子串的始点,然后对右边的终点进行二分查找
for( j = 1; j <= end[i][0]; ++j )
{
ll = 1, rr = start[i+1][0]+1;
while( rr - ll > 1 )
{
mid = (ll+rr)/2;
if( start[i+1][mid] <= end[i][j] + 1 )
ll = mid;
else
rr = mid;
}
if( start[i+1][ll] == end[i][j] + 1 )
{
temp1[++temp1[0]] = start[i][j];
temp2[++temp2[0]] = end[i+1][ll];
}
}
//找不到符合的,匹配失败
if( !temp1[0] )
ok = 0;
else
{
//把temp中的复制去i+1
memcpy(start[i+1], temp1, sizeof(temp1));
memcpy(end[i+1], temp2, sizeof(temp2));
}
}
else
{
for( s[len][0] = e[len][0] = 0, j = 1; j <= end[i][0]; ++j )
{
//s, e储存合并后的子串
s[len][++s[len][0]] = start[i][j];
e[len][++e[len][0]] = end[i][j];
}
++len;
}
}
//不要忘了最后一个
for( s[len][0] = e[len][0] = 0, j = 1; j <= end[k][0]; ++j )
{
s[len][++s[len][0]] = start[k][j];
e[len][++e[len][0]] = end[k][j];
}
++len;
}
//贪心验证
if( ok )
{
k = -1;
for( i = 0; ok && i < len; ++i )
{
for( j = 1; j <= s[i][0]; ++j )
if( s[i][j] > k )
{
k = e[i][j]; // 取第一种结果,即匹配的主串片断中长度最小的
break;
}
if( j > s[i][0] )
ok = 0;
}
}
if( ok )
{
printf("(suffix = include ? : not *) \nYES\nfront = %d\nback = k + cnt1 + cnt2= %d\ns=\n", s[0][1], k + cnt1 + cnt2);
for (int i = 0; i < len; i++)
{
printf("%3d: ", s[i][0]);
for(int j = 1; j <= s[i][0]; j++)
printf("%3d ", s[i][j]);
printf("\n");
}
printf("YES k = %d cnt1 = %d cnt2 = %d\ne=\n", k, cnt1, cnt2);
for ( i = 0; i < len; i++)
{
printf("%3d: ", e[i][0]);
for(int j = 1; j <= e[i][0]; j++)
printf("%3d ", e[i][j]);
printf("\n");
}
}
else
printf("NO k = %d\n", k);
printf("///\n");
}
return 0;
}
再给出几组我debug中比较有价值的数据:
abababcdababcdecdabefcda
*ab??cd??ef*
abcdebcdde
*abcd?e*
babbbabab
ab?b?bab
abcdef
a*b*c*d*e*f
isdjkasd
i*s*d*j*k*a*s*d
hellokugou
hello*??gou
dfjijijiugnmlok
??*f?ij*ug?ml?k
dfjijijiugnmlok
?*f?ij*ug?ml?k
abcdefghijklmnopqrstuvwx
ab*?*ef?h?jk*qr??*u??x?z
sodfmkkoasa
s?df?k?o?*a
sodfmkkoas
s?df?k?o?*
求更简单的方法···
http://blog.sina.com.cn/s/blog_7da04dd30100vlcp.html