【dp】正则表达式匹配问题_exchange+, extra+,hardware+, mouse-,network--CSDN博客

本文链接：https://blog.csdn.net/Nameless_05/article/details/6737362

问题描述：	许多操作系统采用正则表达式实现文件匹配功能。一种简单的正则表达式由英文字母、数字及通配符“”和“？”组成。 “？”代表任意一个字符。“”则可以代表任意多个字符。现要用正则表达式对部分文件进行操作。试设计一个算法，找出一个正则表达式，使其能匹配的待操作文件最多，但不能匹配任何不进行操作的文件。所找出的正则表达式的长度还应是最短的。
编程任务：	对于给定的待操作文件，找出一个能匹配最多待操作文件的正则表达式。
数据输入：	由文件 input.txt 提供输入数据。文件由 n（1≤n≤250）行组成。每行给出一个文件名。文件名由英文字母和数字组成。英文字符要区分大小写，文件名长度不超过8个字符。文件名后是一个空格符和一个字符“+”或“-”。“+”表示要对该行给出的文件进行操作，“-”表示不进行操作。
结果输出：	程序运行结束时，将计算出的最多文件匹配数和最优正则表达式输出到文件output.txt 中。文件的第 1 行中的数是计算出的最多文件匹配数。文件的第 1 行是最优正则表达式。
样例：	EXCHANGE + EXTRA + HARDWARE + MOUSE - NETWORK -	3 A
核心思想：	先讨论*再讨论？再讨论其他

var
 len,t1,t2,t3,max,lc,i,k,j,l,n:integer;{<n存文件个数，len存最优解长度，max存搜索出来的最优解，lc存出现了多少个字符，>}
 s:array[0..8,1..250,0..8] of boolean;{<前i为匹配第j个文件，前k个字符的情况>}
 y:array[1..250] of boolean;{<‘+’，‘-’>}
 t:boolean;
 add,m:array[1..62] of integer;{<add临时存每个字符出现的次数，m存整体单串出现的次数>}
 lenst:array[1..250] of integer;{<存文件长度>}
 ch:char;
 cut,c1:array['0'..'z'] of integer;
 d,ost:array[1..8] of char;
 time:longint;
 st:array[1..250] of string[8];{<存文件>}
 c:array[1..62] of char;{<存第i位m存的哪个字母（数字）>}
procedure init;{<将每个字符存进来，记录出现频率→排序>}
begin
 n:=0;lc:=0;
 fillchar(m,sizeof(m),0);
 fillchar(cut,sizeof(cut),0);
 repeat
 inc(n);
 lenst[n]:=0;
 st[n]:='';
 fillchar(add,sizeof(add),0);
 fillchar(c1,sizeof(c1),0);
 repeat
  read(ch);
   ifch<>' ' then
   begin
     c1[ch]:=1;t:=true;
     inc(lenst[n]);st[n]:=st[n]+ch;
     for i:=1 to lc do
      if c[i]=ch then
       begin
        t:=false;
        add[i]:=add[i]+1;
        break;
       end;
     if t then
      begin
       inc(lc);
       add[lc]:=1;
       c[lc]:=ch;
      end;
   end;
 until ch=' ';
 readln(ch);
  ifch='-' then y[n]:=false else y[n]:=true;
  ify[n] then
 begin
  for i:=1 to 62 do m[i]:=m[i]+add[i];
  for ch:='0' to 'z' do cut[ch]:=cut[ch]+c1[ch];
  endelse
  for i:=1 to 62 do m[i]:=m[i]-add[i];
 until seekeof;
  fori:=1 to lc-1 do
   for j:=i+1 to lc do
     if m[i]<m[j] then
        begin
          k:=m[i];
          m[i]:=m[j];
          m[j]:=k;
          ch:=c[i];
          c[i]:=c[j];
          c[j]:=ch;
        end;
end;
procedure search(s1:integer);{<先判断上一个是否为已知前缀（字母/数字），搜第一次出现的前缀，后用*补足，并记录是‘+’还是‘-’的>}
var                          {<'*' 如果没补‘-’说明是合法的，更新状态，继续搜索，否则状态不合法，只能用‘*’且深度<8接着搜，初始化这一层接着枚举别的符号>}
 s2:integer;                {<'?'从后向前找前缀+？，标true，记录补了多少‘+’，如果完全补上‘+’，则加t1，否则加t2，如果没补‘-’，则加‘？’>}
begin                        {<枚举字符，如果该字符出现的次数小于max则不必枚举，如果匹配前缀>max，则继续搜>}
  if(s1=1) or ((d[s1-1]<>'?') and (d[s1-1]<>'*')) then
    begin
      t1:=0;
      t2:=0;
      for i:=1 to n do
      begin
        t:=true;
        for j:=0 to lenst[i] do
          if s[s1-1,i,j] then
              begin
                t:=false;
                break;
              end;
          if not t then
              begin
                for k:=j to lenst[i] dos[s1,i,k]:=true;
                if y[i] then inc(t1)
                else inc(t2);
              end;
      end;
  ift2=0 then
    begin
      if (t1>max) or ((t1=max) and (len>s1)) then
         begin
           max:=t1;
           ost:=d;
           ost[s1]:='*';
           len:=s1;
         end;
    end;
  if((t1>max) or (t1=max) and (s1<len)) and (s1<>8) then
    begin
      d[s1]:='*';
      search(s1+1);
    end;
 fillchar(s[s1],sizeof(s[s1]),false);
    end;
 t1:=0;
 t2:=0;
 t3:=0;
  fori:=1 to n do
   begin
     t:=true;
     for j:=lenst[i]-1 downto 0 do
       if s[s1-1,i,j] then
          begin
            t:=false;
            s[s1,i,j+1]:=true;
          end;
       if not t and y[i] then inc(t3);
       if (s[s1,i,lenst[i]]) then
          if y[i] then inc(t1)
          else inc(t2);
   end;
  ift2=0 then
    begin
      if (t1>max) or ((t1=max) and (len>s1)) then
         begin
           max:=t1;
            ost:=d;
           ost[s1]:='?';
           len:=s1;
         end  
    end;
  if((t3>max) or (t3=max) and (s1<len)) and (s1<>8) then
    begin
      d[s1]:='?';
      search(s1+1);
    end;
 fillchar(s[s1],sizeof(s[s1]),false);
  fors2:=1 to lc do
   if cut[c[s2]]>=max then
      begin
        t1:=0;
        t2:=0;
        t3:=0;
        for i:=1 to n do
          begin
            t:=true;
            for j:=lenst[i]-1 downto 0 do
               if (s[s1-1,i,j]) and(st[i,j+1]=c[s2]) then
                  begin
                    t:=false;
                    s[s1,i,j+1]:=true;
                  end;
            if not t and y[i] then inc(t3);
            if (s[s1,i,lenst[i]]) then
                if y[i] then inc(t1) elseinc(t2);
          end;
   if t2=0 then
      begin
        if (t1>max) or ((t1=max) and (len>s1)) then
           begin
              max:=t1;
              ost:=d;
              ost[s1]:=c[s2];
             len:=s1;
           end;
      end;
   if ((t3>max) or (t3=max) and (s1<len)) and (s1<>8) then
      begin
        d[s1]:=c[s2];
        search(s1+1);
      end;
   fillchar(s[s1],sizeof(s[s1]),false);
 end;
end;
procedure solving;{<初始化→搜索→输出>}
begin
 fillchar(s,sizeof(s),false);
  forj:=1 to n do s[0,j,0]:=true;
 max:=0;
 len:=8;
 search(1);
 writeln(max);
  fori:=1 to len do write(ost[i]);
 writeln;
end;
begin
 assign(input,'p313.in');reset(input);
  assign(output,'p313.out');rewrite(output);
 init;
 solving;
 close(input);close(output);
end.

题目来源：《算法设计与分析》第三章动态规划