[Algorithm&Datastructure]Suffix Trie 后缀树代码实现

  1. Give a linear-time algorithm to determine whether a text T is a cyclic rotation of
    another string T 0. For example, add is a cyclic rotation of dad.
    n = p = # length of T

    k = # matched chars

    Set x = 0; k = 0;

    T = T + T //copy of T

    while(x < 2n-p+1) {

    start from ​ position

    1. match T with P at position x;

    2. Update k = #matched chars;

    3. if ( k == p ) output “match at x” ;

    4. Update x = x + k - π(k) ;

    5. Update k = π(k) ;

    }

    codes here:

    def get_k(T,P):
        k = 0
        #match T with P at first position
        for i in range(0,len(T)):
            if k!=0: break
            if P[0] == T[i]:
                #oversize
                if ((len(P)+i)>len(T)): return(0)
                for j in range(0,len(P)):
                    if(P[j]==T[i+j]): 
                        k+=1
                    else:
                        x = i
                        return(k)
                        break 
                break
        return(k) get_k(T,P):
        k = 0
        #match T with P at first position
        for i in range(0,len(T)):
            if k!=0: break
            if P[0] == T[i]:
                #oversize
                if ((len(P)+i)>len(T)): return(0)
                for j in range(0,len(P)):
                    if(P[j]==T[i+j]): 
                        k+=1
                    else:
                        x = i
                        return(k)
                        break 
                break
        return(k)
    
    T = 'NDAJNFVJASKNVKASLDNVJAKSDNVKSAJNDAJNFVJASKNVKASLDNVJAKSDNVKSAJ'
    P = 'VJASKNVKASLDNVJAKSDNVKSAJNDAJNF'
    print(get_k(T,P))
    #ouput:31T = 'NDAJNFVJASKNVKASLDNVJAKSDNVKSAJNDAJNFVJASKNVKASLDNVJAKSDNVKSAJ'
    P = 'VJASKNVKASLDNVJAKSDNVKSAJNDAJNF'
    print(get_k(T,P))
    #ouput:31
    
    def find_r(P,k):
        #if there is no prefix of P with the value of k, set r=k
        if(k==0): 
            return k
        if(k==1):
            return 1
        for i in range(1,k):
            if(P[i]==P[0]):
                for j in range(i,k):
                    if P[j] != P[j-i]: 
                        break
                    if j==k-1: 
                        r = i
                        return r
                        break
        return kdef find_r(P,k):
        #if there is no prefix of P with the value of k, set r=k
        if(k==0): 
            return k
        if(k==1):
            return 1
        for i in range(1,k):
            if(P[i]==P[0]):
                for j in range(i,k):
                    if P[j] != P[j-i]: 
                        break
                    if j==k-1: 
                        r = i
                        return r
                        break
        return k
    
    P = 'CCACC'
    find_r(P,4)
    #ouput:3P = 'CCACC'
    find_r(P,4)
    #ouput:3
    
    def cyclic_rotation(L,L2):
        """
        L = 'NDAJNFVJASKNVKASLDNVJAKSDNVKSAJ'
        L1 = L + L
        L2 ='VJASKNVKASLDNVJAKSDNVKSAJNDAJNF'
        """
    ​
        if(len(L)!=len(L2)):
        print("wrong input")
    ​
        #find first x position
        for i in range(0,len(L)):
            if(L2[0]==L[i]): 
                x = i
                break
    ​
        while(x <= len(L)):
            k = get_k(L1[x:len(L1)],L2[0:len(L)])
            if k==len(L):    
                break
    ​
            else:
                r = find_r(L2[0:len(L2)],k)
                if r==0:
                    break
                x = x + r
        if(k==len(L)):
            print("T is a cyclic rotation of T'")
        else:
            print("T is not a cyclic rotation of T'")
    ​def cyclic_rotation(L,L2):
        """
        L = 'NDAJNFVJASKNVKASLDNVJAKSDNVKSAJ'
        L1 = L + L
        L2 ='VJASKNVKASLDNVJAKSDNVKSAJNDAJNF'
        """
    ​
        if(len(L)!=len(L2)):
        print("wrong input")
    ​
        #find first x position
        for i in range(0,len(L)):
            if(L2[0]==L[i]): 
                x = i
                break
    ​
        while(x <= len(L)):
            k = get_k(L1[x:len(L1)],L2[0:len(L)])
            if k==len(L):    
                break
    ​
            else:
                r = find_r(L2[0:len(L2)],k)
                if r==0:
                    break
                x = x + r
        if(k==len(L)):
            print("T is a cyclic rotation of T'")
        else:
            print("T is not a cyclic rotation of T'")
    ​
    """
    TRUE
    input T:VJASKNVKASLDNVJAKSDNVKSAJNDAJNF
    input P:NDAJNFVJASKNVKASLDNVJAKSDNVKSAJ
    """
    """
    WRONR
    input T:VJASKNVKASLDNVJAKSDNVKSAJNDAJNF
    input P:NDAJNFVJASKNVKASLDYVJAKSDNVKSAJ
    """
    import datetime
    starttime = datetime.datetime.now()
    L = input("input T:")
    L2 = input("input P:")
    cyclic_rotation(L,L2)
    endtime = datetime.datetime.now()
    print (endtime - starttime).seconds
    TRUE
    input T:VJASKNVKASLDNVJAKSDNVKSAJNDAJNF
    input P:NDAJNFVJASKNVKASLDNVJAKSDNVKSAJ
    """
    """
    WRONR
    input T:VJASKNVKASLDNVJAKSDNVKSAJNDAJNF
    input P:NDAJNFVJASKNVKASLDYVJAKSDNVKSAJ
    """
    import datetime
    starttime = datetime.datetime.now()
    L = input("input T:")
    L2 = input("input P:")
    cyclic_rotation(L,L2)
    endtime = datetime.datetime.now()
    print (endtime - starttime).seconds

     

     

  2. (a)ababaa$

    1: ababaa$ 8: $ 7: a$ 6: aa$ 4: abaa$ 2: ababaa$ 5: baa$ 3: babaa$

     

     

    (b)

     

     

  3. (a)

    ababaa$baabab# [0]
    # [14]
    $baabab# [7]
    a [-1]
    $baabab# [6]
    a [-1]
    $baabab# [5]
    bab# [9]
    b [-1]
    # [12]
    a [-1]
    a$baabab# [3]
    b [-1]
    # [10]
    aa$baabab# [1]
    b [-1]
    # [13]
    a [-1]
    a [-1]
    $baabab# [4]
    bab# [8]
    b [-1]
    # [11]
    aa$baabab# [2]
    # [14]
    $baabab# [7]
    a [-1]
    $baabab# [6]
    a [-1]
    $baabab# [5]
    bab# [9]
    b [-1]
    # [12]
    a [-1]
    a$baabab# [3]
    b [-1]
    # [10]
    aa$baabab# [1]
    b [-1]
    # [13]
    a [-1]
    a [-1]
    $baabab# [4]
    bab# [8]
    b [-1]
    # [11]
    aa$baabab# [2]

     

    (b)

    i. Maximum unique match s is the substring of both S1 and S2.

    path(v) = s

    S1 and S2 have common trie structure(v), so v is a parent with two children, one child is a leaf corresponding to a suffix of S1, and the other is a leaf corresponding to a suffix of S2 .

    If there is suffix link to v, means s.t. path(u) - 1character = path(v).

    There is a bigger string u which is the parent of v. Then u is the maximum unique match. If v is the maximum unique match, there is no suffix link to v.

    ii. If two strings are of size M and N, then Generalized Suffix Tree construction takes O(M+N) and LCS finding is a DFS on tree which is again O(M+N).

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值