Moderate 加入空格使得可辨别单词数量最多 @CareerCup

递归题目,注意结合了memo的方法和trie的应用


package Moderate;

import java.util.Hashtable;

import CtCILibrary.AssortedMethods;
import CtCILibrary.Trie;


/**
 *  Oh, no! You have just completed  a lengthy  document when you have an  unfortu-
nate Find/Replace mishap.  You have accidentally  removed all spaces, punctuation,
and  capitalization  in  the document. A sentence like  "I reset  the  computer.  It  still
didn't boot!" would become "iresetthecomputeritstilldidntboot".  You figure that you
can add back in the punctation and capitalization later, once you get the individual
words properly separated. Most of the words will be in a dictionary,  but some strings,
like proper names, will  not.
Given a dictionary  (a list of words), design an algorithm  to find the optimal way of
"unconcatenating" a sequence of words. In this case, "optimal" is defined  to be the
parsing  which  minimizes  the number  of unrecognized sequences of characters.
For example, the string "jesslookedjustliketimherbrother"  would be optimally parsed
as  "JESS looked just like TIM her brother". This parsing  has seven unrecognized  char-
acters, which  we have capitalized  for  clarity.

给一个string,把string内的所有标点,空格都去掉。然后要求找到把空格加回去使得不可辨别的
单词数量达到最少的方法(判断是否可以辨别是通过提供一个字典来判断)

 *
 */
public class S17_14 {

	public static String sentence;
    public static Trie dictionary;
    
    /* incomplete code */
    public static Result parse(int wordStart, int wordEnd, Hashtable<Integer, Result> cache) {
            if (wordEnd >= sentence.length()) {
                    return new Result(wordEnd - wordStart, sentence.substring(wordStart).toUpperCase());
            }
            if (cache.containsKey(wordStart)) {
                    return cache.get(wordStart).clone();
            }
            String currentWord = sentence.substring(wordStart, wordEnd + 1);
            boolean validPartial = dictionary.contains(currentWord, false);
            boolean validExact = validPartial && dictionary.contains(currentWord, true);
            
            /* break current word */
            Result bestExact = parse(wordEnd + 1, wordEnd + 1, cache);
            if (validExact) {
                    bestExact.parsed = currentWord + " " + bestExact.parsed;
            } else {
                    bestExact.invalid += currentWord.length();
                    bestExact.parsed = currentWord.toUpperCase() + " " + bestExact.parsed;
            } 
            
            /* extend current word */
            Result bestExtend = null;
            if (validPartial) {
                    bestExtend = parse(wordStart, wordEnd + 1, cache);
            }
            
            /* find best */
            Result best = Result.min(bestExact, bestExtend);
            cache.put(wordStart, best.clone());
            return best;
    }        
    
    public static int parseOptimized(int wordStart, int wordEnd, Hashtable<Integer, Integer> cache) {
            if (wordEnd >= sentence.length()) {
                    return wordEnd - wordStart;
            }
            if (cache.containsKey(wordStart)) {
                    return cache.get(wordStart);
            }                
            
            String currentWord = sentence.substring(wordStart, wordEnd + 1);
            boolean validPartial = dictionary.contains(currentWord, false);
            
            /* break current word */
            int bestExact = parseOptimized(wordEnd + 1, wordEnd + 1, cache);
            if (!validPartial || !dictionary.contains(currentWord, true)) {
                    bestExact += currentWord.length();
            }
            
            /* extend current word */
            int bestExtend = Integer.MAX_VALUE;
            if (validPartial) {
                    bestExtend = parseOptimized(wordStart, wordEnd + 1, cache);
            }
            
            /* find best */
            int min = Math.min(bestExact, bestExtend);
            cache.put(wordStart, min);
            return min;
    }
    
    public static int parseSimple(int wordStart, int wordEnd) {
            if (wordEnd >= sentence.length()) {
                    return wordEnd - wordStart;
            }
            
            String word = sentence.substring(wordStart, wordEnd + 1);
            
            /* break current word */
            int bestExact = parseSimple(wordEnd + 1, wordEnd + 1);
            if (!dictionary.contains(word, true)) {
                    bestExact += word.length();
            }
            
            /* extend current word */
            int bestExtend = parseSimple(wordStart, wordEnd + 1);
            
            /* find best */
            return Math.min(bestExact, bestExtend);
    }        
    
    public static String clean(String str) {
            char[] punctuation = {',', '"', '!', '.', '\'', '?', ','};
            for (char c : punctuation) {
                    str = str.replace(c, ' ');
            }
            return str.replace(" ", "").toLowerCase();
    }
    
    public static void main(String[] args) {
            dictionary = AssortedMethods.getTrieDictionary();
            sentence = "As one of the top companies in the world, Google will surely attract the attention of computer gurus. This does not, however, mean the company is for everyone.";
            sentence = clean(sentence);
            System.out.println(sentence);
            //Result v = parse(0, 0, new Hashtable<Integer, Result>());
            //System.out.println(v.parsed);
            int v = parseOptimized(0, 0, new Hashtable<Integer, Integer>());
            System.out.println(v);
    }
    
    
    static class Result {
        public int invalid = Integer.MAX_VALUE;
        public String parsed = "";
        public Result(int inv, String p) {
                invalid = inv;
                parsed = p;
        }
        
        public Result clone() {
                return new Result(this.invalid, this.parsed);
        }
        
        public static Result min(Result r1, Result r2) {
                if (r1 == null) {
                        return r2;
                } else if (r2 == null) {
                        return r1;
                } 
                
                return r2.invalid < r1.invalid ? r2 : r1;
        }        
    }

}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值