import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
public class ExtractSummary {
/**
* @param args
*/
public static void main(String[] args) {
System.out.println(extractSummary("a b c d c a", new String[] { "a", "c" }));
}
public static String extractSummary(String description, String[] keywords) {
String[] words = description.split(" ");
HashSet<String> keywordSet = new HashSet<String>(Arrays.asList(keywords));
HashMap<String, Integer> keywordMap = new HashMap<String, Integer>();
int savedStart = 0;
int savedEnd = 0;
int start = 0;
int end = 0;
while (end < words.length) {
// expand end
for (; end < words.length; end++) {
String word = words[end];
if (keywordSet.contains(word)) {
if (keywordMap.containsKey(word)) {
keywordMap.put(word, keywordMap.get(word) + 1);
} else {
keywordMap.put(word, 1);
if (keywordMap.size() == keywordSet.size()) {
// we have a substring, break
end++;
break;
}
}
}
}
if (keywordMap.size() < keywordSet.size()) {
// we found no more substring
break;
}
// shrink start
for (; start < end; start++) {
String word = words[start];
if (keywordSet.contains(word)) {
Integer value = keywordMap.remove(word);
if (value != null && value != 1) {
keywordMap.put(word, value - 1);
} else {
if (keywordMap.size() < keywordSet.size()) {
// we have another minimum substring, save start and end then break
if (savedEnd == 0 || (savedEnd - savedStart) > (end - start)) {
savedStart = start;
savedEnd = end;
}
start++;
break;
}
}
}
}
}
if (savedEnd != 0) {
// we have found a minimum substring
StringBuilder sb = new StringBuilder(words[savedStart]);
for (int i = savedStart + 1; i < savedEnd; i++) {
sb.append(" ").append(words[i]);
}
return sb.toString();
} else {
return "";
}
}
}
Extract the shortest summary contains all the keywords from a article
最新推荐文章于 2022-03-04 16:21:09 发布