JAVA
需求:文章内容按标点符号断句,字数需要设置上限 不能超过 50个字
/**
* @param content 内容
* @param chunkSize 字数限制
* @return
*/
public static List<String> contentDiv(String content, int chunkSize) {
if (StringUtils.isEmpty(content)) return new ArrayList<>();
LinkedList<String> result = new LinkedList<>();
Pattern pattern = Pattern.compile("([^,。]{1," + chunkSize + "})([,。]?)");
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
String sentence = matcher.group(1) + matcher.group(2);
if (sentence.endsWith(",") || sentence.endsWith("。")) {
result.add(sentence);
} else if (!result.isEmpty()) {
String previousSentence = result.get(result.size() - 1);
if (!previousSentence.endsWith(",") && !previousSentence.endsWith("。")) {
result.set(result.size() - 1, previousSentence + sentence);
} else {
result.add(sentence);
}
} else {
result.add(sentence);
}