示例:
"<p class=\"MsoNormal\">
<span style=\"text-decoration:underline;\">链接:
<a href=\"链接1\" title=\"标题1\" target=\"_self\">标题1</a>
<a href=\"链接2\" title=\"\" target=\"_self\">标题2</a>
<a href=\"链接3\">标题3</a>
</span>
</p>"
获取href及title,title没有获取显示内容作为标题
public static List<Map<String, String>> getAttachmentFromContent(String content) {
Pattern p = Pattern.compile("<a\\s+href=\"(.*?)\"\\s+title=\"(.*?)\"\\s*[^>]*>(.*?)</a>");
Matcher m = p.matcher(content);
List<Map<String, String>> attachmentList = new ArrayList<>();
while (m.find()) {
Map<String, String> attachmentMap = new HashMap<>();
String url = m.group(1);
String fileName = m.group(2);
if (!StringUtils.hasText(fileName)) {
fileName = m.group(3);
}
if (!url.contains("http")) {
url = "https://" + url;
}
if (StringUtils.hasText(url)) {
attachmentMap.put("url", url);
}
// 如果title为空,则使用默认文件名或空字符串
if (StringUtils.hasText(fileName)) {
attachmentMap.put("title", fileName);
} else {
attachmentMap.put("title", "未命名");
}
attachmentList.add(attachmentMap);
}
return attachmentList;
}