java去除emoji表情等特殊字符。保留html标签
package test;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Emoji {
public static void main(String[] args) {
String str = "<html><body><p>😋测试去除emoji💦你好</p><p>文字文字</p><body></html>";
String filterEmoji = filterEmoji(str);
System.out.println(filterEmoji);
}
public static String filterEmoji(String source) {
if (source != null) {
Pattern emoji = Pattern.compile("[\ud83c\udc00-\ud83c\udfff]|[\ud83d\udc00-\ud83d\udfff]|[\u2600-\u27ff]",
Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE);
Matcher emojiMatcher = emoji.matcher(source);
if (emojiMatcher.find()) {
source = emojiMatcher.replaceAll("");
return source;
}
return source;
}
return source;
}
}
结果: