业务需求:在工作中碰到了要将一个XML格式的字符串解析其中自己需要的属性值,下列代码
private static final String path = "[xml_result].[read_sentence].[rec_paper].[read_sentence]";
public static void main(String[] args) {
String str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
" <xml_result>\n" +
" <read_sentence lan=\"cn\" type=\"study\" version=\"7,0,0,1024\">\n" +
" <rec_paper>\n" +
" <read_sentence accuracy_score=\"0.000000\" beg_pos=\"0\" content=\"今天天气怎么样。\" emotion_score=\"0.000000\" end_pos=\"236\" except_info=\"0\" fluency_score=\"85.277153\" integrity_score=\"100.000000\" is_rejected=\"false\" phone_score=\"78.571426\" time_len=\"236\" tone_score=\"100.000000\" total_score=\"62.654514\">\n" +
" <sentence beg_pos=\"0\" content=\"今天天气怎么样\" end_pos=\"236\" fluency_score=\"0.000000\" phone_score=\"78.571426\" time_len=\"236\" tone_score=\"100.000000\" total_score=\"71.654205\">\n" +
" <word beg_pos=\"0\" content=\"今\" end_pos=\"102\" symbol=\"jin1\" time_len=\"102\">\n" +
" <syll beg_pos=\"0\" content=\"sil\" dp_message=\"0\" end_pos=\"70\" rec_node_type=\"sil\" time_len=\"70\">\n" +
" <phone beg_pos=\"0\" content=\"sil\" dp_message=\"0\" end_pos=\"70\" rec_node_type=\"sil\" time_len=\"70\"></phone>\n" +
" </syll>\n" +
" <syll beg_pos=\"70\" content=\"fil\" dp_message=\"32\" end_pos=\"83\" rec_node_type=\"fil\" time_len=\"13\">\n" +
" <phone beg_pos=\"70\" content=\"fil\" dp_message=\"32\" end_pos=\"83\" rec_node_type=\"fil\" time_len=\"13\"></phone>\n" +
" </syll>\n" +
" <syll beg_pos=\"83\" content=\"今\" dp_message=\"0\" end_pos=\"102\" rec_node_type=\"paper\" symbol=\"jin1\" time_len=\"19\">\n" +
" <phone beg_pos=\"83\" content=\"j\" dp_message=\"0\" end_pos=\"90\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"7\"></phone>\n" +
" <phone beg_pos=\"90\" content=\"in\" dp_message=\"0\" end_pos=\"102\" is_yun=\"1\" mono_tone=\"TONE1\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"12\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" <word beg_pos=\"102\" content=\"天\" end_pos=\"126\" symbol=\"tian1\" time_len=\"24\">\n" +
" <syll beg_pos=\"102\" content=\"天\" dp_message=\"0\" end_pos=\"126\" rec_node_type=\"paper\" symbol=\"tian1\" time_len=\"24\">\n" +
" <phone beg_pos=\"102\" content=\"t\" dp_message=\"0\" end_pos=\"108\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"6\"></phone>\n" +
" <phone beg_pos=\"108\" content=\"ian\" dp_message=\"0\" end_pos=\"126\" is_yun=\"1\" mono_tone=\"TONE1\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"18\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" <word beg_pos=\"126\" content=\"天\" end_pos=\"152\" symbol=\"tian1\" time_len=\"26\">\n" +
" <syll beg_pos=\"126\" content=\"天\" dp_message=\"0\" end_pos=\"152\" rec_node_type=\"paper\" symbol=\"tian1\" time_len=\"26\">\n" +
" <phone beg_pos=\"126\" content=\"t\" dp_message=\"0\" end_pos=\"136\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"10\"></phone>\n" +
" <phone beg_pos=\"136\" content=\"ian\" dp_message=\"0\" end_pos=\"152\" is_yun=\"1\" mono_tone=\"TONE1\" perr_level_msg=\"2\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"16\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" <word beg_pos=\"152\" content=\"气\" end_pos=\"174\" symbol=\"qi9\" time_len=\"22\">\n" +
" <syll beg_pos=\"152\" content=\"气\" dp_message=\"0\" end_pos=\"174\" rec_node_type=\"paper\" symbol=\"qi0\" time_len=\"22\">\n" +
" <phone beg_pos=\"152\" content=\"q\" dp_message=\"0\" end_pos=\"161\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"9\"></phone>\n" +
" <phone beg_pos=\"161\" content=\"i\" dp_message=\"0\" end_pos=\"174\" is_yun=\"1\" mono_tone=\"TONE0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"13\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" <word beg_pos=\"174\" content=\"怎\" end_pos=\"186\" symbol=\"zen3\" time_len=\"12\">\n" +
" <syll beg_pos=\"174\" content=\"怎\" dp_message=\"0\" end_pos=\"186\" rec_node_type=\"paper\" symbol=\"zen3\" time_len=\"12\">\n" +
" <phone beg_pos=\"174\" content=\"z\" dp_message=\"0\" end_pos=\"180\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"6\"></phone>\n" +
" <phone beg_pos=\"180\" content=\"en\" dp_message=\"0\" end_pos=\"186\" is_yun=\"1\" mono_tone=\"TONE3\" perr_level_msg=\"3\" perr_msg=\"1\" rec_node_type=\"paper\" time_len=\"6\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" <word beg_pos=\"186\" content=\"么\" end_pos=\"197\" symbol=\"me5\" time_len=\"11\">\n" +
" <syll beg_pos=\"186\" content=\"么\" dp_message=\"0\" end_pos=\"197\" rec_node_type=\"paper\" symbol=\"me0\" time_len=\"11\">\n" +
" <phone beg_pos=\"186\" content=\"m\" dp_message=\"0\" end_pos=\"189\" is_yun=\"0\" perr_level_msg=\"3\" perr_msg=\"1\" rec_node_type=\"paper\" time_len=\"3\"></phone>\n" +
" <phone beg_pos=\"189\" content=\"e\" dp_message=\"0\" end_pos=\"197\" is_yun=\"1\" mono_tone=\"TONE0\" perr_level_msg=\"3\" perr_msg=\"1\" rec_node_type=\"paper\" time_len=\"8\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" <word beg_pos=\"197\" content=\"样\" end_pos=\"236\" symbol=\"yang4\" time_len=\"39\">\n" +
" <syll beg_pos=\"197\" content=\"样\" dp_message=\"0\" end_pos=\"217\" rec_node_type=\"paper\" symbol=\"yang4\" time_len=\"20\">\n" +
" <phone beg_pos=\"197\" content=\"_i\" dp_message=\"0\" end_pos=\"206\" is_yun=\"0\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"9\"></phone>\n" +
" <phone beg_pos=\"206\" content=\"iang\" dp_message=\"0\" end_pos=\"217\" is_yun=\"1\" mono_tone=\"TONE4\" perr_level_msg=\"1\" perr_msg=\"0\" rec_node_type=\"paper\" time_len=\"11\"></phone>\n" +
" </syll>\n" +
" <syll beg_pos=\"217\" content=\"fil\" dp_message=\"32\" end_pos=\"236\" rec_node_type=\"fil\" time_len=\"19\">\n" +
" <phone beg_pos=\"217\" content=\"fil\" end_pos=\"236\" time_len=\"19\"></phone>\n" +
" </syll>\n" +
" </word>\n" +
" </sentence>\n" +
" </read_sentence>\n" +
" </rec_paper>\n" +
" </read_sentence>\n" +
" </xml_result>";
JSONObject jsonObject = XML.toJSONObject(str);
JSONObject readSentence = (JSONObject) jsonObject.getByPath(path);
BigDecimal totalScore = (BigDecimal) readSentence.get("total_score");
double v = totalScore.doubleValue();
System.out.println(totalScore);
}
踩坑过程:先将XML字符串解析成hutool的JSONObject对象,但是由于想要的数据在第四层,使用JSONObject.get(Objet key)显然不太合适,然后再猜测之下,使用了JSONObject.getByPath(String expression)的方法,一开始expression参数写的是 "/xml_result/read_sentence/rec_paper/read_sentence"和"\\xml_result\\read_sentence\\rec_paper\\read_sentence",发现返回的都是null,后来去了hutool官网api文档也没有详细说明
private static final char[] EXP_CHARS = new char[]{'.', '[', ']'};
private void init(String expression) {
List<String> localPatternParts = new ArrayList();
int length = expression.length();
StrBuilder builder = StrUtil.strBuilder();
boolean isNumStart = false;
for(int i = 0; i < length; ++i) {
char c = expression.charAt(i);
if (0 == i && '$' == c) {
this.isStartWith = true;
} else if (ArrayUtil.contains(EXP_CHARS, c)) {
if (']' == c) {
if (!isNumStart) {
throw new IllegalArgumentException(StrUtil.format("Bad expression '{}':{}, we find ']' but no '[' !", new Object[]{expression, i}));
}
isNumStart = false;
} else {
if (isNumStart) {
throw new IllegalArgumentException(StrUtil.format("Bad expression '{}':{}, we find '[' but no ']' !", new Object[]{expression, i}));
}
if ('[' == c) {
isNumStart = true;
}
}
if (builder.length() > 0) {
localPatternParts.add(unWrapIfPossible(builder));
}
builder.reset();
} else {
builder.append(c);
}
}
if (isNumStart) {
throw new IllegalArgumentException(StrUtil.format("Bad expression '{}':{}, we find '[' but no ']' !", new Object[]{expression, length - 1}));
} else {
if (builder.length() > 0) {
localPatternParts.add(unWrapIfPossible(builder));
}
this.patternParts = Collections.unmodifiableList(localPatternParts);
}
}
结果:然后就把路径改成了:"[xml_result].[read_sentence].[rec_paper].[read_sentence]"写法,然后发现就请求到数据了,因为网上关于这个方法的介绍很少,就发个帖子供大家参考,如果有小伙伴发现了其他用法也可评论区讨论哟。