1 提取元素值的示例代码 搬运自官方培训
2.9.12
1.1 xml文件
<?xml version="1.0"?>
<story>
<storyinfo>
<author>John Fleck</author>
<datewritten>June 2, 2002</datewritten>
<keyword>example keyword</keyword>
</storyinfo>
<body>
<headline>This is the headline</headline>
<para>This is the body text.</para>
</body>
</story>
1.2 libxml2示例代码 按xml层次逐层解析
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
void
parseStory (xmlDocPtr doc, xmlNodePtr cur) {
xmlChar *key;
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"keyword"))) {
//从doc树形结构获取指定元素的值:字符串
key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
printf("keyword: %s\n", key);
//注意释放key
xmlFree(key);
}
cur = cur->next;
}
return;
}
//读取xml文件,从doc树形结构获取keyword元素的值
static void
parseDoc(char *docname) {
xmlDocPtr doc;
xmlNodePtr cur;
//读取xml文件->doc树形结构
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return;
}
//获取doc属性结构的根
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return;
}
//根的合法性判断:需为story
if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return;
}
//提取storyinfo里的keyword元素值
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"storyinfo"))){
parseStory (doc, cur);
}
cur = cur->next;
}
//注意释放doc树形结构
xmlFreeDoc(doc);
return;
}
int
main(int argc, char **argv) {
char *docname;
if (argc <= 1) {
printf("Usage: %s docname\n", argv[0]);
return(0);
}
docname = argv[1];
parseDoc (docname);
return (1);
}
1.3 编译命令
gcc -o test test.c xml2-config --cflags --libs
1.4 拓展获取元素的文本内容 递归进行
使用接口 xmlNodeGetContent (),返回xmlChar *msg指针;
这个msg在使用完后,需要自行xmlFree(msg)
也可以自行编写一个递归提取节点文本内容的函数,参考第3节的递归实现
2 提取元素属性的示例代码 搬运自官方培训
2.9.12
2.1 xml文件
<?xml version="1.0"?>
<story>
<storyinfo>
<author>John Fleck</author>
<datewritten>June 2, 2002</datewritten>
<keyword>example keyword</keyword>
</storyinfo>
<body>
<headline>This is the headline</headline>
<para>This is the body text.</para>
</body>
<reference uri="www.baidu.com"/>
</story>
2.2 libxml2示例代码 xmlGetProp接口完成xml特殊字符的反转义
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
void
getReference (xmlDocPtr doc, xmlNodePtr cur) {
xmlChar *uri;
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"reference"))) {
uri = xmlGetProp(cur, "uri");
printf("uri: %s\n", uri);
xmlFree(uri);//注意释放
}
cur = cur->next;
}
return;
}
void
parseDoc(char *docname) {
xmlDocPtr doc;
xmlNodePtr cur;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return;
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return;
}
if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return;
}
getReference (doc, cur);
xmlFreeDoc(doc);
return;
}
int
main(int argc, char **argv) {
char *docname;
if (argc <= 1) {
printf("Usage: %s docname\n", argv[0]);
return(0);
}
docname = argv[1];
parseDoc (docname);
return (1);
}
2.3 xmlGetProp接口说明
- 如果没有该属性,那么接口返回NULL
- 如果有该属性,那么接口返回该属性的值:字符串
- 空字符串,“”,即字符串长度为0,只有\0
- 非空字符串,即字符串长度非0,以\0结束
- Go 的xml,获取属性,也能达到类似效果
3 遍历xml文件-转化特定xml节点为json数据示例代码 参考官方示例的2次修改
参考转json代码
2.9.12
3.1 xml文件
<Envelope xmlns="http://schemas.xmlsoap.org/soap/envelope/">
<Body xmlns="http://schemas.xmlsoap.org/soap/envelope/">
<portInfo xmlns="http://wsserver.ips.navigator.com">
<status>0</status>
</portInfo>
</Body>
</Envelope>
3.2 libxml2示例代码 参考tree1.c
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <jansson.h>
#ifdef LIBXML_TREE_ENABLED
/*
*To compile this file using gcc you can type
*gcc `xml2-config --cflags --libs` -o xmlexample libxml2-example.c
*/
static int
transfer_json(xmlNode *node, json_t **out_json)
{
xmlNode *cur_node = NULL;
json_t *new_obj = json_object();
json_t *new_value = NULL;
const char *name = NULL;
//遍历node的值,element/text类型的均转为obj对象:使用递归实现嵌套
for (cur_node = node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) {
printf("node type: Element, name: %s\n", cur_node->name);
name = cur_node->name;
transfer_json(cur_node->children, &new_value);
json_object_set_new(new_obj, name, new_value);
}
else if ( cur_node->type == XML_TEXT_NODE ) {
printf("node type: TEXT, parent name: %s value: %s\n",
cur_node->name,
cur_node->content);
name = cur_node->name;
new_value = json_string(cur_node->content);
if ( NULL == new_value )
continue;
json_object_set_new(new_obj, name, new_value);
}
}
*out_json = new_obj;
return 0;
}
static void
find_given_element_with_name(xmlNode * a_node, char *name, xmlNode ** out_node, int *found_flag)
{
xmlNode *cur_node = NULL;
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) {
printf("node type: Element, name: %s\n", cur_node->name);
if ( 0 == strncmp(cur_node->name, name, strlen(name)) ) {
*out_node = cur_node;
*found_flag = 1;
printf("found the node type: Element, name: %s\n", cur_node->name);
printf("---------------------------------\n");
return ;//找到提前结束
}
}
find_given_element_with_name(cur_node->children, name, out_node, found_flag);
if ( 1 == *found_flag ) {
break;//找到提前结束递归,减少性能消耗
}
}
return ;
}
/**
* print_element_names:
* @a_node: the initial xml node to consider.
*
* Prints the names of the all the xml elements
* that are siblings or children of a given xml node.
*/
static void
print_element_names(xmlNode * a_node)
{
xmlNode *cur_node = NULL;
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) {
printf("node type: Element, name: %s\n", cur_node->name);
}
else if ( cur_node->type == XML_TEXT_NODE ) {
printf("node type: TEXT, parent name: %s value: %s\n",
cur_node->parent->name,
cur_node->content);
}
print_element_names(cur_node->children);
}
}
/**
* Simple example to parse a file called "file.xml",
* walk down the DOM, and print the name of the
* xml elements nodes.
*/
int
main(int argc, char **argv)
{
xmlDoc *doc = NULL;
xmlNode *root_element = NULL;
xmlNode *params = NULL;
json_t *params_j = NULL;
int found_flag = 0;
char *out_s = NULL;
if (argc != 2)
return(1);
/*
* this initialize the library and check potential ABI mismatches
* between the version it was compiled for and the actual shared
* library used.
*/
LIBXML_TEST_VERSION
/*parse the file and get the DOM */
//doc = xmlReadFile(argv[1], NULL, 0);
doc = xmlReadFile(argv[1], NULL, 256);//设置为256,去掉空节点elements
if (doc == NULL) {
printf("error: could not parse file %s\n", argv[1]);
}
/*Get the root element node */
root_element = xmlDocGetRootElement(doc);
//遍历-打印当前xml的全部节点elements
//print_element_names(root_element);
#if 1
find_given_element_with_name(root_element, "portInfo", ¶ms, &found_flag);
if (1 == found_flag) {
transfer_json(params->children, ¶ms_j);
printf("---------------------------------\n");
//out_s = json_dumps(params_j, JSON_INDENT(4));
out_s = json_dumps(params_j, JSON_COMPACT);//压缩形式将json结构转为字符串,即encoding序列化
printf("resutl:%s\n",out_s);
printf("---------------------------------\n");
free(out_s);//注意释放相关的结构
json_decref(params_j);
}
#endif
/*free the document */
xmlFreeDoc(doc);
return 0;
}
#else
int main(void) {
fprintf(stderr, "Tree support not compiled in\n");
return 0;
}
#endif
3.3 编译命令
gcc -g -o example example.c `xml2-config --cflags --libs` -ljansson
4 常用头文件说明
- parse.h:xml的解析器,parsefile/parsememory等
- xmlmemory.h:内存操作
- tree.h:操作树形结构doc,xmlNode/xmlNodePtr定义
5 libxml2库安装
- ubuntu 下,直接执行sudo apt install libxml2-dev
- 也可以从源码包编译安装
- !下载按照libxml2
- 上述示例代码,可在libxml2 2.9.12版本运行
- libxml2 2.9版本为最近几年的新版本,比较稳定
6 xml树形结构的说明
6.1 举例xml文件
<Envelope xmlns="http://schemas.xmlsoap.org/soap/envelope/">
<Body xmlns="http://schemas.xmlsoap.org/soap/envelope/">
<portInfo xmlns="http://wsserver.ips.navigator.com">
<status>0</status>
</portInfo>
</Body>
</Envelope>
6.2 xml树形结构
- xml树形结构,有元素即node组成
- 元素类型
- 大部分为
XML_ELEMENT_NODE
- 文本为
XML_TEXT_NODE
- 还有其他类型,如CDATA、DTD、DECL等
- 大部分为
- 简单认为,树形结构为ELEMENT-TEXT组成
- TEXT为叶子
- ELEMENT非叶子
- 元素类型
- 默认解析情况下:
Enum xmlParserOption = 0
- ELEMENT元素的组成:DTD - ELE - DECL - DTD
- TEXT元素的组成:TEXT
- Envelope 为根
- TEXT:"\n" //DTD
- Body //ELE
- TEXT:"\n"
- portInfo //ELE
- TEXT:"\n" //DTD
- status //ELE
- TEXT:"0" //TEXT
- status //DECL
- TEXT:"\n" //DTD
- portInfo //DECL
- TEXT:"\n"
- Body //DECL
- TEXT:"\n" //DTD
- 去掉空节点:
Enum xmlParserOption = 256
- 去掉TEXT:“\n”
- 但也去掉元素值为空的节点,比如status的值若为空也会去掉
- Envelope 为根
- Body
- portInfo
- status
- TEXT:"0"
7 常见错误
7.1 xmlReadFile 报错
- 正常调用xmlReadFile 加载指定的xml文件
- xml报错:I/O warning : failed to load external entity “xxxxx”
- 一般是文件路径有误,导致无法找到或打开该文件
- 提供正确的文件路径,能ls找到或fopen即可修复
- 参考linux读xml文件问题