libxml2库的学习

1 提取元素值的示例代码 搬运自官方培训 2.9.12

1.1 xml文件
<?xml version="1.0"?>
<story>
  <storyinfo>
    <author>John Fleck</author>
    <datewritten>June 2, 2002</datewritten>
    <keyword>example keyword</keyword>
  </storyinfo>
  <body>
    <headline>This is the headline</headline>
    <para>This is the body text.</para>
  </body>
</story>
1.2 libxml2示例代码 按xml层次逐层解析
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>

void
parseStory (xmlDocPtr doc, xmlNodePtr cur) {

	xmlChar *key;
	cur = cur->xmlChildrenNode;
	while (cur != NULL) {
	    if ((!xmlStrcmp(cur->name, (const xmlChar *)"keyword"))) {
	    	//从doc树形结构获取指定元素的值:字符串
		    key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
		    printf("keyword: %s\n", key);
		    //注意释放key
		    xmlFree(key);
 	    }
	    cur = cur->next;
	}
    return;
}

//读取xml文件,从doc树形结构获取keyword元素的值
static void
parseDoc(char *docname) {

	xmlDocPtr doc;
	xmlNodePtr cur;

	//读取xml文件->doc树形结构
	doc = xmlParseFile(docname);
	if (doc == NULL ) {
		fprintf(stderr,"Document not parsed successfully. \n");
		return;
	}
	
	//获取doc属性结构的根
	cur = xmlDocGetRootElement(doc);
	if (cur == NULL) {
		fprintf(stderr,"empty document\n");
		xmlFreeDoc(doc);
		return;
	}
	
	//根的合法性判断:需为story
	if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
		fprintf(stderr,"document of the wrong type, root node != story");
		xmlFreeDoc(doc);
		return;
	}
	
	//提取storyinfo里的keyword元素值
	cur = cur->xmlChildrenNode;
	while (cur != NULL) {
		if ((!xmlStrcmp(cur->name, (const xmlChar *)"storyinfo"))){
			parseStory (doc, cur);
		}
	    cur = cur->next;
	}
	
	//注意释放doc树形结构
	xmlFreeDoc(doc);
	return;
}

int
main(int argc, char **argv) {

	char *docname;
		
	if (argc <= 1) {
		printf("Usage: %s docname\n", argv[0]);
		return(0);
	}

	docname = argv[1];
	parseDoc (docname);

	return (1);
}
1.3 编译命令

gcc -o test test.c xml2-config --cflags --libs

1.4 拓展获取元素的文本内容 递归进行

使用接口 xmlNodeGetContent (),返回xmlChar *msg指针;
这个msg在使用完后,需要自行xmlFree(msg)
也可以自行编写一个递归提取节点文本内容的函数,参考第3节的递归实现

2 提取元素属性的示例代码 搬运自官方培训 2.9.12

2.1 xml文件
<?xml version="1.0"?>
<story>
  <storyinfo>
    <author>John Fleck</author>
    <datewritten>June 2, 2002</datewritten>
    <keyword>example keyword</keyword>
  </storyinfo>
  <body>
    <headline>This is the headline</headline>
    <para>This is the body text.</para>
  </body>
  <reference uri="www.baidu.com"/>
</story>
2.2 libxml2示例代码 xmlGetProp接口完成xml特殊字符的反转义
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>

void
getReference (xmlDocPtr doc, xmlNodePtr cur) {

	xmlChar *uri;
	cur = cur->xmlChildrenNode;
	while (cur != NULL) {
	    if ((!xmlStrcmp(cur->name, (const xmlChar *)"reference"))) {
		    uri = xmlGetProp(cur, "uri");
		    printf("uri: %s\n", uri);
		    xmlFree(uri);//注意释放
	    }
	    cur = cur->next;
	}
	return;
}


void
parseDoc(char *docname) {

	xmlDocPtr doc;
	xmlNodePtr cur;

	doc = xmlParseFile(docname);
	
	if (doc == NULL ) {
		fprintf(stderr,"Document not parsed successfully. \n");
		return;
	}
	
	cur = xmlDocGetRootElement(doc);
	
	if (cur == NULL) {
		fprintf(stderr,"empty document\n");
		xmlFreeDoc(doc);
		return;
	}
	
	if (xmlStrcmp(cur->name, (const xmlChar *) "story")) {
		fprintf(stderr,"document of the wrong type, root node != story");
		xmlFreeDoc(doc);
		return;
	}
	
	getReference (doc, cur);
	xmlFreeDoc(doc);
	return;
}

int
main(int argc, char **argv) {

	char *docname;

	if (argc <= 1) {
		printf("Usage: %s docname\n", argv[0]);
		return(0);
	}

	docname = argv[1];
	parseDoc (docname);
	
	return (1);
}
2.3 xmlGetProp接口说明
  • 如果没有该属性,那么接口返回NULL
  • 如果有该属性,那么接口返回该属性的值:字符串
    • 空字符串,“”,即字符串长度为0,只有\0
    • 非空字符串,即字符串长度非0,以\0结束
  • Go 的xml,获取属性,也能达到类似效果

3 遍历xml文件-转化特定xml节点为json数据示例代码 参考官方示例的2次修改 参考转json代码 2.9.12

3.1 xml文件
<Envelope xmlns="http://schemas.xmlsoap.org/soap/envelope/">
    <Body xmlns="http://schemas.xmlsoap.org/soap/envelope/">
        <portInfo xmlns="http://wsserver.ips.navigator.com">
            <status>0</status>
        </portInfo>
    </Body>
</Envelope>
3.2 libxml2示例代码 参考tree1.c
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <jansson.h>

#ifdef LIBXML_TREE_ENABLED

/*
 *To compile this file using gcc you can type
 *gcc `xml2-config --cflags --libs` -o xmlexample libxml2-example.c
 */

static int
transfer_json(xmlNode *node, json_t **out_json)
{
    xmlNode *cur_node = NULL;
    json_t *new_obj   = json_object();
    json_t *new_value = NULL;
    const char *name  = NULL;

	//遍历node的值,element/text类型的均转为obj对象:使用递归实现嵌套
    for (cur_node = node; cur_node; cur_node = cur_node->next) {
        if (cur_node->type == XML_ELEMENT_NODE) {
            printf("node type: Element, name: %s\n", cur_node->name);
            name = cur_node->name;
            transfer_json(cur_node->children, &new_value);
            json_object_set_new(new_obj, name, new_value);
        }
        else if ( cur_node->type == XML_TEXT_NODE ) {
            printf("node type: TEXT, parent name: %s value: %s\n", 
                cur_node->name,
                cur_node->content);
            name      = cur_node->name;
            new_value = json_string(cur_node->content);
            if ( NULL == new_value )
                continue;
            json_object_set_new(new_obj, name, new_value);
        }
    }

    *out_json = new_obj;
    return 0;
}

static void
find_given_element_with_name(xmlNode * a_node, char *name, xmlNode ** out_node, int *found_flag)
{
    xmlNode *cur_node = NULL;

    for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
        if (cur_node->type == XML_ELEMENT_NODE) {
            printf("node type: Element, name: %s\n", cur_node->name);
            if ( 0 == strncmp(cur_node->name, name, strlen(name)) ) {
                *out_node = cur_node;
                *found_flag = 1;
                printf("found the node type: Element, name: %s\n", cur_node->name);
                printf("---------------------------------\n");
                return ;//找到提前结束
            }

        }

        find_given_element_with_name(cur_node->children, name, out_node, found_flag);
        if ( 1 == *found_flag ) {
            break;//找到提前结束递归,减少性能消耗
        }
    }

    return ;
}

/**
 * print_element_names:
 * @a_node: the initial xml node to consider.
 *
 * Prints the names of the all the xml elements
 * that are siblings or children of a given xml node.
 */
static void
print_element_names(xmlNode * a_node)
{
    xmlNode *cur_node = NULL;

    for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
        if (cur_node->type == XML_ELEMENT_NODE) {
            printf("node type: Element, name: %s\n", cur_node->name);
        }
        else if ( cur_node->type == XML_TEXT_NODE ) {
            printf("node type: TEXT, parent name: %s value: %s\n", 
                cur_node->parent->name,
                cur_node->content);

        }

        print_element_names(cur_node->children);
    }
}


/**
 * Simple example to parse a file called "file.xml", 
 * walk down the DOM, and print the name of the 
 * xml elements nodes.
 */
int
main(int argc, char **argv)
{
    xmlDoc *doc = NULL;
    xmlNode *root_element = NULL;
    xmlNode *params = NULL;
    json_t *params_j = NULL;
    int found_flag = 0;
    char *out_s = NULL;

    if (argc != 2)
        return(1);

    /*
     * this initialize the library and check potential ABI mismatches
     * between the version it was compiled for and the actual shared
     * library used.
     */
    LIBXML_TEST_VERSION

    /*parse the file and get the DOM */
    //doc = xmlReadFile(argv[1], NULL, 0);
    doc = xmlReadFile(argv[1], NULL, 256);//设置为256,去掉空节点elements

    if (doc == NULL) {
        printf("error: could not parse file %s\n", argv[1]);
    }

    /*Get the root element node */
    root_element = xmlDocGetRootElement(doc);

	//遍历-打印当前xml的全部节点elements
    //print_element_names(root_element);
    #if 1
    find_given_element_with_name(root_element, "portInfo", &params, &found_flag);
    if (1 == found_flag) {
        transfer_json(params->children, &params_j);
        printf("---------------------------------\n");
        //out_s = json_dumps(params_j, JSON_INDENT(4));
        out_s = json_dumps(params_j, JSON_COMPACT);//压缩形式将json结构转为字符串,即encoding序列化
        printf("resutl:%s\n",out_s);
        printf("---------------------------------\n");
        free(out_s);//注意释放相关的结构
        json_decref(params_j);
    }
    #endif

    /*free the document */
    xmlFreeDoc(doc);

    return 0;
}
#else
int main(void) {
    fprintf(stderr, "Tree support not compiled in\n");
    return 0;
}
#endif

3.3 编译命令
gcc -g -o example example.c `xml2-config --cflags --libs` -ljansson

4 常用头文件说明

  • parse.h:xml的解析器,parsefile/parsememory等
  • xmlmemory.h:内存操作
  • tree.h:操作树形结构doc,xmlNode/xmlNodePtr定义

5 libxml2库安装

  • ubuntu 下,直接执行sudo apt install libxml2-dev
  • 也可以从源码包编译安装
  • !下载按照libxml2
  • 上述示例代码,可在libxml2 2.9.12版本运行
  • libxml2 2.9版本为最近几年的新版本,比较稳定

6 xml树形结构的说明

6.1 举例xml文件
<Envelope xmlns="http://schemas.xmlsoap.org/soap/envelope/">
    <Body xmlns="http://schemas.xmlsoap.org/soap/envelope/">
        <portInfo xmlns="http://wsserver.ips.navigator.com">
            <status>0</status>
        </portInfo>
    </Body>
</Envelope>
6.2 xml树形结构
  • xml树形结构,有元素即node组成
    • 元素类型
      • 大部分为XML_ELEMENT_NODE
      • 文本为XML_TEXT_NODE
      • 还有其他类型,如CDATA、DTD、DECL等
    • 简单认为,树形结构为ELEMENT-TEXT组成
      • TEXT为叶子
      • ELEMENT非叶子
  • 默认解析情况下:Enum xmlParserOption = 0
    • ELEMENT元素的组成:DTD - ELE - DECL - DTD
    • TEXT元素的组成:TEXT
- Envelope 为根
	- TEXT:"\n" //DTD
	- Body //ELE
		- TEXT:"\n"
		- portInfo //ELE
			- TEXT:"\n" //DTD
			- status //ELE
				- TEXT:"0" //TEXT
			- status //DECL
			- TEXT:"\n" //DTD
	    - portInfo //DECL	
		- TEXT:"\n"
	- Body //DECL
	- TEXT:"\n" //DTD
  • 去掉空节点:Enum xmlParserOption = 256
    • 去掉TEXT:“\n”
    • 但也去掉元素值为空的节点,比如status的值若为空也会去掉
- Envelope 为根
	- Body
		- portInfo
			- status
				- TEXT:"0"

7 常见错误

7.1 xmlReadFile 报错

  • 正常调用xmlReadFile 加载指定的xml文件
  • xml报错:I/O warning : failed to load external entity “xxxxx”
  • 一般是文件路径有误,导致无法找到或打开该文件
  • 提供正确的文件路径,能ls找到或fopen即可修复
  • 参考linux读xml文件问题

8 参考资料

  • 18
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值