Linux C]利用libxml2解析xml文件

最新推荐文章于 2024-06-22 22:32:54 发布

gnnulzy

最新推荐文章于 2024-06-22 22:32:54 发布

阅读量1.8k

点赞数

分类专栏： linux应用

linux应用专栏收录该内容

17 篇文章 0 订阅

订阅专栏

为了解析xml，可以使用Linux下默认安装的libxml2。

[cpp]view plaincopy
/* 
    a.c 
    功能：利用libxml2解析xml文件 
*/  
  
#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  
#include <unistd.h>  
#include <libgen.h>  
#include <libxml/xmlmemory.h>  
#include <libxml/parser.h>  
#include <libxml/xpath.h>  
  
int GetCurFilePath(char *lpOut)     // get full path of the executable file  
{  
    char chPath[BUFSIZ] = {0};  
    int nRetVal = readlink("/proc/self/exe", chPath, sizeof(chPath)); // get full path of the current-executable file  
    if(nRetVal < 0)  
    {  
        strcpy(lpOut, ".");  
        return -1;  
    }  
    else  
    {  
        strcpy(lpOut, chPath);  
        return 0;  
    }  
}  
  
int GetCurDir(char *lpOut)                                  // get directory-path of current executable-file  
{  
    char    chPath[BUFSIZ] = { 0 };  
    if( GetCurFilePath(chPath) < 0 )  
        return - 1;  
    dirname(chPath);                                        // dirname will change value of "chPath"(contain result)  
    strcpy(lpOut, chPath);                                  // copy result to out-param  
  
    return 0;  
}  
  
xmlDocPtr getdoc(char *docname)                         // 根据文件名得到文档指针  
{  
    xmlDocPtr doc;  
    doc = xmlParseFile(docname);  
    if(doc == NULL)  
    {  
        fprintf(stderr, "Document not parsed successfully.\n");  
        return NULL;  
    }  
    return doc;  
}  
  
// 在文档doc中解析xpath表达式，返回结果集指针  
xmlXPathObjectPtr getnodeset(xmlDocPtr doc, xmlChar *xpath)  
{  
    xmlXPathContextPtr context;  
    xmlXPathObjectPtr result;  
    context = xmlXPathNewContext(doc);  
    if(context == NULL)  
    {  
        printf("Error in xmlXPathNewContent\n");  
        return NULL;  
    }  
    result = xmlXPathEvalExpression(xpath, context);        // 在context中解析表达式xpath  
    xmlXPathFreeContext(context);                           // 释放context  
    if(result == NULL)  
    {  
        printf("Error in xmlXPathEvalExpression\n");  
        return NULL;  
    }  
    if(xmlXPathNodeSetIsEmpty(result->nodesetval))           // 解析表达式的结果集为空  
    {  
        xmlXPathFreeObject(result);  
        printf("No result\n");  
        return NULL;  
    }  
    return result;  
}  
  
// 解析xmlPath路径的结点  
void testReadXmlDoc(char *filepath, char *xmlPath)  
{  
    xmlDocPtr doc = getdoc(filepath);  
    if(NULL == doc)  
        return ;  
  
    xmlChar *xpath = (xmlChar*) xmlPath;  
    xmlXPathObjectPtr result = getnodeset(doc, xpath);          // 获取结果集  
    if(result)  
    {  
        xmlNodeSetPtr nodeset = result->nodesetval;  
        xmlChar *name, *value;  
        printf("nodeset->nodeNr = %d\n", nodeset->nodeNr);        // 打印结果集中结点个数  
        for(int i = 0; i < nodeset->nodeNr; i++)  
        {  
            xmlNodePtr cur = nodeset->nodeTab[i];                // products  
            printf("cur->name = %s\n", cur->name);  
            cur = cur->xmlChildrenNode;  
            while(cur)  
            {  
                if(xmlStrcmp(cur->name, (const xmlChar*) "text"))        // cur->name不为"text"  
                {  
                    printf("cur->name = %s\t", cur->name);  
                    name = xmlGetProp(cur, (const xmlChar*) "name");    // 获取属性值  
                    value = xmlGetProp(cur, (const xmlChar*) "value");  
                    printf("name = %s, value = %s\n", name, value);  
                    xmlFree(name);  
                    xmlFree(value);  
                }  
                cur = cur->next;  
            }  
            printf("\n");  
        }  
        xmlXPathFreeObject(result);  
    }  
    xmlFreeDoc(doc);  
    xmlCleanupParser();  
}  
  
int main(void)  
{  
    char curDir[100] = {0};  
    char docname[100] = {0};  
    GetCurDir(curDir);  
    strcpy(docname, curDir);  
    strcat(docname, "/dprod.xml");  
    testReadXmlDoc(docname, "/allproducts/products");  
  
    return EXIT_SUCCESS;  
}  

makefile文件：

[python]view plaincopy
CC=gcc  
CFLAGS=  
BIN=a  
INC=/usr/include/libxml2  
  
$(BIN): $(BIN).c  
    $(CC) $(CFLAGS) -o $(BIN) $(BIN).c -I$(INC) -lxml2 -std=c99  
  
clean:  
    rm -f *.o $(BIN)  

xml文件(dprod.xml)内容：

[html]view plaincopy
<?xml version="1.0"?>  
<allproducts>  
  <products>  
    <product name="name11" value="value11" />  
    <product name="name12" value="value12" />  
    <product name="name13" value="value13" />  
    <product name="name14" value="value14" />  
  </products>  
  <products>  
    <product name="name21" value="value21" />  
    <product name="name22" value="value22" />  
    <product name="name23" value="value23" />  
  </products>  
  <products>  
    <product name="name31" value="value31" />  
    <product name="name32" value="value32" />  
  </products>  
</allproducts>  

编译运行：

[plain]view plaincopy
[zcm@tmp #115]$make  
gcc  -o a a.c -I/usr/include/libxml2 -lxml2 -std=c99  
a.c: 在函数‘GetCurFilePath’中:  
a.c:18: 警告：隐式声明函数‘readlink’  
[zcm@tmp #116]$./a  
nodeset->nodeNr = 3  
cur->name = products  
cur->name = product  name = name11, value = value11  
cur->name = product  name = name12, value = value12  
cur->name = product  name = name13, value = value13  
cur->name = product  name = name14, value = value14  
  
cur->name = products  
cur->name = product  name = name21, value = value21  
cur->name = product  name = name22, value = value22  
cur->name = product  name = name23, value = value23  
  
cur->name = products  
cur->name = product  name = name31, value = value31  
cur->name = product  name = name32, value = value32  
  
[zcm@tmp #117]$  

说明：对于编译中出现的“a.c:18: 警告：隐式声明函数‘readlink’”错误，实在不能明白。我查了下手册，这个函数在unistd.h中，而且我也已经#include了，为什么还会出现这个错误呢？

后来突然想到，可能是-std=c99的原因，将它改为-std=gnu99后，这个警告就没有了!

--------------------------------------------------------------------------------------------------------------------------------------------------

修改了xml文件和上面源码中的testReadXmlDoc()后，发现结果相当神奇，看来对libxml2的理解还是比较缺乏。

1. 修改xml文件内容：

[html]view plaincopy
<?xml version="1.0"?>  
<allproducts>  
  <products>h1  
    <product name="name11" value="value11" />h2  
    <product name="name12" value="value12" />h3  
    <product name="name13" value="value13" />h4  
    <product name="name14" value="value14" />h5  
  </products>  
  <products>  
    <product name="name21" value="value21" />  
    <product name="name22" value="value22" />  
    <product name="name23" value="value23" />  
  </products>  
  <products>  
    <product name="name31" value="value31" />g1  
    <product name="name32" value="value32" />  
g2</products>  
</allproducts>  

2. 修改testReadXmlDoc()

[cpp]view plaincopy
// 解析xmlPath路径的结点  
void testReadXmlDoc(char *filepath, char *xmlPath)  
{  
    xmlDocPtr doc = getdoc(filepath);  
    if(NULL == doc)  
        return ;  
  
    xmlChar *xpath = (xmlChar*) xmlPath;  
    xmlXPathObjectPtr result = getnodeset(doc, xpath);          // 获取结果集  
    if(result)  
    {  
        xmlNodeSetPtr nodeset = result->nodesetval;  
        xmlChar *name, *value;  
        printf("nodeset->nodeNr = %d\n", nodeset->nodeNr);        // 打印结果集中结点个数  
        for(int i = 0; i < nodeset->nodeNr; i++)  
        {  
            xmlNodePtr cur = nodeset->nodeTab[i];                // products  
            printf("cur->name = %s\n", cur->name);  
            cur = cur->xmlChildrenNode;  
            int ctext = 0;  
            while(cur)  
            {  
                if(xmlStrcmp(cur->name, (const xmlChar*) "text"))        // cur->name不为"text"  
                {  
                    printf("cur->name = %s\t", cur->name);  
                    name = xmlGetProp(cur, (const xmlChar*) "name");    // 获取属性值  
                    value = xmlGetProp(cur, (const xmlChar*) "value");  
                    printf("name = %s, value = %s\n", name, value);  
                    xmlFree(name);  
                    xmlFree(value);  
                }  
                else  
                {  
                    ctext++;  
                    xmlChar *v = xmlNodeListGetString(doc, cur, 1);  
                    printf("cur->content = [%s], v = [%s]", cur->content, v); // cur->content获取cur的内容  
                    xmlFree(v);  
                }  
                cur = cur->next;  
            }  
            printf("ctext = %d\n", ctext);  
            printf("\n");  
        }  
        xmlXPathFreeObject(result);  
    }  
    xmlFreeDoc(doc);  
    xmlCleanupParser();  
}  

运行结果：

[plain]view plaincopy
[zcm@tmp #168]$make  
gcc  -o a a.c -I/usr/include/libxml2 -lxml2 -std=gnu99  
[zcm@tmp #169]$./a  
nodeset->nodeNr = 3  
cur->name = products  
cur->content = [h1  
    ], v = [h1  
    h2  
    h3  
    h4  
    h5  
  ]cur->name = product   name = name11, value = value11  
cur->content = [h2  
    ], v = [h2  
    h3  
    h4  
    h5  
  ]cur->name = product   name = name12, value = value12  
cur->content = [h3  
    ], v = [h3  
    h4  
    h5  
  ]cur->name = product   name = name13, value = value13  
cur->content = [h4  
    ], v = [h4  
    h5  
  ]cur->name = product   name = name14, value = value14  
cur->content = [h5  
  ], v = [h5  
  ]ctext = 5  
  
cur->name = products  
cur->content = [  
    ], v = [  
      
      
      
  ]cur->name = product   name = name21, value = value21  
cur->content = [  
    ], v = [  
      
      
  ]cur->name = product   name = name22, value = value22  
cur->content = [  
    ], v = [  
      
  ]cur->name = product   name = name23, value = value23  
cur->content = [  
  ], v = [  
  ]ctext = 4  
  
cur->name = products  
cur->content = [  
    ], v = [  
    g1  
      
g2]cur->name = product   name = name31, value = value31  
cur->content = [g1  
    ], v = [g1  
      
g2]cur->name = product   name = name32, value = value32  
cur->content = [  
g2], v = [  
g2]ctext = 3  
  
[zcm@tmp #170]$  

由此可见，一般情况下，我们用的比较多的可能会是cur->content这个东西了!

补充：

在网上看到一个人的写法，可以在解析xml文件时，直接忽略掉结点之间的无效空白。对于本文，就是将：

doc = xmlParseFile(docname);　－－－＞修改为：doc = xmlParseFile(docname, "UTF-8", XML_PARSE_NOBLANKS); // 第3个参数是关键

gnnulzy

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
Linux C]利用libxml2解析xml文件

为了解析xml，可以使用Linux下默认安装的libxml2。[cpp] view plaincopy/* a.c 功能：利用libxml2解析xml文件 */ #include &lt;stdio.h&gt; #include &lt;stdlib.h&gt; #include &lt;string.h&gt; #include &lt;unistd.h&gt; #...
复制链接

扫一扫

专栏目录