xerces-c编码示例

xerces-c编码示例 2008-03-17 15:17:40

分类: C/C++

上个星期所做的项目中涉及到xml文件的解析,所以首先需要选择一个合适的xml解析器,我以前用过libxml2,经过评估觉得它对dom和sax支持得不够全。最后选择了xerces-c这个开源的xml解析器。关于xerces-c的历史及具体细节,可以在对其维护的网站上查看。 http://xerces.apache.org
  编译安装好后,可以在sample目录下查看示例代码,但是很多API接口描述得并不清晰。故现将代码整理一下,方便以后自己查阅,同时也希望对其它人有些帮助。
  我将一直常用的接口封装成了一个类,包括查找结点,取结点值,更新结点值,删除结点。其中包括有从内存块中读取xml格式的内容并解析,还有将dom树中的内容输出为字节流。
 
XercesParserXml.h
 

#ifndef XERCES_PARSER_XML_H__
#define XERCES_PARSER_XML_H__

#include <string>

class XercesParserXml {
public:
    bool has(const std::string& srcByte, const std::string& node) const;
    std::string get(const std::string& srcByte, const std::string& node) const;
    bool set(std::string& srcByte, const std::string& node, const std::string& value);
    bool del(std::string& srcByte, const std::string& node);

private:

};

#endif

 

XercesParserXml.cpp

 

#include "XercesParserXml.h"

#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>

#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif
#include <vector>

XERCES_CPP_NAMESPACE_USE

    DOMNode* getNode(DOMNode* node, const char* nodeName) {
        //char* ptr = (char *)nodeName;
        char ptr[4096] = {0};
        sprintf(ptr, "%s", nodeName);
        if (NULL == ptr)
            return NULL;
        char* d = ".";
        char* p = NULL;
        p = strtok(ptr, d);
        std::vector<std::string> vec;
        while (p) {
            //printf("name: %s\n", p);
            vec.push_back(p);
            p = strtok(NULL, d);
        }
        DOMNode* child;
        DOMNode* curNode = node;

        for (unsigned int i = 1; i < vec.size(); i++) {
            //std::cout << vec[i] << std::endl;
            if (0 == curNode) {
                return NULL;
            }
            for (child = curNode->getFirstChild(); child != 0; child = child->getNextSibling()) { // have no child ???
                char *name = XMLString::transcode(child->getNodeName());
                if (vec[i] == name) {
                    //printf("vec[%d]: %s\tname: %s\n", i, vec[i].c_str(), name);
                    XMLString::release(&name);
                    curNode = child;
                    break;
                }
                XMLString::release(&name);
                if (child == curNode->getLastChild()) {
                    std::cout << "such node isn't exist" << std::endl;
                    return NULL;
                }
            }
        }

        return curNode;
    }


class XStr
{
public :
    // -----------------------------------------------------------------------
    // Constructors and Destructor
    // -----------------------------------------------------------------------
    XStr(const char* const toTranscode)
    {
        // Call the private transcoding method
        fUnicodeForm = XMLString::transcode(toTranscode);
    }

    ~XStr()
    {
        XMLString::release(&fUnicodeForm);
    }


    // -----------------------------------------------------------------------
    // Getter methods
    // -----------------------------------------------------------------------
    const XMLCh* unicodeForm() const
    {
        return fUnicodeForm;
    }

private :
    // -----------------------------------------------------------------------
    // Private data members
    //
    // fUnicodeForm
    // This is the Unicode XMLCh format of the string.
    // -----------------------------------------------------------------------
    XMLCh* fUnicodeForm;
};


#define X(str) XStr(str).unicodeForm()

bool XercesParserXml::has(const std::string& srcBytes, const std::string& node) const
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return false;
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return false;
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return false;
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    // release source
    delete parser;
    delete errHandler;
    delete pInputSource;
    //doc->release();
    if (testNode == NULL) {
        return false;
    }
    else {
        return true;
    }
}


std::string XercesParserXml::get(const std::string& srcBytes, const std::string& node) const
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return "";
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return "";
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return "";
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return "";
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    // get content
    std::string result;
    char* nodeValue = NULL;
    if (testNode) {
        nodeValue = XMLString::transcode(testNode->getTextContent());
        result = nodeValue;
        XMLString::release(&nodeValue);
    }

    // release source
    delete parser;
    delete errHandler;
    delete pInputSource;
    //doc->release();

    return result;
}



bool XercesParserXml::set(std::string& srcBytes, const std::string& node, const std::string& value)
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return false;
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return false;
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return false;
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    // 4. set new content
    if (testNode) {
        testNode->setTextContent(X(value.c_str()));
    }

    // 5. serialize DOM tree and save it
    char* result;
    DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(X("Core"));
    if (NULL == impl) {
        XERCES_STD_QUALIFIER cerr << "Requested implementation is not supported" << XERCES_STD_QUALIFIER endl;
        XMLPlatformUtils::Terminate();
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    DOMWriter* theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter();
    if (NULL == theSerializer) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true);
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true);
    XMLCh* unicodeStr = theSerializer->writeToString(*rootElem);
    result = XMLString::transcode(unicodeStr);
    srcBytes = result;
    //printf("result: %s\n", result);
    XMLString::release(&unicodeStr);
    XMLString::release(&result);

    // release source
    theSerializer->release();
    delete parser;
    delete errHandler;
    delete pInputSource;

    return true;
}



bool XercesParserXml::del(std::string& srcBytes, const std::string& node)
{
    // init
    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return false;
    }
    // 1. load the right xml bytes from signal tree
    XercesDOMParser* parser = new XercesDOMParser();
    if (NULL == parser) {
        return false;
    }
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true); // optional
    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);
    InputSource* pInputSource = new MemBufInputSource((XMLByte *)srcBytes.c_str(), srcBytes.size(), X("GUID"));
    if (NULL == pInputSource) {
        delete parser;
        delete errHandler;
        return false;
    }
    // 2. parser the xml bytes to a DOM tree
    parser->parse(*pInputSource);
    DOMDocument* doc = parser->getDocument();
    if (NULL == doc) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 3. find node of parameter in DOM tree
    DOMElement* rootElem = NULL;
    rootElem = doc->getDocumentElement();
    DOMNode* testNode = NULL;
    testNode = getNode(rootElem, node.c_str());
    if (NULL == testNode) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    // 4. remove identifier node from DOM tree
    DOMNode* parentNode = testNode->getParentNode();
    DOMNode* oldNode = parentNode->removeChild(testNode);
    oldNode->release();

    // 5. serialize DOM tree and save it
    char* result;
    DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(X("Core"));
    if (NULL == impl) {
        XERCES_STD_QUALIFIER cerr << "Requested implementation is not supported" << XERCES_STD_QUALIFIER endl;
        XMLPlatformUtils::Terminate();
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    DOMWriter* theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter();
    if (NULL == theSerializer) {
        delete parser;
        delete errHandler;
        delete pInputSource;
        return false;
    }
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, true);
    if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true))
        theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, true);
    XMLCh* unicodeStr = theSerializer->writeToString(*rootElem);
    result = XMLString::transcode(unicodeStr);
    srcBytes = result;
    //printf("result: %s\n", result);
    XMLString::release(&unicodeStr);
    XMLString::release(&result);


    // release source
    theSerializer->release();
    delete parser;
    delete errHandler;
    delete pInputSource;

    return true;
}



int main()
{
        char* xmlFile = "x2.xml";

        char buf[4096] = {0};
        FILE* fp = fopen(xmlFile, "rb");
        if (!fp) {
            perror(xmlFile);
            exit(1);
        }
        size_t size;
        size = fread(buf, 1, sizeof(buf), fp);
        fclose(fp);
        std::cout << "size to be parser: " << size << std::endl;
        std::string srcDocBytes;
        srcDocBytes.assign(buf, size);
        std::string backup = srcDocBytes;

        XercesParserXml xml;
        bool flag = false;
        char testNode[1000] = {0};
        sprintf(testNode, "%s", "SendRoutingInfoRes.imsi"); //其中各结点以“.”分隔,格式如下:root.child.grandson
        std::string result;
        for (int i = 0; i < 1; i++) {
            srcDocBytes = backup;
            printf("\nfind node test ... \n");
            flag = xml.has(srcDocBytes, testNode);
            printf("\nget node test ...\n");
            result = xml.get(srcDocBytes, testNode);
            printf("get value: %s\n", result.c_str());
            printf("\nset node test ...\n");
            xml.set(srcDocBytes, testNode, "gnu means gnu not unix");
            printf("new content: %s\n", srcDocBytes.c_str());
            printf("\nremove test ...\n");
            xml.del(srcDocBytes, testNode);
            printf("after del operation: %s\n", srcDocBytes.c_str());
        }
        if (flag) {
            printf("%s has found\n", testNode);
        }
        else {
            printf("%s hasn't found\n", testNode);
        }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
如果您的 Xerces-C 库在进行大文件校验时崩溃,可能是因为它尝试将整个文件加载到内存中进行处理,导致内存不足。为了解决这个问题,您可以使用 Xerces-C 的 SAX 接口来处理 XML 文件。SAX 接口允许您在处理 XML 文件时逐个节点处理,而不是将整个文件加载到内存中。 以下是使用 SAX 接口进行 XSD 校验的示例代码: ```c++ #include <iostream> #include <xercesc/sax/HandlerBase.hpp> #include <xercesc/parsers/XercesDOMParser.hpp> #include <xercesc/framework/LocalFileInputSource.hpp> #include <xercesc/framework/XMLValidator.hpp> #include <xercesc/util/XMLString.hpp> using namespace xercesc; class XSDValidator : public HandlerBase { public: void startElement( const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs ) { // Add your validation logic here } }; int main() { XMLPlatformUtils::Initialize(); XercesDOMParser parser; parser.setValidationScheme(XercesDOMParser::Val_Always); parser.setDoNamespaces(true); XSDValidator handler; parser.setErrorHandler(&handler); parser.setDocumentHandler(&handler); LocalFileInputSource source(XMLString::transcode("your_file.xml")); parser.parse(source); XMLPlatformUtils::Terminate(); return 0; } ``` 在上面的示例中,我们使用 Xerces-C 的 SAX 接口来处理 XML 文件。我们创建了一个名为 `XSDValidator` 的处理程序,它实现了 `HandlerBase` 接口并重写了 `startElement` 方法来添加自己的验证逻辑。在 `main` 函数中,我们创建了一个 `XercesDOMParser` 对象,并将其配置为始终进行验证。然后,我们将 `XSDValidator` 处理程序分配给 `parser` 对象,并使用 `LocalFileInputSource` 对象来指定要验证的 XML 文件。最后,我们调用 `parse` 方法来开始解析文件。 请注意,在上面的示例中,我们没有将整个文件加载到内存中。相反,我们使用 SAX 接口逐个节点处理 XML 文件,从而避免了内存不足的问题。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值