iOS: libxml2 xpath 函数调用示例, 网页解析

在XPath中使用Contains函数

#import <Foundation/Foundation.h>

#import <libxml/HTMLparser.h>
#import <libxml/HTMLtree.h>
#import <libxml/xpath.h>
#import <libxml/xpathInternals.h>


//读取URL地址的内容
NSString * ReadURLContent(NSString *sURL, NSStringEncoding enc)
{
    NSURL *url = [NSURL URLWithString:sURL];
    NSString *sResponse = [NSString stringWithContentsOfURL:url encoding:enc error:nil];
    return sResponse;
}

//测试解析Baidu的网页
void TestParseBaidu()
{
    xmlDocPtr doc = NULL;
    xmlNodePtr node = NULL;
    
    //读取网页内容
    NSString *sURL = @"http://www.baidu.com";
    NSStringEncoding enc = NSUTF8StringEncoding;
    NSString *sContent = ReadURLContent(sURL, enc);
    
    //解析网页
    doc = htmlReadDoc((xmlChar *)[sContent cStringUsingEncoding:enc], NULL, NULL, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR);
    if (doc == NULL) {
        printf("无法解析网页%s\n", [sURL cStringUsingEncoding: NSUTF8StringEncoding]);
        return;
    }
    
    //得到根节点
    node = xmlDocGetRootElement(doc);
    printf("根节点名称: %s\n", node->name);
    
    //使用xPath函数查找Script节点
    xmlXPathContextPtr context = NULL;
    xmlXPathObjectPtr result = NULL;
    const xmlChar *sScriptTag = (const xmlChar *)"src";
    const xmlChar *sXpath = (const xmlChar *)"//script [contains(@src,'.js')]";
    context = xmlXPathNewContext(doc);
    result = xmlXPathEvalExpression(sXpath, context);
    if(xmlXPathNodeSetIsEmpty(result->nodesetval)){
        printf("没有选择结果\n");
    }
    
    //输出链接js脚本
    for(int i=0; i<result->nodesetval->nodeNr; i++){
        node = result->nodesetval->nodeTab[i];
        xmlChar *sScriptFile = xmlGetProp(node, sScriptTag);
        printf("%s: %s\n", node->name, sScriptFile);
        xmlFree(sScriptFile);
    }
    
    //释放内存
    xmlXPathFreeObject(result);
    xmlXPathFreeContext(context);
    xmlFreeDoc(doc);
    xmlCleanupParser();
}

int main(int argc, const char * argv[])
{
    @autoreleasepool {
        TestParseBaidu();
    }
    return 0;
}


阅读更多
想对作者说点什么?

博主推荐

换一批

没有更多推荐了,返回首页