iOS开发之html解析

使用XPath解析html
可以从此处 https://github.com/topfunky/hpple下载工程,将TFHpple.h,TFHpple.m,TFHppleElement.h,TFHppleElement.m,XPathQuery.h,XPathQuery.m加到自己的项目中,在Frameworks中导入libxml2.x
iOS开发之html解析

在项目中找到Other Linker Flags,加入-libxml2
iOS开发之html解析

在项目中找到Header Search Paths,加入/usr/include/libxml2
代码如下:

NSString *urlString = nil;

 

urlString = @"http://www.xiyou.edu.cn/new/lm.jsp?urltype=tree.TreeTempUrl&wbtreeid=724";

 

NSData *htmlData = [[NSData alloc] initWithContentsOfURL:[NSURL URLWithString:urlString]];

 

NSData *toHtmlData = [self toUTF8:htmlData];

 

TFHpple *xpathParser = [[TFHpple allocinitWithHTMLData:toHtmlData];

 

 

NSArray *aArray = [xpathParser searchWithXPathQuery:@"//a"];

 

if ([span count] > 0) {

            

            for (int i = 87; i < 102; i++) {

                            //从<a>的第82个开始取值,共获取15个值

                TFHppleElement *aElement = [aArray objectAtIndex:i];    

                NSArray *aArr = [aElement children];

                TFHppleElement *aEle = [aArr objectAtIndex:0];

                NSArray *aChild = [aEle children];

                TFHppleElement *aChildEle = [aChild objectAtIndex:0];

                NSArray *aChildren = [aChildEle children];

                NSString *aStr = [[aChildren objectAtIndex:0content];

                NSLog(@"aStr:%@",aStr);

                NSDictionary *aAttributeDict = [aElement attributes];

                NSLog(@"aAttributeDict:%@",aAttributeDict);

                

                            //获取a中的属性值

                NSString *hrefStr = [NSString stringWithFormat:@"http://www.xiyou.edu.cn%@",[aAttributeDict objectForKey:@"href"]];

                NSLog(@"hrefStr:%@",hrefStr);

                

                [currentNewsArr addObject:aStr];

                [currentHrefArr addObject:hrefStr];

                

            }

[htmlData release];

[xpathParser release];

}


//如果解析的网页不是utf8编码,如gbk编码,可以先将其转换为utf8编码再对其进行解析

 

-(NSData *) toUTF8:(NSData *)sourceData {  

    CFStringRef gbkStr = CFStringCreateWithBytes(NULL, [sourceData bytes], [sourceData length],kCFStringEncodingGB_18030_2000, false);  

    

    if (gbkStr == NULL) {  

        return nil;  

    } else {  

        NSString *gbkString = (NSString *)gbkStr; 

        //根据网页源代码中编码方式进行修改,此处为从gbk转换为utf8

             NSString *utf8_String = [gbkString stringByReplacingOccurrencesOfString:@"META http-equiv="Content-Type" content="text/html; charset=GBK""  

                                                                      withString:@"META http-equiv="Content-Type" content="text/html; charset=UTF-8""];  

        

        return [utf8_String dataUsingEncoding:NSUTF8StringEncoding];                             

    }                                     

}

iphone:解析html的第三库hpple初试

用hpple较为便利的利用xpath解析html。

做法:http://lwxshow.com/ios-iphone-development-teaches-you-how-to-use-the-objective-c-parsing-html-lwxshow-com

(相关:

    http://stackoverflow.com/questions/405749/parsing-html-on-the-iphone 

           http://stackoverflow.com/questions/9746745/xpath-attributes-selection

它里面说的挺详细的:就是引用 https://github.com/topfunky/hpple 上的hpple库,再结合libxml,就可以使用xpath搜索html了。

关于xpath的可以参考:w3school的教程 http://www.w3school.com.cn/xpath/index.asp

相关配置好了之后就可以直接使用:

复制代码
- (void)viewDidLoad
{
    [super viewDidLoad];
    NSError *error;
    
    NSData *htmlData = [[NSString stringWithContentsOfURL:[NSURL
                                                           URLWithString: @"http://dict.youdao.com/m/search?keyfrom=dict.mindex&vendor=&q=apple"]
                                                 encoding:NSASCIIStringEncoding error:&error]
                        dataUsingEncoding:NSUTF8StringEncoding];
    TFHpple *xpathParser = [[TFHpple alloc] initWithHTMLData:htmlData];
    NSArray *elements  = [xpathParser searchWithXPathQuery:@"//title"]; // get the title
    NSLog(@"%d",[elements count]);
    TFHppleElement *element = [elements objectAtIndex:0];
    
    NSString *content = [element content];
    NSString *tagname = [element tagName];
    NSString *attr = [element objectForKey:@"href"];
    NSLog(@"content = %@",content);
    NSLog(@"tagname = %@",tagname);
    NSLog(@"attr is = %@",attr);
}
  1. //
  2. // TFHppleElement.m
  3. // Hpple
  4. //
  5. // Created by Geoffrey Grosenbach on 1/31/09.
  6. //
  7. // Copyright (c) 2009 Topfunky Corporation, http://topfunky.com
  8. //
  9. // MIT LICENSE
  10. //
  11. // Permission is hereby granted, free of charge, to any person obtaining
  12. // a copy of this software and associated documentation files (the
  13. // "Software"), to deal in the Software without restriction, including
  14. // without limitation the rights to use, copy, modify, merge, publish,
  15. // distribute, sublicense, and/or sell copies of the Software, and to
  16. // permit persons to whom the Software is furnished to do so, subject to
  17. // the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be
  20. // included in all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  26. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  27. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29. #import "TFHppleElement.h"
  30. static NSString * const TFHppleNodeContentKey = @"nodeContent";
  31. static NSString * const TFHppleNodeNameKey = @"nodeName";
  32. static NSString * const TFHppleNodeChildrenKey = @"nodeChildArray";
  33. static NSString * const TFHppleNodeAttributeArrayKey = @"nodeAttributeArray";
  34. static NSString * const TFHppleNodeAttributeNameKey = @"attributeName";
  35. @interface TFHppleElement ()
  36. @property (nonatomic, retain, readwrite) TFHppleElement *parent;
  37. @end
  38. @implementation TFHppleElement
  39. @synthesize parent;
  40. - (void) dealloc
  41. {
  42. [node release];
  43. [parent release];
  44. [super dealloc];
  45. }
  46. - (id) initWithNode:(NSDictionary *) theNode
  47. {
  48. if (!(self = [super init]))
  49. return nil;
  50. [theNode retain];
  51. node = theNode;
  52. return self;
  53. }
  54. + (TFHppleElement *) hppleElementWithNode:(NSDictionary *) theNode {
  55. return [[[[self class] alloc] initWithNode:theNode] autorelease];
  56. }
  57. #pragma mark -
  58. - (NSString *) content
  59. {
  60. return [node objectForKey:TFHppleNodeContentKey];
  61. }
  62. - (NSString *) tagName
  63. {
  64. return [node objectForKey:TFHppleNodeNameKey];
  65. }
  66. - (NSArray *) children
  67. {
  68. NSMutableArray *children = [NSMutableArray array];
  69. for (NSDictionary *child in [node objectForKey:TFHppleNodeChildrenKey]) {
  70. TFHppleElement *element = [TFHppleElement hppleElementWithNode:child];
  71. element.parent = self;
  72. [children addObject:element];
  73. }
  74. return children;
  75. }
  76. - (TFHppleElement *) firstChild
  77. {
  78. NSArray * children = self.children;
  79. if (children.count)
  80. return [children objectAtIndex:0];
  81. return nil;
  82. }
  83. - (NSDictionary *) attributes
  84. {
  85. NSMutableDictionary * translatedAttributes = [NSMutableDictionary dictionary];
  86. for (NSDictionary * attributeDict in [node objectForKey:TFHppleNodeAttributeArrayKey]) {
  87. [translatedAttributes setObject:[attributeDict objectForKey:TFHppleNodeContentKey]
  88. forKey:[attributeDict objectForKey:TFHppleNodeAttributeNameKey]];
  89. }
  90. return translatedAttributes;
  91. }
  92. - (NSString *) objectForKey:(NSString *) theKey
  93. {
  94. return [[self attributes] objectForKey:theKey];
  95. }
  96. - (id) description
  97. {
  98. return [node description];
  99. }
  100. @end

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值