NSString *urlString = nil;
urlString = @"http://www.xiyou.edu.cn/new/lm.jsp?urltype=tree.TreeTempUrl&wbtreeid=724";
NSData *htmlData = [[NSData alloc] initWithContentsOfURL:[NSURL URLWithString:urlString]];
NSData *toHtmlData = [self toUTF8:htmlData];
TFHpple *xpathParser = [[TFHpple alloc] initWithHTMLData:toHtmlData];
NSArray *aArray = [xpathParser searchWithXPathQuery:@"//a"];
if ([span count] > 0) {
[htmlData release];
[xpathParser release];
}
//如果解析的网页不是utf8编码,如gbk编码,可以先将其转换为utf8编码再对其进行解析
-(NSData *) toUTF8:(NSData *)sourceData {
}
iphone:解析html的第三库hpple初试
用hpple较为便利的利用xpath解析html。
(相关:
http://stackoverflow.com/questions/405749/parsing-html-on-the-iphone
http://stackoverflow.com/questions/9746745/xpath-attributes-selection
)
它里面说的挺详细的:就是引用 https://github.com/topfunky/hpple 上的hpple库,再结合libxml,就可以使用xpath搜索html了。
关于xpath的可以参考:w3school的教程 http://www.w3school.com.cn/xpath/index.asp
相关配置好了之后就可以直接使用:
- (void)viewDidLoad { [super viewDidLoad]; NSError *error; NSData *htmlData = [[NSString stringWithContentsOfURL:[NSURL URLWithString: @"http://dict.youdao.com/m/search?keyfrom=dict.mindex&vendor=&q=apple"] encoding:NSASCIIStringEncoding error:&error] dataUsingEncoding:NSUTF8StringEncoding]; TFHpple *xpathParser = [[TFHpple alloc] initWithHTMLData:htmlData]; NSArray *elements = [xpathParser searchWithXPathQuery:@"//title"]; // get the title NSLog(@"%d",[elements count]); TFHppleElement *element = [elements objectAtIndex:0]; NSString *content = [element content]; NSString *tagname = [element tagName]; NSString *attr = [element objectForKey:@"href"]; NSLog(@"content = %@",content); NSLog(@"tagname = %@",tagname); NSLog(@"attr is = %@",attr); }
-
//
-
// TFHppleElement.m
-
// Hpple
-
//
-
// Created by Geoffrey Grosenbach on 1/31/09.
-
//
-
// Copyright (c) 2009 Topfunky Corporation, http://topfunky.com
-
//
-
// MIT LICENSE
-
//
-
// Permission is hereby granted, free of charge, to any person obtaining
-
// a copy of this software and associated documentation files (the
-
// "Software"), to deal in the Software without restriction, including
-
// without limitation the rights to use, copy, modify, merge, publish,
-
// distribute, sublicense, and/or sell copies of the Software, and to
-
// permit persons to whom the Software is furnished to do so, subject to
-
// the following conditions:
-
//
-
// The above copyright notice and this permission notice shall be
-
// included in all copies or substantial portions of the Software.
-
//
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-
-
#import "TFHppleElement.h"
-
-
static NSString * const TFHppleNodeContentKey = @"nodeContent";
-
static NSString * const TFHppleNodeNameKey = @"nodeName";
-
static NSString * const TFHppleNodeChildrenKey = @"nodeChildArray";
-
static NSString * const TFHppleNodeAttributeArrayKey = @"nodeAttributeArray";
-
static NSString * const TFHppleNodeAttributeNameKey = @"attributeName";
-
-
@interface TFHppleElement ()
-
@property (nonatomic, retain, readwrite) TFHppleElement *parent;
-
@end
-
-
@implementation TFHppleElement
-
@synthesize parent;
-
-
- (void) dealloc
-
{
-
[node release];
-
[parent release];
-
[super dealloc];
-
}
-
-
- (id) initWithNode:(NSDictionary *) theNode
-
{
-
if (!(self = [super init]))
-
return nil;
-
-
[theNode retain];
-
node = theNode;
-
-
return self;
-
}
-
-
+ (TFHppleElement *) hppleElementWithNode:(NSDictionary *) theNode {
-
return [[[[self class] alloc] initWithNode:theNode] autorelease];
-
}
-
-
#pragma mark -
-
-
- (NSString *) content
-
{
-
return [node objectForKey:TFHppleNodeContentKey];
-
}
-
-
-
- (NSString *) tagName
-
{
-
return [node objectForKey:TFHppleNodeNameKey];
-
}
-
-
- (NSArray *) children
-
{
-
NSMutableArray *children = [NSMutableArray array];
-
for (NSDictionary *child in [node objectForKey:TFHppleNodeChildrenKey]) {
-
TFHppleElement *element = [TFHppleElement hppleElementWithNode:child];
-
element.parent = self;
-
[children addObject:element];
-
}
-
return children;
-
}
-
-
- (TFHppleElement *) firstChild
-
{
-
NSArray * children = self.children;
-
if (children.count)
-
return [children objectAtIndex:0];
-
return nil;
-
}
-
-
-
- (NSDictionary *) attributes
-
{
-
NSMutableDictionary * translatedAttributes = [NSMutableDictionary dictionary];
-
for (NSDictionary * attributeDict in [node objectForKey:TFHppleNodeAttributeArrayKey]) {
-
[translatedAttributes setObject:[attributeDict objectForKey:TFHppleNodeContentKey]
-
forKey:[attributeDict objectForKey:TFHppleNodeAttributeNameKey]];
-
}
-
return translatedAttributes;
-
}
-
-
- (NSString *) objectForKey:(NSString *) theKey
-
{
-
return [[self attributes] objectForKey:theKey];
-
}
-
-
- (id) description
-
{
-
return [node description];
-
}
-
-
@end