// // SougouMusicParser.h // // Created by scott.8an@gmail.com on 12-3-13. // Copyright (c) 2012年 littleworn llc. All rights reserved. // #import <Foundation/Foundation.h> #import "ASIHTTPRequest.h" #import "ASINetworkQueue.h" #import "TFHpple.h" #import "XPathQuery.h" #import "TFHppleElement.h" #import "DataType.h" //页面地址 #define kRequestURL(s_songName,i_pageNumber) \ [NSString stringWithFormat:@"http://mp3.sogou.com/music.so?pf=mp3&query=%@&page=%i&w=02009900&dr=1",s_songName,i_pageNumber] @interface SogouMusicParser : NSObject + (SogouMusicParser*)shareInstance; /** 发送成功通知的数组结构: [ { "song"="save me", "artist"="Queen", "size"="4.38M", "firstChoose_url"="http://ziyuan1.myi.cn/film/YINYUE/fir/firT1563.mp3", "other_link"= [ "http://mul1.tximg.cn/music/group/bbs/mp3/7/090528/1243462409315.mp3", "http://api.ning.com/files/SRIkTa2lzwqL1jt26257Yzj3TCFZ6jAJD2HaYhYB*N469JhRrd01xDttiHqjJD7K9WkWr69u6LnXChFfA2Wid0jWUfiIB33m/f.i.r._.mp3" ] },... ] **/ - (void)runToGetSongInfoFromSogouWithKeyword:(NSString*)kw pageNumber:(NSInteger)pgNum; @end
// // SougouMusicParser.m // // Created by scott.8an@gmail.com on 12-3-13. // Copyright (c) 2012年 littleworn llc. All rights reserved. // #import "SogouMusicParser.h" NSString *const SogouMusicParseSuccessNotification = @"SogouMusicParseSuccessNotification"; NSString *const SogouMusicParseFailedNotification = @"SogouMusicParseFailedNotification"; static SogouMusicParser *parser_ = nil; @interface SogouMusicParser (Private) //获得跳转地址的urls /** [ "http://mp3.sogou.com/down.so?gid=11950F9C68D7EDE7&globalId=1f940cf0056326c8&query=%CE%D2%C", "http://mp3.sogou.com/down.so?gid=11950F9C68D7EDE7&globalId=1f940cf0056326c8&query=%CE%D2%C", ... ] **/ - (NSArray*)getJumpToDownloadPageURLsWithKeyword:(NSString*)kw pageNumber:(int)pgNum; /** 返回字典结构: { "song"="save me", "artist"="Queen", "size"="4.38M", "firstChoose_url"="http://ziyuan1.myi.cn/film/YINYUE/fir/firT1563.mp3", "other_link"= [ "http://mul1.tximg.cn/music/group/bbs/mp3/7/090528/1243462409315.mp3", "http://api.ning.com/files/SRIkTa2lzwqL1jt26257Yzj3TCFZ6jAJD2HaYhYB*N469JhRrd01xDttiHqjJD7K9WkWr69u6LnXChFfA2Wid0jWUfiIB33m/f.i.r._.mp3" ] } **/ - (NSDictionary*)getMusicInfoByJumpToUrl:(NSString*)jumpToUrl; @end @implementation SogouMusicParser + (SogouMusicParser*)shareInstance{ if (!parser_) { parser_ = [[self alloc] init]; } return parser_; } - (NSArray*)getJumpToDownloadPageURLsWithKeyword:(NSString*)kw pageNumber:(int)pgNum{ if (kw && [kw length]) { //初始化需要返回的数组 NSMutableArray *urlsArr = [NSMutableArray arrayWithCapacity:0]; //去掉搜索条件两端空格 NSString *pureKW = [kw stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; //用 + 号连接需要要搜索的绑定条件 NSArray *kwArr = [pureKW componentsSeparatedByString:@" "]; NSMutableString *neededKw = [NSMutableString stringWithCapacity:0]; if (kwArr && [kwArr count]) { for (NSString *str in kwArr) { [neededKw appendFormat:@"%@+",str]; } //去掉最后一个 + 号 [neededKw deleteCharactersInRange:NSMakeRange([neededKw length]-1, 1)]; }else{ neededKw = (NSMutableString*)kw; } //对url编码 NSString *requestURLStr = kRequestURL(neededKw,pgNum); NSString *requestURLStrEnc = [requestURLStr stringByAddingPercentEscapesUsingEncoding:CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_18030_2000)]; //请求页面地址 ASIHTTPRequest *request = [ASIHTTPRequest requestWithURL:[NSURL URLWithString:requestURLStrEnc]]; [request startSynchronous]; //获得返回数据 //NSString *responseStr = [request responseString]; NSData *responseData = [request responseData]; //NSLog(@"%@",responseStr); if (responseData) { TFHpple *help = [TFHpple hppleWithHTMLData:responseData]; if (help) { //用xpath解析获得歌曲的跳转地址 //获得tbody节点 NSString *xPath = @"//*[@id='songlist']"; TFHppleElement *tableNode = [help peekAtSearchWithXPathQuery:xPath]; TFHppleElement *tBodyNode = tableNode.firstChild; if (tBodyNode) { NSMutableArray *childrenNodes = (NSMutableArray*)tBodyNode.children; if (childrenNodes && [childrenNodes count]) { //移除第一个node [childrenNodes removeObjectAtIndex:0]; for (TFHppleElement *element in childrenNodes) { NSArray *tdNodeArr = element.children; if (tdNodeArr && [tdNodeArr count]) { for (TFHppleElement *tdEle in tdNodeArr) { TFHppleElement *aNode = tdEle.firstChild; if (aNode && [[aNode objectForKey:@"action"] isEqualToString:@"down"]) { /** window.open('/down.so?gid=1A6F5F0BA3CDD9C8&globalId=137c9db3dc0bf7bf&query=%B2%BB%C3%F0%B5%C4%B0%AE&tgid=629801e642a2eadc&pf=mp3&s=%CC%B7%D3%BD%F7%EB&t=%B2%BB%C3%F0%B5%C4%B0%AE&size=4195343&ac=0&c',' ','width=431,height=495,scrollbars=no'); uigsPB('consume=music_down&music_down=28');return(false); */ NSString *urlThatMixed = [aNode objectForKey:@"onclick"]; NSArray *urlParts = [urlThatMixed componentsSeparatedByString:@"'"]; //取第二个作为uri NSString *uri = [urlParts objectAtIndex:1]; if (uri && [uri length]) { NSString *url = [NSString stringWithFormat:@"http://mp3.sogou.com%@",uri]; //NSLog(@"组合新的跳转地址:http://mp3.sogou.com%@",uri); [urlsArr addObject:url]; } } } } } if ([urlsArr count]) { return urlsArr; } } } } }else{ //再次尝试 [self getJumpToDownloadPageURLsWithKeyword:kw pageNumber:pgNum]; } } return nil; } - (NSDictionary*)getMusicInfoByJumpToUrl:(NSString*)jumpToUrl{ if (jumpToUrl && [jumpToUrl length]) { //获得下载页面,不需要编码,因为获得的数据已经是编码过了的 ASIHTTPRequest *request = [ASIHTTPRequest requestWithURL:[NSURL URLWithString:jumpToUrl]]; [request startSynchronous]; //获得返回界面 NSString *responseStr = [request responseString]; NSData *responseData = [request responseData]; // NSLog(@"返回下载界面的网页信息:%@",responseStr); if (responseData) { NSMutableDictionary *dictionary = [NSMutableDictionary dictionaryWithCapacity:0]; //解析页面 TFHpple *help = [TFHpple hppleWithHTMLData:responseData]; /** 1.查找歌曲的下载地址 **/ //href="..+.mp3" NSString *pattern = @"href=\"..+.mp3\""; NSRegularExpression *exp = [NSRegularExpression regularExpressionWithPattern:pattern options:NSRegularExpressionCaseInsensitive error:nil]; NSArray *metchedArr = [exp matchesInString:responseStr options:NSMatchingReportProgress range:NSMakeRange(0, [responseStr length])]; if (metchedArr && [metchedArr count]) { //用数组保存其他连接 NSMutableArray *otherLinksArr = [NSMutableArray arrayWithCapacity:0]; for (NSTextCheckingResult *result in metchedArr) { NSRange range = result.range; NSString *urlWithHREF = [responseStr substringWithRange:range]; if (urlWithHREF && [urlWithHREF length]) { NSArray *urlParts = [urlWithHREF componentsSeparatedByString:@"\""]; if (urlParts && [urlParts count]>2) { NSString *musicURL = [urlParts objectAtIndex:1]; //把第一个获得的地址存到首选里去 NSString *songURL = [dictionary objectForKey:@"firstChoose_url"]; if (!songURL || [songURL length]<1) { [dictionary setObject:musicURL forKey:@"firstChoose_url"]; }else{ //去重 if (![musicURL isEqualToString:songURL]) { //用数组保存其他链接 [otherLinksArr addObject:musicURL]; } } } } } //保存其他链接到字典 [dictionary setObject:otherLinksArr forKey:@"other_link"]; } //如果下载地址解析失败,一下内容都没有必要存在 NSString *firstChoose_url = [dictionary objectForKey:@"firstChoose_url"]; if (firstChoose_url && [firstChoose_url length]) { /** 2.查找歌曲的名称 **/ NSString *xPath = @"//*[@class=\"info1\"]"; TFHppleElement *info1Node = [help peekAtSearchWithXPathQuery:xPath]; TFHppleElement *aNode = info1Node.firstChild; NSString *songName = [aNode objectForKey:@"title"]; if (songName && [songName length]) { [dictionary setObject:songName forKey:@"song"]; }else{ [dictionary setObject:@"Unknown" forKey:@"song"]; } /** 3.查找艺术家 **/ NSArray *info1Children = info1Node.children; if (info1Children && [info1Children count]) { BOOL isArtistTag = NO; for (TFHppleElement *node in info1Children) { if ([node.tagName isEqualToString:@"a"] && isArtistTag) { NSString *artist = [node objectForKey:@"title"]; if (artist && [artist length]) { [dictionary setObject:artist forKey:@"artist"]; isArtistTag =NO; } } isArtistTag = YES; } } NSString *hasArtist = [dictionary objectForKey:@"artist"]; if (!hasArtist || [hasArtist length]<1) { [dictionary setObject:@"Unknown" forKey:@"artist"]; } /** 4.查找歌曲的大小 **/ xPath = @"//*[@class=\"info2\"]"; TFHppleElement *info2Node = [help peekAtSearchWithXPathQuery:xPath]; NSArray *children = info2Node.children; if (children && [children count]) { for (TFHppleElement *node in children) { if ([node.tagName isEqualToString:@"strong"]) { NSString *sizeDesc = node.content; if (sizeDesc && [sizeDesc length]) { [dictionary setObject:sizeDesc forKey:@"size"]; } } } } NSString *hasSize = [dictionary objectForKey:@"size"]; if (!hasSize || [hasSize length]<1) { [dictionary setObject:@"Unknown" forKey:@"size"]; } //NSLog(@"***************歌曲信息:%@",dictionary); //返回字典 return dictionary; } return nil; }else{ //重新请求,直到获取成功 [self getMusicInfoByJumpToUrl:jumpToUrl]; } } return nil; } - (void)runToGetSongInfoFromSogouWithKeyword:(NSString*)kw pageNumber:(NSInteger)pgNum{ if (kw && [kw length]) { NSMutableArray *songsInfoArr = [NSMutableArray arrayWithCapacity:0]; NSArray *songsURLsArr = [self getJumpToDownloadPageURLsWithKeyword:kw pageNumber:pgNum]; //如果第一次请求失败,再次请求 if (!songsInfoArr || [songsInfoArr count]<1) { songsURLsArr = [self getJumpToDownloadPageURLsWithKeyword:kw pageNumber:pgNum]; } //如果第二次请求失败,再次请求 if (!songsInfoArr || [songsInfoArr count]<1) { songsURLsArr = [self getJumpToDownloadPageURLsWithKeyword:kw pageNumber:pgNum]; } if (songsURLsArr && [songsURLsArr count]) { for (NSString *url in songsURLsArr) { NSDictionary *songInfo = [self getMusicInfoByJumpToUrl:url]; if (songInfo && [songInfo count]) { [songsInfoArr addObject:songInfo]; } } } if ([songsInfoArr count]) { [[NSNotificationCenter defaultCenter] postNotificationName:SogouMusicParseSuccessNotification object:nil userInfo:[NSDictionary dictionaryWithObject:songsInfoArr forKey:@"songsInfo"]]; }else{ [[NSNotificationCenter defaultCenter] postNotificationName:SogouMusicParseFailedNotification object:nil userInfo:[NSDictionary dictionaryWithObject:@"This page does not exist" forKey:@"msg"]]; } }else{ [[NSNotificationCenter defaultCenter] postNotificationName:SogouMusicParseFailedNotification object:nil userInfo:[NSDictionary dictionaryWithObject:@"Check what you've been input" forKey:@"msg"]]; } } @end