http://www.lexun.cn/thread-6030948-1-1.html
NSURL *url = [NSURL URLWithString:urlStr]; NSData *data = [NSData dataWithContentsOfURL:url]; NSStringEncoding enc = CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_18030_2000); NSString *retStr = [[NSString alloc] initWithData:data encoding:enc];
http://www.lexun.cn/thread-6030948-1-1.html
#import int main() { const char *bytes = "图片_6.png"; NSString *str = [[NSString alloc] initWithCString: bytes encoding: NSASCIIStringEncoding]; NSLog(@"str: %@", str); [str release]; return 0; } (用 GBK 编码保存) 这个程序的执行结果就是输出开头那段乱码,原来 NSURLResponse 把 Content-Disposition 中的 filename 当成 ASCII 处理了,怪不得会乱码。 可是也不能怪 NSURLResponse,毕竟服务器没有提供任何编码的信息,而 RFC 2183 中也明确说明,不应该在 filename 中使用任何 ASCII 以外的字符,用了就是后果自负了。 那假如我们要写一个自己的客户端 (或者尝试修正 Safari 的错误行为),该怎么修正已经被按照 ASCII 错误解码的 NSString 呢? 因为 NSString 本身是按照 UTF-16 编码的,所以如果逐个字符地观察这个错误解码后的 NSString: int max = [str length]; int i; for (i = 0; i < max; i++) { unichar ch = [str characterAtIndex: i]; printf("%x ", ch); } 我们可以得到: cd bc c6 ac 5f 36 2e 70 6e 67 这样一串输出,5f 36 2e 70 6e 67 就是 _6.png,比较好认,前面的 cd bc c6 ac 是什么呢?一查,原来是“图”和“片”这两个字的 GBK 编码。 这就好理解了:NSString 一开始把一段 GBK 编码的字节流逐个字节地按照 8bit-ASCII 处理了,原本 10 个字节对应的是 7 个字符,结果被错误地解码为了 10 个字符,所以我们要把它转换回去,首先是要还原回原来的那段字节流: int max = [str length]; char *nbytes = malloc(max + 1); int i; for (i = 0; i < max; i++) { unichar ch = [str characterAtIndex: i]; nbytes = (char) ch; } nbytes = '\0'; 然后再将这段字节流按照正确的编码 (GB18030) 处理: NSStringEncoding enc = CFStringConvertEncodingToNSStringEncoding( kCFStringEncodingGB_18030_2000); NSLog(@"nstr: %@", [NSString stringWithCString: nbytes encoding: enc]); 结果果然得到了正确的输出: 2008-02-17 06:09:46.408 test[14095:10b] nstr: 图片_6.png
http://www.cocoachina.com/bbs/simple/?t829.html
怎么把一个字符串中的html特殊符号解析出来
比如一个字符串是“test test test & copy;" 怎么把& copy;转换成© 呢?
@"&", @"<", @">", @""",
@" ", @"¡", @"¢", @"£", @"¤", @"¥", @"¦",
@"§", @"¨", @"©", @"ª", @"«", @"¬", @"", @"®",
@"¯", @"°", @"±", @"²", @"³", @"´", @"µ",
@"¶", @"·", @"¸", @"¹", @"º", @"»", @"¼",
@"½", @"¾", @"¿", @"À", @"Á", @"Â",
@"Ã", @"Ä", @"Å", @"Æ", @"Ç", @"È",
@"É", @"Ê", @"Ë", @"Ì", @"Í", @"Î", @"Ï",
@"Ð", @"Ñ", @"Ò", @"Ó", @"Ô", @"Õ", @"Ö",
@"×", @"Ø", @"Ù", @"Ú", @"Û", @"Ü", @"Ý",
@"Þ", @"ß", @"à", @"á", @"â", @"ã", @"ä",
@"å", @"æ", @"ç", @"è", @"é", @"ê", @"ë",
@"ì", @"í", @"î", @"ï", @"ð", @"ñ", @"ò",
@"ó", @"ô", @"õ", @"ö", @"÷", @"ø", @"ù",
@"ú", @"û", @"ü", @"ý", @"þ", @"ÿ"
搜到一个替换的代码:
+ (NSString *) decodeCharacterEntitiesIn:(NSString *)source { if(!source) return nil; else if([source rangeOfString: @"&"].location == NSNotFound) return source; else { NSMutableString *escaped = [NSMutableString stringWithString: source]; NSArray *codes = [NSArray arrayWithObjects: @"&", @"<", @">", @""", @" ", @"¡", @"¢", @"£", @"¤", @"¥", @"¦", @"§", @"¨", @"©", @"ª", @"«", @"¬", @"", @"®", @"¯", @"°", @"±", @"²", @"³", @"´", @"µ", @"¶", @"·", @"¸", @"¹", @"º", @"»", @"¼", @"½", @"¾", @"¿", @"À", @"Á", @"Â", @"Ã", @"Ä", @"Å", @"Æ", @"Ç", @"È", @"É", @"Ê", @"Ë", @"Ì", @"Í", @"Î", @"Ï", @"Ð", @"Ñ", @"Ò", @"Ó", @"Ô", @"Õ", @"Ö", @"×", @"Ø", @"Ù", @"Ú", @"Û", @"Ü", @"Ý", @"Þ", @"ß", @"à", @"á", @"â", @"ã", @"ä", @"å", @"æ", @"ç", @"è", @"é", @"ê", @"ë", @"ì", @"í", @"î", @"ï", @"ð", @"ñ", @"ò", @"ó", @"ô", @"õ", @"ö", @"÷", @"ø", @"ù", @"ú", @"û", @"ü", @"ý", @"þ", @"ÿ", nil]; int i, count = [codes count]; // Html for(i = 0; i < count; i++) { NSRange range = [source rangeOfString: [codes objectAtIndex: i]]; if(range.location != NSNotFound) { [escaped replaceOccurrencesOfString: [codes objectAtIndex: i] withString: [NSString stringWithFormat: @"%C", 160 + i] options: NSLiteralSearch range: NSMakeRange(0, [escaped length])]; } } // Decimal & Hex NSRange start, finish, searchRange = NSMakeRange(0, [escaped length]); i = 0; while(i < [escaped length]) { start = [escaped rangeOfString: @"&#" options: NSCaseInsensitiveSearch range: searchRange]; finish = [escaped rangeOfString: @";" options: NSCaseInsensitiveSearch range: searchRange]; if(start.location != NSNotFound && finish.location != NSNotFound && finish.location > start.location) { NSRange entityRange = NSMakeRange(start.location, (finish.location - start.location) + 1); NSString *entity = [escaped substringWithRange: entityRange]; NSString *value = [entity substringWithRange: NSMakeRange(2, [entity length] - 2)]; [escaped deleteCharactersInRange: entityRange]; if([value hasPrefix: @"x"]) { int tempInt = 0; NSScanner *scanner = [NSScanner scannerWithString: [value substringFromIndex: 1]]; [scanner scanHexInt: &tempInt]; [escaped insertString: [NSString stringWithFormat: @"%C", tempInt] atIndex: entityRange.location]; } else { [escaped insertString: [NSString stringWithFormat: @"%C", [value intValue]] atIndex: entityRange.location]; } i = start.location; } else i++; searchRange = NSMakeRange(i, [escaped length] - i); } return escaped; // Note this is autoreleased } }
答:
如果不想自己替换,还有一个比较那啥的办法……用Webk\Kit来帮忙
function htmlDecode (str){ var div = document.createElement("div"); div.innerHTML = str; return div.innerHTML; }
http://www.cocoachina.com/index.php/archives/cocoachina_241.html
http://www.cocoachina.com/index.php/archives/cocoachina_15.html