1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
| #import "NSString+Extensions.h"
@implementation NSString (Extensions)
#pragma mark - 转换string大小写
- (NSString *)lowercaseFirstCharacter {
NSRange range = NSMakeRange(0,1);
NSString *lowerFirstCharacter = [[self substringToIndex:1] lowercaseString];
return [self stringByReplacingCharactersInRange:range withString:lowerFirstCharacter];
}
- (NSString *)uppercaseFirstCharacter {
NSRange range = NSMakeRange(0,1);
NSString *upperFirstCharacter = [[self substringToIndex:1] uppercaseString];
return [self stringByReplacingCharactersInRange:range withString:upperFirstCharacter];
}
#pragma mark - trim string
- (NSString *)trim {
return [self stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
- (NSString *)trimTheExtraSpaces{
NSCharacterSet *whitespaces = [NSCharacterSet whitespaceCharacterSet];
NSPredicate *noEmptyStrings = [NSPredicate predicateWithFormat:@"SELF != ''"];
NSArray *parts = [self componentsSeparatedByCharactersInSet:whitespaces];
NSArray *filteredArray = [parts filteredArrayUsingPredicate:noEmptyStrings];
return [filteredArray componentsJoinedByString:@" "];
}
//是否是空字符串
- (BOOL)isEmpty {
NSCharacterSet *charSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
NSString *trimmed = [self stringByTrimmingCharactersInSet:charSet];
return [trimmed isEqualToString:@""];
}
//替换HTML代码
- (NSString *)escapeHTML {
NSMutableString *result = [[NSMutableString alloc] initWithString:self];
[result replaceOccurrencesOfString:@"&" withString:@"&" options:NSLiteralSearch range:NSMakeRange(0, [result length])];
[result replaceOccurrencesOfString:@"<" withString:@"<" options:NSLiteralSearch range:NSMakeRange(0, [result length])];
[result replaceOccurrencesOfString:@">" withString:@">" options:NSLiteralSearch range:NSMakeRange(0, [result length])];
[result replaceOccurrencesOfString:@"\"" withString:@""" options:NSLiteralSearch range:NSMakeRange(0, [result length])];
[result replaceOccurrencesOfString:@"'" withString:@"'" options:NSLiteralSearch range:NSMakeRange(0, [result length])];
return result;
}
// implementation by Daniel Dickison and Walty
// http://stackoverflow.com/questions/1105169/html-character-decoding-in-objective-c-cocoa-touch
- (NSString *)stringByDecodingXMLEntities {
NSUInteger myLength = [self length];
NSUInteger ampIndex = [self rangeOfString:@"&" options:NSLiteralSearch].location;
// Short-circuit if there are no ampersands.
if (ampIndex == NSNotFound) {
return self;
}
// Make result string with some extra capacity.
NSMutableString *result = [NSMutableString stringWithCapacity:(myLength * 1.25)];
// First iteration doesn't need to scan to & since we did that already, but for code simplicity's sake we'll do it again with the scanner.
NSScanner *scanner = [NSScanner scannerWithString:self];
[scanner setCharactersToBeSkipped:nil];
NSCharacterSet *boundaryCharacterSet = [NSCharacterSet characterSetWithCharactersInString:@" \t\n\r;"];
do {
// Scan up to the next entity or the end of the string.
NSString *nonEntityString;
if ([scanner scanUpToString:@"&" intoString:&nonEntityString]) {
[result appendString:nonEntityString];
}
if ([scanner isAtEnd]) {
goto finish;
}
// Scan either a HTML or numeric character entity reference.
if ([scanner scanString:@"&" intoString:NULL])
[result appendString:@"&"];
else if ([scanner scanString:@"'" intoString:NULL])
[result appendString:@"'"];
else if ([scanner scanString:@""" intoString:NULL])
[result appendString:@"\""];
else if ([scanner scanString:@"<" intoString:NULL])
[result appendString:@"<"];
else if ([scanner scanString:@">" intoString:NULL])
[result appendString:@">"];
else if ([scanner scanString:@"&#" intoString:NULL]) {
BOOL gotNumber;
unsigned charCode;
NSString *xForHex = @"";
// Is it hex or decimal?
if ([scanner scanString:@"x" intoString:&xForHex]) {
gotNumber = [scanner scanHexInt:&charCode];
}
else {
gotNumber = [scanner scanInt:(int*)&charCode];
}
if (gotNumber) {
[result appendFormat:@"%C", charCode];
[scanner scanString:@";" intoString:NULL];
}
else {
NSString *unknownEntity = @"";
[scanner scanUpToCharactersFromSet:boundaryCharacterSet intoString:&unknownEntity];
[result appendFormat:@"&#%@%@", xForHex, unknownEntity];
NSLog(@"Expected numeric character entity but got &#%@%@;", xForHex, unknownEntity);
}
}
else {
NSString *amp;
[scanner scanString:@"&" intoString:&]; //an isolated & symbol
[result appendString:amp];
}
}
while (![scanner isAtEnd]);
finish:
return result;
}
//普通的MD5加密
- (NSString *)md5 {
const char *cStr = [self UTF8String];
unsigned char result[CC_MD5_DIGEST_LENGTH];
CC_MD5(cStr, strlen(cStr), result);
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7],
result[8], result[9], result[10], result[11], result[12], result[13], result[14], result[15]];
}
//UTF16的MD5加密
- (NSString *)md5ForUTF16{
NSData *temp = [self dataUsingEncoding:NSUTF16LittleEndianStringEncoding];
UInt8 *cStr = (UInt8 *)[temp bytes];
unsigned char result[CC_MD5_DIGEST_LENGTH];
CC_MD5(cStr, [temp length], result);
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7],
result[8], result[9], result[10], result[11], result[12], result[13], result[14], result[15]];
}
//自适应文字大小
- (CGFloat)fontSizeWithFont:(UIFont *)font constrainedToSize:(CGSize)size {
CGFloat fontSize = [font pointSize];
CGFloat height = [self sizeWithFont:font constrainedToSize:CGSizeMake(size.width,FLT_MAX) lineBreakMode:UILineBreakModeWordWrap].height;
UIFont *newFont = font;
//Reduce font size while too large, break if no height (empty string)
while (height > size.height && height != 0) {
fontSize--;
newFont = [UIFont fontWithName:font.fontName size:fontSize];
height = [self sizeWithFont:newFont constrainedToSize:CGSizeMake(size.width,FLT_MAX) lineBreakMode:UILineBreakModeWordWrap].height;
};
// Loop through words in string and resize to fit
// for (NSString *word in [self componentsSeparatedByString:@" "]) {
// CGFloat width = [word sizeWithFont:newFont].width;
// while (width > size.width && width != 0) {
// fontSize--;
// newFont = [UIFont fontWithName:font.fontName size:fontSize];
// width = [word sizeWithFont:newFont].width;
// }
// }
return fontSize;
}
#pragma mark - tokeniztion string
/**
根据设定参数进行字符串拆分
NSStringEnumerationByComposedCharacterSequences,根据字母
NSStringEnumerationByWords,根据单词
NSStringEnumerationBySentences,根据句子
这3个比较常用
*/
- (NSMutableArray *)tokenizationStringByNSStringEnumerationOptions:(NSStringEnumerationOptions)opts{
NSMutableArray *splitArray = [NSMutableArray array];
NSRange range = NSMakeRange(0, [self length]);
[self enumerateSubstringsInRange:range options:opts usingBlock:^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
[splitArray addObject:substring];
}];
return splitArray;
}
//detect string language,对于句子相对准确,单词不是很准确
- (NSString *)languageForString{
return (__bridge NSString *)CFStringTokenizerCopyBestStringLanguage((CFStringRef)self, CFRangeMake(0, MIN(self.length,400)));
}
//分析句中单词的词性
- (NSMutableArray *)analyseTextOfSentences{
NSMutableArray *analyseArray = [NSMutableArray array];
// This range contains the entire string, since we want to parse it completely
NSRange stringRange = NSMakeRange(0, self.length);
//第一种方式
NSLinguisticTagger *tagger = [[NSLinguisticTagger alloc] initWithTagSchemes:[NSArray arrayWithObject:NSLinguisticTagSchemeNameTypeOrLexicalClass] options:0];
[tagger setString:self];
[tagger enumerateTagsInRange:stringRange
scheme:NSLinguisticTagSchemeNameTypeOrLexicalClass
options:NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerOmitPunctuation
usingBlock:^(NSString *tag, NSRange tokenRange, NSRange sentenceRange, BOOL *stop) {
[analyseArray addObject:@{@"word":[self substringWithRange:tokenRange],@"tag" : tag}];
}];
// 第二种方式
// NSArray *language = [NSArray arrayWithObjects:@"en",@"de",@"fr",@"ko",@"ja",nil];
// NSDictionary* languageMap = [NSDictionary dictionaryWithObject:language forKey:@"Latn"];
//
// [self enumerateLinguisticTagsInRange:stringRange
// scheme:NSLinguisticTagSchemeNameTypeOrLexicalClass
// options:NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerOmitPunctuation
// orthography:[NSOrthography orthographyWithDominantScript:@"Latn" languageMap:languageMap]
// usingBlock:^(NSString *tag, NSRange tokenRange, NSRange sentenceRange, BOOL *stop) {
// [analyseArray addObject:@{@"word":[self substringWithRange:tokenRange],@"tag" : tag}];
// }];
return analyseArray;
}
@end
|