1.字符串分隔
package worddetection;
import java.util.StringTokenizer;
public class WordDetection {
public static void main(String[] args){
String input = "\"Let's get this vis-a-vis\", he said, \"these boys' marks are really that well?\"";
WordDetection wordDetection = new WordDetection();
wordDetection.useTokenizer(input);
}
public void useTokenizer(String input){
System.out.println("Tokenizer");
StringTokenizer tokenizer = new StringTokenizer(input);
String word ="";
while(tokenizer.hasMoreTokens()){
word = tokenizer.nextToken();
System.out.println(word);
}
}
}
输出效果:
Tokenizer
"Let's
get
this
vis-a-vis",
he
said,
"these
boys'
marks
are
really
that
well?"
2.分词(BreakIterator)
package worddetection;
import java.text.BreakIterator;
public class WordDetection {
public static void main(String[] args){
String input = "\"Let's get this vis-a-vis\", he said, \"these boys' marks are really that well?\"";
WordDetection wordDetection = new WordDetection();
wordDetection.useBreakIterator(input);
}
public void useBreakIterator(String input){
System.out.println("Break Iterator");
BreakIterator tokenizer = BreakIterator.getWordInstance();
tokenizer.setText(input);
int start = tokenizer.first();
for (int end = tokenizer.next();
end != BreakIterator.DONE;
start = end, end = tokenizer.next()) {
System.out.println(input.substring(start,end));
}
}
}
输出效果:
Break Iterator
"
Let's
get
this
vis-a-vis
"
,
he
said
,
"
these
boys
'
marks
are
really
that
well
?
"
3.正则表达式
package worddetection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WordDetection {
public static void main(String[] args){
String input = "\"Let's get this vis-a-vis\", he said, \"these boys' marks are really that well?\"";
WordDetection wordDetection = new WordDetection();
wordDetection.useRegEx(input);
}
public void useRegEx(String input){
System.out.println("Regular Expression");
Pattern pattern = Pattern.compile("\\w[\\w-]+('\\w*)?");
Matcher matcher = pattern.matcher(input);
while ( matcher.find() ) {
System.out.println(input.substring(matcher.start(), matcher.end()));
}
}
}
输出效果:
Regular Expression
Let's
get
this
vis-a-vis
he
said
these
boys'
marks
are
really
that
well