import org.apache.lucene.analysis.util.CharArraySet; //导入方法依赖的package包/类
private TokenStream getStopFilter(String lang, Set metadataStopWords, TokenStream stream) {
if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
return new StopFilter(stream, new CharArraySet(metadataStopWords, false));
} else {
try {
InputStream in = ClassLoader.getSystemResourceAsStream(lang.toLowerCase() + ".stopwords");
if (in != null) {
logger.debug("Loading Stop words for lang={}", lang);
CharArraySet stopWords = new CharArraySet(30, true);
try (BufferedReader bin = new BufferedReader(new InputStreamReader(in))) {
String line;
String[] parts;
while ((line = bin.readLine()) != null) {
parts = line.split(Pattern.quote("|"));
line = parts[0].trim();
if (line.length() > 0) {
stopWords.add(line);
}
}
return new StopFilter(stream, stopWords);
}
} else {
logger.warn("No stop words found for lang={}", lang);
}
} catch (Exception e) {
logger.error("Error creating stop filter for lang={}", lang, e);
}
}
return stream;
}