根据文章内容自动抽取标签

	private List<String> getKeyWord(String text) throws IOException {
		List<String> keywords = new ArrayList<String>();
		if (!Common.empty(text)) {
			Map<String, Integer> words = new HashMap<String, Integer>();
			Analyzer analyzer = new IKAnalyzer(true);
			StringReader reader = new StringReader(text);
			TokenStream tokenStream = analyzer.tokenStream("*", reader);
			TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
			while (tokenStream.incrementToken()) {
				String word = termAtt.term();
				if (word.length() > 1 && Common.strlen(word) > 2) {
					Integer count = words.get(word);
					if (count == null) {
						count = 0;
					}
					words.put(word, count + 1);
				}
			}
			if (words.size() > 0) {
				Directory dir = null;
				IndexSearcher searcher = null;
				try {
					String fieldName = "text";
					dir = new RAMDirectory();
					IndexWriter writer = new IndexWriter(dir, analyzer, true,
							IndexWriter.MaxFieldLength.LIMITED);
					Document doc = new Document();
					doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
					writer.addDocument(doc);
					writer.close();

					searcher = new IndexSearcher(dir);
					searcher.setSimilarity(new IKSimilarity());
					Set<String> keys = words.keySet();
					Map<String, Float> temps = new HashMap<String, Float>();
					for (String key : keys) {
						int count = words.get(key);
						Query query = IKQueryParser.parse(fieldName, key);
						TopDocs topDocs = searcher.search(query, 1);
						if (topDocs.totalHits > 0) {
							temps.put(key, topDocs.getMaxScore() * count);
						}
					}
					Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps);
					for (Entry<String, Float> entry : keywordEntry) {
						if (keywords.size() < 5) {
							keywords.add(entry.getKey());
						}
					}
				} catch (Exception e) {
					e.printStackTrace();
				} finally {
					try {
						searcher.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
					try {
						dir.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
		}
		return keywords;
	}
 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值