在用TextRank算法提取主题词和主题句时,过程中有词语之间和句子之间相互投票的过程。求大神帮忙解析一下这部分代码的含义。明天答辩需要!两个投票过程都涉及到了Map,HashMap,这两个图的运用也是一个理解困难的难点。求会的大神帮帮忙。下面第一个提取关键句迭代投票的代码
private void solve() { int cnt = 0; for (List<String> sentence : docs) { double[] scores = bm25.simAll(sentence); // System.out.println(Arrays.toString(scores)); weight[cnt] = scores; weight_sum[cnt] = sum(scores) - scores[cnt]; // 减掉自己,自己跟自己肯定最相似 vertex[cnt] = 1.0; ++cnt; } for (int _ = 0; _ < max_iter; ++_) { double[] m = new double[D];//给m申请一个大小为D的新空间 double max_diff = 0; for (int i = 0; i < D; ++i) { m[i] = 1 - d; for (int j = 0; j < D; ++j) { if (j == i || weight_sum[j] == 0) continue; m[i] += (d * weight[j][i] / weight_sum[j] * vertex[j]); } double diff = Math.abs(m[i] - vertex[i]); if (diff > max_diff) { max_diff = diff; } } vertex = m; if (max_diff <= min_diff) break; } // 我们来排个序吧 for (int i = 0; i < D; ++i) { top.put(vertex[i], i); } }
第二个是提取关键词的。
Map<String, Float> score = new HashMap<String, Float>(); for (int i = 0; i < max_iter; ++i) { Map<String, Float> m = new HashMap<String, Float>(); float max_diff = 0; for (Map.Entry<String, Set<String>> entry : words.entrySet()) { String key = entry.getKey(); Set<String> value = entry.getValue(); m.put(key, 1 - d); for (String element : value) { int size = words.get(element).size(); if (key.equals(element) || size == 0) continue; m.put(key, m.get(key) + d / size * (score.get(element) == null ? 0 : score.get(element))); } max_diff = Math.max(max_diff, Math.abs(m.get(key) - (score.get(key) == null ? 0 : score.get(key)))); } score = m; if (max_diff <= min_diff) break; } return score; }