WordNet
WordNet.java
问题主要是判断G是不是只有一个根的有向无环图。无环可以通过BFS或DFS去判断,这里使用内置的类DirectedCycle去判断;不难看出一个点是根的条件是它的出度为0,而在无环的前提下,每一个连通分量必然至少有一个出度为0的点(反证法很容易证明),所以只要判断出只有一个出度为0的点,则既能确定该有向图为连通图,又能确定该图只有一个根。
import edu.princeton.cs.algs4.Digraph;
import edu.princeton.cs.algs4.DirectedCycle;
import edu.princeton.cs.algs4.In;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class WordNet {
private final SAP sap;
private final Map<Integer, String> ids;
private final Map<String, Set<Integer>> nouns;
// constructor takes the name of the two input files
public WordNet(String synsets, String hypernyms) {
if (synsets == null || hypernyms == null) {
throw new IllegalArgumentException();
}
ids = new HashMap<>();
nouns = new HashMap<>();
// 读取synsets并处理
In in = new In(synsets);
while (in.hasNextLine()) {
String[] synset = in.readLine().split(",");
int id = Integer.parseInt(synset[0]);
ids.put(id, synset[1]);
for (String s : synset[1].split(" ")) {
if (!nouns.containsKey(s)) {
Set<Integer> temp = new HashSet<>();
temp.add(id);
nouns.put(s, temp);
} else {
nouns.get(s).add(id);
}
}
}
in.close();
// 读取hypernyms并生成图
Digraph G = new Digraph(ids.size());
in = new In(hypernyms);
while (in.hasNextLine()) {
String[] hypernym = in.readLine().split(",");
int id = Integer.parseInt(hypernym[0]);
for (int i = 1; i < hypernym.length; i++) {
int hyper = Integer.parseInt(hypernym[i]);
G.addEdge(id, hyper);
}
}
in.close();
// 判断是否有环及根的个数是否为1
DirectedCycle cycle = new DirectedCycle(G);
int rootNum = 0;
for (int i = 0; i < ids.size(); i++) {
if (G.outdegree(i) == 0) {
rootNum++;
}
}
if (cycle.hasCycle() || rootNum != 1) {
throw new IllegalArgumentException();
}
// 生成图对应的SAP
sap = new SAP(G);
}
// returns all WordNet nouns
public Iterable<String> nouns() {
return nouns.keySet();
}
// is the word a WordNet noun?
public boolean isNoun(String word) {
if (word == null) {
throw new IllegalArgumentException();
}
return nouns.containsKey(word);
}
// distance between nounA and nounB (defined below)
public int distance(String nounA, String nounB) {
if (nounA == null || nounB == null || !isNoun(nounA) || !isNoun(nounB)) {
throw new IllegalArgumentException();
}
return sap.length(nouns.get(nounA), nouns.get(nounB));
}
// a synset (second field of synsets.txt) that is the common ancestor of nounA and nounB
// in a shortest ancestral path (defined below)
public String sap(String nounA, String nounB) {
if (nounA == null || nounB == null || !isNoun(nounA) || !isNoun(nounB)) {
throw new IllegalArgumentException();
}
return ids.get(sap.ancestor(nouns.get(nounA), nouns.get(nounB)));
}
// do unit testing of this class
public static void main(String[] args) {
WordNet wn = new WordNet("synsets.txt", "hypernyms.txt");
System.out.println(wn.distance("1750s", "1790s"));
}
}
SAP.java
length(int v, int w):
两次BFS先求出图中v到其余点的最短距离、w到其余点的最短距离,再遍历图中所有点,判断v和w是否都有路径到该点i,如果是则判断v->i和w->i路径之和是否小于先前所求最小路径和。
length(Iterable v, Iterable w):
问题关键在于如何求出图中一个点集合到其余所有点的最短距离,其余只要稍微修改一下BFS的实现即可:初始化队列时,将点集合中所有的点入队后再进行BFS。剩余步骤与整数参数版本的方法一致。
为了代码简洁使用了提供的BreadthFirstDirectedPaths类,具体实现可以直接阅读源码。
import edu.princeton.cs.algs4.BreadthFirstDirectedPaths;
import edu.princeton.cs.algs4.Digraph;
import edu.princeton.cs.algs4.In;
import edu.princeton.cs.algs4.StdIn;
import edu.princeton.cs.algs4.StdOut;
public class SAP {
private final Digraph G;
private int ancestor;
// constructor takes a digraph (not necessarily a DAG)
public SAP(Digraph G) {
if (G == null) {
throw new IllegalArgumentException();
}
this.G = new Digraph(G); // 复制G而不是直接引用,因为G应当是不可变的
}
// length of shortest ancestral path between v and w; -1 if no such path
public int length(int v, int w) {
if (v < 0 || v >= G.V() || w < 0 || w >= G.V()) {
throw new IllegalArgumentException();
}
ancestor = -1; // 顺便处理(v, w)的祖先
int minLength = -1;
BreadthFirstDirectedPaths bfsv = new BreadthFirstDirectedPaths(G, v);
BreadthFirstDirectedPaths bfsw = new BreadthFirstDirectedPaths(G, w);
for (int i = 0; i < G.V(); i++) {
if (bfsv.hasPathTo(i) && bfsw.hasPathTo(i)) {
int length = bfsv.distTo(i) + bfsw.distTo(i);
if (minLength == -1 || length < minLength) {
minLength = length;
ancestor = i;
}
}
}
return minLength;
}
// a common ancestor of v and w that participates in a shortest ancestral path; -1 if no such path
public int ancestor(int v, int w) {
length(v, w);
return ancestor;
}
// length of shortest ancestral path between any vertex in v and any vertex in w; -1 if no such path
public int length(Iterable<Integer> v, Iterable<Integer> w) {
if (v == null || w == null) {
throw new IllegalArgumentException();
}
// 注意实际存储的是Integer而不是int,所以可能出现i==null的情况
for (Integer i : v) {
if (i == null || i < 0 || i >= G.V()) throw new IllegalArgumentException();
}
for (Integer i : w) {
if (i == null || i < 0 || i >= G.V()) throw new IllegalArgumentException();
}
ancestor = -1; // 顺便处理(v, w)的祖先
int minLength = -1;
BreadthFirstDirectedPaths bfsv = new BreadthFirstDirectedPaths(G, v);
BreadthFirstDirectedPaths bfsw = new BreadthFirstDirectedPaths(G, w);
for (int i = 0; i < G.V(); i++) {
if (bfsv.hasPathTo(i) && bfsw.hasPathTo(i)) {
int length = bfsv.distTo(i) + bfsw.distTo(i);
if (minLength == -1 || length < minLength) {
minLength = length;
ancestor = i;
}
}
}
return minLength;
}
// a common ancestor that participates in shortest ancestral path; -1 if no such path
public int ancestor(Iterable<Integer> v, Iterable<Integer> w) {
length(v, w);
return ancestor;
}
// do unit testing of this class
public static void main(String[] args) {
In in = new In(args[0]);
Digraph G = new Digraph(in);
SAP sap = new SAP(G);
while (!StdIn.isEmpty()) {
int v = StdIn.readInt();
int w = StdIn.readInt();
int length = sap.length(v, w);
int ancestor = sap.ancestor(v, w);
StdOut.printf("length = %d, ancestor = %d\n", length, ancestor);
}
}
}
Outcast.java
直接按照定义写即可。
import edu.princeton.cs.algs4.In;
import edu.princeton.cs.algs4.StdOut;
public class Outcast {
private final WordNet wordNet;
// constructor takes a WordNet object
public Outcast(WordNet wordnet) {
if (wordnet == null) {
throw new IllegalArgumentException();
}
this.wordNet = wordnet;
}
// given an array of WordNet nouns, return an outcast
public String outcast(String[] nouns) {
int maxDis = -1;
String outcast = null;
for (int i = 0; i < nouns.length; i++) {
int dis = 0;
for (int j = 0; j < nouns.length; j++) {
if (j != i) {
dis += wordNet.distance(nouns[i], nouns[j]);
}
}
if (dis > maxDis) {
maxDis = dis;
outcast = nouns[i];
}
}
return outcast;
}
// see test client below
public static void main(String[] args) {
WordNet wordnet = new WordNet(args[0], args[1]);
Outcast outcast = new Outcast(wordnet);
for (int t = 2; t < args.length; t++) {
In in = new In(args[t]);
String[] nouns = in.readAllStrings();
StdOut.println(args[t] + ": " + outcast.outcast(nouns));
}
}
}