这是我遵循的步骤(不是唯一的解决方案)通过parseHtml方法使用伪URL解析字符串
通过xpath获取第二张表
使用双嵌套循环进行迭代(用于和迭代器-正确附加分隔符-)
ExtractTableData:
import java.net.URL;
import com.gargoylesoftware.htmlunit.StringWebResponse;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HTMLParser;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow.CellIterator;
public class ExtractTableData {
public static void main(String[] args) throws Exception {
String html = "
\n" + "\n"+ "
+ "
\n"+ "
\n"+ "
+ "
Welcome!
\n" + "\n" + "\n"
+ "Welcome to the Apache ActiveMQ Console of localhost (ID:TOOLCONTROLPJX526-524666-65544585445-2:3)\n"
+ "
\n" + "\n" + "\n"
+ "You can find more information about Apache ActiveMQ on the Apache ActiveMQ Site\n"
+ "
\n" + "\n" + "Broker
\n" + "\n" + "\n" + "+ "
Name\n" + " localhost\n" + " \n" + " \n"+ "
Version\n" + " 5.13.3\n" + " \n" + " \n"+ "
ID\n" + " ID:TOOLCONTROLPJX526-524666-65544585445-2:3\n"+ "
\n" + " \n" + " Uptime\n"+ "
17 days 13 hours\n" + " \n" + " \n"+ "
Store percent used\n" + " 19\n" + " \n"+ "
\n" + " Memory percent used\n" + " 0\n"+ "
\n" + " \n" + " Temp percent used\n" + " 0\n"+ "
\n" + "";WebClient webClient = new WebClient();
HtmlPage page = HTMLParser.parseHtml(new StringWebResponse(html, new URL("http://dummy.url.for.parsing.com/")),
webClient.getCurrentWindow());
final HtmlTable table = (HtmlTable) page.getByXPath("//table").get(1);
for (final HtmlTableRow row : table.getRows()) {
CellIterator cellIterator = row.getCellIterator();
if (cellIterator.hasNext()) {
System.out.print(cellIterator.next().asText());
while (cellIterator.hasNext()) {
System.out.print(":" + cellIterator.next().asText());
}
}
System.out.println();
}
}
}
输出:
Name:localhost
Version:5.13.3
ID:ID:TOOLCONTROLPJX526-524666-65544585445-2:3
Uptime:17 days 13 hours
Store percent used:19
Memory percent used:0
Temp percent used:0