maven依赖
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
要解析的html内容 我主要解析table部分 只给出table部分内容
<table class="logview">
<tr class="header">
<td colspan=5>HT2 cat localhost.localdomain 192.168.104.165 Cat-ConfigSyncTask 167 Cat-ConfigSyncTask-2 cat-c0a868a5-429415-1544 null null </td>
</tr>
<tr class="odd">
<td>t15:25:04.453</td>
<td>TimerSync</td>
<td>resource-config</td>
<td></td>
<td></td>
</tr>
<tr class="even">
<td> t15:25:04.453</td>
<td>SQL</td>
<td>config.findByName</td>
<td></td>
<td></td>
</tr>
<tr class="odd">
<td> E15:25:04.702</td>
<td>SQL.Method</td>
<td>SELECT</td>
<td> </td>
<td>["resource-config"]</td>
</tr>
<tr class="even">
<td> E15:25:04.702</td>
<td>SQL.Database</td>
<td>jdbc:mysql://192.168.104.101:3306/cat?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&socketTimeout=120000</td>
<td> </td>
<td></td>
</tr>
<tr class="odd">
<td> T15:25:04.702</td>
<td>SQL</td>
<td>config.findByName</td>
<td> </td>
<td>249ms SELECT c.id,c.`name`,c.content,c.creation_date,c.modify_date FROM config c WHERE c.name = ?</td>
</tr>
<tr class="even">
<td>T15:25:04.702</td>
<td>TimerSync</td>
<td>resource-config</td>
<td> </td>
<td>249ms </td>
</tr>
</table>
java 代码
api参考
https://www.cnblogs.com/boy1025/p/5040495.html
http://www.open-open.com/jsoup/load-document-from-url.htm
本例的代码
package com.flyer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
/**
* flyer 2018.12.28
*/
public class Test {
public static void main(String[] args) {
Document doc = null;
try {
doc = Jsoup.connect("http://192.168.104.164:8080/cat/r/m/cat-c0a868a5-429353-669?domain=cat").get();
Elements container = doc.getElementsByClass("logview");
Elements trList = container.select("tr");
for (Element tr : trList) {
if (tr.getElementsByClass("header") != null && tr.getElementsByClass("header").size() > 0)
continue;
Elements tdList = tr.select("td");
for (Element td : tdList) {
String text = td.text();
// System.out.println(tr.text()+"*******"+ text);
System.out.println("*******" + text);
}
System.out.println("开始下一个tr:");
System.out.println("");
}
// Document containerDoc = Jsoup.parse(container.toString());
// System.out.println(containerDoc.toString());
} catch (IOException e) {
e.printStackTrace();
}
}
}
输出
"C:\Program Files\Java\jdk1.8.0_152\bin\java.exe" "-javaagent:D:\program Files\JetBrains\IntelliJ IDEA 2018.3.2\lib\idea_rt.jar=60158:D:\program Files\JetBrains\IntelliJ IDEA 2018.3.2\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_152\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_152\jre\lib\rt.jar;D:\work\code\test2018\target\classes;C:\Users\fei.jiang\.m2\repository\org\apache\httpcomponents\httpclient\4.5.2\httpclient-4.5.2.jar;C:\Users\fei.jiang\.m2\repository\org\apache\httpcomponents\httpcore\4.4.4\httpcore-4.4.4.jar;C:\Users\fei.jiang\.m2\repository\commons-logging\commons-logging\1.2\commons-logging-1.2.jar;C:\Users\fei.jiang\.m2\repository\commons-codec\commons-codec\1.9\commons-codec-1.9.jar;C:\Users\fei.jiang\.m2\repository\org\apache\httpcomponents\httpclient-cache\4.5\httpclient-cache-4.5.jar;C:\Users\fei.jiang\.m2\repository\org\apache\httpcomponents\httpmime\4.3.2\httpmime-4.3.2.jar;C:\Users\fei.jiang\.m2\repository\org\jsoup\jsoup\1.11.3\jsoup-1.11.3.jar" com.flyer.Test
*******t01:11:03.993
*******TimerSync
*******server-config
*******
*******
开始下一个tr:
*******t01:11:03.994
*******SQL
*******config.findByName
*******
*******
开始下一个tr:
*******E01:11:04.270
*******SQL.Method
*******SELECT
*******
*******["server-config"]
开始下一个tr:
*******E01:11:04.270
*******SQL.Database
*******jdbc:mysql://192.168.104.101:3306/cat?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&socketTimeout=120000
*******
*******
开始下一个tr:
*******T01:11:04.271
*******SQL
*******config.findByName
*******
*******277ms SELECT c.id,c.`name`,c.content,c.creation_date,c.modify_date FROM config c WHERE c.name = ?
开始下一个tr:
*******T01:11:04.270
*******TimerSync
*******server-config
*******
*******277ms
开始下一个tr:
Process finished with exit code 0