package cn.luke.tool;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class digger {
public static void main(String[] args) throws IOException {
String base = "F:\\logs\\";
String path = base+"a.txt";
try {
BufferedWriter bw = new BufferedWriter(new FileWriter(path));
String BaseUrl = "http://serv.icem.e.d.calcs.med.live.cn/web/";
bw.write(BaseUrl);
bw.newLine(); // 换行用
Document document = Jsoup.connect(BaseUrl).timeout(10000).ignoreContentType(true).userAgent(
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")
.get();
Elements h3 = document.select("h3");
Elements h4 = document.select("h4");
Elements element = document.select("ul");
int size = element.size();
System.out.println(h3.get(0).text());
for (int i = 0; i < size; i++) {
System.err.println(h4.get(i).text() + "---" + element.get(i).childNodeSize());
}
int[] i = new int[1];
i[0] = 0;
element.forEach(em -> {
Elements a_element = em.select("a");
a_element.forEach(ae -> {
try {
// if (++i[0] < 2) {
String href = ae.attr("href");
if (href.startsWith("sh...ow.php?")) {
String title = ae.select("li").text();
System.out.println("href." + href);
System.out.println("title" + title);
bw.newLine(); // 换行用
bw.write(title);
bw.newLine(); // 换行用
bw.write(href);
bw.newLine(); // 换行用
Document sub = Jsoup.connect(BaseUrl+"/"+href).timeout(10000).ignoreContentType(true).userAgent(
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")
.get();
String[] html =new String[1];
html[0] = sub.html();
Elements js = sub.select("script");
js.forEach(jse->{
if(!jse.attr("type").isEmpty()) {
html[0] = html[0] .replace(jse.outerHtml(),"");
}else {
String jsFile = jse.attr("src");
String JsSavePath = base+jsFile.replace("calc.ul.ate/",File.separator+"tool"+File.separator+"tools_").replace("/", File.separator);
System.out.println(jsFile);
System.out.println(JsSavePath);
File files = new File(JsSavePath);
if (!files.exists()) {
createFile(JsSavePath,base);
}
try {
BufferedWriter bww = new BufferedWriter(new FileWriter(JsSavePath));
Document jsFileD = Jsoup.connect(BaseUrl+"/"+jsFile).timeout(10000).ignoreContentType(true).userAgent(
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")
.get();
bww.write(jsFileD.body().text());
bww.newLine(); // 换行用
bww.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
html[0] = html[0].replace("calculate/","tools_");
}
});
Elements css = sub.select("link");
css.forEach(csse->{
String _href = csse.attr("href");
String cssSavePath = base+"tool"+File.separator+_href.replace("cal.cu.late/",File.separator+"tool"+File.separator+"tools_").replace("/", File.separator);
System.out.println(cssSavePath);
File files = new File(cssSavePath);
if (!files.exists()) {
createFile(cssSavePath,base);
}
try {
BufferedWriter bww = new BufferedWriter(new FileWriter(cssSavePath));
Document jsFileD = Jsoup.connect(BaseUrl+"/"+_href).timeout(10000).ignoreContentType(true).userAgent(
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")
.get();
bww.write(jsFileD.body().text());
bww.newLine(); // 换行用
bww.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
});
String htmlName = href.substring(href.indexOf("=")+1,href.length());
File htmlSave = new File(base+"tool"+File.separator+htmlName+".html");
if (!htmlSave.exists()) {
createFile(base+"tool"+File.separator+htmlName+".html",base);
}
BufferedWriter htmlbw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(base+"tool"+File.separator+htmlName+".html")),"utf-8"));
htmlbw.write(html[0]);
htmlbw.newLine(); // 换行用
htmlbw.close();
}
// }
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
});
});
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建文件
*
* @param destFileName 文件路劲
* @return Boolean
*/
public static boolean createFile(String destFileName,String base) {
if(destFileName.equals(base) || destFileName.endsWith(File.separator)) {
return true;
}
File file = new File(destFileName);
if (file.exists()) {
System.out.println("创建单个文件" + destFileName + "失败,目标文件已存在!");
return false;
}
// 判断目标文件所在的目录是否存在
if (!file.getParentFile().exists()) {
// 如果目标文件所在的目录不存在,则创建父目录
System.out.println("目标文件所在目录不存在,准备创建它!");
if (!file.getParentFile().mkdirs()) {
System.out.println("创建目标文件所在目录失败!");
return false;
}
}
// 创建目标文件
try {
if (file.createNewFile()) {
System.out.println("创建单个文件" + destFileName + "成功!");
return true;
} else {
System.out.println("创建单个文件" + destFileName + "失败!");
return false;
}
} catch (IOException e) {
e.printStackTrace();
System.out.println("创建单个文件" + destFileName + "失败!" + e.getMessage());
return false;
}
}
}
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>