implementation 'org.jsoup:jsoup:1.13.1'
package cy.main.mytest;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import static org.junit.Assert.*;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
/**
* Example local unit test, which will execute on the development machine (host).
*
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
*/
public class ExampleUnitTest {
int m = 1;
static boolean isRun = true;
// 13000
@Test
public void addition_isCorrect() {
createFile();
try {
while (isRun) {
start();
}
} catch (Exception e) {
e.printStackTrace();
}
}
static String url = "http://www.ibiqu.org/book/123189/188178476.htm";
public static void start() {
parse(url);
}
private static void parse(String serverString) {
System.out.println(serverString);
// 可以使用Jsoup自带的网络请求方式:
Document document = null;
try {
Connection conn = Jsoup.connect(serverString).timeout(10000);
conn.header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0");
document = conn.get();
} catch (Exception e) {
// e.printStackTrace();
System.err.println(e.getMessage());
}
// String string = document.toString();
// System.out.println("document:" + string);
// 解析xml
// document = (Document) Jsoup.parse(serverString);
if (document == null) {
System.err.println("链接错误 正在重试。。。");
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
return;
}
Elements title = document.select("div");// 得到table标签中的内容
for (Element item : title) {
String name = item.attr("class");
if (name.equals("bookname")) {
Elements h1 = item.select("h1");
String txt = "\r\n" + h1.text();
getTxt(txt);
System.out.println(txt);
}
}
Elements div = document.select("div");// 得到table标签中的内容
for (Element item : div) {
// System.out.println("--------------------------");
// System.out.println(item);
String name = item.attr("id");
if (name.equals("content")) {
System.out.println(item.text().length());
String[] line = item.text().split(" ");
int n = line.length;
for (int i = 0; i < n; i++) {
getTxt(line[i]);
}
}
}
Elements div1 = document.select("div");// 得到table标签中的内容
for (Element item : div1) {
String name = item.attr("class");
if (name.equals("bottem2")) {
Elements a = item.select("a");
for (Element item1 : a) {
String name1 = item1.text();
if (name1.equals("下一章")) {
String href = item1.attr("href");
System.out.println(href);
if (!href.contains(".htm")) {
endTxt();
isRun = false;
} else {
url = "http://www.ibiqu.org" + href;
return;
}
}
}
}
}
}
public static void getTxt(String msg) {
String t = msg;
Matcher matcher = Patterns.WEB_URL.matcher(msg);
if (matcher.find()) {
// System.out.println(matcher.group());
t = t.replace(matcher.group(), "");
}
saveTxt(t + "\r\n");
}
public static void endTxt() {
System.out.println("任务结束");
try {
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
static BufferedWriter writer;
public static void createFile() {
File f = new File("D:\\txt\\2.txt");
FileOutputStream writerStream = null;
try {
writerStream = new FileOutputStream(f, true);
writer = new BufferedWriter(new OutputStreamWriter(writerStream, "UTF-8"));
} catch (FileNotFoundException | UnsupportedEncodingException e) {
e.printStackTrace();
}
}
public static void saveTxt(String msg) {
try {
// System.out.println(msg);
writer.write(msg);
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
}