package com.jby.resource.service.impl;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import java.io.*;
/**
-
txt小说下载
-
@author Administrator
*/
public class MyTxt {static String url = “http://www.zzzcn.org/8_8187/3899817.html”;
@Test
public void addition_isCorrect() {
createFile();
start();
}public static void start() {
parse(url);
}private static void parse(String serverString) {
System.out.println(serverString);
// 可以使用Jsoup自带的网络请求方式:
Document document = null;
try {
Connection conn = Jsoup.connect(serverString).timeout(3000);
conn.header(“User-Agent”, “Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0”);
document = conn.get();
} catch (Exception e) {
// e.printStackTrace();
System.err.println(e.getMessage());
}
// String string = document.toString();
// System.out.println(“document:” + string);// 解析xml // document = (Document) Jsoup.parse(serverString); if (document == null) { System.err.println("链接错误 正在重试。。。"); try { Thread.sleep(3000); } catch (InterruptedException e) { throw new RuntimeException(e); } start(); return; } Elements title = document.select("div");// 得到table标签中的内容 for (Element item : title) { String name = item.attr("class"); if (name.equals("bookname")) { Elements h1 = item.select("h1"); String txt = "\r\n" + h1.text(); getTxt(txt); System.out.println(txt); } } Elements div = document.select("div");// 得到table标签中的内容 for (Element item : div) { // System.out.println("--------------------------"); // System.out.println(item); String name = item.attr("id"); if (name.equals("content")) { System.out.println(item.text().length()); String[] line = item.text().split(" "); int n = line.length; for (int i = 0; i < n; i++) { getTxt(line[i]); } } } Elements div1 = document.select("div");// 得到table标签中的内容 for (Element item : div1) { String name = item.attr("class"); if (name.equals("bottem2")) { Elements a = item.select("a"); for (Element item1 : a) { String name1 = item1.text(); if (name1.equals("下一章")) { String href = item1.attr("href"); System.out.println(href); if (!href.contains(".html")) { endTxt(); } else { url = "http://www.zzzcn.org" + href; start(); return; } } } } }
}
public static void getTxt(String msg) {
String t = msg;
saveTxt(t + “\r\n”);
}public static void endTxt() {
try {
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}static BufferedWriter writer;
public static void createFile() {
File f = new File(“D:\txt\2.txt”);
FileOutputStream writerStream = null;
try {
writerStream = new FileOutputStream(f, true);
writer = new BufferedWriter(new OutputStreamWriter(writerStream, “UTF-8”));
} catch (FileNotFoundException | UnsupportedEncodingException e) {
e.printStackTrace();
}
}public static void saveTxt(String msg) {
try {
// System.out.println(msg);
writer.write(msg);
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
}