java 爬小说

package com.jby.resource.service.impl;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;

import java.io.*;

/**

  • txt小说下载

  • @author Administrator
    */
    public class MyTxt {

    static String url = “http://www.zzzcn.org/8_8187/3899817.html”;

    @Test
    public void addition_isCorrect() {
    createFile();
    start();
    }

    public static void start() {
    parse(url);
    }

    private static void parse(String serverString) {
    System.out.println(serverString);
    // 可以使用Jsoup自带的网络请求方式:
    Document document = null;
    try {
    Connection conn = Jsoup.connect(serverString).timeout(3000);
    conn.header(“User-Agent”, “Mozilla/5.0 (Windows NT 6.1; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0”);
    document = conn.get();
    } catch (Exception e) {
    // e.printStackTrace();
    System.err.println(e.getMessage());
    }
    // String string = document.toString();
    // System.out.println(“document:” + string);

     // 解析xml
     // document = (Document) Jsoup.parse(serverString);
     if (document == null) {
         System.err.println("链接错误  正在重试。。。");
         try {
             Thread.sleep(3000);
         } catch (InterruptedException e) {
             throw new RuntimeException(e);
         }
         start();
         return;
     }
    
     Elements title = document.select("div");// 得到table标签中的内容
     for (Element item : title) {
         String name = item.attr("class");
         if (name.equals("bookname")) {
             Elements h1 = item.select("h1");
             String txt = "\r\n" + h1.text();
             getTxt(txt);
             System.out.println(txt);
         }
     }
    
     Elements div = document.select("div");// 得到table标签中的内容
     for (Element item : div) {
         // System.out.println("--------------------------");
         // System.out.println(item);
    
         String name = item.attr("id");
         if (name.equals("content")) {
             System.out.println(item.text().length());
             String[] line = item.text().split(" ");
             int n = line.length;
             for (int i = 0; i < n; i++) {
                 getTxt(line[i]);
             }
         }
     }
    
     Elements div1 = document.select("div");// 得到table标签中的内容
     for (Element item : div1) {
         String name = item.attr("class");
         if (name.equals("bottem2")) {
    
             Elements a = item.select("a");
             for (Element item1 : a) {
                 String name1 = item1.text();
                 if (name1.equals("下一章")) {
                     String href = item1.attr("href");
                     System.out.println(href);
    
                     if (!href.contains(".html")) {
                         endTxt();
                     } else {
                         url = "http://www.zzzcn.org" + href;
                         start();
                         return;
                     }
                 }
             }
         }
     }
    

    }

    public static void getTxt(String msg) {
    String t = msg;
    saveTxt(t + “\r\n”);
    }

    public static void endTxt() {
    try {
    writer.close();
    } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
    }
    }

    static BufferedWriter writer;

    public static void createFile() {
    File f = new File(“D:\txt\2.txt”);
    FileOutputStream writerStream = null;
    try {
    writerStream = new FileOutputStream(f, true);
    writer = new BufferedWriter(new OutputStreamWriter(writerStream, “UTF-8”));
    } catch (FileNotFoundException | UnsupportedEncodingException e) {
    e.printStackTrace();
    }
    }

    public static void saveTxt(String msg) {
    try {
    // System.out.println(msg);
    writer.write(msg);
    writer.flush();
    } catch (IOException e) {
    e.printStackTrace();
    }
    }

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值