1.java 的 File.separator 斜杠
2.jsoup解析标签,element的text()方法直接取出两个标签中间的文本
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Test {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Document doc = Jsoup.connect("http://www.biquge5.com/2_2975/1388243.html").get();
Elements links = doc.select("a[href]");
for (Element link:links){
if (link.text().contentEquals("上一章")||link.text().contentEquals("下一章"))
System.out.println(link.attr("abs:href").trim()+"---"+link.text());
}
Element content = doc.getElementById("content");
//System.out.println(content.text());
String [] sentences ;
sentences = content.text().split(" ");
for (String sen : sentences){
sen = sen.trim();
sen = sen+"\r\n";
try {
File dir = new File("F:"+File.separator+"book");
if(!dir.exists()){
dir.mkdirs();
System.out.println("小说"+"F:"+File.separator+"book"+"目录下");
}
File file = new File( "F:"+File.separator+"book"+File.separator+"text.txt");
FileOutputStream os = new FileOutputStream(file,true);
try {
os.write(sen.getBytes());
os.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}}
catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}