1.目标文件
路径:http://www.shicimingju.com/book/sanguoyanyi
2.代码思路
(1)客户端发起一个url请求,获取连接对象。
New Url();
通过URL的openStrean方法获取URL对象所表示的自愿字节输入流
(2)服务器解析url,并且将指定的资源返回一个输入流给客户。
(3)建立存储的目录以及保存的文件名。
(4)输出了写数据。
(5)关闭输入流和输出流。
3.代码实现
public static void main(String[] args) {
// 根据网页结构编写正则,创建pattern对象
String regex_content = "<p.*?>(.*?)</p>";
String regex_title = "<title>(.*?)</title>";
Pattern p_content = Pattern.compile(regex_content);
Pattern p_title = Pattern.compile(regex_title);
Matcher m_content;
Matcher m_title;
BufferedWriter writer = null;
BufferedReader reader = null;
// 3、编写循环,创建向所有小说章节页面发起网络请求的url对象
for (int i = 1; i <= 120; i++){
System.out.println("第" + i + "章开始下载。。。");
try {
//创建Url对象
URL url = new URL("http://www.shicimingju.com/book/sanguoyanyi/"+i+"/.html");
//获取URL对象所表示的自愿字节输入流
InputStream inputStream = url.openStream();
//读
reader = new BufferedReader(new InputStreamReader(inputStream,"utf-8"));
String str = null;
while((str = reader.readLine()) != null){
// writer.write(str);
//匹配整个字符串
m_title = p_title.matcher(str.toString());
m_content = p_content.matcher(str.toString());
//获取小说标题并写入本地文件
if (m_title.find()){
//返回匹配到的字符串
String title = m_title.group();
//清洗标题(replace替换)
title = title.replace("<title>", "").replace("</title>", "");
//写
// 根据小说存放位置创建file对象
String[] split = title.split("\\s+");
File file = new File("C:\\Users\\asus\\Desktop\\text2\\"+"第" + i + "章:"+split[0]+".txt");
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
writer.write("第" + i + "章:" + title + "\n");
}
while (m_content.find()){
String content = m_content.group();
content = content.replace("<p>", "").replace("</p>", "").replace(" ", "").replace("?", "");
writer.write(content+"\n");
}
}
System.out.println("第" + i + "章下载完成.........");
writer.write("\n\n");
writer.close();
reader.close();
} catch (Exception e) {
System.out.println("下载失败");
e.printStackTrace();
}
}
}