利用java进行读取网页内容并保存。参数为url链接。
使用到的jar文件:
- commons-logging-1.2.jar
- httpclient-4.5.1.jar
- httpcore-4.4.3.jar
package com.crawler;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
/**
* 网络爬虫测试
*
* @author Administrator
* @2016年11月4日
*/
public class WebCrawler {
public static void main(String[] args) {
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet("http://www.zysj.com.cn/lilunshuji/jichulilun/index.html");
httpGet.addHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
// 执行请求
HttpResponse response;
String line;
try {
response = httpClient.execute(httpGet);
HttpEntity httpEntity = response.getEntity();
BufferedReader bufferedReader = null;
bufferedReader = new BufferedReader(new InputStreamReader(
httpEntity.getContent(), "utf-8"), 8 * 1024);
StringBuilder entityStringBuilder = new StringBuilder();
while ((line = bufferedReader.readLine()) != null) {
entityStringBuilder.append(line + "\n");
}
// System.out.println(entityStringBuilder.toString());
// appendMethodB("f:/中医基础理论.html",entityStringBuilder.toString());
savaFile("f:/中医基础理论.html",entityStringBuilder.toString(),"UTF-8");
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 保存文件
* @param fileName 文件名称:绝对路径
* @param content 要保存的内容
* @param format 以某种格式保存文件
*/
public static void savaFile(String fileName, String content,String format) {
BufferedWriter rd=null;
OutputStream out=null;
File file = new File(fileName);
try {
out = new FileOutputStream(file);
rd = new BufferedWriter(new OutputStreamWriter(out,format));
rd.write(content);
} catch (IOException e) {
e.printStackTrace();
}finally{
if(null!=rd){
try {
rd.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if(null!=out){
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
*
* @param fileName
* @param content
*/
public static void appendMethodB(String fileName, String content) {
FileWriter writer=null;
try {
writer = new FileWriter(fileName, false);
// 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
writer.write(content);
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
if(null!=writer){
writer.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
}