近期做的一个项目由于客户需求,需要将网站的首页静态化。因为自己从未接触过静态化的相关知识,所以 只好硬着头皮导出找资料,焦头烂额。最后想到一种解决方案,用爬虫技术把自己的首页静态化,然后将爬下来的整个页面把首页替换掉。这样用户访问的就是一个静态资源了。百度了一下,发现果然有这样的案例,果断改改拿来用了,写此博客来记录一下。
上代码:(参考自http://www.2cto.com/kf/201306/221657.html)
package com.evcipa.comutil;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.output.FileWriterWithEncoding;
import org.apache.log4j.Logger;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
/**
* @Description:生成静态页面
* @Commpany BK
* @author ZhangAn
*/
public class HtmlGenerator {
HttpClient httpClient = null;
GetMethod getMethod =null;
BufferedWriter fw = null;
String page = null;
String webappname = null;
BufferedReader br = null;
InputStream in = null;
StringBuffer sb = null;
String line = null;
private static Logger logger=Logger.getLogger(HtmlGenerator.class);
public boolean createHtmlPage(String url,String htmlFileName){
boolean status = false;
int statusCode = 0;
try{
httpClient = new HttpClient();
httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"UTF-8");
getMethod = new GetMethod(url);
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
getMethod.addRequestHeader("Content-Type","text/html;charset=UTF-8");
statusCode = httpClient.executeMethod(getMethod);
if (statusCode!=200) {
logger.error("静态页面引擎在解析"+url+"产生静态页面"+htmlFileName+"时出错!");
}else{
sb = new StringBuffer();
Thread.sleep(20000);
in = getMethod.getResponseBodyAsStream();
br = new BufferedReader(new InputStreamReader(in,"UTF-8"));
while((line=br.readLine())!=null){
sb.append(line+"\n");
}
if(br!=null)br.close();
page = sb.toString();
page = formatPage(page);
writeHtml(htmlFileName,page);
status = true;
}
}catch(Exception ex){
logger.error(ex.getMessage());
logger.error("静态页面引擎在解析"+url+"产生静态页面"+htmlFileName+"时出错:"+ex.getMessage());
}finally{
getMethod.releaseConnection();
}
return status;
}
private synchronized void writeHtml(String htmlFileName,String content) throws Exception{
fw = new BufferedWriter(new FileWriter(htmlFileName));
OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(htmlFileName),"UTF-8");
fw.write(page);
if(fw!=null)fw.close();
}
private String formatPage(String page){
page = page.replaceAll("\\.\\./\\.\\./\\.\\./", webappname+"/");
page = page.replaceAll("\\.\\./\\.\\./", webappname+"/");
page = page.replaceAll("\\.\\./", webappname+"/");
return page;
}
public static boolean getHtmlPage(String url,String rootPath){
boolean status = false;
WebClient multiWebClient = new WebClient(BrowserVersion.CHROME);
multiWebClient.getOptions().setJavaScriptEnabled(true);
multiWebClient.getOptions().setCssEnabled(true);
multiWebClient.setAjaxController(new NicelyResynchronizingAjaxController());
multiWebClient.getOptions().setTimeout(50000);
try {
HtmlPage htmlPage = multiWebClient.getPage(url);
multiWebClient.waitForBackgroundJavaScript(20000);
if(stringToFile(htmlPage.asXml(),rootPath)){
status=true;
}else{
status=false;
}
} catch (Exception e) {
logger.error(e.getMessage());
status=false;
}finally {
multiWebClient.closeAllWindows();
}
return status;
}
/**
*
* @Title: stringToFile
* @Description: TODO 将html写入路径内
* @param @param html
* @param @param rootPath
* @param @return 条件参数
* @return boolean 返回类型
* @Commpany BK
* @author ZhangAn
*/
public static boolean stringToFile(String html,String rootPath){
try {
FileWriterWithEncoding fileWriter = new FileWriterWithEncoding(rootPath,"utf-8");
fileWriter.write(html);
fileWriter.flush();
fileWriter.close();
return true;
} catch (IOException e) {
logger.error(e.getMessage());
return false;
}
}
public static String sendGet(String url) {
String result = "";
BufferedReader in = null;
StringBuffer sb = new StringBuffer();
String params = "";
try {
java.net.URL connURL = new java.net.URL(url);
java.net.HttpURLConnection httpConn = (java.net.HttpURLConnection) connURL
.openConnection();
httpConn.setRequestProperty("Accept", "*/*");
httpConn.setRequestProperty("Connection", "Keep-Alive");
httpConn.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)");
httpConn.connect();
in = new BufferedReader(new InputStreamReader(httpConn
.getInputStream(), "UTF-8"));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
} catch (Exception e) {
logger.error(e.getMessage());
} finally {
try {
if (in != null) {
in.close();
}
} catch (IOException ex) {
logger.error(ex.getMessage());
}
}
return result;
}
public static void main(String[] args){
HtmlGenerator h = new HtmlGenerator("webappname");
h.createHtmlPage("http://localhost:8080/evcipa/views/index.jsp","D:/a.html");
System.out.println("静态页面已经生成到D:/a.html");
}
}
后续处理
由于是静态页面,所以如果需要改些东西,就不会像动态页面那样动态生成了,所以需要一个定时的任务来执行它,定期更新动态的内容。下一篇博客中我将介绍springMVC的定时任务:task。