最近去看了下phantomjs官网,建议深入使用phantomjs的同学查看官网例子
总结更新:可设置超时时间(写在命令行参数或者js文件中)
phantomjs不方便支持多线程(每打开一个phantomjs内核会占用内存等)
支持js语法可使用window.open()设置反馈到服务器等。
phantomjs支持IO读取文件
如何安装:
下载phantomjs安装文件,直接解压到相关目录,解包:tar xvf FileName.tar
创建软连接方便调用:(如果报错使用 ln -sf 强制执行)
ln –s /root/satanbox/phantomjs/phantomjs-1.9.7/bin/phantomjs /usr/bin/phantomjs
安装相关库 :yum install freetype-devel fontconfig-devel
截图中文乱码:安装编码
在centos中执行:yum install bitmap-fonts bitmap-fonts-cjk
在ubuntu中执行:sudo apt-get install xfonts-wqy
测试
phantomjs /home/satanbox/phantomjs/phantomjs-1.9.7/examples/rasterize.js http://www.baidu.com /home/satanbox/test/a.png
java代码代用phantomjs进行截图:
代码说明:phantomjs对多线程支持不好,建议使用单线程,调用ProcessUtils中的createIndexImage方法进行截图,三分钟无法截图杀死进程
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PrintscreenUtil {
private static Logger logger = LoggerFactory.getLogger(ProcessUtils.class);
public static ConcurrentLinkedQueue<Long> websiteRunning = new ConcurrentLinkedQueue<Long>();//正在进行中的pid 有序
/**
*
* @描述: 通过phantomjs获得截图
* @说明:
* @修改时间: 2016年6月22日 下午5:33:58
* @param url
* @param imagePath
* @return
*/
public static byte[] getImage(String url, String imagePath) {
if (!url.contains("http") && !url.contains("https")) {
url = "http://" + url;
}
Process process = ProcessUtils.createIndexImage(url + " " + imagePath);
if (process != null) {
new Thread(new ClearStream(process.getInputStream())).start();
new Thread(new ClearStream(process.getErrorStream())).start();
int currentPid = ProcessUtils.getProPid(process);
int count = 0;
int flag = 0;
while (true) {
// 超过三分钟杀死进程
if (count > 50) {
logger.info("获取图片失败,杀死进程" + imagePath);
ProcessUtils.killProcessByPid(currentPid);
break;
}
try {
// 半分钟获取一次图片
count++;
logger.info("将在休眠后第" + count + "次保存图片");
Thread.sleep(3 * 1000);
} catch (InterruptedException e) {
logger.error("保存首页截图休眠时出错", e);
break;
}
List<Integer> pidList = ProcessUtils.getProcessPidByName("phantomjs");
if (pidList != null && pidList.size() > 0) {
for (Integer pid : pidList) {
if (pid != null) {
if (pid.equals(currentPid)) {
flag = 1;
break;
}
}
}
} else {
flag = 0;
}
if (flag == 0) {
// 已结束
byte[] imageByte = getImageByte(imagePath);
return imageByte;
}
}
}
return null;
}
/**
* 根据图片路径活动字节流数组
*
* @param imagePath
* @return
*/
public static byte[] getImageByte(String imagePath) {
File file = new File(imagePath);
byte[] imageByte = null;
FileInputStream fin = null;
try {
imageByte = new byte[(int) file.length()];
fin = new FileInputStream(file);
fin.read(imageByte);
return imageByte;
} catch (FileNotFoundException e) {
logger.info(e.getMessage());
} catch (IOException e) {
logger.info(e.getMessage());
} finally {
if (fin != null) {
try {
fin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return null;
}
/**
* @类名: ClearStream
* @描述: 清空缓冲区
*/
private static class ClearStream implements Runnable {
private InputStream inputStream;
public ClearStream(InputStream inputStream) {
this.inputStream = inputStream;
}
public void run() {
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
try {
String line = null;
while ((line = br.readLine()) != null) {
if (line != null) {
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {// 释放资源
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
代码分割
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sun.jna.Library;
import com.sun.jna.Native;
import com.sun.jna.Platform;
/**
* @类名: ProcessUtils
* @描述: 进程帮组类
*/
public class ProcessUtils {
private static Logger logger = LoggerFactory.getLogger(ProcessUtils.class);
/**
* @方法名: getProcessPidByName
* @描述: 根据进程名称得到进程
* @param processName
* @return
*/
public static List<Integer> getProcessPidByName(String processName) {
List<Integer> pidList = new ArrayList<Integer>();
Process process;
String command = "";
try {
if (Platform.isWindows()) {
command = "cmd.exe /c tasklist";
process = Runtime.getRuntime().exec(command);
InputStream is = process.getInputStream();
BufferedReader r = new BufferedReader(new InputStreamReader(is));
String str = null;
while ((str = r.readLine()) != null) {
String id = null;
Matcher matcher = Pattern.compile(processName + ".exe[ ]*([0-9]*)").matcher(str);
while (matcher.find()) {
if (matcher.groupCount() >= 1) {
id = matcher.group(1);
if (id != null) {
Integer pid = null;
try {
pid = Integer.parseInt(id);
pidList.add(pid);
} catch (NumberFormatException e) {
e.printStackTrace();
}
}
}
}
}
} else if (Platform.isLinux()) {
command = "pidof " + processName;
process = Runtime.getRuntime().exec(command);
InputStream is = process.getInputStream();
BufferedReader r = new BufferedReader(new InputStreamReader(is));
String str = null;
while ((str = r.readLine()) != null) {
String [] ids = str.split(" ");
for(String id : ids){
if(id != null){
pidList.add(Integer.parseInt(id));
}
}
}
} else {
return null;
}
} catch (IOException e) {
logger.error("获取系统pid出现异常");
e.printStackTrace();
}
return pidList;
}
/**
* 获取进程PID
*
* @return
*/
public static Integer getProPid(Process process) {
Field f;
if (Platform.isWindows()) {
try {
f = process.getClass().getDeclaredField("handle");
f.setAccessible(true);
int pid = Kernel32.INSTANCE.GetProcessId((Long) f.get(process));
return pid;
} catch (Exception ex) {
ex.printStackTrace();
}
} else if (Platform.isLinux()) {
try {
f = process.getClass().getDeclaredField("pid");
f.setAccessible(true);
int pid = (Integer) f.get(process);
return pid;
} catch (Exception ex) {
ex.printStackTrace();
}
} else {
}
return null;
}
/**
* @方法名: killProcessByPid
* @描述: 根据pid杀掉进程
* @param pid
*/
public static void killProcessByPid(Integer pid) {
if (pid != null) {
String command = "";
if (Platform.isWindows()) {
command = "cmd.exe /c taskkill /f /pid " + pid;
} else if (Platform.isLinux()) {
command = "kill -9 " + pid;
} else {
return;
}
try {
Runtime.getRuntime().exec(command);
} catch (IOException e) {
logger.info("杀进程" + pid + "异常时出现异常,原因,"+e);
}
}
}
/**
* 执行截图命令 </p>
* phantomjs
* 安装路径: /home/satanbox/phantomjs/phantomjs-1.9.7/examples/rasterize.js </p>
* 目标 :http://www.jsjg.gov.cn/ </p>
* 保存路径: /home/satanbox/test/test.png 1000px*1000px
* @修改时间: 2015年12月3日 下午2:08:44
* @param imagePath
* @return
*/
public static Process createIndexImage(String imagePath){
String command = "";
if (Platform.isLinux()){
command = "phantomjs " + PropertiesInfo.PHANTOMJS_DIR + " " + imagePath + " 1024px*768px";
try {
Process process = Runtime.getRuntime().exec(command);
return process;
} catch (IOException e) {
logger.info("获取网站截图是异常" + imagePath ,e);
}
}
return null;
}
/**
* 执行wget命令
* @修改时间: 2015年12月3日 下午2:09:31
* @param command
* @return
*/
public static Process execWgetCommand(String command){
try {
Process exec = Runtime.getRuntime().exec(command);
return exec;
} catch (IOException e) {
logger.info("执行wget出现异常 command:" + command,e);
}
return null;
}
/**
* @类名: Kernel32
* @描述: 获取进程pid
*/
static interface Kernel32 extends Library {
public static Kernel32 INSTANCE = (Kernel32) Native.loadLibrary("kernel32", Kernel32.class);
public int GetProcessId(Long hProcess);
}
}
另外增加一个单线程控制
public void getImageResult() {
if (!thread.isAlive() && thread.getState().equals(State.NEW)) {
thread.start();
} else if (thread.getState().equals(State.TERMINATED)) {
logger.error("当前线程已挂掉,重启一个");
thread = new Thread(new SaveImageResultRunable());
thread.start();
} else if (!thread.getState().equals(State.RUNNABLE) && !thread.getState().equals(State.TERMINATED)) {
logger.info("开始唤醒当前线程,当前状态为:" + thread.getState());
synchronized (thread) {
thread.notify();
}
}
logger.info("当前线程状态:" + thread.getState());
}
研究CasperJS截图方式