做了第一个自己的微服务,其中参考了很多人的文章,于是也想记录下来。一是方便自己查看,二也是为了帮助有需要的人。这里使用了phantomjs,这是一个无头浏览器,可以用它进行网址截图。
这是一个截图的样例,大家可以先去官网下载一个phantomjs.exe的,因为我是windows系统。我用的是springboot框架,很简单的配置。
直接附上调用的主程序:
public class Screenshot {
//phantomjs程序位置
//private static final String cmdPath = System.getProperty("user.dir") + "\\target\\phantomjs.exe"; //windows
private static final String cmdPath = System.getProperty("user.dir") + "/phantomjs"; //linux
// imgpath /img/2222.png
public static void screenshot(String url, String imgpath, String jsPath) {
screenshot(url, imgpath,jsPath, "");
}
public static void screenshot(String url, String imgpath, String jsPath,String title) {
BufferedReader reader = null;
InputStream inputStream = null;
try {
System.out.println("start......"+url);
String[] cmd = new String[] { cmdPath, jsPath, url, getPicSavePath(imgpath), title };
for (String string : cmd) {
System.out.println(string);
}
Process process = Runtime.getRuntime().exec(cmd);
inputStream = process.getInputStream();
reader = new BufferedReader(new InputStreamReader(inputStream));
StringBuffer sbf = new StringBuffer();
String tmp = " ";
while ((tmp = reader.readLine()) != null) {
sbf.append(tmp);
}
System.out.println("图片存放" + ScreenConstant.picBasePath + "目录");
} catch (IOException e) {
e.printStackTrace();
}
//return true;
}
private static String getPicSavePath(String imgpath) {
return ScreenConstant.picBasePath + imgpath;
}
}
主要是使用phantomjs,然后调用写好的js,附上一个baidu的js
"use strict";
var page = require('webpage').create(),
system = require('system'),
address, output, title,size, pageWidth, pageHeight;
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36';
page.settings.resourceTimeout = 15000; // 15 seconds
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
console.log(' image (png/jpg output) examples: "1920px" entire page, window width 1920px');
console.log(' "800px*600px" window, clipped to 800x600');
phantom.exit(1);
} else {
//String[] cmd = new String[] { cmdPath, jsPath, url, getPicSavePath(imgpath), title };
//这是传入的参数,第一位是phantomjs的路径,二是js的路径
address = system.args[1]; //传入的url,需要截图的url
output = system.args[2]; //图片保存的路径
title = system.args[3]; //这里是我的一个小功能,传入的title,如果和网页上的title一样,就可以画个红框
address = encodeURI(address); //遇到过传入url和url不匹配的情况,就要先编码一下
page.viewportSize = { width: 1920, height: 1080 }; //设置大小,一般网页都可以用
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split('*');
page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: '0px' }
: { format: system.args[3], orientation: 'portrait', margin: '1cm' };
} else if (system.args.length > 3 && system.args[3].substr(-2) === "px") {
size = system.args[3].split('*');
if (size.length === 2) {
var pageWidth = parseInt(size[0], 10),
pageHeight = parseInt(size[1], 10);
page.viewportSize = { width: pageWidth, height: pageHeight };
page.clipRect = { top: 0, left: 0, width: pageWidth, height: pageHeight };
} else {
console.log("size:", system.args[3]);
var pageWidth = parseInt(system.args[3], 10),
pageHeight = parseInt(pageWidth * 3/4, 10); // it's as good an
// assumption as any
console.log ("pageHeight:",pageHeight);
page.viewportSize = { width: pageWidth, height: pageHeight };
}
}
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit(1);
} else {
//这个函数特殊,因为无法调试,具体的可以在百度,这就是传入的title,对比若一样,则会在截图上画红框
page.evaluate(function(tt){
document.body.bgColor = 'white'; //添加背景色为白色,不然会有很多透明的图片
window.scrollTo(0,10000);// 滚动到底部
var ele = document.getElementById('content_left');
var list = ele.getElementsByClassName('result c-container');
for(var y=0,j = list.length;y < j;y++){
var ee = list[y];
var h3 = ee.getElementsByTagName('h3');
var a = h3[0].getElementsByTagName('a');
var tit = a[0].innerText;
if(tit == tt){
ee.style.border = "2px solid red";
}
}
},title); //添加title
//截图的路径
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 1000);
}
});
}
这是主要的js文件,之后调用传入即可,非常的简单。
@Service
public class BaiduService {
@Async
public void screenshot(String url,String imgpath,String title) {
url = url.replaceAll("%20", " ").replaceAll("%22", "\"");
String jsPath = System.getProperty("user.dir") + "/js/baidu.js"; //linux
//String jsPath = System.getProperty("user.dir") + "\\target\\js\\baidu.js"; //windows
Screenshot.screenshot(url, imgpath, jsPath, title);
}
}
因为调用phantomjs很耗费内存,我使用redis缓存需要截图的url,然后定时从redis取出url,进行截图
package com.zy.screenshot.service;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.ListOperations;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.data.redis.core.SetOperations;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.zy.screenshot.model.Screen;
@Component
public class ControlTimeService {
@Autowired
RedisTemplate redisTemplate;
@Autowired
private BaiduService baiduService;
@Autowired
private SoService soService;
@Autowired
private SogouService sogouService;
protected static Logger logger = LoggerFactory.getLogger(ControlTimeService.class);
@Async("phantomjsAsync")
public void getDateFromRedis() {
logger.info("............................................................................................");
logger.info(".......................................【开始截图】............................................");
ListOperations<String, Object> list = redisTemplate.opsForList();
SetOperations<String, Object> set = redisTemplate.opsForSet();
for (int i = 0; i < 100; i++) {
String json = (String) list.leftPop("phantomjsList");
if (json == null || "".equals(json)) {
continue;
}
if(set.isMember("phantomjsSetHistroy", json)) {
continue;
}else {
set.add("phantomjsSetHistroy", json);
screen(json);
}
}
logger.info("............................................................................................");
logger.info(".......................................【完成截图】............................................");
}
private void screen(String json) {
Screen screen = JSONObject.parseObject(json, Screen.class);
if (screen.getUrl() == null || "".equals(screen.getUrl()) || screen.getImgPath() == null
|| "".equals(screen.getImgPath())) {
return;
}
String url = screen.getUrl();
String imgPath = screen.getImgPath();
String title = "";
if (screen.getTitle() != null && !"".equals(screen.getTitle())) {
title = screen.getTitle();
}
if (url.contains("baidu.com"))
baiduService.screenshot(url, imgPath, title);
else if (url.contains("so.com"))
soService.screenshot(url, imgPath, title);
else if (url.contains("sogou.com"))
sogouService.screenshot(url, imgPath, title);
else
baiduService.screenshot(url, imgPath, title);
}
}
因为我用的是虚拟机,配置不是很高,差不多1分钟100条保证内存不会爆掉。
谢谢!有疑问可以加微信:18932009560