前言
数据采集中,尤其是时效性敏感的数据,比如:微博热搜,这类“过了这个村就没有这个店”的数据,为了证明当时数据采集的准确性与完整性,最直接有效的办法就是保留当时采集的网页页面。
本文主要交流点,Appium模拟操作截屏需要滑动时,如何得到一张无缝的完整长图。
基本思路:图片截取->计算截取后图片的差异值->取最小差异值的截取位置进行截取->对截取图片进行拼接。因为微博热搜有所谓的“页眉”,所以会有一个去“页眉”的截取步骤。
成果展示
原始图片
——————————————————图片分隔符————————————————————
——————————————————图片分隔符————————————————————
截取图片
——————————————————图片分隔符————————————————————
——————————————————图片分隔符————————————————————
拼接图片
Maven依赖
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv</artifactId>
<version>1.3.3</version>
</dependency>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>1.3.3</version>
</dependency>
核心代码
类Pic
public class Pic {
public static final Logger LOGGER = LoggerFactory.getLogger(Pic.class);
public void cut(InputStream input, OutputStream out,
int x, int y, int width, int height) throws Exception {
ImageInputStream imageStream = null;
try {
Iterator readers = ImageIO.getImageReadersByFormatName("png");
ImageReader reader = (ImageReader) readers.next();
imageStream = ImageIO.createImageInputStream(input);
reader.setInput(imageStream, true);
ImageReadParam param = reader.getDefaultReadParam();
Rectangle rect = new Rectangle(x, y, width, height);
param.setSourceRegion(rect);
BufferedImage bi = reader.read(0, param);
ImageIO.write(bi, "png", out);
} finally {
if (imageStream != null) {
imageStream.close();
}
}
}
public void merge(List<String> picList, String pic2, String type) throws Exception {
if (picList == null || picList.size() <= 0) {
throw new Exception("无待合并图片!");
}
int dstHeight = 0;
int dstWidth = 0;
File[] file = new File[picList.size()];
BufferedImage[] images = new BufferedImage[picList.size()];
int[][] ImageArrays = new int[picList.size()][];
for (int i = 0; i < picList.size(); i++) {
file[i] = new File(picList.get(i));
images[i] = ImageIO.read(file[i]);
int width = images[i].getWidth();
int height = images[i].getHeight();
ImageArrays[i] = new int[width * height];
ImageArrays[i] = images[i].getRGB(0, 0, width, height, ImageArrays[i], 0, width);
dstWidth = dstWidth > width ? dstWidth : width;
dstHeight += height;
}
if (dstHeight <= 0) {
throw new Exception("图片合并高度为0!");
}
BufferedImage imageNew = new BufferedImage(dstWidth, dstHeight, BufferedImage.TYPE_INT_RGB);
int height_i = 0;
for (int i = 0; i < images.length; i++) {
int width = images[i].getWidth();
int height = images[i].getHeight();
imageNew.setRGB(0, height_i, width, height, ImageArrays[i], 0, width);
height_i += height;
}
ImageIO.write(imageNew, type, new File(pic2));
}
public double compare(String targetImageUrl, String baseImageUrl) throws Exception {
opencv_core.Mat targetImage = imread(targetImageUrl);
opencv_core.Mat baseImage = imread(baseImageUrl);
if (targetImage.size().width() == baseImage.size().width()) {
if (targetImage.size().height() != baseImage.size().height()) {
if (targetImage.size().height() > baseImage.size().height()) {
targetImage = dealLong(targetImage.clone(), baseImage.clone());
} else {
baseImage = dealLong(baseImage.clone(), targetImage.clone());
}
}
opencv_core.Mat imageDiff = compare(targetImage, baseImage);
double nonZeroPercent = 100 * (double) countNonZero(imageDiff) / (imageDiff.size().height() * imageDiff.size().width());
imageDiff.release();
baseImage.release();
targetImage.release();
return nonZeroPercent;
} else {
throw new Exception("图片比对时,图片宽度不一致!");
}
}
public int interceptBarHeight(opencv_core.Mat longImage, opencv_core.Mat shortImage) {
int imageSearchMaxHeight = 240;
opencv_core.Mat subImageLong = new opencv_core.Mat(longImage, new opencv_core.Rect(0, longImage.size().height() - imageSearchMaxHeight, longImage.size().width(), imageSearchMaxHeight));
opencv_core.Mat subImageShort = new opencv_core.Mat(shortImage, new opencv_core.Rect(0, shortImage.size().height() - imageSearchMaxHeight, shortImage.size().width(), imageSearchMaxHeight));
opencv_core.Mat imageDiff = compare(subImageLong, subImageShort);
for (int row = imageDiff.size().height() - 1; row > -1; row--) {
for (int col = 0; col < imageDiff.size().width(); col++) {
BytePointer bytePointer = imageDiff.ptr(row, col);
if (bytePointer.get(0) != 0) {
imageDiff.release();
return imageSearchMaxHeight - row;
}
}
}
return imageSearchMaxHeight;
}
public opencv_core.Mat dealLong(opencv_core.Mat longImage, opencv_core.Mat shortImage) {
int barHeight = interceptBarHeight(longImage, shortImage);
opencv_core.Mat dealedLongImage = new opencv_core.Mat(longImage, new opencv_core.Rect(0, 0, longImage.size().width(), shortImage.size().height() - barHeight));
opencv_core.Mat imageBar = new opencv_core.Mat(longImage, new opencv_core.Rect(0, longImage.size().height() - barHeight, longImage.size().width(), barHeight));
opencv_core.Mat dealedLongImageNew = dealedLongImage.clone();
vconcat(dealedLongImage, imageBar, dealedLongImageNew);
imageBar.release();
dealedLongImage.release();
return dealedLongImageNew;
}
public opencv_core.Mat compare(opencv_core.Mat targetImage, opencv_core.Mat baseImage) {
opencv_core.Mat targetImageClone = targetImage.clone();
opencv_core.Mat baseImageColne = baseImage.clone();
opencv_core.Mat imgDiff1 = targetImage.clone();
opencv_core.Mat imgDiff = targetImage.clone();
// 将图片转成灰度图
cvtColor(targetImage, targetImageClone, COLOR_BGR2GRAY);
cvtColor(baseImage, baseImageColne, COLOR_BGR2GRAY);
// 两个矩阵相减,获得差异图
subtract(targetImageClone, baseImageColne, imgDiff1);
subtract(baseImageColne, targetImageClone, imgDiff);
// 按比重进行叠加
addWeighted(imgDiff, 1, imgDiff1, 1, 0, imgDiff);
// 图片二值化,大于24的为1,小于24的为0
threshold(imgDiff, imgDiff, 24, 255, THRESH_BINARY);
erode(imgDiff, imgDiff, new opencv_core.Mat());
dilate(imgDiff, imgDiff, new opencv_core.Mat());
return imgDiff;
}
}
类WeiboPic
public class WeiboPic extends Pic {
public static final Logger LOGGER = LoggerFactory.getLogger(WeiboPic.class);
public void process(List<String> picList, String pic2) throws Exception {
if (picList == null || picList.size() <= 0) {
throw new Exception("无待处理图片!");
}
if (picList.size() > 1) {
List<String> picMergeList = new ArrayList<>();
boolean isNext = true;
for (int i = 1; i < picList.size(); i++) {
String picOne = picList.get(i - 1);
String picTwo = picList.get(i);
String picOneTmp = picOne.replace(".png", "_tmp.png");
String picOneSnapshot = picOne.replace(".png", "_snapshot.png");
String picTwoTmp = picTwo.replace(".png", "_tmp.png");
if (i < picList.size() - 1) {
String picThree = picList.get(i + 1);
String picThreeTmp = picThree.replace(".png", "_tmp.png");
cut(new FileInputStream(picTwo), new FileOutputStream(picTwoTmp),
0, 1920 - 360, 1080, 1920);
cut(new FileInputStream(picThree), new FileOutputStream(picThreeTmp),
0, 1920 - 360, 1080, 1920);
if (compare(picTwoTmp, picThreeTmp) <= 0.0) {
isNext = false;
}
}
cut(new FileInputStream(picTwo), new FileOutputStream(picTwoTmp),
0, 360, 1080, 1920);
double nonZeroPercentMin = 0.0;
int yMin = 0;
for (int y = 1920 - 400; y >= 360; y--) {
LOGGER.info("当前处理图片:{},Y轴:{}", picOne, y);
cut(new FileInputStream(picOne), new FileOutputStream(picOneTmp),
0, y, 1080, 1920);
double nonZeroPercent = compare(picOneTmp, picTwoTmp);
if (nonZeroPercent <= 0.0) {
yMin = y;
break;
} else if (nonZeroPercentMin <= 0.0 || nonZeroPercentMin > nonZeroPercent) {
nonZeroPercentMin = nonZeroPercent;
yMin = y;
}
}
if (i == 1) {
cut(new FileInputStream(picOne), new FileOutputStream(picOneSnapshot),
0, 0, 1080, yMin);
} else {
cut(new FileInputStream(picOne), new FileOutputStream(picOneSnapshot),
0, 360, 1080, yMin - 360);
}
picMergeList.add(picOneSnapshot);
if (!isNext || i == picList.size() - 1) {
picMergeList.add(picTwoTmp);
break;
}
}
merge(picMergeList, pic2, "png");
} else {
merge(picList, pic2, "png");
}
}
public String picSnapshotPath = PropKit.get("path.pic.snapshot");
public static void main(String[] args) throws Exception {
InitService initService = new InitService();
initService.initProp();
WeiboPic weiboPic = new WeiboPic();
List<String> hotwordScrFilePathList = new ArrayList<>();
hotwordScrFilePathList.add("D:\\crawler\\hot-list\\tmp\\pic\\1664505521_184302e4-7ffb-42e9-8131-a60a91cf9586.png");
hotwordScrFilePathList.add("D:\\crawler\\hot-list\\tmp\\pic\\1664505526_6795fd01-a5af-4536-81ce-4ab96905d163.png");
hotwordScrFilePathList.add("D:\\crawler\\hot-list\\tmp\\pic\\1664505531_6c00ecc8-9c01-4b53-a9e2-00f3bc443964.png");
String hotwordScrFilePath2 = weiboPic.picSnapshotPath + File.separator
+ (int) (System.currentTimeMillis() / 1000) + "_" + UUID.randomUUID().toString() + ".png";
weiboPic.process(hotwordScrFilePathList, hotwordScrFilePath2);
}
}