一、前言
本文将基于springboot2.4.0
实现html对比
差异
新增数据
删除数据
修改数据
对调数据
移动数据
二、效果图
html1:
html2:
对比结果:
三、部分代码
@Slf4j
public class MyHtmlDiffPlus {
/**
* 处理文本差异信息(判断是否显示背景样式标识)
*
* @param params:
* 参数信息
* @return: 差异文本信息
* @author : zhengqing
* @date : 2020/10/22 13:35
*/
public DiffBO handleDiffData(DiffTextInfoHandleBO params) {
// 1、判断是否支持插入、删除、修改、对调、移动
DiffConditionBO conditionInfo = this.handleCondition(params);
String htmlOld = params.getHtmlOld();
String htmlNew = params.getHtmlNew();
if (StringUtils.isBlank(htmlOld)) {
htmlOld = "";
}
if (StringUtils.isBlank(htmlNew)) {
htmlNew = "";
}
// 2、对比数据列表
List<DiffDataBO> diffDataList = this.getDiffDataList(htmlOld, htmlNew);
// 3、判断是否为增加、删除、修改、对调时显示样式
List<DiffShowBO> diffShowList = this.handleDiffTypeData(diffDataList, conditionInfo);
// 4、移出不要的元素数据
diffShowList.removeIf(DiffShowBO::isIfRemoveCurrentElement);
// 5、移动识别处理(字数大于等于3时处理移动文本)
this.handleMoveData(diffShowList, conditionInfo.getHasMove());
// 6、统计差异文本信息
DiffTextStatisticsInfoBO diffTextStatisticsInfo = this.handleDiffTextStatisticsData(diffShowList);
// 封装返回结果
DiffBO diffBO = new DiffBO();
diffBO.setDiffContentHtml(this.addTagForText(diffShowList));
diffBO.setDiffShowList(diffShowList);
diffBO.setDiffTextStatisticsInfo(diffTextStatisticsInfo);
return diffBO;
}
/**
* 处理条件
*
* @param params:
* 参数
* @return: 条件结果
* @author : zhengqing
* @date : 2020/11/5 14:30
*/
private DiffConditionBO handleCondition(DiffTextInfoHandleBO params) {
List<Integer> checkedDiffTypeList = params.getCheckedDiffTypeList();
Boolean ifAllDiffType = params.getIfAllDiffType();
// 判断是否支持插入、删除、修改、对调、移动
boolean hasInsert = false;
boolean hasDelete = false;
boolean hasUpdate = false;
boolean hasExChange = false;
boolean hasMove = false;
// 判断是否需要对比处理
boolean ifOpenDiff = CollectionUtils.isEmpty(checkedDiffTypeList) && (ifAllDiffType == null || !ifAllDiffType);
if (ifOpenDiff) {
hasInsert = false;
hasDelete = false;
hasUpdate = false;
hasExChange = false;
hasMove = false;
} else {
if (ifAllDiffType != null && ifAllDiffType) {
hasInsert = true;
hasDelete = true;
hasUpdate = true;
hasExChange = true;
hasMove = true;
} else {
if (!CollectionUtils.isEmpty(checkedDiffTypeList)) {
for (Integer e : checkedDiffTypeList) {
if (DiffTypeEnum.增加.getType().equals(e)) {
hasInsert = true;
continue;
}
if (DiffTypeEnum.删除.getType().equals(e)) {
hasDelete = true;
continue;
}
if (DiffTypeEnum.修改.getType().equals(e)) {
hasUpdate = true;
continue;
}
if (DiffTypeEnum.对调.getType().equals(e)) {
hasExChange = true;
continue;
}
if (DiffTypeEnum.移动.getType().equals(e)) {
hasMove = true;
}
}
}
}
}
// 封装处理结果
DiffConditionBO result = new DiffConditionBO();
result.setHasInsert(hasInsert);
result.setHasDelete(hasDelete);
result.setHasUpdate(hasUpdate);
result.setHasExChange(hasExChange);
result.setHasMove(hasMove);
return result;
}
/**
* 获取对比差异信息列表
*
* @param htmlOld:
* 旧数据
* @param htmlNew:
* 新数据
* @return: 对比差异信息列表
* @author : zhengqing
* @date : 2020/12/4 17:16
*/
private List<DiffDataBO> getDiffDataList(String htmlOld, String htmlNew) {
HtmlDiff diff = new HtmlDiff(htmlOld, htmlNew);
String diffHtml = diff.Build();
List<DiffDataBO> diffDataBOList = Lists.newArrayList();
Matcher matchertDiff = Constants.DIFF_PATTERN_CONTENT.matcher(diffHtml);
int unMatchStartIndex = 0;
while (matchertDiff.find()) {
int startIndex = matchertDiff.start();
int endIndex = matchertDiff.end();
// 处理未匹配数据
if (unMatchStartIndex != startIndex) {
String unMatchDataStr = diffHtml.substring(unMatchStartIndex, startIndex);
diffDataBOList.add(new DiffDataBO(DiffTypeEnum.不变, unMatchDataStr));
}
unMatchStartIndex = endIndex;
// 处理匹配数据
String diffContent = matchertDiff.group(0);
String diffPlainText = matchertDiff.group(2);
if (diffContent.startsWith("<ins")) {
diffDataBOList.add(new DiffDataBO(DiffTypeEnum.增加, diffPlainText));
}
if (diffContent.startsWith("<del")) {
diffDataBOList.add(new DiffDataBO(DiffTypeEnum.删除, diffPlainText));
}
}
return diffDataBOList;
}
/**
* 封装不同对比类型数据,判断是否为增加、删除、修改、对调、移动(字数大于等于3时处理移动文本)时显示样式
*
* @param diffList:
* 对比差异数据
* @param conditionInfo:
* 对比条件
* @return: 封装过后的对比差异数据
* @author : zhengqing
* @date : 2020/11/5 14:35
*/
private List<DiffShowBO> handleDiffTypeData(List<DiffDataBO> diffList, DiffConditionBO conditionInfo) {
boolean hasInsert = conditionInfo.getHasInsert();
boolean hasDelete = conditionInfo.getHasDelete();
boolean hasUpdate = conditionInfo.getHasUpdate();
boolean hasExChange = conditionInfo.getHasExChange();
// 保存是否显示背景标识的判断参数
List<DiffShowBO> diffShowList = Lists.newArrayList();
for (int i = 0; i < diffList.size(); i++) {
DiffDataBO diffItemNow = diffList.get(i);
String diffTextNow = diffItemNow.getText();
DiffTypeEnum diffTypeEnumNow = diffItemNow.getTypeEnum();
DiffShowBO diffShowNow = new DiffShowBO();
diffShowNow.setDiff(diffItemNow);
diffShowNow.setDiffText(diffTextNow);
boolean isInsert = diffTypeEnumNow.equals(DiffTypeEnum.增加);
boolean isDelete = diffTypeEnumNow.equals(DiffTypeEnum.删除);
boolean isEqual = diffTypeEnumNow.equals(DiffTypeEnum.不变);
if (isInsert) {
diffShowNow.setDiffTypeEnum(DiffTypeEnum.增加);
}
if (isDelete) {
diffShowNow.setDiffTypeEnum(DiffTypeEnum.删除);
}
if (isEqual) {
diffShowNow.setDiffTypeEnum(DiffTypeEnum.不变);
}
if (i == 0) {
boolean isShow = isEqual || (hasDelete && isDelete) || (hasInsert && isInsert);
diffShowNow.setIfShow(isShow);
} else {
DiffDataBO diffItemBefore = diffList.get(i - 1);
// 之前文本内容
String diffTextBefore = diffItemBefore.getText();
DiffTypeEnum beforeTypeEnum = diffItemBefore.getTypeEnum();
DiffShowBO diffShowDataBefore = diffShowList.get(i - 1);
String beforeDiffText = diffShowDataBefore.getDiffText();
// 删除+新增=修改
boolean isUpdate = beforeTypeEnum.equals(DiffTypeEnum.删除) && diffTypeEnumNow.equals(DiffTypeEnum.增加);
if (isUpdate) {
if (hasUpdate || hasExChange) {
// ☆☆☆ 标识修改 ☆☆☆ 【在修改的基础上,去识别是否为`对调`方式 ex: "这是数据测试" -> "据测试这是数" 】
int diffTextLength = diffTextNow.length();
if (diffTextLength % 2 == 0) {
int diffTextBeforeLength = diffTextBefore.length();
// 偶数情况下判断`对调`
// 现在最新内容前半部分、后半部分
int diffTextNowCenterLength = diffTextLength / 2;
String diffTextFrontPartNow = diffTextNow.substring(0, diffTextNowCenterLength);
String diffTextLatterPartNow = diffTextNow.substring(diffTextNowCenterLength);
// 之前内容前半部分、后半部分
int diffTextBeforeCenterLength = diffTextBeforeLength / 2;
String diffTextFrontPartBefore = diffTextBefore.substring(0, diffTextBeforeCenterLength);
String diffTextLatterPartBefore = diffTextBefore.substring(diffTextBeforeCenterLength);
if (diffTextFrontPartNow.equals(diffTextLatterPartBefore)
&& diffTextLatterPartNow.equals(diffTextFrontPartBefore)) {
diffShowNow.setDiffTypeEnum(DiffTypeEnum.对调);
} else {
diffShowNow.setDiffTypeEnum(DiffTypeEnum.修改);
}
} else {
diffShowNow.setDiffTypeEnum(DiffTypeEnum.修改);
}
// ① 设置本次显示样式
DiffTypeEnum diffTypeEnum = diffShowNow.getDiffTypeEnum();
if (hasInsert && diffTypeEnum.equals(DiffTypeEnum.增加)) {
diffShowNow.setIfShow(true);
}
if (hasUpdate && diffTypeEnum.equals(DiffTypeEnum.修改)) {
diffShowNow.setIfShow(true);
}
if (hasExChange && diffTypeEnum.equals(DiffTypeEnum.对调)) {
diffShowNow.setIfShow(true);
}
// ② 【设置上一次删除操作不显示样式 && 不显示删除文本】(即移除上一次删除的元素内容) && 【将删除文本保存作为`data-title`虚浮提示】
diffShowDataBefore.setIfShow(false);
diffShowDataBefore.setIfRemoveCurrentElement(true);
diffShowNow.setDiffTextBefore(beforeDiffText);
} else {
// 标识删除
if (hasDelete && isDelete) {
diffShowNow.setIfShow(true);
}
// 标识新增 (上次为删除 这次为新增 标识为修改操作 不作为新增操作)
if (isInsert) {
// ① 设置本次新增不显示样式
diffShowNow.setIfShow(false);
// ② 设置上一次删除不显示样式
diffShowDataBefore.setIfShow(false);
}
}
} else {
// 标识删除或新增
boolean isShow = (hasDelete && isDelete) || (hasInsert && isInsert) || isEqual;
diffShowNow.setIfShow(isShow);
}
// 处理`对调`数据 ( 新增+不变+删除 组合数据 , 即 新增和删除数据一致为对调数据 ) ex: "这是数据测试" -> "据测试这是数"
if (hasExChange && i > 1) {
// 对调后的数据 -> 新增数据
DiffShowBO diffShowDataExChangeAfter = diffShowList.get(i - 2);
DiffTypeEnum exChangeAfterDiffTypeEnum = diffShowDataExChangeAfter.getDiffTypeEnum();
String exChangeAfterDiffText = diffShowDataExChangeAfter.getDiffText();
// 新增+不变+删除 = 对调
boolean isExChange = exChangeAfterDiffTypeEnum.equals(DiffTypeEnum.增加)
&& beforeTypeEnum.equals(DiffTypeEnum.不变) && diffTypeEnumNow.equals(DiffTypeEnum.删除);
if (isExChange && diffTextNow.equals(exChangeAfterDiffText)) {
// 【将新增数据和不变数据设置为一组对调数据】 && 【设置当前删除操作不显示样式 && 不显示删除文本】(即移除本次删除的元素内容)
diffShowDataExChangeAfter.setDiffTextBefore(beforeDiffText);
diffShowDataExChangeAfter.setDiffTypeEnum(DiffTypeEnum.对调);
diffShowDataExChangeAfter.setIfRemoveCurrentElement(false);
diffShowDataBefore.setIfShow(true);
diffShowDataBefore.setDiffTextBefore(exChangeAfterDiffText);
diffShowDataBefore.setDiffTypeEnum(DiffTypeEnum.对调);
diffShowDataBefore.setIfRemoveCurrentElement(false);
diffShowNow.setIfRemoveCurrentElement(true);
}
}
}
diffShowNow.setIndex(i);
diffShowList.add(diffShowNow);
}
return diffShowList;
}
/**
* 处理移动数据(字数大于等于3时处理移动文本) - 识别删除和新增相同的文本,然后判断 *** (删除在前 & 新增在后) || (新增在前 &
* 删除在后),在删除处插入转移标签并标识转移前文本数据,在新增处插入转移标签并标识转移后的现在文本数据
*
* @param diffShowList:
* 差异数据列表信息
* @return: 处理过后识别到移动的数据
* @author : zhengqing
* @date : 2020/11/4 10:22
*/
private List<DiffShowBO> handleMoveData(List<DiffShowBO> diffShowList, boolean hasMove) {
List<DiffShowBO> moveDeleteDataList = Lists.newArrayList();
List<DiffShowBO> moveInsertDataList = Lists.newArrayList();
for (DiffShowBO e : diffShowList) {
DiffTypeEnum diffTypeEnum = e.getDiffTypeEnum();
String diffText = e.getDiffText();
if (StringUtils.isBlank(diffText) || diffText.length() < 3) {
continue;
}
if (diffTypeEnum.equals(DiffTypeEnum.删除)) {
moveDeleteDataList.add(e);
}
if (diffTypeEnum.equals(DiffTypeEnum.增加)) {
moveInsertDataList.add(e);
}
}
if (CollectionUtils.isEmpty(moveDeleteDataList) || CollectionUtils.isEmpty(moveInsertDataList)) {
return diffShowList;
}
// 用于判断该数据是否已经绑定过移动的数据标识
Set<Integer> moveInsertIndexSet = Sets.newHashSet();
boolean ifHasMoveData;
DiffShowBO moveDeleteItem;
DiffShowBO moveInsertItem;
String diffTextDelete;
String diffTextInsert;
Integer moveDeleteItemIndex = null;
Integer moveInsertItemIndex = null;
Matcher matcherDelete = null;
Matcher matcherInsert = null;
for (int i = 0; i < moveDeleteDataList.size(); i++) {
ifHasMoveData = false;
moveDeleteItem = moveDeleteDataList.get(i);
diffTextDelete = moveDeleteItem.getDiffText();
for (int j = 0; j < moveInsertDataList.size(); j++) {
moveInsertItem = moveInsertDataList.get(j);
diffTextInsert = moveInsertItem.getDiffText();
// (如果删除文本与新增文本相同) & (新增文本未出现过匹配的数据) -> 则 计入 移动数据
// 先对`差异内容`进行处理,去掉开始和结尾的`\n\r`换行符再判断是否相等
matcherDelete = Constants.TEXT_PATTERN_CENTER_CONTENT_LINE_FEED.matcher(diffTextDelete);
while (matcherDelete.find()) {
diffTextDelete = matcherDelete.group();
}
matcherInsert = Constants.TEXT_PATTERN_CENTER_CONTENT_LINE_FEED.matcher(diffTextInsert);
while (matcherInsert.find()) {
diffTextInsert = matcherInsert.group();
}
if (diffTextDelete.equals(diffTextInsert) && !moveInsertIndexSet.contains(j)) {
moveInsertIndexSet.add(j);
ifHasMoveData = true;
moveDeleteItemIndex = moveDeleteItem.getIndex();
moveInsertItemIndex = moveInsertItem.getIndex();
break;
}
}
if (ifHasMoveData) {
for (DiffShowBO e : diffShowList) {
Integer currentIndex = e.getIndex();
// 将删除&新增的数据设置会移动数据类型
boolean isMoveDeleteIndex = currentIndex.equals(moveDeleteItemIndex);
boolean isMoveInsertIndex = currentIndex.equals(moveInsertItemIndex);
if (isMoveDeleteIndex || isMoveInsertIndex) {
e.setDiffTypeEnum(DiffTypeEnum.移动);
e.setIfShow(hasMove);
// 在识别到的移动插入数据类型处将移动标识修改为删除的标识,组合成一对数据,返回给前端处理
if (isMoveInsertIndex) {
e.setIndex(moveDeleteItemIndex);
}
}
}
}
}
return diffShowList;
}
/**
* 统计差异文本信息
*
* @param diffShowList:
* 差异数据列表信息
* @return: 差异文本统计信息
* @author : zhengqing
* @date : 2020/11/9 15:08
*/
private DiffTextStatisticsInfoBO handleDiffTextStatisticsData(List<DiffShowBO> diffShowList) {
DiffTextStatisticsInfoBO diffTextInfo = new DiffTextStatisticsInfoBO();
// 定义统计数据值
int addNum = 0;
int deleteNum = 0;
int updateNum = 0;
int exchangeNum = 0;
int moveNum = 0;
List<DiffTextInfoItem> textInfoAddList = Lists.newArrayList();
List<DiffTextInfoItem> textInfoDeleteList = Lists.newArrayList();
List<DiffTextInfoItem> textInfoUpdateList = Lists.newArrayList();
List<DiffTextInfoItem> textInfoExchangeList = Lists.newArrayList();
List<DiffTextInfoItem> textInfoMoveList = Lists.newArrayList();
if (!CollectionUtils.isEmpty(diffShowList)) {
for (DiffShowBO e : diffShowList) {
DiffTypeEnum diffTypeEnum = e.getDiffTypeEnum();
boolean ifShow = e.isIfShow();
// 只要不是不变的内容都算 `修改/删除/新增` 操作过的内容
if (!diffTypeEnum.equals(DiffTypeEnum.不变) && ifShow) {
DiffTextInfoItem diffTextInfoItem = new DiffTextInfoItem();
String diffText = e.getDiffText();
if (StringUtils.isBlank(diffText)) {
// continue;
}
diffTextInfoItem.setText(diffText);
diffTextInfoItem.setType(diffTypeEnum.getType());
diffTextInfoItem.setTypeName(diffTypeEnum.getDesc());
// 这里开始统计不同数据类型的文本数量
switch (diffTypeEnum) {
case 增加:
addNum++;
textInfoAddList.add(diffTextInfoItem);
break;
case 删除:
deleteNum++;
textInfoDeleteList.add(diffTextInfoItem);
break;
case 修改:
updateNum++;
diffTextInfoItem.setTextBefore(e.getDiffTextBefore());
textInfoUpdateList.add(diffTextInfoItem);
break;
case 对调:
exchangeNum++;
diffTextInfoItem.setTextBefore(e.getDiffTextBefore());
textInfoExchangeList.add(diffTextInfoItem);
case 移动:
moveNum++;
// diffTextInfoItem.setTextBefore(e.getDiffTextBefore());
textInfoMoveList.add(diffTextInfoItem);
break;
default:
break;
}
}
}
// 装载数据
diffTextInfo.setAddNum(addNum);
diffTextInfo.setDeleteNum(deleteNum);
diffTextInfo.setUpdateNum(updateNum);
diffTextInfo.setExchangeNum(exchangeNum);
diffTextInfo.setMoveNum(moveNum == 0 ? 0 : moveNum / 2);
diffTextInfo.setTextInfoAddList(textInfoAddList);
diffTextInfo.setTextInfoDeleteList(textInfoDeleteList);
diffTextInfo.setTextInfoUpdateList(textInfoUpdateList);
diffTextInfo.setTextInfoExchangeList(textInfoExchangeList);
diffTextInfo.setTextInfoMoveList(textInfoMoveList);
}
return diffTextInfo;
}
/**
* 正则表达式替换匹配内容并为其加标签
*
* @param diffShowList:
* 对比数据
* @return: 拿到对比后差异文本信息
* @author : zhengqing
* @date : 2020/10/30 13:59
*/
private String addTagForText(List<DiffShowBO> diffShowList) {
StringJoiner diffHtml = new StringJoiner("");
for (DiffShowBO diffItem : diffShowList) {
boolean ifShow = diffItem.isIfShow();
if (!ifShow) {
continue;
}
Integer index = diffItem.getIndex();
String diffText = diffItem.getDiffText();
DiffTypeEnum diffTypeEnum = diffItem.getDiffTypeEnum();
DiffDataBO diff = diffItem.getDiff();
DiffTypeEnum oldTypeEnum = diff.getTypeEnum();
String diffTextBefore = diffItem.getDiffTextBefore();
// 反转义HTML处理
diffTextBefore = escapeHtml(diffTextBefore);
switch (diffTypeEnum) {
case 增加:
diffText = String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_INSERT, "%s", "%s", diffText);
break;
case 删除:
diffText = String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_DELETE, "%s", "%s", diffText);
break;
case 修改:
diffText =
String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_UPDATE, "%s", "%s", diffTextBefore, diffText);
break;
case 对调:
if (DiffTypeEnum.增加 == oldTypeEnum) {
diffText = String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_EXCHANGE_BEFORE, "%s", "%s",
diffTextBefore, diffText);
}
if (DiffTypeEnum.不变 == oldTypeEnum) {
diffText = String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_EXCHANGE_AFTER, "%s", "%s",
diffTextBefore, diffText);
}
break;
case 移动:
if (DiffTypeEnum.删除 == oldTypeEnum) {
diffText =
String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_MOVE_BEFORE, index, "%s", "%s", diffText);
}
if (DiffTypeEnum.增加 == oldTypeEnum) {
diffText =
String.format(Constants.TEXT_DIFF_HTML_CODE_TEXT_MOVE_AFTER, index, "%s", "%s", diffText);
}
break;
default:
break;
}
diffHtml.add(diffText);
}
return this.addCssForText(diffHtml.toString());
}
/**
* 添加css样式
*
* @param diffHtml:
* 对比文本差异信息
* @return: 处理数据
* @author : zhengqing
* @date : 2020/12/7 13:42
*/
private String addCssForText(String diffHtml) {
Document doc = Jsoup.parseBodyFragment(diffHtml);
Element head = doc.head();
head.append("<link href=\"diff.css\" rel=\"stylesheet\" type=\"text/css\">");
return doc.html();
}
/**
* html转义处理
*
* @param html:
* 富文本
* @return: 处理过后的html
* @author : zhengqing
* @date : 2020/10/27 11:42
*/
private String escapeHtml(String html) {
if (StringUtils.isBlank(html)) {
return html;
}
// html转义处理
html = html.replaceAll("\"", """);
html = html.replaceAll("<", "<");
html = html.replaceAll(">", ">");
return html;
}
}
四、项目地址
https://gitee.com/zhengqingya/html-diff-util
今日分享语句:
- 对过去,要淡;
- 对现在,要惜;
- 对未来,要信;