C#统计文本修改字数
背景: 需要对用户两次修改的字数进行统计和存储。参照网上的标准最长公共子串匹配方法,并进行了修改。
1.创建矩阵
public static int[,] LCSmethod(string s1, string s2)
{
int[,] c = new int[s1.Length + 1, s2.Length + 1];
for (int i = 1; i <= s1.Length; i++)
for (int j = 1; j <= s2.Length; j++)
{
if (s1[i - 1] == s2[j - 1])
c[i, j] = c[i - 1, j - 1] + 1;
else
c[i, j] = c[i - 1, j] > c[i, j - 1] ? c[i - 1, j] : c[i, j - 1];
}
return c;
}
2.打印文本修改记录(显示修改内容,便于观察)
public static string PrintDiff(int[,] c, string s1, string s2, int i, int j)
{
var a = "";
if (i > 0 && j > 0 && s1[i - 1] == s2[j - 1])
{
a = PrintDiff(c, s1, s2, i - 1, j - 1);
return a + "" + s1[i - 1];
}
else if (j > 0 && (i == 0 || (c[i, j - 1] > c[i - 1, j])))
{
a = PrintDiff(c, s1, s2, i, j - 1);
return a + "+" + s2[j - 1];
}
else if (i > 0 && (j == 0 || (c[i, j - 1] <= c[i - 1, j])))
{
a = PrintDiff(c, s1, s2, i - 1, j);
return a + "-" + s1[i - 1];
}
return a;
}
3.计算修改字数
public static int CountDiff(int[,] c, string s1, string s2, int i, int j)
{
var t = 0;
if (i > 0 && j > 0 && s1[i - 1] == s2[j - 1])
{
t = CountDiff(c, s1, s2, i - 1, j - 1);
return t;
}
else if (j > 0 && (i == 0 || (c[i, j - 1] > c[i - 1, j])))
{
t = CountDiff(c, s1, s2, i, j - 1);
t++;
return t;
}
else if (i > 0 && (j == 0 || (c[i, j - 1] <= c[i - 1, j])))
{
t = CountDiff(c, s1, s2, i - 1, j);
t++;
return t;
}
return t;
}
4.使用案例:
static void Main(string[] args)
{
string s1 = "diff / patch如何工作?当我第一次开始研究这个问题时,我不知道所涉及的数学的复杂性和缺乏C#的例子。事实证明,找到合适的资源可以让生活变得更轻松,最后维基百科条目和这个截屏视频做得很好,让我理解并在C#中实现了一个小的差异算法。在这篇文章中,我将尝试轻松解释diff应用程序面临的问题并发布一个简单的实现。如果您真的想了解这一点,我强烈推荐截屏视频。";
string s2 = "diff / patch如何工作?当我第一次开始研究这eee222vfsdss啊啊她她她,我不知道所涉及的数学的复杂性和缺乏C#的例子。事实证明,找355kskjdnamq12mdsa;lmd活变得更轻松,最后维基百科条目和这个截屏视频做得很好,让我理解并在C#中实现了一个小的差异算法。在这篇文章中,我将尝试轻松解释diff应用程序面临的问题并发布一个。如果您真的想了解这一点,我强烈推荐截屏视频。";
Console.WriteLine("老字串:" + s1);
Console.WriteLine("新字串:" + s2);
Console.WriteLine();
Console.WriteLine("修改的字数:" + LCS.CountDiff(LCS.LCSmethod(s1, s2), s1, s2, s1.Length, s2.Length));
Console.WriteLine("修改记录:" + LCS.PrintDiff(LCS.LCSmethod(s1, s2), s1, s2, s1.Length, s2.Length));
Console.ReadKey();
}
结果图:
"+“后边的字符代表新增字符,”-"后边的字符代表删除字符。
顺带附上前端Js的相同计算
以下代码可放置在一个JS文件内:
function cell(row, col) {
this.row = row;
this.col = col;
this.score;
this.prevCell;
}
function compareText(text, state) {
this.text = text;
this.state = state; //-1 delete, 0 no change, 1 add
}
function longestCommonSequence(oldValue, newValue) {
this.oldValue = oldValue;
this.newValue = newValue;
this.scoreTable = [];
this.initialize = function () {
for (var i = 0; i < this.newValue.length + 1; i++) {
this.scoreTable[i] = [];
for (var j = 0; j < this.oldValue.length + 1; j++) {
this.scoreTable[i][j] = new cell(i, j);
this.scoreTable[i][j].score = 0;
this.scoreTable[i][j].prevCell = null;
}
}
}
this.fillIn = function () {
for (var row = 1; row < this.scoreTable.length; row++) {
for (var col = 1; col < this.scoreTable[row].length; col++) {
var currentCell = this.scoreTable[row][col];
var cellAbove = this.scoreTable[row - 1][col];
var cellToLeft = this.scoreTable[row][col - 1];
var cellAboveLeft = this.scoreTable[row - 1][col - 1];
this.fillInCell(currentCell, cellAbove, cellToLeft, cellAboveLeft);
var aboveScore = cellAbove.score;
var leftScore = cellToLeft.score;
var matchScore;
if (this.oldValue.charAt(currentCell.col - 1) == this.newValue.charAt(currentCell.row - 1)) {
matchScore = cellAboveLeft.score + 1;
} else {
matchScore = cellAboveLeft.getScore;
}
var cellScore;
var cellPointer;
if (matchScore >= aboveScore) {
if (matchScore >= leftScore) {
//matchScore >= aboveScore and matchScore >= leftScore
cellScore = matchScore;
cellPointer = cellAboveLeft;
} else {
//leftScore > matchScore >= aboveScore
cellScore = leftScore;
cellPointer = cellToLeft;
}
} else {
if (aboveScore >= leftScore) {
//aboveScore > matchScore and aboveScore >= leftScore
cellScore = aboveScore;
cellPointer = cellAbove;
} else {
//leftScore > aboveScore > matchScore
cellScore = leftScore;
cellPointer = cellToLeft;
}
}
currentCell.score = cellScore;
currentCell.prevCell = cellPointer;
}
}
}
this.fillInCell = function (currentCell, cellAbove, cellToLeft, cellAboveLeft) {
var aboveScore = cellAbove.score;
var leftScore = cellToLeft.score;
var matchScore;
if (this.oldValue.charAt(currentCell.col - 1) == this.newValue.charAt(currentCell.row - 1)) {
matchScore = cellAboveLeft.score + 1;
} else {
matchScore = cellAboveLeft.getScore;
}
var cellScore;
var cellPointer;
if (matchScore >= aboveScore) {
if (matchScore >= leftScore) {
//matchScore >= aboveScore and matchScore >= leftScore
cellScore = matchScore;
cellPointer = cellAboveLeft;
} else {
//leftScore > matchScore >= aboveScore
cellScore = leftScore;
cellPointer = cellToLeft;
}
} else {
if (aboveScore >= leftScore) {
//aboveScore > matchScore and aboveScore >= leftScore
cellScore = aboveScore;
cellPointer = cellAbove;
} else {
//leftScore > aboveScore > matchScore
cellScore = leftScore;
cellPointer = cellToLeft;
}
}
currentCell.score = cellScore;
currentCell.prevCell = cellPointer;
}
this.getTrack = function () {
var array = [];
var currentCell = this.scoreTable[this.scoreTable.length - 1][this.scoreTable[0].length - 1];
while (currentCell.score > 0) {
var prevCell = currentCell.prevCell;
if (currentCell.row - prevCell.row == 1 && currentCell.col - prevCell.col == 1 && currentCell.score == prevCell.score + 1) {
array.unshift(this.oldValue.charAt(currentCell.col - 1));
}
currentCell = prevCell;
}
return array.join("");
}
this.getCompareTextArray = function () {
this.initialize();
this.fillIn();
var array = [];
var cText = null;
var currentCell = this.scoreTable[this.scoreTable.length - 1][this.scoreTable[0].length - 1];
var deleteAllFlag = false;
if (currentCell.score == 0) {
deleteAllFlag = true;
}
while (currentCell.score > 0) {
var prevCell = currentCell.prevCell;
if (currentCell.row - prevCell.row == 1 && currentCell.col - prevCell.col == 1 && currentCell.score == prevCell.score + 1) {
//lcs element
cText = new compareText(this.oldValue.charAt(currentCell.col - 1), 0);
array.unshift(cText);
}
if (currentCell.row - prevCell.row == 1 && currentCell.col - prevCell.col == 1 && currentCell.score == prevCell.score) {
//add and delete element
cText = new compareText(this.oldValue.charAt(currentCell.col - 1), -1);
array.unshift(cText);
cText = new compareText(this.newValue.charAt(currentCell.row - 1), 1);
array.unshift(cText);
}
if (currentCell.row == prevCell.row && currentCell.score == prevCell.score) {
//only delete element
cText = new compareText(this.oldValue.charAt(currentCell.col - 1), -1);
array.unshift(cText);
}
if (currentCell.col == prevCell.col && currentCell.score == prevCell.score) {
//only add element
cText = new compareText(this.newValue.charAt(currentCell.row - 1), 1);
array.unshift(cText);
}
currentCell = prevCell;
}
if (currentCell.score == 0) {
if (currentCell.col >= 1) {
//only delete element
for (var i = currentCell.col; i >= 1; i--) {
cText = new compareText(this.oldValue.charAt(i - 1), -1);
array.unshift(cText);
}
}
if (currentCell.row >= 1) {
//only add element
for (var i = currentCell.row; i >= 1; i--) {
cText = new compareText(this.newValue.charAt(i - 1), 1);
array.unshift(cText);
}
}
}
return array;
}
}
使用演示:
function getResultHistory(index, oldValue, newValue) {
var array = [];
var lcs = new longestCommonSequence(oldValue, newValue);
var compareArray = lcs.getCompareTextArray();
for (var i = 0; i < compareArray.length; i++) {
if (index == 1) {
array.push("<font style=\"color:black;\">");
} else {
switch (compareArray[i].state) {
case 0:
array.push("<font style=\"color:black;\">");
break;
case 1:
array.push("<font style=\" text-decoration:underline; color:grren;\">");
break;
case -1:
array.push("<font style=\" text-decoration:line-through; color:red;\">");
break;
default:
array.push("<font style=\"color:black;\">");
break;
}
}
array.push(compareArray[i].text);
array.push("</font>");
}
return array.join("");
}
compareArray[i].state说明:
状态 | 含义 |
---|---|
0 | 无修改 |
1 | 新增字符 |
-1 | 删除字符 |
HTML显示效果(将getResultHistory返回的串显示出来):
适用于查看文本修改记录的场景(将swicth内的"1"和"-1"逻辑改成int++即可达到统计修改字数的效果)。