最近遇到了一个问题,有两列数据要找出它们的差异。这两列数据中,如果仅英文字母大小写不同,或是全角、半角字符不同,要视作是同一个条目。因此我想了一个办法对之进行比较。
测试数据如下:
先分别对两列数据进行排序,我使用的Office版本是Office2010,在开始菜单中找到排序和筛选,都以升序排列。
在随后弹出的提醒窗中要选择“以当前选定区域排序”
排序完毕后,将这两列数据分别存到两个txt文件中(column1.txt和column2.txt)
创建一个C#命令行应用程序,输入如下代码:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ItemChecker
{
class Program
{
static void Main(string[] args)
{
string file1Path = "C:\\Users\\Administrator\\Desktop\\column1.txt";
string file2Path = "C:\\Users\\Administrator\\Desktop\\column2.txt";
FileInfo file1 = new FileInfo(file1Path);
FileInfo file2 = new FileInfo(file2Path);
//读取文件1
LinkedList<String> itemsInFile1List = new LinkedList<string>();
StreamReader sr1 = new StreamReader(file1Path, Encoding.Default);
String line1;
while ((line1 = sr1.ReadLine()) != null)
{
//以下为自定义添加的转换:删除空格、删除星号、不区分大小写、不区分全角半角字符
line1 = ToDBC(line1); //不区分全角半角字符
line1 = line1.Replace("*", ""); //删除星号
line1 = line1.Replace(" ", ""); //删除空格
line1 = line1.ToLower(); //字母全部转小写
//若为空串则不加入比较
if (string.IsNullOrWhiteSpace(line1))
{
continue;
}
//转换完毕,插入链表
itemsInFile1List.AddLast(line1);
}
String[] itemsInFile1 = itemsInFile1List.ToArray();
//读取文件2
LinkedList<String> itemsInFile2List = new LinkedList<string>();
StreamReader sr2 = new StreamReader(file2Path, Encoding.Default);
String line2;
while ((line2 = sr2.ReadLine()) != null)
{
//以下为自定义添加的转换:删除空格、删除星号、不区分大小写、不区分全角半角字符
line2 = ToDBC(line2); //不区分全角半角字符
line2 = line2.Replace("*", ""); //删除星号
line2 = line2.Replace(" ", ""); //删除空格
line2 = line2.ToLower(); //字母全部转小写
//若为空串则不加入比较
if (string.IsNullOrWhiteSpace(line2))
{
continue;
}
//转换完毕,插入链表
itemsInFile2List.AddLast(line2);
}
String[] itemsInFile2 = itemsInFile2List.ToArray();
//对文件1条目排序
string temp1 = "";
for (int i = 0; i < itemsInFile1.Length; i++)
{
for (int j = i + 1; j < itemsInFile1.Length; j++)
{
if (string.Compare(itemsInFile1[i], itemsInFile1[j]) > 1)
{
temp1 = itemsInFile1[i];
itemsInFile1[i] = itemsInFile1[j];
itemsInFile1[j] = temp1;
}
}
}
//对文件2条目排序
string temp2 = "";
for (int i = 0; i < itemsInFile2.Length; i++)
{
for (int j = i + 1; j < itemsInFile2.Length; j++)
{
if (string.Compare(itemsInFile2[i], itemsInFile2[j]) > 1)
{
temp2 = itemsInFile2[i];
itemsInFile2[i] = itemsInFile2[j];
itemsInFile2[j] = temp2;
}
}
}
//文件1有而文件2没有的条目
IEnumerable<string> onlyLeft =
from item1 in itemsInFile1
where !(from item2 in itemsInFile2
where item1 == item2
select itemsInFile2).Any()
select item1;
Console.WriteLine("文件1有而文件2没有的条目(" + onlyLeft.Count() + "个)");
Console.WriteLine("--------------------------");
foreach (string item in onlyLeft)
{
Console.WriteLine(item);
}
Console.WriteLine();
Console.WriteLine();
Console.WriteLine();
Console.WriteLine();
Console.WriteLine();
//文件2有而文件1没有的条目
IEnumerable<string> onlyRight =
from item2 in itemsInFile2
where !(from item1 in itemsInFile1
where item1 == item2
select itemsInFile1).Any()
select item2;
Console.WriteLine("文件2有而文件1没有的条目(" + onlyRight.Count() + "个)");
Console.WriteLine("--------------------------");
foreach (string item in onlyRight)
{
Console.WriteLine(item);
}
Console.WriteLine();
Console.WriteLine();
Console.WriteLine();
Console.WriteLine();
Console.WriteLine();
//二者都有的条目
IEnumerable<string> bothOk =
from item1 in itemsInFile1
from item2 in itemsInFile2
where item1 == item2
select item1;
Console.WriteLine("二者都有的条目(" + bothOk.Count() + "个)");
Console.WriteLine("--------------------------");
foreach (string item in bothOk)
{
Console.WriteLine(item);
}
}
/// <summary>
/// 全角字符转半角字符
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static string ToDBC(string input)
{
char[] array = input.ToCharArray();
for (int i = 0; i < array.Length; i++)
{
if (array[i] == 12288)
{
array[i] = (char)32;
continue;
}
if (array[i] > 65280 && array[i] < 65375)
{
array[i] = (char)(array[i] - 65248);
}
}
return new string(array);
}
}
}
全角字符与半角字符的转换,参考了这篇博客:
http://blog.csdn.net/a497785609/article/details/52689171
程序运行结果如下:
END