看到群里有个需求,2组1500万个整数(目前是2个文本文件),怎么快速的比较2组的差别?
算了一下,1500万个整数,量不是很大,直接可以内存处理。从建立工程到写完,18分钟,测试正确性5分钟,一共才23分钟,平均1分钟写10多行。还几乎一次写对。
感谢平时搬的很多砖。这个,很遗憾,很多人觉得没啥,但就没有几个写得好的,群里面参与的,除了我师父对技术执着之外,更加多是理论派。最后我想说,下面的代码,优化空间还很大,会比下面的代码提高6--8倍(整个速度,包括读文件),真正写一次,老老实实和别人的比较一次,会结果会刷新认知。N年前是,现在也是。
下图第二个结果,会吓倒很多人。不是同一个程序,我就不说,追求技术的人必然能够写出来甚至超越它。
// FindDiffNumber.cpp : 此檔案包含 'main' 函式。程式會於該處開始執行及結束執行。
//
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <stdio.h>
//---------------------------------------------------------------------------
#define MALLOC(x) HeapAlloc(GetProcessHeap(), 0, (x))
#define FREE(x) HeapFree(GetProcessHeap(), 0, (x))
//---------------------------------------------------------------------------
int uintegers_compare(const void *p1, const void *p2)
{
return(*(unsigned int *)p1 - *(unsigned int *)p2);
}
int wmain(int argc, WCHAR *argv[])
{
unsigned int *values0;
unsigned int *values1;
char line[256];
char *p;
FILE *fp;
FILE *fp0;
FILE *fp1;
unsigned int l;
unsigned int count0;
unsigned int count1;
unsigned int i;
unsigned int j;
DWORD tickcount0, tickcount1;
tickcount0 = GetTickCount();
if (argc > 2)
{
values0 = NULL;
// 因为条件说了是1500万行,不是很大,可以申请内存,申请够2000万个
values1 = (unsigned int *)MALLOC(sizeof(unsigned int) * 20000000);
count0 = 0;
count1 = 0;
if (values1)
{
fp = _wfopen(argv[1], L"rb");
if (fp)
{
while (!feof(fp))
{
if (fgets(line, sizeof(line), fp) != NULL)
{
l = strlen(line);
if (l > 0 && line[l - 1] == '\n')
{
line[l - 1] = '\0';
l--;
}
if (l > 0 && line[l - 1] == '\r')
{
line[l - 1] = '\0';
l--;
}
if (l > 0 && line[l - 1] == '\n')
{
line[l - 1] = '\0';
l--;
}
if (l > 0 && line[l - 1] == '\r')
{
line[l - 1] = '\0';
l--;
}
if (l)
{
values1[count0++] = atoi(line);
}
}
}
fclose(fp);
}
}
if (count0)
{
values0 = (unsigned int *)MALLOC(sizeof(unsigned int) * count0);
if (values0)
{
memcpy(values0, values1, sizeof(unsigned int) * count0);
fp = _wfopen(argv[2], L"rb");
if (fp)
{
while (!feof(fp))
{
if (fgets(line, sizeof(line), fp) != NULL)
{
l = strlen(line);
if (l > 0 && line[l - 1] == '\n')
{
line[l - 1] = '\0';
l--;
}
if (l > 0 && line[l - 1] == '\r')
{
line[l - 1] = '\0';
l--;
}
if (l > 0 && line[l - 1] == '\n')
{
line[l - 1] = '\0';
l--;
}
if (l > 0 && line[l - 1] == '\r')
{
line[l - 1] = '\0';
l--;
}
if (l)
{
values1[count1++] = atoi(line);
}
}
}
fclose(fp);
}
}
tickcount1 = GetTickCount();
printf("Load text cost %d ms.\r\n", tickcount1 - tickcount0);
tickcount0 = tickcount1;
if (count1)
{
fp0 = _wfopen(L"r0.txt", L"wb");
fp1 = _wfopen(L"r1.txt", L"wb");
if (fp0 && fp1)
{
qsort(values0, count0, sizeof(unsigned int), uintegers_compare);
qsort(values1, count1, sizeof(unsigned int), uintegers_compare);
tickcount1 = GetTickCount();
printf("Prepare text cost %d ms.\r\n", tickcount1 - tickcount0);
tickcount0 = tickcount1;
i = 0;
j = 0;
while (i < count0 && j < count1)
{
if (values0[i] == values1[j])
{
i++;
j++;
}
else
{
if (values0[i] < values1[j])
{
_itoa(values0[i++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp0);
}
else
{
_itoa(values1[j++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp1);
}
}
}
while (i < count0)
{
_itoa(values0[i++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp0);
}
while (j < count1)
{
_itoa(values1[j++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp1);
}
}
if (fp0)
{
fclose(fp0);
}
if (fp1)
{
fclose(fp1);
}
}
}
if (values0)
{
FREE(values0);
}
if (values1)
{
FREE(values1);
}
}
tickcount1 = GetTickCount();
printf("Cost %d ms.\r\n", tickcount1 - tickcount0);
return(0);
}
后来又简单优化了一下,
// FindDiffNumber.cpp : 此檔案包含 'main' 函式。程式會於該處開始執行及結束執行。
//
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <stdio.h>
//---------------------------------------------------------------------------
#define MALLOC(x) HeapAlloc(GetProcessHeap(), 0, (x))
#define FREE(x) HeapFree(GetProcessHeap(), 0, (x))
//---------------------------------------------------------------------------
int uintegers_compare(const void *p1, const void *p2)
{
return(*(unsigned int *)p1 - *(unsigned int *)p2);
}
int wmain(int argc, WCHAR *argv[])
{
unsigned int *values0;
unsigned int *values1;
char line[20];
char *p;
char *p0;
char *p1;
HANDLE hfile;
HANDLE hfile0;
HANDLE hfile1;
FILE *fp0;
FILE *fp1;
LARGE_INTEGER li;
DWORD numberofbytes;
unsigned int l;
unsigned int count0;
unsigned int count1;
unsigned int i;
unsigned int j;
DWORD tickcount0, tickcount1;
tickcount0 = GetTickCount();
if (argc > 2)
{
values0 = NULL;
values1 = NULL;
count0 = 0;
count1 = 0;
hfile = CreateFile(argv[1], GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hfile != INVALID_HANDLE_VALUE)
{
if (GetFileSizeEx(hfile, &li))
{
p = (char *)MALLOC(sizeof(unsigned int) * 10 + li.QuadPart + 1);
if (p)
{
if (ReadFile(hfile, p + sizeof(unsigned int) * 10, li.QuadPart, &numberofbytes, NULL) && numberofbytes == li.QuadPart)
{
numberofbytes -= 3;
memmove(p + sizeof(unsigned int) * 10, p + sizeof(unsigned int) * 10 + 3, numberofbytes);
p[sizeof(unsigned int) * 10 + numberofbytes] = '\0';
values0 = (unsigned int *)p;
p = NULL;
}
if (p)
{
FREE(p);
}
}
}
CloseHandle(hfile);
}
hfile = CreateFile(argv[2], GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hfile != INVALID_HANDLE_VALUE)
{
if (GetFileSizeEx(hfile, &li))
{
p = (char *)MALLOC(sizeof(unsigned int) * 10 + li.QuadPart + 1);
if (p)
{
if (ReadFile(hfile, p + sizeof(unsigned int) * 10, li.QuadPart, &numberofbytes, NULL) && numberofbytes == li.QuadPart)
{
numberofbytes -= 3;
memmove(p + sizeof(unsigned int) * 10, p + sizeof(unsigned int) * 10 + 3, numberofbytes);
p[sizeof(unsigned int) * 10 + numberofbytes] = '\0';
values1 = (unsigned int *)p;
p = NULL;
}
if (p)
{
FREE(p);
}
}
}
CloseHandle(hfile);
}
tickcount1 = GetTickCount();
printf("Load text cost %d ms.\r\n", tickcount1 - tickcount0);
tickcount0 = tickcount1;
if (values0 && values1)
{
l = 0;
p = (char *)values0;
p += sizeof(unsigned int) * 10;
p0 = p;
p1 = p;
while (*p1)
{
if (*p1 == '\r' || *p1 == '\n')
{
*p1 = '\0';
if (p0 < p1)
{
values0[count0++] = atoi(p0);
}
p0 = p1 + 1;
}
p1++;
}
if (p0 < p1)
{
values0[count0++] = atoi(p0);
}
p = (char *)values1;
p += sizeof(unsigned int) * 10;
p0 = p;
p1 = p;
while (*p1)
{
if (*p1 == '\r' || *p1 == '\n')
{
*p1 = '\0';
if (p0 < p1)
{
values1[count1++] = atoi(p0);
}
p0 = p1 + 1;
}
p1++;
}
if (p0 < p1)
{
values1[count1++] = atoi(p0);
}
if (count0 && count1)
{
fp0 = _wfopen(L"r0.txt", L"wb");
fp1 = _wfopen(L"r1.txt", L"wb");
if (fp0 && fp1)
{
qsort(values0, count0, sizeof(unsigned int), uintegers_compare);
qsort(values1, count1, sizeof(unsigned int), uintegers_compare);
tickcount1 = GetTickCount();
printf("Prepare text cost %d ms.\r\n", tickcount1 - tickcount0);
tickcount0 = tickcount1;
i = 0;
j = 0;
while (i < count0 && j < count1)
{
if (values0[i] == values1[j])
{
i++;
j++;
}
else
{
if (values0[i] < values1[j])
{
_itoa(values0[i++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp0);
}
else
{
_itoa(values1[j++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp1);
}
}
}
while (i < count0)
{
_itoa(values0[i++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp0);
}
while (j < count1)
{
_itoa(values1[j++], line, 10);
l = strlen(line);
line[l++] = '\r';
line[l++] = '\n';
fwrite(line, l, 1, fp1);
}
}
if (fp0)
{
fclose(fp0);
}
if (fp1)
{
fclose(fp1);
}
}
}
if (values0)
{
FREE(values0);
}
if (values1)
{
FREE(values1);
}
}
tickcount1 = GetTickCount();
printf("Cost %d ms.\r\n", tickcount1 - tickcount0);
return(0);
}