文本文件大数据查询

大数据查询的步骤:

//读入到内存、排序、写入到文件、创建索引、二分查找

下面代码听课时写的有点乱,可以按上面的 步骤来实现。

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<Windows.h>

#define LINE 10000000

//读入到内存、排序、写入到文件、创建索引、二分查找

char **g_pp;	//保存指针数组
char *sorpath = "E:\\qqok.txt";
char *despath = "E:\\qsort.txt";
char *indexpath = "E:\\qindex.txt";

struct index	//索引的数据结构
{
	int *pindex;		//保存每行的首地址
	int length;			//行数
}allindex;

int getLine(char *path)
{
	int line = 0;
	FILE *pfr = fopen(path, "rb");
	if (pfr == NULL)
	{
		printf("文件打开失败!\n");
		return 0;
	}
	else
	{
		while (!feof(pfr))
		{
			char str[256] = { 0 };
			fgets(str, 256, pfr);
			line++;
		}
	}
	fclose(pfr);
	return line;
}

void eatq(char *str)
{
	for (char *ptemp = str; *ptemp != '\0'; ptemp++)
	{
		if (*ptemp == '-')
		{

			*ptemp = '\0';
		}
	}
}

//去掉 \n
void eatN(char *str)
{
	while (*str != '\0')
	{
		if (*str == '\r' || *str == '\n')
		{
			*str = '\0';
		}
		str++;
	}
}


void initmem(char *path)
{
	printf("\n开始读取...\n");
	g_pp = calloc(LINE, sizeof(char *));
	FILE *pfr = fopen(path, "rb");
	if (pfr == NULL)
	{
		printf("文件打开失败!\n");
		return ;
	}
	else
	{
		for (int i = 0; i < LINE;i++)
		{
			char str[256] = { 0 };
			fgets(str, 256, pfr);
			g_pp[i] = calloc(strlen(str) + 1, sizeof(char));
			//printf("%s", str);
			
			if (g_pp[i])
			{
				//strcpy(g_pp[i], str);	//拷贝数据
				sprintf(g_pp[i], str);	//在大数据使用时不稳定
				strcpy(g_pp[i], str);
				//printf("%d  ", strlen(str));
				eatN(g_pp[i]);	//去掉 \n 因为这儿可能有次使用 \n
				//printf("%d \n ", strlen(g_pp[i]));

			}
		
		}
	}
	fclose(pfr);
	printf("\n结束读取...\n");
}

int com(void *p1, void *p2)
{
	char **pstr1 = p1;
	char **pstr2 = p2;
	int res = strcmp(*pstr1, *pstr2);
	
	return res;
}

void sort()
{
	qsort(g_pp, LINE, sizeof(char*), com);
}



void writetofile(char *path)
{
	FILE *pfw = fopen(path, "wb");
	if (pfw == NULL)
	{
		printf("文件打开失败!\n");
		return ;
	}
	else
	{
		for (int i = 0; i < LINE; i++)
		{
			char tempstr[100] = { 0 };
			sprintf(tempstr, "%s\n", g_pp[i]);
			//strcat(g_pp[i], '\n');
			//写进去字符串前先要配上个 \n,因为上面我们把 \n 去掉了
			fputs(tempstr, pfw);	//得到一个有序的文件
			//printf("%s", tempstr);
		}
	}
	fclose(pfw);

}

void show()
{
	for (int i = 0; i < LINE; i++)
	{
		printf("%s\n", g_pp[i]);
	}
}

//初始化数据
void initindex(char *path)
{
	printf("索引数组开始分配...\n");
	allindex.length = LINE+1;
	allindex.pindex = calloc(allindex.length, sizeof(int));		//开辟内存空间
	printf("索引数组完成分配。\n");

	printf("开始读取...\n");
	FILE *pfr = fopen(path, "rb");
	FILE *pfw = fopen(indexpath, "wb");	//索引文件
	if (pfr == NULL || pfw == NULL)
	{
		printf("文件打开失败!\n");
		return;
	}
	else
	{
		int alllength = 0;
		int i = 0;
		while (!feof(pfr))
		{
			char str[256] = { 0 };
			fgets(str, 256, pfr);
			//记录每行数据所占用的长度,方便后面指针查询的跳转
			allindex.pindex[i] = alllength;			
			//printf("%d ", allindex.pindex[i]);
			int length = strlen(str);
			alllength += length;

			i++;
		}
		fclose(pfr);
	}
	printf("结束读取...\n");

	//把索引写入到文件中
	printf("索引写入...\n");
	fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);
	
	fclose(pfw);
	printf("索引写入结束。\n");

	//释放内存
	free(allindex.pindex);

}

//快速读取,就是建立好索引文件后,直接读取索引文件
void qucik(char *path)
{
	printf("索引数组开始分配...\n");
	allindex.length = LINE;
	allindex.pindex = calloc(allindex.length, sizeof(int));		//开辟内存空间
	printf("索引数组完成分配。\n");

	printf("开始读取...\n");
	FILE *pfr = fopen(indexpath, "rb");
	if (pfr == NULL)
	{
		printf("文件打开失败!\n");
		return;
	}
	fread(allindex.pindex, sizeof(int), allindex.length, pfr);
	fclose(pfr);
	
	printf("结束读取...\n");
}

//把索引读入到内存。
void main内存索引查询()
{
	//printf("%d\n", getLine(sorpath));
	//initmem(sorpath);
	show();
	//printf("\n排序后:\n");
	//sort();
	show();

	//writetofile(despath);
	//printf("%d\n", getLine(despath));
	//创建索引
	//initindex(despath);
	/*qucik(indexpath);
	FILE *pfr = fopen(despath, "rb");

	while (1)
	{
		printf("请输入要查询的行号:");
		int num = 0;
		scanf("%d", &num);
		fseek(pfr, allindex.pindex[num], SEEK_SET);
		
		char str[256] = { 0 };
		fgets(str, 256, pfr);
		printf("%s", str);

	}
	fclose(pfr);*/



	system("pause");
}

//文件索引,即打开二个文件,先查询索引文件的内容,
//然后根据索引的内容来查询相应的行
void main索引文件查询()
{
	FILE *pfindex = fopen(indexpath, "rb");
	FILE *pfsortq = fopen(despath, "rb");

	while (1)
	{
		printf("请输入要查询的行数:");
		int num = 0;
		scanf("%d", &num);
		fseek(pfindex, num*sizeof(int), SEEK_SET);
		int data;
		fread(&data, sizeof(int), 1, pfindex);
		//printf("%d", data);
		char str[256] = { 0 };
		fseek(pfsortq, sizeof(char)*data, SEEK_SET);
		fgets(str, 256, pfsortq);
		printf("%s", str);

	}

	fclose(pfindex);
	fclose(pfsortq);



	system("pause");
}

void searchstr(char *str)
{
	FILE *pfindex = fopen(indexpath, "rb");
	FILE *pfdes = fopen(despath, "rb");
	if (pfindex == NULL || pfdes == NULL)
	{
		printf("cannot file !\n");
		return;
	}
	int flag = 0;
	int start = 0;
	int end = LINE - 1;
	while (start <= end)
	{
		int mid = (start + end) / 2;
		int data = 0;
		fseek(pfindex, mid*sizeof(int), SEEK_SET);
		fread(&data, sizeof(1), 1, pfindex);
		
		char pstr[256] = { 0 };
		fseek(pfdes, data, SEEK_SET);
		fgets(pstr, 256, pfdes);
		eatN(pstr);	//去掉 \n
		//如果按 QQ 号,这时就需要把前的QQ号取出来
		//eatq(pstr);  这样会把原字符串直接截断,无法打印出全部数据。
		char ptemp[256] = { 0 };
		strcpy(ptemp, pstr);
		eatq(ptemp);

		int res = strcmp(ptemp, str); //这个比较是完全比较,连 \n 也比较
		if (res == 0)
		{
			flag = 1;
			printf("%s", pstr);
			break;
		}
		else if (res == 1)
		{
			end = mid - 1;
		}
		else if (res == -1)
		{
			start = mid + 1;
		}
	}
	if (flag)
	{
		printf("\nfind\n");
	}
	else
	{
		printf("\nnot find");
	}

	fclose(pfindex);
	fclose(pfdes);
}



//二分查找法
void main()
{
	
	
	while (1)
	{
		char str[256];
		printf("请输入要查询的QQ号:");
		scanf("%s", str);

		searchstr(str);
	}
	


	system("pause");
}


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值