查找文件中相同的行

最新推荐文章于 2021-05-25 10:54:49 发布

xl-xulei

最新推荐文章于 2021-05-25 10:54:49 发布

阅读量1.1k

点赞数

分类专栏： C/C++ 数据结构和算法 VC 文章标签：链表查找文件相同行

本文链接：https://blog.csdn.net/xulei364132789/article/details/8821328

版权

C/C++ 同时被 3 个专栏收录

33 篇文章 0 订阅

订阅专栏

16 篇文章 0 订阅

订阅专栏

数据结构和算法

9 篇文章 0 订阅

订阅专栏

今天测试部的同事叫我帮他们弄个小程序, 实现从文件中查找相同的行, 并去掉相同的行,

想想也不难, 花了点时间将它敲出来了, 下面代码用了两种方法实现, 还有一种方法提了一下, 懒得去实现, 有兴趣的朋友可以去实现了下,

三种方法都不难, 在此贴出来, 方便其它朋友,

/************************************************
 *	2013年4月18日 21:51:42 xulei				*
 * 功能: 查找文件中的相同行,					*
 * 并从文件中删除相同的行后输出到tmp文件中		*
 ************************************************/
#include <stdio.h>
#include <string.h>
#include "list.h"	// linux 内核中的, 可以在我的blog中去找, 我将这个文件贴出来了
#include <malloc.h>
// 这种结构比较简单, 但是每一行的数据是固定的, 会浪费空间, 也可能不够一行.
struct list_node
{
	struct list_head link_to;	// 挂在主链表上
	struct list_head link_to2;	// 挂在临时链表上, 将重复的节点从临时链表上删除.
	char line[512];	
};

// 这种节点可以节省很多空间, 先算出很一行的长度, line_len, 然后分配sizeof(struct list_node_s) + line_len 的空间
// 实现部分就不写了
struct list_node_s
{
	struct list_head link_to;
	struct list_head link_to2;
	int line_len;
	char line[0];
};

// 将每一行写入一链表, 然后比较链表中的节点, 比较麻烦, 但比较快.
	
int main(int argc, char **argv)
{
	FILE *f, *tmp;
	char line[1024];
	int flag = 0, offset = 0;
	struct list_head line_list_head = LIST_HEAD_INIT(line_list_head);
	struct list_head line_list_head_tmp = LIST_HEAD_INIT(line_list_head_tmp);
	struct list_head *pos, *pos2, *next, *next2;
	struct list_node *list_node_tmp = NULL;
	struct list_node *list_node_next = NULL;
	

	if (argc != 2)
	{
		printf("Use As: %s FileName\n", argv[0]);
		return -1;
	}
	f = fopen(argv[1], "a+b");

	f = fopen(argv[1], "a+b");
	tmp = fopen("tmp.txt", "w+b");
	
	if (f == NULL || tmp == NULL)
	{
		printf("open file err!\n");
		return 0;
	}

	while(fgets(line, 512, f))
	{
		struct list_node *list_node = NULL;
		list_node = (struct list_node *)malloc(sizeof(struct list_node));
		if (list_node == NULL)
		{
			printf("malloc mem err!\n");
			return 0;
		}
		memset(list_node, 0x00, sizeof(struct list_node));
		memcpy(list_node->line, line, strlen(line) > 512 ? 512 : strlen(line));
		list_add_tail(&list_node->link_to, &line_list_head);
		list_add_tail(&list_node->link_to2, &line_list_head_tmp);
	}

	list_for_each(pos, &line_list_head)
	{
		list_node_tmp = list_entry(pos, struct list_node, link_to);
		list_for_each_safe(pos2, next2, &line_list_head_tmp)
		{
			list_node_next = list_entry(pos2, struct list_node, link_to2);
			if (strcmp(list_node_tmp->line, list_node_next->line) == 0)
			{
				if (flag == 1)
				{
					printf("%s\n", list_node_next->line);
					list_del(&list_node_next->link_to2);
				}
				else
					flag ++;
			}
		}
		flag = 0;
	}
	
	list_for_each_safe(pos2, next2, &line_list_head_tmp)
	{
		list_node_next = list_entry(pos2, struct list_node, link_to2);
		fputs(list_node_next->line, tmp);
	}
	fclose(f);
	fclose(tmp);
	return 0;
}

/*
	// 直接从文件中读取一行来比较, 比较慢, 但是简单
int main(int argc, char **argv)
{
	FILE *f, *tmp;
	char line1[1024], line2[1024];
	int flag = 0, offset = 0;
	
	if (argc != 2)
	{
		printf("Use As: %s FileName\n", argv[0]);
		return -1;
	}
	//f = fopen(argv[1], "a+b");

	f = fopen("test.txt", "a+b");
	tmp = fopen("tmp.txt", "w+b");
	if (f == NULL || tmp == NULL)
	{
		printf("open file err!\n");
		return -1;
	}
	while(1)
	{
		if (fgets(line1, 1024, f) == NULL)
			break;
		strcpy(line2, line1);
		offset += strlen(line2);
		fseek(f, 0, SEEK_SET);
		while(1)
		{
			if (fgets(line1, 1024, f) == NULL)
				break;
			if (strcmp(line1, line2) == 0)
				flag ++;
			if (flag > 1)
			{
				printf("%s\n", line1);
				break;
			}
		}
		if (flag == 1)
			fputs(line2, tmp);
		fseek(f, offset, SEEK_SET);
		flag = 0;
	}
	fclose(f);
	fclose(tmp);
	return 0;
}
*/