C语言文件读写操作实践(二)
查找重复的英文名字
- 用最原始最简单的办法,用读出的字符串在数组中进行比较,如果存在相同的字符串,返回结果1,否则返回结果0!!!
- 如果为0,则计数,如果为1则不计数
- 继续读下一个字符串,如此循环至文件结束
- 所有英文名字排序输出,因为原英文名字文件中将男名和女名分为两部分,重新排序后,整合为一体!!!
代码如下:
/* filename: fe.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* compile : gcc fe.c -o fe
run : ./fe */
/* if word in buff return 1 else return 0 */
int
name_inbuff (char buff[1024][32], uint len, char *word)
{
if (len <= 1) return 0;
for (int j = 0; j < len; j++)
if (0 == strcmp (word, buff[j])) return 1;
return 0;
}
/* name compare function */
int
namecmp (const void *x, const void *y)
{
return strcmp ((const char*)x, (const char*)y);
}
/**/
int
main (int argc, char *argv[])
{
FILE *fpi;
char buff[1024][32] = { 0 };
int tmp = 1, i = 0;
fpi = fopen ("name.txt", "r"); //打开文件
if (fpi == NULL)
{
fprintf (stderr, "Open file name.txt error!\n");
return 0;
}
//循环读取,保存到字符串数组中
while (tmp != EOF)
{
tmp = fscanf (fpi, "%s", buff[i]);
//如果不重复则计数
if (0 == name_inbuff (buff, i, buff[i]))
i = i + 1;
}
//快速排序
qsort (buff, i, sizeof(char)*32, namecmp);
//循环输出,最后输出行数
for (int j = 0; j < i; j++)
fprintf (stderr, "%s\n", buff[j]);
fprintf (stderr, "count : %d\n", i);
fclose (fpi);
return 0;
}
/* -- end -- */
编译运行,结果如下:
songvm@ubuntu:~/works/xdn/foo$ gcc fe.c -o fe
songvm@ubuntu:~/works/xdn/foo$ ./fe
Aaron
Abby
Abel
Abelard
Abigail
......
Zelda
Zoe
Zoey
Zora
count : 626
songvm@ubuntu:~/works/xdn/foo$
将上面的输出结果写入目标文件
- 目标文件名:name_v.txt
代码如下:
/* filename: ff.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* compile : gcc ff.c -o ff
run : ./ff */
/* if word in buff return 1 else return 0 */
int
name_inbuff (char buff[1024][32], uint len, char *word)
{
if (len <= 1) return 0;
for (int j = 0; j < len; j++)
if (0 == strcmp (word, buff[j])) return 1;
return 0;
}
/* name compare function */
int
namecmp (const void *x, const void *y)
{
return strcmp ((const char*)x, (const char*)y);
}
/**/
int
main (int argc, char *argv[])
{
FILE *fpi, *fpo;
char buff[1024][32] = { 0 };
int tmp = 1, i = 0;
fpi = fopen ("name.txt", "r"); //打开文件
if (fpi == NULL)
{
fprintf (stderr, "Open file name.txt error!\n");
return 0;
}
fpo = fopen ("name_v.txt", "w+"); //打开文件做为输出目标
if (fpo == NULL)
{
fprintf (stderr, "Open file name_v.txt error!\n");
return 0;
}
//循环读取,保存到字符串数组中
while (tmp != EOF)
{
tmp = fscanf (fpi, "%s", buff[i]);
//如果不重复则计数
if (0 == name_inbuff (buff, i, buff[i]))
i = i + 1;
}
//快速排序
qsort (buff, i, sizeof(char)*32, namecmp);
//循环输出,最后输出行数
for (int j = 0; j < i; j++)
fprintf (fpo, "%s\n", buff[j]);
fprintf (stderr, "count : %d\n", i);
fprintf (stderr, "File operate ok!\n");
//关闭文件
fclose (fpi);
fclose (fpo);
return 0;
}
/* -- end -- */
编译运行,结果如下:
- 打开name_v.txt文件查看,会发现达到预期目标!!!
songvm@ubuntu:~/works/xdn/foo$ gcc ff.c -o ff
songvm@ubuntu:~/works/xdn/foo$ ./ff
count : 626
File operate ok!
songvm@ubuntu:~/works/xdn/foo$
数组的局限,分配内存的必要性
- name_inbuff函数的第一个参数是char buff[1024][32],这不是一个理想的参数,应该是char **buff!!!
- 如果name.data文件内容更多,所占用的空间更多的话,这个1024的值就要变得更大!!!
- 采用动态分配内存的办法则会更好的解决这一问题!!!
- 首先定义一个宏PAGESIZE,值为128,也就是一次分配128个字符指针来存贮读入的字符串!!!
- 定义变量ps值为1,当字符指针数量等于128时,ps = ps + 1,再用realloc扩展内存!!!
- 每次分配32字节的内存,长度足以保存读入的英文人名,地址赋给上面分配的指针,完成char **buff的内存分配!!!
- 以上操作保证了动态存贮字符指针,不在是固定长度的数组!!!
- 程序结束前释放所有分配的内存!!!
代码如下:
/* filename: fg.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* compile : gcc fg.c -o fg
run : ./fg */
#define PAGESIZE 128
/* if word in buff return 1 else return 0 */
int
name_inbuff (char **buff, uint len, char *word)
{
if (len <= 1) return 0;
for (int j = 0; j < len; j++)
if (0 == strcmp (word, buff[j])) return 1;
return 0;
}
/* name compare function */
int
namecmp (const void *x, const void *y)
{
return strcmp ((const char*)x, (const char*)y);
}
/**/
int
main (int argc, char *argv[])
{
FILE *fpi; //, *fpo;
char **buff, buf[32] = {0}; //char buff[1024][32] = { 0 };
int tmp = 1, i = 0, ps = 1;
fpi = fopen ("name.txt", "r"); //打开文件
if (fpi == NULL)
{
fprintf (stderr, "Open file name.txt error!\n");
return 0;
}
/*
fpo = fopen ("name_v.txt", "w+"); //
if (fpo == NULL)
{
fprintf (stderr, "Open file name_v.txt error!\n");
return 0;
}
*/
//为字符串数组分配内存,长度为:页数 X 页长 X 指针长度
buff = (char**) malloc (ps * PAGESIZE * sizeof(char*));
//循环读取,保存到字符数组buf中
while (tmp != EOF)
{
tmp = fscanf (fpi, "%s", buf); //读文件字符存于buf中
if (0 == name_inbuff (buff, i, buf)) //判断是否为重复字符串
{
buff[i] = (char*) malloc (32 * sizeof(char)); //分配内存
memset (buff[i], 0, 32); //初始化,置为0
strcpy (buff[i], buf); //将buf复制到buffi
i = i + 1; //计数
if (ps * PAGESIZE == i) //已满则扩展内存,加一页
{
ps = ps + 1;
buff = (char**) realloc (buff, ps * PAGESIZE * sizeof(char*));
}
}
memset (buf, 0, 32); //重新将buf置为0,准备下一次读文件
}
//快速排序
//qsort (buff, i, sizeof(char)*32, namecmp);
//循环输出,最后输出行数
//for (int j = 0; j < i; j++)
// fprintf (fpo, "%s\n", buff[j]);
fprintf (stderr, "count : %d\n", i);
fprintf (stderr, "File operate ok!\n");
//关闭文件
fclose (fpi);
//fclose (fpo);
//释放已分配的内存
for (int k = 0; k < i; k++)
free (buff[k]);
free (buff);
return 0;
}
/* -- end -- */
编译运行,结果如预期,再用valgrind测试一下内存分配情况
结果如下:
songvm@ubuntu:~/works/xdn/foo$ gcc fg.c -o fg
songvm@ubuntu:~/works/xdn/foo$ ./fg
count : 626
File operate ok!
songvm@ubuntu:~/works/xdn/foo$ valgrind --leak-check=yes ./fg
==3574== Memcheck, a memory error detector
==3574== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3574== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3574== Command: ./fg
==3574==
count : 626
File operate ok!
==3574==
==3574== HEAP SUMMARY:
==3574== in use at exit: 0 bytes in 0 blocks
==3574== total heap usage: 633 allocs, 633 frees, 40,040 bytes allocated
==3574==
==3574== All heap blocks were freed -- no leaks are possible
==3574==
==3574== For counts of detected and suppressed errors, rerun with: -v
==3574== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
重新完成写文件功能,加注释!!!
- name_inbuff 函数要改一下参数!!!
- namecmp 也要改一下参数,否则会出错!!!
- qsort对初学者不友好,低版本的还会出问题,后面研究写出自己的qsort!!!
/* filename: fh.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* compile : gcc fh.c -o fh
run : ./fh */
#define PAGESIZE 128 //定义一次分存的长度
#define BUFSIZE 32 //定义字符缓冲区的长度
/* if word in buff return 1 else return 0 */
int
name_inbuff (char **buff, uint len, char *word)
{
if (len <= 1) return 0;
for (int j = 0; j < len; j++)
if (0 == strcmp (word, buff[j])) return 1;
return 0;
}
/* name compare function */
int
namecmp (const void *x, const void *y)
{
return strcmp (*(const char**)x, *(const char**)y);
}
/**/
int
main (int argc, char *argv[])
{
FILE *fpi, *fpo;
char **buff, buf[32] = {0};
int tmp = 1, i = 0, ps = 1;
fpi = fopen ("name.txt", "r"); //打开文件,用于读取
if (fpi == NULL)
{
fprintf (stderr, "Open file name.txt error!\n");
return 0;
}
fpo = fopen ("name_x.txt", "w+"); //打开文件,用于写入,重命名为name_x.txt
if (fpo == NULL)
{
fprintf (stderr, "Open file name_x.txt error!\n");
return 0;
}
//为字符串数组分配内存,长度为:页数 X 页长 X 指针长度
buff = (char**) malloc (ps * PAGESIZE * sizeof(char*));
//循环读取,保存到字符数组buf中
while (tmp != EOF)
{
tmp = fscanf (fpi, "%s", buf); //读文件字符存于buf中
if (0 == name_inbuff (buff, i, buf)) //判断是否为重复字符串
{
buff[i] = (char*) malloc (BUFSIZE * sizeof(char)); //分配内存
memset (buff[i], 0, BUFSIZE); //初始化,置为0
strcpy (buff[i], buf); //将buf复制到buff[i]
i = i + 1; //计数
if (ps * PAGESIZE == i) //二者相等,则说明已满,扩展内存,加一页
{
ps = ps + 1;
buff = (char**) realloc (buff, ps * PAGESIZE * sizeof(char*));
}
}
memset (buf, 0, BUFSIZE); //重新将buf置为0,准备下一次读文件
}
//快速排序
qsort (buff, i, sizeof(char*), namecmp);
//循环输出,最后输出行数
for (int j = 0; j < i; j++)
fprintf (fpo, "%s\n", buff[j]);
fprintf (stderr, "count : %d\n", i);
fprintf (stderr, "File operate ok!\n");
//释放已分配的内存
for (int k = 0; k < i; k++)
free (buff[k]);
free (buff);
//关闭文件
fclose (fpi);
fclose (fpo);
return 0;
}
/* -- end -- */
编译运行,再查看一下内存分配释放情况,结果如预期
songvm@ubuntu:~/works/xdn/foo$ gcc fh.c -o fh
songvm@ubuntu:~/works/xdn/foo$ ./fh
count : 626
File operate ok!
songvm@ubuntu:~/works/xdn/foo$ valgrind --leak-check=yes ./fh
==3732== Memcheck, a memory error detector
==3732== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3732== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3732== Command: ./fh
==3732==
count : 626
File operate ok!
==3732==
==3732== HEAP SUMMARY:
==3732== in use at exit: 0 bytes in 0 blocks
==3732== total heap usage: 636 allocs, 636 frees, 49,696 bytes allocated
==3732==
==3732== All heap blocks were freed -- no leaks are possible
==3732==
==3732== For counts of detected and suppressed errors, rerun with: -v
==3732== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/foo$
查看一下name_x.txt文件,达到预期!!!
下一步,研究一下自动释放已分配内存的问题。