coreutils4.5源码expand.c分析

woshiyilitongdouzi

于 2020-09-05 18:00:23 发布

阅读量238

点赞数

分类专栏： coreutils 读后感

本文链接：https://blog.csdn.net/woshiyilitongdouzi/article/details/108422714

版权

读后感同时被 2 个专栏收录

165 篇文章 0 订阅

订阅专栏

coreutils

95 篇文章 1 订阅

订阅专栏

coreutils4.5源码expand.c分析
感觉这个代码真复杂。本来以为很简单的一个命令，就是把tab转为空格。但你看。
echo -e "ab\td\tx" | ./expand -t 2 | od -cb
0000000 a b d x \n
141 142 040 040 144 040 170 012
0000010
第二个tab只转为了1个空格。
我以为是我看错了，又执行几次：
echo -e "ab\td\tx" | ./expand -t 1 | od -cb
0000000 a b d x \n
141 142 040 144 040 170 012
0000007
yang@DESKTOP-V9HS3B6:/mnt/c/read_source/coreutils-4.5.1/src$ echo -e "ab\td\tx" | ./expand -t 2 | od -cb
0000000 a b d x \n
141 142 040 040 144 040 170 012
0000010
yang@DESKTOP-V9HS3B6:/mnt/c/read_source/coreutils-4.5.1/src$ echo -e "ab\td\tx" | ./expand -t 3 | od -cb
0000000 a b d x \n
141 142 040 144 040 040 170 012
0000010
yang@DESKTOP-V9HS3B6:/mnt/c/read_source/coreutils-4.5.1/src$ echo -e "ab\td\tx" | ./expand -t 4 | od -cb
0000000 a b d x \n
141 142 040 040 144 040 040 040 170 012
0000012
都不太整齐。
再读文档，分析-i选项。
echo -e "ab\td\tx" | ./expand -i | od -cb
0000000 a b \t d \t x \n
141 142 011 144 011 170 012
0000007
果然是非空格后的\t不转换。
感觉这个程序就复杂了。就有些摸不着头绪了。
硬着头皮向下冲。
/* Add tab stop TABVAL to the end of `tab_list', except
if TABVAL is -1, do nothing. */

static void
add_tabstop (int tabval)
{
if (tabval == -1)
return;
if (first_free_tab % TABLIST_BLOCK == 0)
tab_list = (int *) xrealloc ((char *) tab_list,
(first_free_tab
+ TABLIST_BLOCK * sizeof (tab_list[0])));
tab_list[first_free_tab++] = tabval;
}
我学聪明了，先读英文注释，再看代码。我英语不好，就安装了欧路典，进行翻译。
这个函数的意思是，把tabval添加到数组tab_list的末尾。
其中当空间不够时，重新分析空间。
/* Add the comma or blank separated list of tabstops STOPS
to the list of tabstops. */

static void
parse_tabstops (char *stops)
{
int tabval = -1;

for (; *stops; stops++)
{
if (*stops == ',' || ISBLANK (*stops))
   {
   add_tabstop (tabval);
   tabval = -1;
   }
else if (ISDIGIT (*stops))
   {
   if (tabval == -1)
   tabval = 0;
   tabval = tabval * 10 + *stops - '0';
   }
else
   error (EXIT_FAILURE, 0, _("tab size contains an invalid character"));
}

add_tabstop (tabval);
}
这个函数就看不懂了。
其中*stops是个字符串，扫描字符串，如果是','，就保存-1进入数组，如果是数字串，就把数字字符串转为数字，再存储进数组。
这样做是为什么服务呢？从底向上读，就是不知道函数的使用场景。
从顶向下读，又容易流于形势。再接着啃。
/* Check that the list of tabstops TABS, with ENTRIES entries,
contains only nonzero, ascending values. */

static void
validate_tabstops (int *tabs, int entries)
{
int prev_tab = 0;
int i;

for (i = 0; i < entries; i++)
{
if (tabs[i] == 0)
error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
if (tabs[i] <= prev_tab)
error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
prev_tab = tabs[i];
}
}
这里，好像是对数组tabs[]进行检查，要求每个tab[i]!=0，且tab[i-1]<tab[i]
还是不知道使用场景。
接着摸黑向前走。
/* Close the old stream pointer FP if it is non-NULL,
and return a new one opened to read the next input file.
Open a filename of `-' as the standard input.
Return NULL if there are no more input files. */
结合注释和源代码，这个程序的意思是，一方面关闭当前使用的文件，并取得下一个文件，以用于把tab转空格。
static FILE *
next_file (FILE *fp)
{
static char *prev_file;
char *file;

if (fp) //这里明显是在关闭文件。
{
if (ferror (fp))
   {
   error (0, errno, "%s", prev_file);
   exit_status = 1;
   }
if (fp == stdin)
   clearerr (fp);       /* Also clear EOF. */
else if (fclose (fp) == EOF)
   {
   error (0, errno, "%s", prev_file);
   exit_status = 1;
   }
}

while ((file = *file_list++) != NULL)
{
if (file[0] == '-' && file[1] == '\0')
   {
   have_read_stdin = 1;
   prev_file = file;
   return stdin;
   }
fp = fopen (file, "r");
if (fp)
   {
   prev_file = file;
   return
fp;//这里是把打开的文件句柄返回，并把前一文件保存，以便关闭。
   }
error (0, errno, "%s", file);
exit_status = 1;
}
return NULL;
}
/* Change tabs to spaces, writing to stdout.
Read each file in `file_list', in order. */