流程
输入两个文件 -->比较–>输出可视化差异
逐行分析源码
1.准备阶段
正则表达式列表的结构体。
正则表达式(regular expression)描述了一种字符串匹配的模式(pattern),可以用来检查一个串是否含有某种子串、将匹配的子串替换或者从某个串中取出符合某个条件的子串等。
- 定义静态函数
在函数的返回类型前加上关键字static,函数就被定义成为静态函数。
函数的定义和声明默认情况下是extern的,但静态函数只是在声明他的文件当中可见,不能被其他文件所用。
定义静态函数的好处:
<1> 其他文件中可以定义相同名字的函数,不会发生冲突
<2> 静态函数不能被其他文件所用。 存储说明符auto,register,extern,static,对应两种存储期:自动存储期和静态存储期。 auto和register对应自动存储期。具有自动存储期的变量在进入声明该变量的程序块时被建立,它在该程序块活动时存在,退出该程序块时撤销。
关键字extern和static用来说明具有静态存储期的变量和函数。用static声明的局部变量具有静态存储持续期(static storage duration),或静态范围(static extent)。虽然他的值在函数调用之间保持有效,但是其名字的可视性仍限制在其局部域内。静态局部对象在程序执行到该对象的声明处时被首次初始化。
- 有的函数的参数类型为const,即该指针类型是常量,不能修改,不能通过指针来修改值。
- static void add_regexp (struct regexp_list *, char const *);静态函数add_regexp,参数1是regexp_list结构体类型的指针,参数2是字符常量指针。
- enum是枚举类型。
- 结构体comparison
331 struct comparison
332 {
333 struct file_data file[2];
334 struct comparison const *parent; /* parent, if a recursive comparison */
335 };
在diff.h文件中定义:成员有两个:file_data file[2]和类型为comparison的常数指针parent。
接上边77行代码:
79 /* If comparing directories, compare their common subdirectories
80 recursively. */
81 static bool recursive;
82
83 /* In context diffs, show previous lines that match these regexps. */
84 static struct regexp_list function_regexp_list;
85
86 /* Ignore changes affecting only lines that match these regexps. */
87 static struct regexp_list ignore_regexp_list;
88
89 #if O_BINARY //编译时优先编译条件满足的语句
90 /* Use binary I/O when reading and writing data (--binary).
91 On POSIX hosts, this has no effect. */
92 static bool binary;
93 #else
94 enum { binary = true };
95 #endif
96
97 /* If one file is missing, treat it as present but empty (-N). */
98 static bool new_file;
99
100 /* If the first file is missing, treat it as present but empty
101 (--unidirectional-new-file). */
102 static bool unidirectional_new_file;
103
104 /* Report files compared that are the same (-s).
105 Normally nothing is output when that happens. */
106 static bool report_identical_files;
107 ^L
108 static char const shortopts[] =
109 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";
静态变量:全局变量
在全局变量之前加上关键字static,全局变量就被定义成为一个全局静态变量。
1)内存中的位置:静态存储区(静态存储区在整个程序运行期间都存在)
2)初始化:未经初始化的全局静态变量会被程序自动初始化为0(自动对象的值是任意的,除非他被显示初始化)
3)作用域:全局静态变量在声明他的文件之外是不可见的。准确地讲从定义之处开始到文件结尾。
111 /* Values for long options that do not have single-letter equivalents. */
112 enum
113 {
114 BINARY_OPTION = CHAR_MAX + 1,
115 FROM_FILE_OPTION,
116 HELP_OPTION,
117 HORIZON_LINES_OPTION,
118 IGNORE_FILE_NAME_CASE_OPTION,
119 INHIBIT_HUNK_MERGE_OPTION,
120 LEFT_COLUMN_OPTION,
121 LINE_FORMAT_OPTION,
122 NO_DEREFERENCE_OPTION,
123 NO_IGNORE_FILE_NAME_CASE_OPTION,
124 NORMAL_OPTION,
125 SDIFF_MERGE_ASSIST_OPTION,
126 STRIP_TRAILING_CR_OPTION,
127 SUPPRESS_BLANK_EMPTY_OPTION,
128 SUPPRESS_COMMON_LINES_OPTION,
129 TABSIZE_OPTION,
130 TO_FILE_OPTION,
131
132 /* These options must be in sequence. */
133 UNCHANGED_LINE_FORMAT_OPTION,
134 OLD_LINE_FORMAT_OPTION,
135 NEW_LINE_FORMAT_OPTION,
136
137 /* These options must be in sequence. */
138 UNCHANGED_GROUP_FORMAT_OPTION,
139 OLD_GROUP_FORMAT_OPTION,
140 NEW_GROUP_FORMAT_OPTION,
141 CHANGED_GROUP_FORMAT_OPTION,
142
143 COLOR_OPTION,
144 COLOR_PALETTE_OPTION,
145
146 PRESUME_OUTPUT_TTY_OPTION,
147 };
枚举长选项(与单个字母(比如-y ,-W等)没有等价的选项)。
代码149-229行都是定义一些选项的名称。
代码239-270行都是对选项的操作。
2. main函数
271 int
272 main (int argc, char **argv)
273 {
274 int exit_status = EXIT_SUCCESS; //文件相同,status=EXIT_SUCCESS
275 int c;
276 int i;
277 int prev = -1;
278 lin ocontext = -1;
279 bool explicit_context = false; //显示上下文
280 size_t width = 0;
281 bool show_c_function = false;
282 char const *from_file = NULL;
283 char const *to_file = NULL;
284 uintmax_t numval;
285 char *numend;
286 //进行初始化
287 /* Do our initializations. */
288 exit_failure = EXIT_TROUBLE; //EXIT_TROUBLE=2(system.h定义)
289 initialize_main (&argc, &argv);
290 set_program_name (argv[0]);
291 setlocale (LC_ALL, "");
292 bindtextdomain (PACKAGE, LOCALEDIR);
293 textdomain (PACKAGE);
294 c_stack_action (0);
295 function_regexp_list.buf = &function_regexp;
296 ignore_regexp_list.buf = &ignore_regexp;
297 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
298 excluded = new_exclude ();
299 presume_output_tty = false;
300
301 /* Decode the options. *///编码一些选项
302
303 while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
304 {
305 switch (c)
306 {
307 case 0:
308 break;
309
310 case '0':
311 case '1':
312 case '2':
313 case '3':
314 case '4':
315 case '5':
316 case '6':
317 case '7':
318 case '8':
319 case '9':
320 ocontext = (! ISDIGIT (prev)
321 ? c - '0'
322 : (ocontext - (c - '0' <= CONTEXT_MAX % 10)
323 < CONTEXT_MAX / 10)
324 ? 10 * ocontext + (c - '0')
325 : CONTEXT_MAX);
326 break;
327
328 case 'a':
329 text = true;
330 break;
331
332 case 'b':
333 if (ignore_white_space < IGNORE_SPACE_CHANGE)
334 ignore_white_space = IGNORE_SPACE_CHANGE;
335 break;
336
337 case 'Z':
338 if (ignore_white_space < IGNORE_SPACE_CHANGE)
339 ignore_white_space |= IGNORE_TRAILING_SPACE;
340 break;
341
342 case 'B':
343 ignore_blank_lines = true;
344 break;
345
346 case 'C':
347 case 'U':
348 {
349 if (optarg)
350 {
351 numval = strtoumax (optarg, &numend, 10);
352 if (*numend)
353 try_help ("invalid context length '%s'", optarg);
354 if (CONTEXT_MAX < numval)
355 numval = CONTEXT_MAX;
356 }
357 else
358 numval = 3;
359
360 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
361 if (context < numval)
362 context = numval;
363 explicit_context = true;
364 }
365 break;
366
367 case 'c':
368 specify_style (OUTPUT_CONTEXT);
369 if (context < 3)
370 context = 3;
371 break;
372
373 case 'd':
374 minimal = true;
375 break;
376
377 case 'D':
378 specify_style (OUTPUT_IFDEF);
379 {
380 static char const C_ifdef_group_formats[] =
381 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */ \n";
382 char *b = xmalloc (sizeof C_ifdef_group_formats
383 + 7 * strlen (optarg) - 14 /* 7*"%s" */
384 - 8 /* 5*"%%" + 3*"%c" */);
385 sprintf (b, C_ifdef_group_formats,
386 0,
387 optarg, optarg, 0,
388 optarg, optarg, 0,
389 optarg, optarg, optarg);
390 for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++)
391 {
392 specify_value (&group_format[i], b, "-D");
393 b += strlen (b) + 1;
394 }
395 }
396 break;
397
398 case 'e':
399 specify_style (OUTPUT_ED);
400 break;
401
402 case 'E':
403 if (ignore_white_space < IGNORE_SPACE_CHANGE)
404 ignore_white_space |= IGNORE_TAB_EXPANSION;
405 break;
406
407 case 'f':
408 specify_style (OUTPUT_FORWARD_ED);
409 break;
410
411 case 'F':
412 add_regexp (&function_regexp_list, optarg);
413 break;
414
415 case 'h':
416 /* Split the files into chunks for faster processing.
417 Usually does not change the result.
418
419 This currently has no effect. */
420 break;
421
422 case 'H':
423 speed_large_files = true;
424 break;
425
426 case 'i':
427 ignore_case = true;
428 break;
429
430 case 'I':
431 add_regexp (&ignore_regexp_list, optarg);
432 break;
433
434 case 'l':
435 if (!pr_program[0])
436 try_help ("pagination not supported on this host", NULL);
437 paginate = true;
438 #ifdef SIGCHLD
439 /* Pagination requires forking and waiting, and
440 System V fork+wait does not work if SIGCHLD is ignored. */
441 signal (SIGCHLD, SIG_DFL);
442 #endif
443 break;
444
445 case 'L':
446 if (!file_label[0])
447 file_label[0] = optarg;
448 else if (!file_label[1])
449 file_label[1] = optarg;
450 else
451 fatal ("too many file label options");
452 break;
453
454 case 'n':
455 specify_style (OUTPUT_RCS);
456 break;
457
458 case 'N':
459 new_file = true;
460 break;
461
462 case 'p':
463 show_c_function = true;
464 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
465 break;
466
467 case 'P':
468 unidirectional_new_file = true;
469 break;
470
471 case 'q':
472 brief = true;
473 break;
474
475 case 'r':
476 recursive = true;
477 break;
478
479 case 's':
480 report_identical_files = true;
481 break;
482
483 case 'S':
484 specify_value (&starting_file, optarg, "-S");
485 break;
486
487 case 't':
488 expand_tabs = true;
489 break;
490
491 case 'T':
492 initial_tab = true;
493 break;
494
495 case 'u':
496 specify_style (OUTPUT_UNIFIED);
497 if (context < 3)
498 context = 3;
499 break;
500
501 case 'v':
502 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
503 AUTHORS, (char *) NULL);
504 check_stdout ();
505 return EXIT_SUCCESS;
506
507 case 'w':
508 ignore_white_space = IGNORE_ALL_SPACE;
509 break;
510
511 case 'x':
512 add_exclude (excluded, optarg, exclude_options ());
513 break;
514
515 case 'X':
516 if (add_exclude_file (add_exclude, excluded, optarg,
517 exclude_options (), '\n'))
518 pfatal_with_name (optarg);
519 break;
520
521 case 'y':
522 specify_style (OUTPUT_SDIFF);
523 break;
524
525 case 'W':
526 numval = strtoumax (optarg, &numend, 10);
527 if (! (0 < numval && numval <= SIZE_MAX) || *numend)
528 try_help ("invalid width '%s'", optarg);
529 if (width != numval)
530 {
531 if (width)
532 fatal ("conflicting width options");
533 width = numval;
534 }
535 break;
536
537 case BINARY_OPTION:
538 #if O_BINARY
539 binary = true;
540 if (! isatty (STDOUT_FILENO))
541 set_binary_mode (STDOUT_FILENO, O_BINARY);
542 #endif
543 break;
544
545 case FROM_FILE_OPTION:
546 specify_value (&from_file, optarg, "--from-file");
547 break;
548
549 case HELP_OPTION:
550 usage ();
551 check_stdout ();
552 return EXIT_SUCCESS;
553
554 case HORIZON_LINES_OPTION:
555 numval = strtoumax (optarg, &numend, 10);
556 if (*numend)
557 try_help ("invalid horizon length '%s'", optarg);
558 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
559 break;
560
561 case IGNORE_FILE_NAME_CASE_OPTION:
562 ignore_file_name_case = true;
563 break;
564
565 case INHIBIT_HUNK_MERGE_OPTION:
566 /* This option is obsolete, but accept it for backward
567 compatibility. */
568 break;
569
570 case LEFT_COLUMN_OPTION:
571 left_column = true;
572 break;
573
574 case LINE_FORMAT_OPTION:
575 specify_style (OUTPUT_IFDEF);
576 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
577 specify_value (&line_format[i], optarg, "--line-format");
578 break;
579
580 case NO_DEREFERENCE_OPTION:
581 no_dereference_symlinks = true;
582 break;
583
584 case NO_IGNORE_FILE_NAME_CASE_OPTION:
585 ignore_file_name_case = false;
586 break;
587
588 case NORMAL_OPTION:
589 specify_style (OUTPUT_NORMAL);
590 break;
591
592 case SDIFF_MERGE_ASSIST_OPTION:
593 specify_style (OUTPUT_SDIFF);
594 sdiff_merge_assist = true;
595 break;
596
597 case STRIP_TRAILING_CR_OPTION:
598 strip_trailing_cr = true;
599 break;
600
601 case SUPPRESS_BLANK_EMPTY_OPTION:
602 suppress_blank_empty = true;
603 break;
604
605 case SUPPRESS_COMMON_LINES_OPTION:
606 suppress_common_lines = true;
607 break;
608
609 case TABSIZE_OPTION:
610 numval = strtoumax (optarg, &numend, 10);
611 if (! (0 < numval && numval <= SIZE_MAX - GUTTER_WIDTH_MINIMUM)
612 || *numend)
613 try_help ("invalid tabsize '%s'", optarg);
614 if (tabsize != numval)
615 {
616 if (tabsize)
617 fatal ("conflicting tabsize options");
618 tabsize = numval;
619 }
620 break;
621
622 case TO_FILE_OPTION:
623 specify_value (&to_file, optarg, "--to-file");
624 break;
625
626 case UNCHANGED_LINE_FORMAT_OPTION:
627 case OLD_LINE_FORMAT_OPTION:
628 case NEW_LINE_FORMAT_OPTION:
629 specify_style (OUTPUT_IFDEF);
630 c -= UNCHANGED_LINE_FORMAT_OPTION;
631 specify_value (&line_format[c], optarg, line_format_option[c]);
632 break;
633
634 case UNCHANGED_GROUP_FORMAT_OPTION:
635 case OLD_GROUP_FORMAT_OPTION:
636 case NEW_GROUP_FORMAT_OPTION:
637 case CHANGED_GROUP_FORMAT_OPTION:
638 specify_style (OUTPUT_IFDEF);
639 c -= UNCHANGED_GROUP_FORMAT_OPTION;
640 specify_value (&group_format[c], optarg, group_format_option[c]);
641 break;
642
643 case COLOR_OPTION:
644 specify_colors_style (optarg);
645 break;
646
647 case COLOR_PALETTE_OPTION:
648 set_color_palette (optarg);
649 break;
650
651 case PRESUME_OUTPUT_TTY_OPTION:
652 presume_output_tty = true;
653 break;
654
655 default:
656 try_help (NULL, NULL);
657 }
658 prev = c;
659 }
660
661 if (colors_style == AUTO)
662 {
663 char const *t = getenv ("TERM");
664 if (t && STREQ (t, "dumb"))
665 colors_style = NEVER;
666 }
667
668 if (output_style == OUTPUT_UNSPECIFIED)
669 {
670 if (show_c_function)
671 {
672 specify_style (OUTPUT_CONTEXT);
673 if (ocontext < 0)
674 context = 3;
675 }
676 else
677 specify_style (OUTPUT_NORMAL);
678 }
679
680 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
681 {
682 #if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \
683 || defined HAVE_STRUCT_STAT_ST_SPARE1)
684 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
685 #else
686 time_format = "%Y-%m-%d %H:%M:%S %z";
687 #endif
688 }
689 else
690 {
691 /* See POSIX 1003.1-2001 for this format. */
692 time_format = "%a %b %e %T %Y";
693 }
694
695 if (0 <= ocontext
696 && (output_style == OUTPUT_CONTEXT
697 || output_style == OUTPUT_UNIFIED)
698 && (context < ocontext
699 || (ocontext < context && ! explicit_context)))
700 context = ocontext;
701
702 if (! tabsize)
703 tabsize = 8;
704 if (! width)
705 width = 130;
706
707 {
708 /* Maximize first the half line width, and then the gutter width,
709 according to the following constraints:
710
711 1. Two half lines plus a gutter must fit in a line.
712 2. If the half line width is nonzero:
713 a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
714 b. If tabs are not expanded to spaces,
715 a half line plus a gutter is an integral number of tabs,
716 so that tabs in the right column line up. */
717
718 size_t t = expand_tabs ? 1 : tabsize;
719 size_t w = width;
720 size_t t_plus_g = t + GUTTER_WIDTH_MINIMUM;
721 size_t unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1);
722 size_t off = unaligned_off - unaligned_off % t;
723 sdiff_half_width = (off <= GUTTER_WIDTH_MINIMUM || w <= off
724 ? 0
725 : MIN (off - GUTTER_WIDTH_MINIMUM, w - off));
726 sdiff_column2_offset = sdiff_half_width ? off : w;
727 }
728
729 /* Make the horizon at least as large as the context, so that
730 shift_boundaries has more freedom to shift the first and last hunks. */
731 if (horizon_lines < context)
732 horizon_lines = context;
733
734 summarize_regexp_list (&function_regexp_list);
735 summarize_regexp_list (&ignore_regexp_list);
736
737 if (output_style == OUTPUT_IFDEF)
738 {
739 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
740 if (!line_format[i])
741 line_format[i] = "%l\n";
742 if (!group_format[OLD])
743 group_format[OLD]
744 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
745 if (!group_format[NEW])
746 group_format[NEW]
747 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
748 if (!group_format[UNCHANGED])
749 group_format[UNCHANGED] = "%=";
750 if (!group_format[CHANGED])
751 group_format[CHANGED] = concat (group_format[OLD],
752 group_format[NEW], "");
753 }
754
755 no_diff_means_no_output =
756 (output_style == OUTPUT_IFDEF ?
757 (!*group_format[UNCHANGED]
758 || (STREQ (group_format[UNCHANGED], "%=")
759 && !*line_format[UNCHANGED]))
760 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
761
762 files_can_be_treated_as_binary =
763 (brief & binary
764 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
765 | (ignore_regexp_list.regexps || ignore_white_space)));
766
767 switch_string = option_list (argv + 1, optind - 1);
768
769 if (from_file)
770 {
771 if (to_file)
772 fatal ("--from-file and --to-file both specified");
773 else
774 for (; optind < argc; optind++)
775 {
776 int status = compare_files (NULL, from_file, argv[optind]);
777 if (exit_status < status)
778 exit_status = status;
779 }
780 }
781 else
782 {
783 if (to_file)
784 for (; optind < argc; optind++)
785 {
786 int status = compare_files (NULL, argv[optind], to_file);
787 if (exit_status < status)
788 exit_status = status;
789 }
790 else
791 {
792 if (argc - optind != 2)
793 {
794 if (argc - optind < 2)
795 try_help ("missing operand after '%s'", argv[argc - 1]);
796 else
797 try_help ("extra operand '%s'", argv[optind + 2]);
798 }
799
800 exit_status = compare_files (NULL, argv[optind], argv[optind + 1]);
801 }
802 }
803
804 /* Print any messages that were saved up for last. */
805 print_message_queue ();
806
807 check_stdout ();
808 exit (exit_status);
809 return exit_status;
810 }
比较文件
1081 /* Compare two files (or dirs) with parent comparison PARENT
1082 and names NAME0 and NAME1.
1083 (If PARENT is null, then the first name is just NAME0, etc.)
1084 This is self-contained; it opens the files and closes them.
1085
1086 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1087 different, EXIT_TROUBLE if there is a problem opening them. */
1088
1089 static int
1090 compare_files (struct comparison const *parent,
1091 char const *name0,
1092 char const *name1)
1093 {
1094 struct comparison cmp;
1095 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1096 register int f;
1097 int status = EXIT_SUCCESS;
1098 bool same_files;
1099 char *free0;
1100 char *free1;
1101
1102 /* If this is directory comparison, perhaps we have a file
1103 that exists only in one of the directories.
1104 If so, just print a message to that effect. */
1105
1106 if (! ((name0 && name1)
1107 || (unidirectional_new_file && name1)
1108 || new_file))
1109 {
1110 char const *name = name0 ? name0 : name1;
1111 char const *dir = parent->file[!name0].name;
1112
1113 /* See POSIX 1003.1-2001 for this format. */
1114 message ("Only in %s: %s\n", dir, name);
1115
1116 /* Return EXIT_FAILURE so that diff_dirs will return
1117 EXIT_FAILURE ("some files differ"). */
1118 return EXIT_FAILURE;
1119 }
1120
1121 memset (cmp.file, 0, sizeof cmp.file);
1122 cmp.parent = parent;
1123
1124 /* cmp.file[f].desc markers */
1125 #define NONEXISTENT (-1) /* nonexistent file */
1126 #define UNOPENED (-2) /* unopened file (e.g. directory) */
1127 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1128
1129 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1130
1131 cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT;
1132 cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT;
1133
1134 /* Now record the full name of each file, including nonexistent ones. */
1135
1136 if (!name0)
1137 name0 = name1;
1138 if (!name1)
1139 name1 = name0;
1140
1141 if (!parent)
1142 {
1143 free0 = NULL;
1144 free1 = NULL;
1145 cmp.file[0].name = name0;
1146 cmp.file[1].name = name1;
1147 }
1148 else
1149 {
1150 cmp.file[0].name = free0
1151 = file_name_concat (parent->file[0].name, name0, NULL);
1152 cmp.file[1].name = free1
1153 = file_name_concat (parent->file[1].name, name1, NULL);
1154 }
1155
1156 /* Stat the files. */
1157
1158 for (f = 0; f < 2; f++)
1159 {
1160 if (cmp.file[f].desc != NONEXISTENT)
1161 {
1162 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1163 {
1164 cmp.file[f].desc = cmp.file[0].desc;
1165 cmp.file[f].stat = cmp.file[0].stat;
1166 }
1167 else if (STREQ (cmp.file[f].name, "-"))
1168 {
1169 cmp.file[f].desc = STDIN_FILENO;
1170 if (binary && ! isatty (STDIN_FILENO))
1171 set_binary_mode (STDIN_FILENO, O_BINARY);
1172 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1173 cmp.file[f].desc = ERRNO_ENCODE (errno);
1174 else
1175 {
1176 if (S_ISREG (cmp.file[f].stat.st_mode))
1177 {
1178 off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR);
1179 if (pos < 0)
1180 cmp.file[f].desc = ERRNO_ENCODE (errno);
1181 else
1182 cmp.file[f].stat.st_size =
1183 MAX (0, cmp.file[f].stat.st_size - pos);
1184 }
1185
1186 /* POSIX 1003.1-2001 requires current time for
1187 stdin. */
1188 set_mtime_to_now (&cmp.file[f].stat);
1189 }
1190 }
1191 else if ((no_dereference_symlinks
1192 ? lstat (cmp.file[f].name, &cmp.file[f].stat)
1193 : stat (cmp.file[f].name, &cmp.file[f].stat))
1194 != 0)
1195 cmp.file[f].desc = ERRNO_ENCODE (errno);
1196 }
1197 }
1198
1199 /* Mark files as nonexistent as needed for -N and -P, if they are
1200 inaccessible empty regular files (the kind of files that 'patch'
1201 creates to indicate nonexistent backups), or if they are
1202 top-level files that do not exist but their counterparts do
1203 exist. */
1204 for (f = 0; f < 2; f++)
1205 if ((new_file || (f == 0 && unidirectional_new_file))
1206 && (cmp.file[f].desc == UNOPENED
1207 ? (S_ISREG (cmp.file[f].stat.st_mode)
1208 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
1209 && cmp.file[f].stat.st_size == 0)
1210 : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
1211 || cmp.file[f].desc == ERRNO_ENCODE (EBADF))
1212 && ! parent
1213 && (cmp.file[1 - f].desc == UNOPENED
1214 || cmp.file[1 - f].desc == STDIN_FILENO))))
1215 cmp.file[f].desc = NONEXISTENT;
1216
1217 for (f = 0; f < 2; f++)
1218 if (cmp.file[f].desc == NONEXISTENT)
1219 {
1220 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
1221 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1222 }
1223
1224 for (f = 0; f < 2; f++)
1225 {
1226 int e = ERRNO_DECODE (cmp.file[f].desc);
1227 if (0 <= e)
1228 {
1229 errno = e;
1230 perror_with_name (cmp.file[f].name);
1231 status = EXIT_TROUBLE;
1232 }
1233 }
1234
1235 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1236 {
1237 /* If one is a directory, and it was specified in the command line,
1238 use the file in that dir with the other file's basename. */
1239
1240 int fnm_arg = DIR_P (0);
1241 int dir_arg = 1 - fnm_arg;
1242 char const *fnm = cmp.file[fnm_arg].name;
1243 char const *dir = cmp.file[dir_arg].name;
1244 char const *filename = cmp.file[dir_arg].name = free0
1245 = find_dir_file_pathname (dir, last_component (fnm));
1246
1247 if (STREQ (fnm, "-"))
1248 fatal ("cannot compare '-' to a directory");
1249
1250 if ((no_dereference_symlinks
1251 ? lstat (filename, &cmp.file[dir_arg].stat)
1252 : stat (filename, &cmp.file[dir_arg].stat))
1253 != 0)
1254 {
1255 perror_with_name (filename);
1256 status = EXIT_TROUBLE;
1257 }
1258 }
1259
1260 if (status != EXIT_SUCCESS)
1261 {
1262 /* One of the files should exist but does not. */
1263 }
1264 else if (cmp.file[0].desc == NONEXISTENT
1265 && cmp.file[1].desc == NONEXISTENT)
1266 {
1267 /* Neither file "exists", so there's nothing to compare. */
1268 }
1269 else if ((same_files
1270 = (cmp.file[0].desc != NONEXISTENT
1271 && cmp.file[1].desc != NONEXISTENT
1272 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1273 && same_file_attributes (&cmp.file[0].stat,
1274 &cmp.file[1].stat)))
1275 && no_diff_means_no_output)
1276 {
1277 /* The two named files are actually the same physical file.
1278 We know they are identical without actually reading them. */
1279 }
1280 else if (DIR_P (0) & DIR_P (1))
1281 {
1282 if (output_style == OUTPUT_IFDEF)
1283 fatal ("-D option not supported with directories");
1284
1285 /* If both are directories, compare the files in them. */
1286
1287 if (parent && !recursive)
1288 {
1289 /* But don't compare dir contents one level down
1290 unless -r was specified.
1291 See POSIX 1003.1-2001 for this format. */
1292 message ("Common subdirectories: %s and %s\n",
1293 cmp.file[0].name, cmp.file[1].name);
1294 }
1295 else
1296 status = diff_dirs (&cmp, compare_files);
1297 }
1298 else if ((DIR_P (0) | DIR_P (1))
1299 || (parent
1300 && !((S_ISREG (cmp.file[0].stat.st_mode)
1301 || S_ISLNK (cmp.file[0].stat.st_mode))
1302 && (S_ISREG (cmp.file[1].stat.st_mode)
1303 || S_ISLNK (cmp.file[1].stat.st_mode)))))
1304 {
1305 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1306 {
1307 /* We have a subdirectory that exists only in one directory. */
1308
1309 if ((DIR_P (0) | DIR_P (1))
1310 && recursive
1311 && (new_file
1312 || (unidirectional_new_file
1313 && cmp.file[0].desc == NONEXISTENT)))
1314 status = diff_dirs (&cmp, compare_files);
1315 else
1316 {
1317 char const *dir;
1318
1319 /* PARENT must be non-NULL here. */
1320 assert (parent);
1321 dir = parent->file[cmp.file[0].desc == NONEXISTENT].name;
1322
1323 /* See POSIX 1003.1-2001 for this format. */
1324 message ("Only in %s: %s\n", dir, name0);
1325
1326 status = EXIT_FAILURE;
1327 }
1328 }
1329 else
1330 {
1331 /* We have two files that are not to be compared. */
1332
1333 /* See POSIX 1003.1-2001 for this format. */
1334 message5 ("File %s is a %s while file %s is a %s\n",
1335 file_label[0] ? file_label[0] : cmp.file[0].name,
1336 file_type (&cmp.file[0].stat),
1337 file_label[1] ? file_label[1] : cmp.file[1].name,
1338 file_type (&cmp.file[1].stat));
1339
1340 /* This is a difference. */
1341 status = EXIT_FAILURE;
1342 }
1343 }
1344 else if (S_ISLNK (cmp.file[0].stat.st_mode)
1345 || S_ISLNK (cmp.file[1].stat.st_mode))
1346 {
1347 /* We get here only if we use lstat(), not stat(). */
1348 assert (no_dereference_symlinks);
1349
1350 if (S_ISLNK (cmp.file[0].stat.st_mode)
1351 && S_ISLNK (cmp.file[1].stat.st_mode))
1352 {
1353 /* Compare the values of the symbolic links. */
1354 char *link_value[2] = { NULL, NULL };
1355
1356 for (f = 0; f < 2; f++)
1357 {
1358 link_value[f] = xreadlink (cmp.file[f].name);
1359 if (link_value[f] == NULL)
1360 {
1361 perror_with_name (cmp.file[f].name);
1362 status = EXIT_TROUBLE;
1363 break;
1364 }
1365 }
1366 if (status == EXIT_SUCCESS)
1367 {
1368 if ( ! STREQ (link_value[0], link_value[1]))
1369 {
1370 message ("Symbolic links %s and %s differ\n",
1371 cmp.file[0].name, cmp.file[1].name);
1372 /* This is a difference. */
1373 status = EXIT_FAILURE;
1374 }
1375 }
1376 for (f = 0; f < 2; f++)
1377 free (link_value[f]);
1378 }
1379 else
1380 {
1381 /* We have two files that are not to be compared, because
1382 one of them is a symbolic link and the other one is not. */
1383
1384 message5 ("File %s is a %s while file %s is a %s\n",
1385 file_label[0] ? file_label[0] : cmp.file[0].name,
1386 file_type (&cmp.file[0].stat),
1387 file_label[1] ? file_label[1] : cmp.file[1].name,
1388 file_type (&cmp.file[1].stat));
1389
1390 /* This is a difference. */
1391 status = EXIT_FAILURE;
1392 }
1393 }
1394 else if (files_can_be_treated_as_binary
1395 && S_ISREG (cmp.file[0].stat.st_mode)
1396 && S_ISREG (cmp.file[1].stat.st_mode)
1397 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
1398 && 0 < cmp.file[0].stat.st_size
1399 && 0 < cmp.file[1].stat.st_size)
1400 {
1401 message ("Files %s and %s differ\n",
1402 file_label[0] ? file_label[0] : cmp.file[0].name,
1403 file_label[1] ? file_label[1] : cmp.file[1].name);
1404 status = EXIT_FAILURE;
1405 }
1406 else
1407 {
1408 /* Both exist and neither is a directory. */
1409
1410 /* Open the files and record their descriptors. */
1411
1412 int oflags = O_RDONLY | (binary ? O_BINARY : 0);
1413
1414 if (cmp.file[0].desc == UNOPENED)
1415 if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0)
1416 {
1417 perror_with_name (cmp.file[0].name);
1418 status = EXIT_TROUBLE;
1419 }
1420 if (cmp.file[1].desc == UNOPENED)
1421 {
1422 if (same_files)
1423 cmp.file[1].desc = cmp.file[0].desc;
1424 else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0)
1425 {
1426 perror_with_name (cmp.file[1].name);
1427 status = EXIT_TROUBLE;
1428 }
1429 }
1430
1431 /* Compare the files, if no error was found. */
1432
1433 if (status == EXIT_SUCCESS)
1434 status = diff_2_files (&cmp);
1435
1436 /* Close the file descriptors. */
1437
1438 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1439 {
1440 perror_with_name (cmp.file[0].name);
1441 status = EXIT_TROUBLE;
1442 }
1443 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1444 && close (cmp.file[1].desc) != 0)
1445 {
1446 perror_with_name (cmp.file[1].name);
1447 status = EXIT_TROUBLE;
1448 }
1449 }
1450
1451 /* Now the comparison has been done, if no error prevented it,
1452 and STATUS is the value this function will return. */
1453
1454 if (status == EXIT_SUCCESS)
1455 {
1456 if (report_identical_files && !DIR_P (0))
1457 message ("Files %s and %s are identical\n",
1458 file_label[0] ? file_label[0] : cmp.file[0].name,
1459 file_label[1] ? file_label[1] : cmp.file[1].name);
1460 }
1461 else
1462 {
1463 /* Flush stdout so that the user sees differences immediately.
1464 This can hurt performance, unfortunately. */
1465 if (fflush (stdout) != 0)
1466 pfatal_with_name (_("standard output"));
1467 }
1468
1469 free (free0);
1470 free (free1);
1471
1472 return status;
1473 }