4.1.3.1.2.1.2. Read in PCH content in intermediate form
Before, we have seen some variables are declared with GTY((…)), the program of GC (garbage collection) will parse specified files (GTY((…)) is not parsed outside this list and will cause compile error – undefined identifier) and output the result into file of same name with prefix “gt-“, which will be the real input for building the complier. So below, the variables involved are those really used by compiler, they come from variables decorated by GTY((…)) that handled by tools of GC.
GC collects GTY((…)) decorating variables by their attribution. For variables used as free buffer are collected within gt_ggc_deletable_rtab, here at line 543, it cleans these free buffers.
531 void
532 gt_pch_restore (FILE *f) in ggc-common.c
533 {
534 const struct ggc_root_tab *const *rt;
535 const struct ggc_root_tab *rti;
536 size_t i;
537 struct mmap_info mmi;
538 int result;
539
540 /* Delete any deletable objects. This makes ggc_pch_read much
541 faster, as it can be sure that no GCable objects remain other
542 than the ones just read in. */
543 for (rt = gt_ggc_deletable_rtab; *rt; rt++)
544 for (rti = *rt; rti->base != NULL; rti++)
545 memset (rti->base, 0, rti->stride);
546
547 /* Read in all the scalar variables. */
548 for (rt = gt_pch_scalar_rtab; *rt; rt++)
549 for (rti = *rt; rti->base != NULL; rti++)
550 if (fread (rti->base, rti->stride, 1, f) != 1)
551 fatal_error ("can't read PCH file: %m");
552
553 /* Read in all the global pointers, in 6 easy loops. */
554 for (rt = gt_ggc_rtab; *rt; rt++)
555 for (rti = *rt; rti->base != NULL; rti++)
556 for (i = 0; i < rti->nelt; i++)
557 if (fread ((char *)rti->base + rti->stride * i,
558 sizeof (void *), 1, f) != 1)
559 fatal_error ("can't read PCH file: %m");
560
561 for (rt = gt_pch_cache_rtab; *rt; rt++)
562 for (rti = *rt; rti->base != NULL; rti++)
563 for (i = 0; i < rti->nelt; i++)
564 if (fread ((char *)rti->base + rti->stride * i,
565 sizeof (void *), 1, f) != 1)
566 fatal_error ("can't read PCH file: %m");
567
568 if (fread (&mmi, sizeof (mmi), 1, f) != 1)
569 fatal_error ("can't read PCH file: %m");
570
571 result = host_hooks.gt_pch_use_address (mmi.preferred_base, mmi.size,
572 fileno (f), mmi.offset);
573 if (result < 0)
574 fatal_error ("had to relocate PCH");
575 if (result == 0)
576 {
577 if (fseek (f, mmi.offset, SEEK_SET) != 0
578 || fread (mmi.preferred_base, mmi.size, 1, f) != 1)
579 fatal_error ("can't read PCH file: %m");
580 }
581 else if (fseek (f, mmi.offset + mmi.size, SEEK_SET) != 0)
582 fatal_error ("can't read PCH file: %m");
583
584 ggc_pch_read (f, mmi.preferred_base);
585
586 gt_pch_restore_stringpool ();
587 }
Above code reads in the content of PCH file in intermediate form step by step; and the steps involved are those below.
4.1.3.1.2.1.2.1. Content of scalar variables
GC keeps the “GTY((…))” decorating scalar variables from the specified files into array gt_pch_scalar_rtab, at line 548 above, these variables will be read in first.
137 const struct ggc_root_tab * const gt_pch_scalar_rtab[] = { in gtype-c.h
138 gt_pch_rs_gtype_desc_c,
139 gt_pch_rs_gt_alias_h,
140 gt_pch_rs_gt_dbxout_h,
141 gt_pch_rs_gt_dwarf2out_h,
142 gt_pch_rs_gt_dwarf2asm_h,
143 gt_pch_rs_gt_emit_rtl_h,
144 gt_pch_rs_gt_except_h,
145 gt_pch_rs_gt_function_h,
146 gt_pch_rs_gt_langhooks_h,
147 gt_pch_rs_gt_sdbout_h,
148 gt_pch_rs_gt_tree_h,
149 gt_pch_rs_gt_varasm_h,
150 gt_pch_rs_gt_c_decl_h,
151 NULL
152 };
The element of the array is of type ggc_root_tab as below, gt_pointer_walker is a function pointer.
67 struct ggc_root_tab { in gtype-c.h
68 void *base;
69 size_t nelt;
70 size_t stride;
71 gt_pointer_walker cb;
72 gt_pointer_walker pchw;
73 };
The elements defined as array itself as we see above, are extracted from related files. From their name, we can tell where they are generated from, for example, gt_pch_rs_gt_alias_h is generated from alias.c.
The content of the scalar variables includes the size of global arrays managed by GC. For example gt_pch_rs_gtype_desc_c has following definition:
4532 const struct ggc_root_tab gt_pch_rs_gtype_desc_c[] = { in gtype-desc.c
4533 { &cgraph_varpool_n_nodes, 1, sizeof (cgraph_varpool_n_nodes), NULL, NULL },
4534 { &cgraph_max_uid, 1, sizeof (cgraph_max_uid), NULL, NULL },
4535 { &cgraph_n_nodes, 1, sizeof (cgraph_n_nodes), NULL, NULL },
4536 LAST_GGC_ROOT_TAB
4537 };
In which, cgraph_varpool_n_nodes holds the length of cgraph_varpool_nodes; and cgraph_n_nodes tells the legnth of cgraph_nodes. And these two arrays are referred by gt_ggc_rtab below.
4.1.3.1.2.1.2.2. Content of global arrays
Global arrays managed by GC are referred by gt_ggc_rtab. These references are also orgnized by files and generated by tool of GC.
55 const struct ggc_root_tab * const gt_ggc_rtab[] = { in gtype-c.h
56 gt_ggc_r_gt_coverage_h,
57 gt_ggc_r_gtype_desc_c,
58 gt_ggc_r_gt_alias_h,
59 gt_ggc_r_gt_cselib_h,
60 gt_ggc_r_gt_cgraph_h,
61 gt_ggc_r_gt_dbxout_h,
62 gt_ggc_r_gt_dwarf2out_h,
63 gt_ggc_r_gt_dwarf2asm_h,
64 gt_ggc_r_gt_dojump_h,
65 gt_ggc_r_gt_emit_rtl_h,
66 gt_ggc_r_gt_except_h,
67 gt_ggc_r_gt_explow_h,
68 gt_ggc_r_gt_expr_h,
69 gt_ggc_r_gt_fold_const_h,
70 gt_ggc_r_gt_function_h,
71 gt_ggc_r_gt_gcse_h,
72 gt_ggc_r_gt_integrate_h,
73 gt_ggc_r_gt_optabs_h,
74 gt_ggc_r_gt_ra_build_h,
75 gt_ggc_r_gt_regclass_h,
76 gt_ggc_r_gt_reg_stack_h,
77 gt_ggc_r_gt_cfglayout_h,
78 gt_ggc_r_gt_langhooks_h,
79 gt_ggc_r_gt_sdbout_h,
80 gt_ggc_r_gt_stor_layout_h,
81 gt_ggc_r_gt_stringpool_h,
82 gt_ggc_r_gt_tree_h,
83 gt_ggc_r_gt_varasm_h,
84 gt_ggc_r_gt_i386_h,
85 gt_ggc_r_gt_c_parse_h,
86 gt_ggc_r_gt_c_decl_h,
87 gt_ggc_r_gt_c_common_h,
88 gt_ggc_r_gt_c_pragma_h,
89 NULL
90 };
An example of the element of the array is given below. In below struct, gt_ggc_mx_rtx_def points to method to access the content of the element of the array managed by the GC; and gt_pch_nx_rtx_def refers to method to save the whole structure (including all contents) into saving_htab of GC which then written into PCH file.
25 const struct ggc_root_tab gt_ggc_r_gt_coverage_h[] = { in gt-coverage.h
26 {
27 &ctr_labels[0],
28 1 * (GCOV_COUNTERS),
29 sizeof (ctr_labels[0]),
30 >_ggc_mx_rtx_def,
31 >_pch_nx_rtx_def
32 },
33 LAST_GGC_ROOT_TAB
34 };
4.1.3.1.2.1.2.3. Hashtables
Next in GCC, certain hash tables are also managed by GC. Below hash tables referred by gt_pch_cache_rtab include: const_double_htab, reg_attrs_htab, mem_attrs_htab, const_int_htab (in gt_pch_rc_gt_emit_rtl_h and which holds constant objects generated in RTL emission so far); size_htab (in gt_pch_rc_gt_fold_const_h and which holds INTEGER_CST claimed by the compiler so far); type_hash_table (in gt_pch_rc_gt_tree_h and is hash table for types delcared).
118 const struct ggc_root_tab * const gt_pch_cache_rtab[] = { in gtype-c.h
119 gt_pch_rc_gt_emit_rtl_h,
120 gt_pch_rc_gt_fold_const_h,
121 gt_pch_rc_gt_tree_h,
122 NULL
123 };
4.1.3.1.2.1.2.4. Map intermediate form content in VM
Besides those internal variables used during compilation, the content of definitions in PCH file is still not read in. This contetn when generating this PCH file is an intermediate tree. It is a big problem in how to handling pointers in this tree. In current GCC, when writing PCH file, this tree is mapped into the file, and records the mapping information in below mmap_info structure (about mapping file, refers to related Linux document, we leave it here).
414 struct mmap_info in ggc-common.c
415 {
416 size_t offset;
417 size_t size;
418 void *preferred_base;
419 };
So at time reading this part of PCH file, accordingly it should be mapped into the same address. The reason for doing so is obvous, as in the PCH file there are also stored identifiers, to access them from the restored tree, it must be carefully matching addresses on both sides. At line 571 in gt_pch_restore, gt_pch_use_address does this mapping, upon Linux platform, this hook refers to below function.
170 static int
171 linux_gt_pch_use_address (void *base, size_t size, int fd, size_t offset) in host-linux.c
172 {
173 void *addr;
174
175 /* We're called with size == 0 if we're not planning to load a PCH
176 file at all. This allows the hook to free any static space that
177 we might have allocated at link time. */
178 if (size == 0)
179 return -1;
180
181 /* Try to map the file with MAP_PRIVATE. */
182 addr = mmap (base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
183
184 if (addr == base)
185 return 1;
186
187 if (addr != (void *) MAP_FAILED)
188 munmap (addr, size);
189
190 /* Try to make an anonymous private mmap at the desired location. */
191 addr = mmap (base, size, PROT_READ | PROT_WRITE,
192 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
193
194 if (addr != base)
195 {
196 if (addr != (void *) MAP_FAILED)
197 munmap (addr, size);
198 return -1;
199 }
200
201 if (lseek (fd, offset, SEEK_SET) == (off_t)-1)
202 return -1;
203
204 while (size)
205 {
206 ssize_t nbytes;
207
208 nbytes = read (fd, base, MIN (size, SSIZE_MAX));
209 if (nbytes <= 0)
210 return -1;
211 base = (char *) base + nbytes;
212 size -= nbytes;
213 }
214
215 return 1;
216 }
Above mmap is a system call of linux, for detail, refers to the manual of Linux. Notice that if the file can’t be mapped to address of base in the memory, the operation fails.
Back to gt_pch_restore, by invoking linux_gt_pch_use_address virtual memory at specified position and length has been reserved, but physical memory is still not allocated, now at line 584, ggc_pch_read claims the physical memory and reads in file. We will see how GC manages memory later.
4.1.3.1.2.1.2.4.1. Restore Identifiers
Above though restoring the content into tree form, many entities within it are pointers. And identifiers they refer to have been read in, but still can’t be visited. They must be present within ident_hash, then the compiler will know them.
260 void
261 gt_pch_restore_stringpool (void) in stringpool.c
262 {
263 unsigned int i;
264
265 ident_hash->nslots = spd->nslots;
266 ident_hash->nelements = spd->nelements;
267 ident_hash->entries = xrealloc (ident_hash->entries,
268 sizeof (hashnode) * spd->nslots);
269 for (i = 0; i < spd->nslots; i++)
270 if (spd->entries[i] != NULL)
271 ident_hash->entries[i] = GCC_IDENT_TO_HT_IDENT (spd->entries[i]);
272 else
273 ident_hash->entries[i] = NULL;
274
275 spd = NULL;
276 }
See that spd is within gt_ggc_r_gt_stringpool_h which is contained within gt_ggc_rtab.
4.1.3.1.2.1.3. Restore cached macros and #pragma
PCH file has following limitation upon the usage of macro: Any macros defined before the precompiled header is included must either be defined in the same way as when the precompiled header was generated, or must not affect the precompiled header, which usually means that they don't appear in the precompiled header at all.
Here first restoring the cached macros defined before this PCH file; and macros defined in PCH file would be read in below section to obey the order of apperance.
605 int
606 cpp_read_state (cpp_reader *r, const char *name, FILE *f, in cpppch.c
607 struct save_macro_data *data)
608 {
609 struct macrodef_struct m;
610 size_t defnlen = 256;
611 unsigned char *defn = xmalloc (defnlen);
612 struct lexer_state old_state;
613 struct save_macro_item *d;
614 size_t i, mac_count;
615 int saved_line = r->line;
616
617 /* Restore spec_nodes, which will be full of references to the old
618 hashtable entries and so will now be invalid. */
619 {
620 struct spec_nodes *s = &r->spec_nodes;
621 s->n_defined = cpp_lookup (r, DSC("defined"));
622 s->n_true = cpp_lookup (r, DSC("true"));
623 s->n_false = cpp_lookup (r, DSC("false"));
624 s->n__VA_ARGS__ = cpp_lookup (r, DSC("__VA_ARGS__"));
625 }
626
627 /* Run through the carefully-saved macros, insert them. */
628 d = data->macros;
629 mac_count = data->count;
630 while (d)
631 {
632 struct save_macro_item *nextd;
633 for (i = 0; i < mac_count; i++)
634 {
635 cpp_hashnode *h;
636
637 h = cpp_lookup (r, HT_STR (HT_NODE (&d->macs[i])),
638 HT_LEN (HT_NODE (&d->macs[i])));
639 h->type = d->macs[i].type;
640 h->flags = d->macs[i].flags;
641 h->value = d->macs[i].value;
642 free ((void *)HT_STR (HT_NODE (&d->macs[i])));
643 }
644 nextd = d->next;
645 free (d);
646 d = nextd;
647 mac_count = ARRAY_SIZE (d->macs);
648 }
649
650 _cpp_restore_pragma_names (r, data->saved_pragmas);
651
652 free (data);
Similar is #pragma directives.
1124 void
1125 _cpp_restore_pragma_names (cpp_reader *pfile, char **saved) in cpplib.c
1126 {
1127 (void) restore_registered_pragmas (pfile, pfile->pragmas, saved);
1128 free (saved);
1129 }
1107 static char **
1108 restore_registered_pragmas (cpp_reader *pfile, struct pragma_entry *pe,
1109 char **sd)
1110 {
1111 for (; pe != NULL; pe = pe->next)
1112 {
1113 if (pe->is_nspace)
1114 sd = restore_registered_pragmas (pfile, pe->u.space, sd);
1115 pe->pragma = cpp_lookup (pfile, U *sd, strlen (*sd));
1116 free (*sd);
1117 sd++;
1118 }
1119 return sd;
1120 }