符号表和哈希函数是有关系的。
哈希函数可以实现:输入一个字符串,然后通过哈希函数将它转换为一个数字输出。查找字符串的时候,时间复杂度为O(1)。
我们实验的parser.c头文件中就有一些描述:get_symbol函数的作用就是对于将词法分析得到的token分配相应的符号表序号。哈希函数的作用就是将输入的字符串p经过哈希移位取模计算,输出cell,来作为hashvec的索引。对了hshvec[]初始化是用-1来初始化的,所以get_symbol返回的值都是负数。
int get_symbol (char* token)
{
char* p = token; // Point at start.
unsigned long hash;
int i = 0;
length = strlen(token); // Set length. //足够了
hash = length; // Set hash to length.
// Set shift value to 0.
do // Assume length != 0
{
hash += *p << i;
i += 4;
i %= 32;
p++;
}
while ( *p != '\0' );
cell = hash % max_cells; // Get first cell.
i = hashvec [cell]; // Get symbol index.
if (i >= 0)
{
p = token; // Point at token start.
do
{
if (symbol[i].length == length) // If lengths are equal ...
{
char* q = symbol[i].name; // Point at symbol name.
unsigned short j = 0;
do
{
if (p[j] != q[j]) goto Cont; // If characters not equal ...
}
while (++j < length); // while end not reached.
return (i); // Found it. //相同的名字,则不会重新存储
}
Cont: cell = (hash *= 65549)/hashdiv; // Get new cell number.
i = hashvec [cell]; // Get symbol index.
}
while (i >= 0); // While not empty slot. //直到i<0,找到一个新的,此时cell指向一个新的空位置(数组下标)
}
return (i); // Return symbol number.
}
既然get_symbol返回的都是负数,所以进入add_symbol()函数的if(......)语句内。
//再关联一个hash表,检查是否有重复的
int add_symbol (short t, char* token)
{
char *p;
sti = get_symbol (token);
length = strlen(token);
p = (char*) malloc( (length+1)*sizeof(char) ); //加不加1似乎没影响, 注意释放
if(p == NULL){
printf("out of memory for add_symbol\n\n");
}
strcpy(p, token);
if (sti < 0)
{
sti = n_symbols; //符号的序号 sti
if (n_symbols >= max_symbols) // Reached maximum number?
{
printf ( "Number of symbols exceeds %d.\n", max_symbols);
//fprintf ("out.txt", "Number of symbols exceeds %d.\n", max_symbols);
quit();
}
hashvec[cell] = n_symbols; // Put symbol number into hash vector. //hash table中存储了符号的序号,cell的值关联符号的序号值
//symbol[n_symbols].name = token; // Define pointer to symbol name.
symbol[n_symbols].name = p; // Define pointer to symbol name.
symbol[n_symbols].length = length; // Define symbol name length.
symbol[n_symbols].term = t; // Define terminal number (<identifier>, <string>, <number>, ...)
symbol[n_symbols].type = 0; // Define type as undefined (zero).
symbol[n_symbols].cell = cell; // Define hash vector cell number for this symbol. //符号的cell值关联到hash表中的数组下标,循环引用
n_symbols++; // Increment number of symbols.
}
return (sti); // Return symbol-table index.
}
总结如下图所示:
思考来源和研究材料:
《系统软件开发实践》的《Bison实验2》.