本章介绍表的结构、操作及应用,文章给出的例子用到了前章节的atom/except/mem,分别对应“原子”,“异常和断言”,“内存管理”章节。其中except在前面的章节中已经详细给出,并验证无问题。这里补充一下atom和mem
atom.h
#ifndef ATOM_INCLUDED
#define ATOM_INCLUDED
extern int Atom_length(const char *str);
extern const char *Atom_new(const char *str, int len);
extern const char *Atom_string(const char *str);
extern const char *Atom_int(long n);
#endif
atom.c
#include <string.h>
#include <limits.h>
#include "mem.h"
#include "assert.h"
#include "atom.h"
#define NELEMS(x) ((sizeof(x))/(sizeof((x)[0])))
static unsigned long scatter[]=
{
2078917053, 143302914, 1027100827, 1953210302, 755253631, 2002600785,1405390230, 45248011,
1099951567, 433832350, 2018585307, 438263339,813528929, 1703199216, 618906479, 573714703,
766270699, 275680090,1510320440, 1583583926, 1723401032, 1965443329, 1098183682, 1636505764,
980071615, 1011597961, 643279273, 1315461275, 157584038, 1069844923,471560540, 89017443,
1213147837, 1498661368, 2042227746, 1968401469,1353778505, 1300134328, 2013649480, 306246424,
1733966678, 1884751139,744509763, 400011959, 1440466707, 1363416242, 973726663, 59253759,1639096332, 336563455, 1642837685, 1215013716, 154523136, 593537720,704035832, 1134594751,
1605135681, 1347315106, 302572379, 1762719719,269676381, 774132919, 1851737163, 1482824219,
125310639, 1746481261,1303742040, 1479089144, 899131941, 1169907872, 1785335569, 485614972,
907175364, 382361684, 885626931, 200158423, 1745777927, 1859353594,259412182, 1237390611,
48433401, 1902249868, 304920680, 202956538,348303940, 1008956512, 1337551289, 1953439621,
208787970, 1640123668,1568675693, 478464352, 266772940, 1272929208, 1961288571, 392083579,
871926821, 1117546963, 1871172724, 1771058762, 139971187, 1509024645,109190086, 1047146551,
1891386329, 994817018, 1247304975, 1489680608,706686964, 1506717157, 579587572, 755120366,
1261483377, 884508252,958076904, 1609787317, 1893464764, 148144545, 1415743291, 2102252735,1788268214, 836935336, 433233439, 2055041154, 2109864544, 247038362,299641085, 834307717,
1364585325, 23330161, 457882831, 1504556512,1532354806, 567072918, 404219416, 1276257488,
1561889936, 1651524391,618454448, 121093252, 1010757900, 1198042020, 876213618, 124757630,
2082550272, 1834290522, 1734544947, 1828531389, 1982435068, 1002804590,1783300476, 1623219634,
1839739926, 69050267, 1530777140, 1802120822,316088629, 1830418225, 488944891, 1680673954,
1853748387, 946827723,1037746818, 1238619545, 1513900641, 1441966234, 367393385, 928306929,
946006977, 985847834, 1049400181, 1956764878, 36406206, 1925613800,2081522508, 2118956479, 1612420674, 1668583807, 1800004220, 1447372094,523904750, 1435821048, 923108080, 216161028,
1504871315, 306401572,2018281851, 1820959944, 2136819798, 359743094, 1354150250, 1843084537,
1306570817, 244413420, 934220434, 672987810, 1686379655, 1301613820,1601294739, 484902984,
139978006, 503211273, 294184214, 176384212,281341425, 228223074, 147857043, 1893762099,
1896806882, 1947861263,1193650546, 273227984, 1236198663, 2116758626, 489389012, 593586330,
275676551, 360187215, 267062626, 265012701, 719930310, 1621212876,2108097238, 2026501127,
1865626297, 894834024, 552005290, 1404522304,48964196, 5816381, 1889425288, 188942202,
509027654, 36125855,365326415, 790369079, 264348929, 513183458, 536647531, 13672163,
313561074, 1730298077, 286900147, 1549759737, 1699573055, 776289160,2143346068, 1975249606,
1136476375, 262925046, 92778659, 1856406685,1884137923, 53392249, 1735424165, 1602280572
};
static struct atom
{
struct atom *link;
int len;
char *str;
}* buckets[2048];
const char *Atom_string(const char *str)
{
assert(str);
return Atom_new(str, strlen(str));
}
const char *Atom_int(long n)
{
char str[43];
char *s = str + sizeof(str);
unsigned long m;
if(n == LONG_MIN)
{
m = LONG_MAX+1U;
}else if(n < 0)
{
m = -n;
}else
{
m = n;
}
do
{
*--s = m%10 + '0';
}while((m/=10)>0);
if(n<0)
{
*--s = '-';
}
return Atom_new(s, (str + sizeof(str)) - s);
}
const char * Atom_new(const char *str, int len)
{
unsigned long h;
int i;
struct atom *p;
assert(str);
assert(len>=0);
for(h=0,i=0; i<len; i++)
{
h = (h<<1) + scatter[(unsigned char)str[i]];
}
h %= NELEMS(buckets);
for(p = buckets[h]; p; p =p->link)
{
if(len == p->len)
{
for(i=0; i<len && p->str[i] == str[i];)
{
i++;
}
if(i == len)
{
return p->str;
}
}
}
p = ALLOC(sizeof(*p) + len + 1);
p->len = len;
p->str = (char *)(p + 1);
if(len > 0)
{
memcpy(p->str, str, len);
}
p->str[len] = '\0';
p->link = buckets[h];
buckets[h] = p;
return p->str;
}
mem.h
#ifndef MEM_INCLUDED
#define MEM_INCLUDED
#include "../../include/except.h"
extern const Except_T Mem_Failed;
extern void *Mem_alloc(long nbytes, const char *file, \
int line);
extern void *Mem_calloc(long count, long nbytes, \
const char * file, int line);
extern void Mem_free(void *ptr, const char *file, int line);
extern void *Mem_resize(void *ptr, long nbytes, const char *file, int line);
#define ALLOC(nbytes) \
Mem_alloc((nbytes), __FILE__, __LINE__)
#define CALLOC(count, nbytes) \
Mem_calloc((count), (nbytes), __FILE__, __LINE__)
#define NEW(p) ((p) = ALLOC((long)sizeof*(p)))
#define NEW0(p) ((p) = CALLOC(1, (long)sizeof*(p)))
#define FREE(ptr) ((void) (Mem_free((ptr), \
__FILE__, __LINE__), (ptr) = 0))
#define RESIZE(ptr, nbytes) ((ptr) = Mem_resize((ptr), \
(nbytes), __FILE__, __LINE__))
#endif
mem.c
#include <stdlib.h>
#include <stddef.h>
#include "assert.h"
#include "../../include/except.h"
#include "mem.h"
const struct Except_T Mem_Failed = {"Allocation Failed"};
void *Mem_alloc(long nbytes, const char *file, int line)
{
void * ptr;
assert(nbytes > 0);
ptr = malloc(nbytes);
if (ptr == NULL)
{
if (file == NULL)
{
RAISE(Mem_Failed);
}else
{
Except_raise(&Mem_Failed, file, line);
}
}
return ptr;
}
void *Mem_calloc(long count, long nbytes, const char *file, int line)
{
void *ptr;
assert(count > 0);
assert(nbytes > 0);
ptr = calloc(count, nbytes);
if( ptr == NULL)
{
if (file == NULL)
{
RAISE(Mem_Failed);
}else
{
Except_raise(&Mem_Failed, file, line);
}
}
return ptr;
}
void Mem_free(void *ptr, const char *file, int line)
{
if (ptr)
{
free(ptr);
}
}
void *Mem_resize(void *ptr, long nbytes, const char *file, int line)
{
assert(ptr);
assert(nbytes > 0);
ptr = realloc(ptr, nbytes);
if(ptr == NULL)
{
if (file == NULL)
{
RAISE(Mem_Failed);
}else
{
Except_raise(&Mem_Failed, file, line);
}
}
return ptr;
}
getword.h
#ifndef GETWORD_INCLUDED
#define GETWORD_INCLUDED
#include <stdio.h>
extern int getword(FILE *fp, char *buf, int size, int first(int c), int rest(int c));
#endif
getword.c
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include "assert.h"
#include "getword.h"
int getword(FILE *fp, char *buf, int size, int first(int c), int rest(int c))
{
int i = 0, c;
assert(fp && buf && size > 1 && first && rest);
c = getc(fp);
for (; c != EOF; c = getc(fp))
{
if(first(c))
{
if(i < size-1 )
{
buf[i++] = c;
}
c = getc(fp);
break;
}
}
for (; c != EOF && rest(c); c = getc(fp))
{
if(i < size -1)
{
buf[i++] = c;
}
}
if (i < size)
{
buf[i] = '\0';
}else
{
buf[size-1] = '\0';
}
if (c != EOF )
{
ungetc(c, fp);
}
return i>0;
}
table.h
#ifndef TABLE_INCLUDED
#define TABLE_INCLUDED
#define T Table_T
typedef struct T * T;
extern T Table_new(int hint, int cmp(const void *x, const void *y), unsigned hash(const void *key));
extern void Table_free(T *table);
extern int Table_length(T table);
extern void *Table_put(T table, const void *key, void *value);
extern void *Table_get(T table, const void *key);
extern void *Table_remove(T table, const void *key);
extern void Table_map(T table, void apply(const void *key, void **value, void *cl), void *cl);
extern void **tTable_toArray(T table, void *end);
#undef T
#endif
table.c
#include <limits.h>
#include <stddef.h>
#include "mem.h"
#include "assert.h"
#include "table.h"
#define T Table_T
struct T
{
int size;
int (*cmp) (const void *x, const void *y);
unsigned (*hash) (const void *key);
int length;
unsigned timestamp;
struct binding {
struct binding *link;
const void *key;
void *value;
} ** buckets;
};
static int cmpatom(const void *x, const void *y)
{
return x != y;
}
static unsigned hashatom(const void *key)
{
return (unsigned long)key >> 2;
}
T Table_new(int hint,
int cmp(const void *x, const void *y), unsigned hash(const void *key))
{
T table;
int i;
static int primes[] = {509, 509, 1021, 2053, 4093, 8191, 16381, 32771, 65521, INT_MAX};
assert(hint >= 0);
for(i = 1; primes[i] < hint; i++)
{
;
}
table = ALLOC(sizeof(*table) +
primes[i-1]*sizeof(table->buckets[0]));
table->size = primes[i-1];
table->cmp = cmp ? cmp:cmpatom;
table->hash = hash ? hash : hashatom;
table->buckets = (struct binding **) (table + 1);
for (i=0; i<table->size; i++)
{
table->buckets[i] = NULL;
}
table->length = 0;
table->timestamp = 0;
return table;
}
void *Table_get(T table, const void *key)
{
int i;
struct binding *p;
assert(table);
assert(key);
i = (*table->hash)(key)%table->size;
for (p = table->buckets[i]; p; p = p->link)
{
if ((*table->cmp) (key, p->key) == 0)
{
break;
}
}
return p ? p->value:NULL;
}
void *Table_put(T table, const void *key, void *value)
{
int i;
struct binding *p;
void *prev;
assert(table);
assert(key);
i = (*table->hash) (key) % table->size;
for(p = table->buckets[i]; p; p = p->link)
{
if ((*table->cmp)(key, p->key) == 0)
{
break;
}
}
if(p == NULL)
{
NEW(p);
p->key = key;
p->link = table->buckets[i];
table->buckets[i] = p;
table->length++;
prev = NULL;
}else
{
prev = p->value;
}
p->value = value;
table->timestamp++;
return prev;
}
int Table_length(T table)
{
assert(table);
return table->length;
}
void Table_map(T table,
void apply(const void *key, void **value, void *cl), void *cl)
{
int i;
unsigned stamp;
struct binding *p;
assert(table);
assert(apply);
stamp = table->timestamp;
for(i=0; i<table->size; i++)
{
for(p=table->buckets[i]; p; p=p->link)
{
apply(p->key, &p->value, cl);
assert(table->timestamp == stamp);
}
}
}
void *Table_remove(T table, const void *key)
{
int i;
struct binding **pp;
assert(table);
assert(key);
table->timestamp++;
i = (*table->hash) (key) % table->size;
for(pp=&table->buckets[i]; *pp; pp = &(*pp)->link)
{
if ((*table->cmp)(key, (*pp)->key) == 0)
{
struct binding *p = *pp;
void *value = p->value;
*pp = p->link;
FREE(p);
table->length--;
return value;
}
}
return NULL;
}
void **Table_toArray(T table, void *end)
{
int i, j = 0;
void **array;
struct binding *p;
assert(table);
array = ALLOC((2*table->length+1)*sizeof(*array));
for(i=0; i<table->size; i++)
{
for(p=table->buckets[i]; p; p=p->link)
{
array[j++] = (void *)p->key;
array[j++] = p->value;
}
}
array[j] = end;
return array;
}
void Table_free(T *table)
{
assert(table && *table);
if((*table)->length > 0)
{
int i;
struct binding *p, *q;
for(i=0; i<(*table)->size; i++)
{
for(p=(*table)->buckets[i]; p; p=q)
{
q = p->link;
FREE(p);
}
}
}
FREE(*table);
}
wf.c
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "atom.h"
#include "table.h"
#include "mem.h"
#include "getword.h"
int first(int c)
{
return isalpha(c);
}
int rest(int c)
{
return isalpha(c) || c== '_';
}
void vfree(const void *key, void **count, void *l)
{
FREE(*count);
}
int compare(const void *x, const void *y)
{
return strcmp( *(char **)x, *(char **)y);
}
void wf(char *name, FILE *fp)
{
Table_T table = Table_new(0, NULL, NULL);
char buf[128];
while(getword(fp, buf, sizeof(buf), first, rest))
{
const char *word;
int i, *count;
for( i=0; buf[i] != '\0'; i++)
{
buf[i] = tolower(buf[i]);
}
word = Atom_string(buf);
count = Table_get(table, word);
if(count)
{
(*count)++;
}else
{
NEW(count);
*count = 1;
Table_put(table, word, count);
}
}
if(name)
{
printf("%s:\n", name);
}
int i;
void **array = Table_toArray(table, NULL);
qsort(array, Table_length(table), 2*sizeof(*array), compare);
for(i=0; array[i]; i+=2)
{
printf("%d\t%s\n", *(int *)array[i+1], (char *) array[i]);
}
FREE(array);
Table_map(table, vfree, NULL);
Table_free(&table);
}
int main(int argc, char *argv[])
{
int i;
for (i=1; i<argc; i++)
{
FILE *fp = fopen(argv[i], "r");
if (fp == NULL)
{
fprintf(stderr, "%s: can't open '%s' (%s)\n",argv[0], argv[i], strerror(errno));
return EXIT_FAILURE;
}else
{
wf(argv[i], fp);
fclose(fp);
}
}
if (argc == 1)
{
wf(NULL, stdin);
}
return EXIT_SUCCESS;
}
实际表的结构在原子和内存管理里面都用到了,我们可以对比着三幅图
上图为创建的原子存储结构,原子通过hash数组存储。
上图为内存管理的存储结构,内存通过htab数组来存储。
上图为表结构,表中的元素为链接,通过buckets数组存储,而数组又是表结构的元素。
前两幅图和表结构图的区别在于,前两幅图的数组没有关联到表中,实际用法基本一致。都是通过hash值来查找在数组中的位置,数组中的元素又是已链表形式存在的。
本章的实例,抛开细节,整体流程理解起来还是相对容易的,主要麻烦的是细节里面用了二级指针,函数指针作为参数带入到函数中来。