统计输入中所有单词的出现次数,使用二叉树结构
#define BUFSIZE 100
char buf[BUFSIZE];
int bufp=0;
int getch(void) //从缓冲区读取字符
{
return (bufp>0) ? buf[--bufp]:getchar();
}
//把字符压回共享缓存区
void ungetch(int c)
{
if(bufp>=BUFSIZE)
printf("ungetch:too many characters\n");
else
buf[bufp++] =c;
}
统计输入中所有单词的出现次数,使用二叉树结构
#include <stdio.h>
#include <ctype.h>
#include <string.h>
typedef struct tnode{
char *word;
int count;
struct tnode *left;
struct tnode *right;
}tnode;
#define MAXWORD 100
tnode* addtree(struct tnode*,char*);
void treeprint(tnode*);
int getword(char*,int);
int main()
{
tnode *root;
char word[MAXWORD];
root=NULL;
while(getword(word,MAXWORD) !=EOF)
if(isalpha(word[0]))
root =addtree(root,word);
treeprint(root);
return 0;
}
tnode *talloc(void);
char *strdup(char*);
tnode *addtree(tnode *p,char *w)
{
int cond;
if(p ==NULL)
{
p=talloc();
p->word=strdup(w);
p->count=1;
p->left=p->right=NULL;
}
else if((cond=strcmp(w,p->word)) ==0)
p->count++;
else if(cond<0)
p->left=addtree(p->left,w);
else
p->right=addtree(p->right,w);
return p;
}
void treeprint(tnode*p)
{
if(p !=NULL)
{
treeprint(p->left);
printf("%4d %s\n",p->count,p->word);
treeprint(p->right);
}
}
#include <stdlib.h>
tnode*talloc(void)
{
return(tnode*)malloc(sizeof(tnode));
}
char *strdup(char *s)
{
char *p;
p=(char*)malloc(strlen(s)+1);
if(p!=NULL)
strcpy(p,s);
return p;
}
#include <stdio.h>
#include <ctype.h>
int comment(void)
{
int c;
while((c=getch())!=EOF)
if(c=='*')
if((c=getch()) =='/')
break;
else
ungetch(c);
return c;
}
//6-1 处理下划线、字符串常数、注释及预编译器控制指令
int getword(char*word,int lim)
{
int c,d;
char *w=word;
while(isspace(c=getch()))
;
if(c!=EOF)
*w++ =c;
if(isalpha(c) ||c=='_'||c=='#')
{
for(;--lim>0;w++)
if(!isalnum(*w=getch()) &&*w!='#')
{
ungetch(*w);
break;
}
}
else if(c=='\'' ||c=='"')
{
for(;--lim>0;w++)
if((*w=getch()) =='\\')
*++w=getch();
else if(*w==c)
{
w++;
break;
}
else if(*w==EOF)
break;
}
else if(c=='/')
if((d=getch())=='*')
c=comment();
else
ungetch(d);
*w='\0';
return c;
}
6-2 编写一个程序,用以读入一个C语言程序,并按照字符表顺序分组打印变量名,要求每一组各变量名的前6个字符相同,其余字符不同。字符串和注释中的单词不予考虑。请将6作为一个可在命令行中设定的参数
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
typedef struct tnode{
char *word;
int match;
struct tnode *left;
struct tnode *right;
}tnode;
#define MAXWORD 100
#define YES 1
#define NO 0
int compare(char*s,tnode*p,int num,int*found)
{
int i;
char*t=p->word;
for(i=0;*s==*t;i++,s++,t++)
if(*s=='\0')
return 0;
if(i>=num)
{
*found=YES;
p->match=YES;
}
return *s-*t;
}
tnode*addtreex(tnode *p,char*w,int num,int *found)
{
int cond;
if(p ==NULL){
p=talloc();
p->word=strdup(w);
p->match=*found;
p->left=p->right=NULL;
}
else if((cond=compare(w,p,num,found))<0)
p->left= addtreex(p->left,w,num,found);
else if(cond>0)
p->right=addtreex(p->right,w,num,found);
return p;
}
void treexprint(tnode *p)
{
if(p !=NULL) //中序遍历打印,完成分组打印效果
{
treexprint(p->left);
if(p->match)
printf("%s\n",p->word);
treexprint(p->right);
}
}
int main(int argc,char* argv[])
{
tnode *root;
char word[MAXWORD];
int found=NO; //YES if match was found
int num;
num=(--argc&&(*++argv)[0]== '-')?atoi(argv[0]+1):6;
root=NULL;
while(getword(word,MAXWORD) !=EOF){
if(isalpha(word[0])&&strlen(word)>=num) //单词第一个字符是字母且长度大于等于num
root=addtreex(root,word,num,&found);
found=NO;
}
treexprint(root);
return 0;
}
6-3 编写一个交叉引用程序,打印文档中所有单词的列表,并且每个单词还有一个列表,记录出现过该单词的行号.对the、and等非实义单词不予考虑。
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define MAXWORD 100
struct linklist{ //linked list of line numbers
int lnum;
struct linklist *ptr;
};
struct tnode{
char *word;
struct linklist*lines; //line numbers
struct tnode *left;
struct tnode *right;
};
//lalloc:make a linklist node
struct linklist *lalloc(void)
{
return (struct linklist*)malloc(sizeof(struct linklist));
}
void addin(struct tnode*p,int linenum)
{
struct linklist*temp;
temp=p->lines;
while(temp->ptr !=NULL &&temp->lnum!=linenum) //检查链表中是否存在相同的行号
temp=temp->ptr;
if(temp->lnum !=linenum)
{
temp->ptr=lalloc();
temp->ptr->lnum=linenum;
temp->ptr->ptr=NULL;
}
}
struct tnode *addtreex(struct tnode*p,char*w,int linenum)
{
int cond;
if(p ==NULL)
{
p=talloc();
p->word=strdup(w);
p->lines=lalloc();
p->lines->lnum=linenum;
p->lines->ptr=NULL;
p->left=p->right=NULL;
}else if((cond=strcmp(w,p->word))==0)
addin(p,linenum);
else if(cond<0)
p->left=addtreex(p->left,w,linenum);
else
p->right=addtreex(p->right,w,linenum);
return p;
}
void treexprint(struct tnode *p)
{
struct linklist *temp;
if(p!=NULL)
{
treexprint(p->left);
printf("%10s:",p->word);
for(temp=p->lines;temp!=NULL;temp=temp->ptr)
printf("%4d",temp->lnum);
printf("\n");
treexprint(p->right);
}
}
int noiseword(char *w)
{
static char *nw[]={
"a",
"an",
"are",
"in",
"is",
"of",
"or",
"that",
"the",
"this",
"to"
};
int cond,mid;
int low=0;
int high=sizeof(nw)/sizeof(char*)-1;
while(low<=high)
{
mid=(low+high)/2;
if((cond=strcmp(w,nw[mid]))<0)
high=mid-1;
else if(cond>0)
low=mid+1;
else
return mid;
}
return -1;
}
int getword(char*word,int lim)
{
int c,d;
char *w=word;
while(isspace(c=getch())&&c!='\n' ) //返回换行符
;
if(c!=EOF)
*w++ =c;
if(isalpha(c) ||c=='_'||c=='#')
{
for(;--lim>0;w++)
if(!isalnum(*w=getch()) &&*w!='#')
{
ungetch(*w);
break;
}
}
else if(c=='\'' ||c=='"')
{
for(;--lim>0;w++)
if((*w=getch()) =='\\')
*++w=getch();
else if(*w==c)
{
w++;
break;
}
else if(*w==EOF)
break;
}
else if(c=='/')
if((d=getch())=='*')
c=comment();
else
ungetch(d);
*w='\0';
return c;
}
int main()
{
struct tnode *root;
char word[MAXWORD];
int linenum=1;
root=NULL;
while(getword(word,MAXWORD)!=EOF)
{
if(isalpha(word[0]) &&noiseword(word)==-1)
root=addtreex(root,word,linenum);
else if(word[0]=='\n')
linenum++;
}
treexprint(root);
return 0;
}
6-4 编写一个程序,根据单词的出现频率按降序打印输入的各个不同单词,并在每个单词的前面标上它的出现次数。
#include <stdio.h>
#include <ctype.h>
#define MAXWORD 100
#define NDISTINCT 1000
typedef struct tnode{
char *word;
int count;
struct tnode *left;
struct tnode *right;
}tnode;
tnode *list[NDISTINCT];//pointers to tree nodes
int ntn=0; //number of tree nodes
//存储树节点到list数组中
void treestore(tnode*p)
{
if(p!=NULL)
{
treestore(p->left);
if(ntn<NDISTINCT)
list[ntn++]=p;
treestore(p->right);
}
}
//对list数组进行排序
void sortlist()
{
int gap,i,j;
tnode*temp;
for(gap=ntn/2;gap>0;gap /=2)
for(i=gap;i<ntn;i++)
for(j=i-gap;j>=0;j -=gap)
{
if( (list[j]->count) >=(list[j+gap]->count) )
break;
temp=list[j];
list[j]=list[j+gap];
list[j+gap]=temp;
}
}
tnode *addtree(tnode *p,char *w)
{
int cond;
if(p ==NULL)
{
p=talloc();
p->word=strdup(w);
p->count=1;
p->left=p->right=NULL;
}
else if((cond=strcmp(w,p->word)) ==0)
p->count++;
else if(cond<0)
p->left=addtree(p->left,w);
else
p->right=addtree(p->right,w);
return p;
}
int main()
{
tnode*root;
char word[MAXWORD];
int i;
root=NULL;
while(getword(word,MAXWORD)!=EOF)
if(isalpha(word[0]))
root=addtree(root,word);
treestore(root);
sortlist();
for(i=0;i<ntn;i++)
printf("%2d:%20s\n",list[i]->count,list[i]->word);
return 0;
}
表查找:采用散列查找方法
//表查找 采用散列查找方法
struct nlist{
struct nlist *next; //next entry in chain
char*name; //defined name
char*defn; //replacement text
};
#define HASHSIZE 101
static struct nlist *hashtab[HASHSIZE]; //pointer table
//hash:from hash value for string s
unsigned hash(char*s)
{
unsigned hashval;
for(hashval=0;*s!='\0';s++)
hashval=*s+31*hashval;
return hashval%HASHSIZE;
}
//在表中查找
struct nlist* lookup(char*s)
{
struct nlist*np;
//循环链表查找
for(np=hashtab[hash(s)];np!=NULL;np=np->next)
if(strcmp(s,np->name)==0)
return np;
return NULL; //not found
}
//将名字s和替换文本t记录到某个表中
struct nlist* install(char*name,char*defn)
{
struct nlist*np;
unsigned hashval;
if((np=lookup(name))==NULL)//not found
{
np=(struct nlist*)malloc(sizeof(*np));
if(np==NULL||(np->name=strdup(name))==NULL)
return NULL;
hashval=hash(name);
np->next=hashtab[hashval];
hashtab[hashval]=np;
}
else //already there
{
free((void*)np->defn);
}
//新的定义代替
if((np->defn=strdup(defn))==NULL)
return NULL;
return np;
}
6-5 编写一个函数,它将从由lookup和install维护的表中删除一个变量名及其定义
void undef(char *s)
{
int h;
struct nlist *prev,*np;
prev=NULL;
h=hash(s);
for(np=hashtab[h];np!=NULL;np=np->next)
{
if(strcmp(s,np->name)==0)
break;
prev=np; //记录前节点
}
if(np!=NULL)
{
if(prev==NULL)
hashtab[h]=np->next;
else
prev->next=np->next;
free((void*)np->name);
free((void*)np->defn);
free((void*)np);
}
}
6-6 编写一个适合C语言程序使用的#define处理器的简单版本(即无参数的情况)。你会发现getch和ungetch函数非常有用
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAXWORD 100
//print error message and skip the rest of the line
void error(int c,char *s)
{
printf("error:%s\n",s);
while(c !=EOF&&c!='\n')
c=getch();
}
void skipblanks(void)
{
int c;
while((c=getch())==' '||c=='\t')
;
ungetch(c);
}
void getdef(void)
{
int c,i;
char def[MAXWORD],dir[MAXWORD],name[MAXWORD];
skipblanks();
if(!isalpha(getword(dir,MAXWORD)))
error(dir[0],
"getdef:expecting a directive after #");
else if(strcmp(dir,"define")==0)
{
skipblanks();
if(!isalpha(getword(name,MAXWORD)))
error(name[0],
"getdef:non-alpha-name expected");
else
{
skipblanks();
for(i=0;i<MAXWORD-1;i++)
if((def[i]=getch())==EOF ||def[i]=='\n')
break; //end of definition
def[i]='\0';
if(i<=0) //no definition
error('\n',"getdef:incomplete define");
else
install(name,def);
}
}
else if(strcmp(dir,"undef")==0)
{
skipblanks();
if(!isalpha(getword(name,MAXWORD)))
error(name[0],"getdef:non-alpha in undef");
else
undef(name);
}
else
error(dir[0],
"getdef:expecting a directive after #");
}
void ungets(char s[])
{
int len=strlen(s);
while(len>0)
ungetch(s[--len]);
}
int main()
{
char w[MAXWORD];
struct nlist *p;
while(getword(w,MAXWORD) !=EOF)
if(strcmp(w,"#")==0) //beginning of direct
getdef();
else if(!isalpha(w[0])) //cannot be defined
printf("%s",w);
else if((p=lookup(w))==NULL) //not defined
printf("%s",w);
else
ungets(p->defn); //push definition
return 0;
}