问题描述
【问题描述】
“交叉引用生成器”是指具有如下功能的程序:
对输入的一篇文档,统计出现的所有单词及其所在的行号。将其结果以规定的格式输出。
【输入形式】
程序从crossin.txt读入一篇文档。该文档由若干行组成,每行中包含一些列的单词。行号由1开始计数。该文档的单词量能不超过200,每个单词最大长度不超过20个字符。
【输出形式】
将输入文档中出现的单词及其所在行号输出到文件crossout.txt中。
输出有若干行,每一行都是文档中出现的一个单词。按如下规格输出(无空格):
word:line1,line2,…lineN
输出时遵循以下规定:
1.只输出所有由英文字母(包括连字符)构成的单词,数字或包含其他特殊字符的单词不用输出。先输出大写A?Z开头的字符,再输出小写a–z开头的字符
2.各单词后面的行号从小到大排列。行号不重复打印,即一个单词如果在一行中出现多次,只打印一次该行号
3.统计的单词不包括如下:a, an, the, and
【样例输入】
Alcatel provides end-to-end solutions.
It enables enterprises to deliver content to any type of user.
lcatel operates in 130 countries.
Alcatel focus on optimizing their service offerings and revenue streams.
【样例输出】
Alcatel:1,4
It:2
any:2
content:2
countries:3
deliver:2
enables:2
end:1
enterprises:2
focus:4
in:3
lcatel:3
of:2
offerings:4
on:4
operates:3
optimizing:4
provides:1
revenue:4
service:4
solutions:1
streams:4
their:4
to:1,2
type:2
user:2
和之前的内容相比,题目有一定的变化,要求更多了(之前只是将单词分割出来,并建立一个二叉树统计)参见二叉树统计单词
自做代码:C语言编写
//第一次解决
/*
2020/4/6
*/
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<ctype.h>
#define MAXWORD 100
struct Node
{
char word[MAXWORD];
int nlines;
int line[20];
Node* lchild;
Node* rchild;
};
/*
从流中输入
*/
int getword(char *word,int lim,FILE* in)
{
int c;//使用int类型
char* w = word;
while(isspace(c = fgetc(in)))
{
if(c == '\n')
{
*w = '\0';//w中什么都不存放,需要******编写这行代码
return c;//返回'\n'
}
}
if(c != EOF)
*w++ = c;
if( !isalpha(c))
{
*w = '\0';
return c;
}
for(; --lim;w++)
{
if( !isalnum( *w = fgetc(in)) && c != '-')//if( !isalnum( c = fgetc(in)) && c != '-')//存放到w中
{
ungetc(*w,in);
break;
}
}
*w = '\0';
return w[0];
}
Node* insert(Node* root,char *word,int curline)
{
int condition;
if(root == NULL)
{
root = (Node*)malloc(sizeof(Node));
strcpy(root->word,word);
root->nlines = 1;
root->line[root->nlines-1] = curline;
root->lchild = root->rchild = NULL;
}
else
{
condition = strcmp(root->word,word);
if(condition == 0)
{
if(root->line[root->nlines-1] == curline) ;
else
{
root->nlines++;
root->line[root->nlines-1] = curline;
}
}
else if(condition>0)
{
root->lchild = insert(root->lchild,word,curline);
}
else
{
root->rchild = insert(root->rchild,word,curline);
}
}
return root;//在最后面返回
}
void printToFile(Node* root,FILE *out)//中序遍历就是按照从小到大顺序输出
{
int i;
if(root)
{
printToFile(root->lchild,out);
if(strcmp(root->word,"a") && strcmp(root->word,"an") && strcmp(root->word,"the") && strcmp(root->word,"and") )//有一个为0,整体为0
{
fprintf(out,"%s:",root->word);
for(i = 0;i<root->nlines;i++)
{
if(i == root->nlines-1)
fprintf(out,"%d\n",root->line[i]);
else
{
fprintf(out,"%d,",root->line[i]);
}
}
}
printToFile(root->rchild,out);
}
}
int main()
{
char c;
int curline = 1;
char word[MAXWORD];
FILE *in,*out;
Node* root;
in = fopen("crossin.txt","r");
out = fopen("crossout.txt","w");
root = NULL;//root忘记初始化
while( (c = getword(word,MAXWORD,in) ) !=EOF)//while( c = getword(word,MAXWORD,in) !=EOF),注意优先级
{
if(c == '\n')
{
curline++;
}
if(isalpha(word[0]))
{
root = insert(root,word,curline);
}
}
printToFile(root,out);
fclose(in);
fclose(out);
return 0;
}
自做代码:C++编写
//第二次解决
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<ctype.h>
const int MAXWORD = 100;
struct Node
{
char word[MAXWORD];
int nline;
int lines[200];
Node* lchild;
Node* rchild;
};
int getword(char word[],int lim,FILE* in)
{
char c;
char *w = word;
while( isspace(c = fgetc(in)))
{
if(c == '\n')
{
*w = '\0';
return c;
}
}
if(c != EOF)
{
*w++ = c;
}
if( !isalpha(c))
{
*w = '\0';
return c;
}
for(;--lim;w++)
{
*w = fgetc(in);
if( !isalpha(*w) && (*w)!='-')
{
ungetc(*w,in);
break;
}
}
*w = '\0';
return w[0];
}
void insert(Node* &root,char word[],int curLine)
{
if(root == NULL)
{
root = (Node*)malloc(sizeof(Node));
strcpy(root->word,word);
root->nline = 1;
root->lines[0] = curLine;//->和[]的优先级相同,且从左向右结合
root->lchild = root->rchild = NULL;
return ;
}
int cmp = strcmp(root->word,word);
if(cmp>0)
{
insert(root->lchild,word,curLine);
}
else if(cmp == 0)
{
if(root->lines[root->nline-1] != curLine)
{
root->lines[root->nline] = curLine;
root->nline++;
}
}
else
{
insert(root->rchild,word,curLine);
}
}
void midOrder(Node* root,FILE* out)
{
if(root)
{
midOrder(root->lchild,out);
if( strcmp(root->word,"a") == 0 || strcmp(root->word,"an")==0 || strcmp(root->word,"the") == 0 || strcmp(root->word,"and") == 0)
{
}
else
{
fprintf(out,"%s:",root->word);
for(int i = 0;i<root->nline;i++)
{
if(i == root->nline - 1)
fprintf(out,"%d\n",root->lines[i]);
else
{
fprintf(out,"%d,",root->lines[i]);
}
}
midOrder(root->rchild,out);
}
}
}
int main()
{
char word[MAXWORD];
int curLine = 1;
char c;
FILE *in,*out;
in = fopen("crossin.txt","r");
out = fopen("crossout.txt","w");
Node* root = NULL;
while( (c = getword(word,MAXWORD,in) ) != EOF)
{
if(c == '\n')
curLine++;
else if(isalpha(word[0]))
{
insert(root,word,curLine);
}
}
midOrder(root,out);
fclose(in);
fclose(out);
return 0;
}