BUAA 数据结构总结——大作业(文本摘要生成)
无注释单函数平推地狱版
本程序使用了HASH查找亮点在于几乎没有自定义函数,代码质量非常差
逻辑比较混乱,还使用了一个goto语句,几乎不能debug但是过了。
注释:使用了BKDRHash 贼高端一名字结果就一。。。。
MULT乘以31、131、1313、13131、131313…
#include <stdio.h>
#define MULT 31
#define HASH1 (int)1e5
#define HASH2 (int)1e6
#define MAX (int)5e5
struct A
{
char *w;
struct A *p;
};
struct A *SW[HASH1 + 5];
struct B
{
char *w;
unsigned int n;
struct B *p;
};
struct B *nsword[HASH2 + 5];
struct SS
{
unsigned int S;
unsigned int F;
} SS[MAX];
int judge(char *a, char *b)
{
int i = 0;
for (; *(a + i) >= 'a' && *(a + i) <= 'z'; i++)
{
if (*(a + i) != *(b + i))
return 0;
}
if (*(b + i) >= 'a' && *(b + i) <= 'z')
return 0;
else
return 1;
}
int main()
{
FILE *f1, *f2, *FOUT;
f1 = fopen("stopwords.txt", "r");
fseek(f1, 0, SEEK_END);
unsigned int sSize = ftell(f1);
rewind(f1);
char *text = (char *)malloc(sSize + 1);
fread(text, 1, sSize, f1);
*(text + sSize) = 0;
fclose(f1);
unsigned int h = 0;
unsigned int i, j;
struct A *p1;
j = 0;
for (i = 0; i < sSize; i++)
{
if (*(text + i) >= 'a' && *(text + i) <= 'z')
{
h = MULT * h + *(text + i) - 'a' + 1;
}
else
{
if ((i - j) && (h != 0))
{
h = h % HASH1;
p1 = (struct A *)malloc(sizeof(struct A));
p1->w = text + j;
p1->p = SW[h];
SW[h] = p1;
}
j = i + 1;
h = 0;
}
}
f2 = fopen("article.txt", "r");
struct B *p2;
i = 0;
fseek(f2, 0, SEEK_END);
unsigned int Size2 = ftell(f2);
rewind(f2);
char *text2;
text2 = (char *)malloc(Size2 + 1);
fread(text2, 1, Size2, f2);
fclose(f2);
*(text2 + Size2) = '.';
char *text3 = (char *)malloc(Size2 + 1);
for (i = 0; i <= Size2; i++)
*(text3 + i) = *(text2 + i);
j = 0;
h = 0;
for (i = 0; i <= Size2; i++)
{
if (*(text2 + i) >= 'A' && *(text2 + i) <= 'Z')
*(text2 + i) = *(text2 + i) + 32;
if (*(text2 + i) >= 'a' && *(text2 + i) <= 'z')
{
h = MULT * h + *(text2 + i) - 'a' + 1;
}
else
{
if ((i - j) && (h != 0))
{
p1 = SW[h % HASH1];
while (p1 != NULL)
{
if (judge(p1->w, (text2 + j)))
{
for (; j < i; j++)
*(text2 + j) = ' ';
goto next1;
}
p1 = p1->p;
}
h = h % HASH2;
p2 = nsword[h];
{
while (p2 != NULL)
{
if (judge(p2->w, (text2 + j)))
{
p2->n++;
goto next1;
}
p2 = p2->p;
}
p2 = (struct B *)malloc(sizeof(struct B));
p2->w = (text2 + j);
p2->n = 1;
p2->p = nsword[h];
nsword[h] = p2;
}
}
next1:
h = 0;
j = i + 1;
}
}
unsigned int is = 0;
j = 0;
int js = 0;
h = 0;
unsigned int sum = 0;
for (i = 0; i <= Size2; i++)
{
if (*(text2 + i) == '.' || *(text2 + i) == '!' || *(text2 + i) == '?')
{
if ((i - j) && (h != 0))
{
p2 = nsword[h % HASH2];
while (p2 != NULL)
{
if (judge(p2->w, (text2 + j)))
{
SS[is++].S = sum + p2->n;
break;
}
p2 = p2->p;
}
}
else
{
SS[is++].S = sum;
}
sum = 0;
h = 0;
j = i + 1;
js = i + 1;
SS[is].F = js;
}
else if (*(text2 + i) >= 'a' && *(text2 + i) <= 'z')
{
h = MULT * h + *(text2 + i) - 'a' + 1;
}
else
{
if ((i - j) && (h != 0))
{
p2 = nsword[h % HASH2];
while (p2 != NULL)
{
if (judge(p2->w, (text2 + j)))
{
sum = sum + p2->n;
break;
}
p2 = p2->p;
}
}
next2:
h = 0;
j = i + 1;
}
}
struct SS tmp;
unsigned int n = is;
for (i = 0; i < n - 1; i++)
{
for (j = 0; j < n - i - 1; j++)
{
if (SS[j].S < SS[j + 1].S)
{
tmp = SS[j + 1];
SS[j + 1] = SS[j];
SS[j] = tmp;
}
}
}
FOUT = fopen("results.txt", "w+");
int N;
scanf("%d", &N);
for (i = 0; i < 5; i++)
{
printf("%d ", SS[i].S);
j = SS[i].F;
while (*(text3 + j) == ' ')
{
j++;
}
while (*(text3 + j) != '.' && *(text3 + j) != '!' && *(text3 + j) != '?')
{
putc(*(text3 + j), stdout);
j++;
}
putc(*(text3 + j), stdout);
putc('\n', stdout);
}
for (i = 0; i < N; i++)
{
fprintf(FOUT, "%d ", SS[i].S);
j = SS[i].F;
while (*(text3 + j) == ' ')
{
j++;
}
while (*(text3 + j) != '.' && *(text3 + j) != '!' && *(text3 + j) != '?')
{
putc(*(text3 + j), FOUT);
j++;
}
putc(*(text3 + j), FOUT);
putc('\n', FOUT);
}
fclose(FOUT);
return 0;
}