UVa 698 - Index

最新推荐文章于 2018-09-18 00:56:37 发布

小白菜又菜

最新推荐文章于 2018-09-18 00:56:37 发布

阅读量1.2k

点赞数 1

分类专栏：解题报告字符串 APOC-UVa

本文链接：https://blog.csdn.net/mobius_strip/article/details/27410215

版权

解题报告同时被 3 个专栏收录

1364 篇文章 4 订阅

订阅专栏

字符串

121 篇文章 0 订阅

订阅专栏

APOC-UVa

22 篇文章 1 订阅

订阅专栏

题目：给你一些单词（数字和字母构成），再给你几行文章，要求建立单词所在行数的索引。

分析：字符串、字典树。这题好恶心，数据范围完全没有╮(╯▽╰)╭，57次提交才AC。

数据分为单词表和文章两部分，每部分由一个空行做结束标志。

首先，将单词中的小写字母全部转化成大写字母，然后存入字典树。

然后，查找时将文章中所有的小写字母转化成大写字母，

除了字母和数字之外的字符全都非法都转化成空字符；

将处理后的文章中连续的字符作为单词，进行查询（记录<单词，行号>元组）

最后，输出加上个格式处理，连续的压缩，需要注意以下几点：

1.没有出现的单词不输出；

2.单词表中重复的单词，每个都要输出；

3.输出顺序为：（0-9，A-Z）。

注意：输入输出测试时结尾的空格，如果你对输出进行处理则无压力。

单词少于100个，文章少于100行。

#include <iostream>
#include <cstdlib>
#include <cstring>
#include <cstdio>

using namespace std;

char  words[21];
char  texts[1001];
char  wdbuf[21];

/* Trie define */  
#define nodesize 1001       //节点个数   
#define dictsize 36         //字符集大小   

typedef struct node0
{
	int    line;
	node0* next;
}Index;
Index Node[1001];

typedef struct node1  
{  
    int    flag;            //值域   
    node1* next[dictsize];  
    node0* head;
    node0* move;
}tnode;  
tnode  dict[nodesize];  
int    ID[256];  

class Trie  
{  
    private:  
		char   cput[21];
        int    size;  
        int    count;
        tnode* root;  
    public:  
        Trie() {makeID(); initial();}  
        void makeID() {  
			for ( int i = 0 ; i < 10 ; ++ i )
				ID['0'+i] = i;
            for ( int i = 0 ; i < 26 ; ++ i )  
                ID['A'+i] = i+10;
        }  
        void initial() {  
            memset( dict, 0, sizeof( dict ) );  
            memset( Node, 0, sizeof( Node ) );
            count = 0; size=0; root=newnode();  
        }  
        tnode* newnode() {return &dict[size ++];}  
        void insert( char* word ) {  
            tnode* now = root;  
            for ( int i = 0 ; word[i] ; ++ i ) {
                if ( !now->next[ID[word[i]]] )  
                    now->next[ID[word[i]]] = newnode();  
                now = now->next[ID[word[i]]];  
            }now->flag ++;
        }  
        void query( char* word, int id ) {
            tnode* now = root;
            for ( int i = 0 ; word[i] ; ++ i ) {
                if ( !now->next[ID[word[i]]] ) return;  
                now = now->next[ID[word[i]]];  
            }
            /* 利用链表存储<单词，行数>元组 */
			if ( now->flag ) {
				if ( !now->head ) {
					now->head = &Node[count ++];
					now->move = now->head;
					now->move->line = id;
					return;
				}
				if ( now->move->line != id ) {
					now->move->next = &Node[count ++];
					now->move = now->move->next;
					now->move->line = id;
				}
			}
        }
        void output( int line ){output( root, 0, line );}  
        void output( tnode* now, int d, int line ) {  
            if ( now->flag && now->head ) 
            //重复的单词都要输出 
			for ( int k = 0 ; k < now->flag ; ++ k ) {
				//输出单词 
                for ( int i = 0 ; i < d ; ++ i )  
                    printf("%c",cput[i]); 
                //控制格式 
				int first = 1;
				for ( Index *q,*p = now->head ; p ; q = p,p = p->next ) {
					//输出连续出现的起始位置 
					if ( p == now->head || q->line+1 < p->line ) {
						if ( !first ) printf(",");
						else first = 0;
						printf(" ");
						if ( p->next && p->next->line == p->line+1 ) 
							printf("%d-",p->line);
					}
					//输出结束位置（单个字符只有结束位置） 
					if ( !p->next || p->next->line > p->line+1 )
						printf("%d",p->line);
				}
				printf("\n");  
            }  
            for ( int i = 0 ; i < 36 ; ++ i )  
                if ( now->next[i] ) {  
					if ( i < 10 ) cput[d] = i+'0';
					else cput[d] = i-10+'A';
                    output( now->next[i], d+1, line );  
                }  
        }
}trie;  
/* Trie  end */  

int main()
{
	int Case = 1;
	while ( gets(words) ) {
		
		trie.initial();
		while ( strlen(words) ) {
			for ( int i = 0 ; words[i] ; ++ i )
				if ( words[i] >= 'a' && words[i] <= 'z' )
					words[i] = words[i]-'a'+'A';
			trie.insert( words );
			gets(words);
		}
		
		int lines = 1;
		while ( gets(texts) ) {
			int L = strlen(texts);
			if ( !L ) break;
			/* 将字母变成大写，其他字符变成'' */
			for ( int i = 0 ; i < L ; ++ i ) {
				if ( texts[i] >= 'a' && texts[i] <= 'z' )
					texts[i] = texts[i]-'a'+'A';
				if ( texts[i] >= 'A' && texts[i] <= 'Z' )
					continue;
				if ( texts[i] >= '0' && texts[i] <= '9' )
					continue;
				texts[i] = 0;
			}
			
			int start = 0;
			while ( start < L ) {
				/* 计算单词起始位置 */
				while ( start < L && !texts[start] ) start ++;
				/* 将单词进行查询 */
				trie.query( texts+start, lines );
				while ( texts[start] ) start ++;
			}
			lines ++;
		}
		
		printf("Case %d\n",Case ++);
		trie.output( lines );
		printf("\n");
	}
	return 0;
}

小白菜又菜

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
UVa 698 - Index

题目：给你一些单词（数字和字母构成），再给你几行文章，要求建立单词所在行数的索引。分析：字符串、字典树。这题好恶心，数据范围完全没有╮(╯▽╰)╭，57次提交才AC。数据分为单词表和文章两部分，每部分由一个空行做结束标志。首先，将单词中的小写字母全部转化成大写字母，然后存入字典树。然后，查找时将文章中所有的小写
复制链接

扫一扫

专栏目录