变位词问题

最新推荐文章于 2018-12-11 15:42:02 发布

tkp2014

最新推荐文章于 2018-12-11 15:42:02 发布

阅读量512

点赞数

分类专栏：数据结构与算法

数据结构与算法专栏收录该内容

73 篇文章 2 订阅

订阅专栏

问题C
给定一个英语词典，找出其中的所有变位词的集合。例如，“pots”、“stop”和“tops”互为变位词，因为每一个单词都可以通过改变其他单词中的字母的顺序来得到。
解答：
最容易想到解决方法就是：对于每一个单词，找出其的所有排列，然后对词典进行遍历，如果其排序在词典中，那么此排序序列就是该词典中此单词的变位词。
但是这种方法对于单词长度不是很长，且词典不是很大的时候，效率比较好；但是当单词变的很长，求其各个排列的时间、以及进行检索的时间将会很长，这种情况下，该方法不可取。

那么，有没有一种比较效率比较高的方法呢？
《编程珠玑》中给出一种效率比较高的方法，具体思路如下所示。
（1）为每个单词生成一个标签，即对每个单词按字母序进行排序，这样使所有的变位词会有相同的标签；
（2）根据标签对单词进行汇合，每个标签对应一个集合，这个集合包括其所有的变位词；
每个单词的标签的生成：如上面所叙述的那样，把这个单词所包含的字母，按照字母序进行排序。这就可以使所有的变位词的标签就全相等了。

下面是我实现这个程序的详细过程，记录下来，以进行交流和学习。
  （1）词典的获取
为了获取词典，从网上随意的下载了一篇英语文章，然后进行相应的处理，从而获取了词典。
在对文章进行处理的过程中，这里去除了标点符号等简单字符，同时也进行了去除重复。
从网上获取的文章如下所示。


  （2）获取“标签单词”
   通过对获取的文章进行处理，去除掉标点符号等以及去除重复过，然后对各个单词进行排序，进而获取了相应单词的“标签”，我们把标签和单词以“标签单词”的格式保存到文件中。
保存内容如下所示。


  （3）获取变位词集合
    获取变位词集合的方法同《编程珠玑》上讲述的类似，获取的内容如下所示。


  完成的程序代码如下所示。

 
  /*
 
  *        Author：梦醒潇湘
 
  *         Date ：2013/6/9/19:55
 
  *        Place ：Hit
 
  *        Email ：9974771**@qq.com
 
  */
 
 #include <iostream>
 
 #include <set>
 
 #include <stdio.h>
 
 #include <sys/stat.h>
 
 #include <fstream>
 
 #include <map>
 
 using namespace std;
 
 #define SOURCEFILE "big.txt" //原文件，就是小说的一部分，为了抽取单词
 
 #define DESTFILE "dict.txt"     //形成的词典 标签 单词
 
 #define RESULTFILE "result.txt" //结果文件
 
 #define MAXNUM 100             //单词的最大长度
 
 #define DEBUG
 
 set<string> dict;                 //保存处理好的字典
 
 //需要删除的字符
 
 const char delim[] = ".,:;`'/"+-_(){}[]<>*&^%$#@!?~/|=1234567890 tn";
 
 /*
 
  * 函数功能：将字符串中的字符串全部转化为小写
 
  * 返回值 ：转换后的字符串指针
 
  * 参数 ：
 
  *    @prama *word 指向源字符串的指针
 
  */
 
 static char *strtolower(char *word)
 
 {
 
     char *s;
 
     if(word == NULL)
 
     {
 
         return NULL;
 
     }
 
     for(s = word; *s != ''; s++)
 
     {
 
         *s = tolower(*s);
 
     }
 
     return word;
 
 }
 
 /*
 
  * 函数功能：读取文件中的关键字
 
  * 返回值 ：转化成功返回1；反之，返回0
 
  * 参数 ：
 
  * @prama 无
 
  */
 
 static int read_file()
 
 {
 
     char *file, *word, *w;
 
     FILE *fp = fopen(SOURCEFILE, "r");
 
     struct stat sb;
 
     if(!fp)
 
     {
 
         return 0;
 
     }
 
     if(stat(SOURCEFILE, &sb))
 
     {
 
         return 0;
 
     }
 
     file = (char *)malloc(sb.st_size);
 
     if(!file)
 
     {
 
         fclose(fp);
 
         return 0;
 
     }
 
     fread(file, sizeof(char), sb.st_size, fp);
 
     word = strtok(file, delim);
 
     while(word != NULL)
 
     {
 
         w = strtolower(strdup(word));
 
         string str = w;
 
         dict.insert(str);
 
         word = strtok(NULL, delim);
 
     }
 
     free(file);
 
     fclose(fp);
 
     return 1;
 
 }
 
 /*
 
  * 函数功能：用于qsort()函数
 
  */
 
 int comp(const void *a, const void *b)
 
 {
 
     return *(char *)a - *(char *)b;
 
 }
 
 /*
 
  * 函数功能：将字典中的单词保存到文件中, 并且把标签给求解出来
 
  * 返回值 ：保存成功，返1；反之，则返回0
 
  * 参数 ：
 
  *     @prama 无
 
  *        
 
  *        保存格式：标签 单词
 
  *                 标签 单词
 
  *                 .... ....
 
  */
 
 int SaveToFile()
 
 {
 
     FILE *out;
 
     out = fopen(DESTFILE, "w");
 
     if(out == NULL)
 
     {
 
         cout << "fopen() error in SaveToFile()." << endl;
 
         return 0;
 
     }
 
     set::iterator iter = dict.begin();
 
     for( ; iter != dict.end(); iter++)
 
     {
 
         char tmpone[MAXNUM] = {''};
 
         char tmptwo[MAXNUM] = {''};
 
         strncpy(tmpone, (*iter).c_str(), MAXNUM);
 
         strncpy(tmptwo, (*iter).c_str(), MAXNUM);
 
         qsort(tmpone, strlen(tmpone), sizeof(char), comp);
 
         #ifdef DEBUG        
 
             cout << tmpone << " "<< *iter << endl;
 
         #endif
 
         fprintf(out, "%s %sn", tmpone, tmptwo);
 
     }
 
     fclose(out);
 
     cout << "字典保存到文件成功." << endl;
 
     return 1;
 
 }
 
 /*
 
  * 函数功能：读取保存好的字典文件，进行压缩，获取最终的结果
 
  * 返回值 ：读取成功，返回1;反之，返回0
 
  * 参数 ：
 
  *     @prama 无
 
  */
 
 int squasd()
 
 {
 
     /*
 
     ifstream infile(DESTFILE, ios::in);
 
     ofstream outfile("Result.txt",ios::out);
 
     //assert(infile);
 
     //assert(outfile);
 
     if(infile == NULL || outfile == NULL)
 
     {
 
         return 0;
 
     }
 
     string sig, word;
 
     string sigone, wordone;
 
     infile >> sig >> word;
 
     outfile << sig << " " << word;
 
     while(infile >> sigone >> wordone)
 
     {
 
         if(strcmp(sigone.c_str(), sig.c_str()) == 0)
 
         {
 
             if(strcmp(wordone.c_str(), word.c_str()) == 0)
 
             {
 
                 //the same word
 
                 continue;
 
             }
 
             else
 
             {
 
                 outfile << " " << wordone;
 
             }    
 
         }
 
         else
 
         {
 
             outfile << endl;
 
             outfile << sigone << " " << wordone;
 
             sig = sigone;
 
             word = wordone;
 
         }
 
     }
 
     infile.close();
 
     outfile.close();
 
     cout << "处理完毕." << endl;
 
     return 1;
 
     */
 
     //打开dict.txt文件
 
     ifstream infile(DESTFILE, ios::in);
 
     if(infile == NULL)
 
     {
 
         return 0;
 
     }
 
     //读取文件内容，保存到multimap中
 
     multimap res;
 
     string sig, word;
 
     while(infile >> sig >> word)
 
     {
 
         res.insert(make_pair(sig, word));
 
     }
 
     //打开保存最终结果的文件
 
     ofstream outfile(RESULTFILE, ios::out);
 
     if(outfile == NULL)
 
     {
 
         return 0;
 
     }
 
     //进行处理
 
     multimap::iterator iter = res.begin();
 
     if(iter == res.end())
 
     {        
 
         return 0;
 
     }
 
     outfile << iter->first << " " << iter->second;
 
     iter++;
 
     sig = iter->first;
 
     word = iter->second;
 
     for( ; iter != res.end(); iter++)
 
     {
 
         char tmpone[MAXNUM] = {''};
 
         char tmptwo[MAXNUM] = {''};
 
         strncpy(tmpone, (iter->first).c_str(), MAXNUM);
 
         strncpy(tmptwo, (iter->second).c_str(), MAXNUM);
 
         if(strcmp(sig.c_str(), tmpone) == 0)
 
         {
 
             if(strcmp(word.c_str(), tmptwo) == 0)
 
             {
 
                 //same word here
 
                 continue;
 
             }
 
             else
 
             {
 
                 outfile << " " << tmptwo;
 
             }
 
         }
 
         else
 
         {
 
             outfile << endl;
 
             outfile << tmpone << " " << tmptwo;
             sig = tmpone;
 
             word = tmptwo;
 
         }
 
     }
 
     infile.close();
 
     outfile.close();
 
 }
 int main()
 {
     //读取文件，创建词典
 
     if(!read_file())
 
     {
 
         return EXIT_FAILURE;
 
     }
 
     //保存字典到文件中
 
     if(!SaveToFile())
 
     {
 
         return EXIT_FAILURE;
 
     }
 
     //进行最后结果的出来
 
     if(!squasd())
 
     {
 
         return EXIT_FAILURE:
 
     }
 
     return EXIT_SUCCESS;
 
 }
 //上面的代码在粘贴的时候可能会有错误，附件为文件。