编程珠玑--变位词问题

yudan_jiangnan

于 2016-05-26 19:09:38 发布

阅读量530

点赞数

分类专栏：算法

本文链接：https://blog.csdn.net/yudan_jiangnan/article/details/51509779

版权

算法专栏收录该内容

9 篇文章 0 订阅

订阅专栏

编程珠玑第二章第三个问题：

给定一个英语字典，找出其中的所有变位词集合。例如，“pots”、“stop”、‘tops’互为变位词，因为每个单词都可以通过改变其他单词中的字母顺序来得到。按照书上的解法，先将单词中的字母按照字母表的顺序排列，得到一个标识，如果互为变位词，则他们的标识是相同的。然后将所有的单词按照其标识顺序排序，最后将相同标识的单词整合到一起。

/* anagrams.cpp : Defines the entry point for the console application.
编程珠玑 第二章 变位词程序实现
分成三部分：
(1)sign程序将标示程序
pans  --->  anps pans
pots  --->  opst pots     
opt   --->  opt opt
snap  --->  anps snap
stop  --->  opst stop
tops  --->  opst tops
(2)sort程序将所有具有相同标识的单词归拢到一起
anps pans       anps pans
opst pots       anps snap
opt opt   --->  opst pots
anps snap       opst stop
opst stop       opst tops
opst tops       opt opt
(3)squash将这些单词压缩成每个变位词一行的形式
anps pans         
anps snap        pans snap
opst pots  --->  pots stop tops 
opst stop        opt 
opst tops
opt opt
*/

#include "stdafx.h"
#include<stdio.h>
#include<stdlib.h>
#include<iostream>
#include<fstream>
#include<algorithm>
#include<vector>
#include<utility>
#include<string>
#include<map>


#define WORDMAX 100  //假设没有超过一百个字母的单词

using namespace std;
int charcomp(const void *x, const void *y)//比较单词的函数
{
	char *px = (char *)x;
	char *py = (char *)y;
	return *px - *py; 
}


void sign(string InputFile)
{
	ifstream input(InputFile);
	ofstream output("outputSign.txt");
	char word[WORDMAX], sig[WORDMAX];
	if (input)
	{
		while (input >> word)
		{
			strcpy_s(sig, word);
			qsort(sig, strlen(sig), sizeof(char), charcomp);
			output << sig << " " << word << endl;
			//cout << sig << " " << word << endl;
		}
		input.close();

	}
	output.close();
}

void sortWord(string InputFile)
{
	ifstream input(InputFile);
	ofstream output("outputSort.txt");
	vector<pair<string, string>>sortedWord;
	char word[WORDMAX], sig[WORDMAX];
	if (input)
	{
		while (input >> sig >> word)
		{
			auto p = make_pair(sig, word);
			sortedWord.push_back(p);				
		}
		
		sort(sortedWord.begin(), sortedWord.end());
		for (auto itr = sortedWord.begin(); itr != sortedWord.end(); ++itr)
		{
			//cout << itr->first << " " << itr->second << endl;
			output << itr->first << " " << itr->second << endl;
		}
		input.close();
	}
	output.close();
}

void squash(string InputFile)
{
	ifstream input(InputFile);
	ofstream output("outputSquash.txt");
	if (input)
	{
		map<string, vector<string>> result;
		string sig, word;
		while(input >> sig >> word)
		{
			result[sig].push_back(word);
		}
		for (auto itr = result.begin(); itr != result.end(); ++itr)
		{
			
			for (auto it = itr->second.begin(); it != itr->second.end(); ++it)
			{
				output << *it << " ";
			}
			output << endl;
		}
	}	
}

int main()
{
	string inputfile = "input.txt";
	sign(inputfile);
	sortWord("outputsign.txt");
	squash("outputsort.txt");
        return 0;
}

输入文件：

输出文件：

另一种写法：

void gen_label(vector<string> &dict, map<string, vector<string> >&rec)
{
	for (int i = 0; i < dict.size(); ++i)
	{
		string line = dict[i];
		sort(line.begin(), line.end());
		rec[line].push_back(dict[i]);
	}

	for (map<string, vector<string> >::iterator iter = rec.begin();
		iter != rec.end(); ++iter)
	{
		copy((iter->second).begin(), (iter->second).end(), ostream_iterator<string>(cout, " "));
		cout << endl;
	}
}

int main()
{
	
	string inputfile = "input.txt";
	ifstream input(inputfile);
	string word;
	vector<string> words;
	map<string, vector<string> >rec;
	if (input)
	{
		while (input >> word)
			words.push_back(word);
	}
	gen_label(words,rec);
	
	return 0;
}

2.6 习题第一题

如果不允许进行预处理，那么我们只能顺序读取整个文件，计算每个单词的标识并比较两个标识。

如果额外的空间足够多，我们可以构建一个哈希表，其中键为标识，值为所有的标识为键的单词。

如果额外的空间有限，可以预先计算好标识并排好序（标识，单词），然后进行二分搜索。