// DicMerge.cpp
// 程序用途:将当前目录下收集到的字典合并,同时去掉重复项(注:仅去掉同一个字典文件内的重复项,如果多个不同字典文件内有重复的项目,则是会合并到一块的,而要解决这样的重复项,很简单,再单独合并一次结果文件自身即可)。
#include <dirent.h>
#include <iostream>
#include <fstream>
#include <string>
#include <set>
#include <vector>
#include <algorithm>
#include <functional>
using namespace std;
static bool FileGetLine(const char* szFileName, vector<string>& v)
{
bool fRet = false;
const int MAX_LINE_LEN = 512;
static char szLineData[MAX_LINE_LEN];
ifstream infile;
infile.open(szFileName);
if ( !infile.fail() )
{
while (true)
{
infile.getline(szLineData, MAX_LINE_LEN);
if ( infile.eof() )
{
fRet = true;
break;
}
else
{
if ( '\r' == szLineData[strlen(szLineData)-1] )
{
szLineData[strlen(szLineData)-1] = '\0';
}
v.push_back(szLineData);
}
}
infile.close();
}
return fRet;
}
static int DicFromFile(const char* szFileName, vector<string>& vDic)
{
vector<string> v;
FileGetLine(szFileName, v);
set<string> s;
for ( vector<string>::iterator it_vs = v.begin(); it_vs != v.end(); ++it_vs )
{
s.insert(*it_vs);
}
for ( set<string>::iterator it_ss = s.begin(); it_ss != s.end(); ++it_ss )
{
vDic.push_back(*it_ss);
}
return 0;
}
void SearchAndMergeDicFile(const char* szDir, vector<string>& vDic)
{
DIR* d = NULL;
struct dirent* de = NULL;
d = opendir(szDir);
if ( d != NULL )
{
while ( (de = readdir(d)) != NULL )
{
if ( DT_REG == de->d_type )
{
char* ext = strrchr(de->d_name, '.');
if ( ext != NULL )
{
if ( 0 == strcmp(ext, ".dic") )
{
printf("[%s]\n", de->d_name);
DicFromFile(de->d_name, vDic);
}
}
}
}
closedir(d);
}
}
void SaveDicToFile(vector<string>& vDic, const char* szFileName)
{
FILE* fp = fopen(szFileName, "wb");
if ( fp )
{
for ( vector<string>::iterator it_vs = vDic.begin(); it_vs != vDic.end(); ++it_vs )
{
fwrite((*it_vs).c_str(), 1, (*it_vs).length(), fp);
fwrite("\r\n", 1, 2, fp);
}
fclose(fp);
}
}
int main(void)
{
cout<<"--- DicMerge on Mac v1.0 (20150928) ---"<<endl;
vector<string> vDic;
SearchAndMergeDicFile("./", vDic);
sort(vDic.begin(), vDic.end());
SaveDicToFile(vDic, "./results.dic");
cout<<"Done[save to results.dic]"<<endl;
return 0;
}
字典合并小工具 @ OS X 10.10 Yosemite
最新推荐文章于 2020-12-23 22:19:53 发布