根据世界地名词典,对OpenStreetMap进行汉化,使用了下面的代码。不太完善,这里仅贴出来。
地名字典在我的资源世界地名大词典下载。
#include <QCoreApplication>
#include <QDebug>
#include <QFile>
#include <QHash>
#include <QMap>
#include <QRegExp>
#include <QString>
#include <QSqlDatabase>
#include <QSqlError>
#include <QSqlQuery>
#include <QTextStream>
#include <QVector>
QHash <QString, QMap<int,QVector<QString> > > make_dictionary(QSqlDatabase db);
void outputDictionary(QHash <QString, QMap<int,QVector<QString> > > dict);
void prepareToTranslate(const QHash <QString, QMap<int,QVector<QString> > > dict,
QSqlDatabase db,
const QString & tableName,
QVector<qint64> & vec_osmid,
QVector<QString> & vec_rawName,
QVector<QString> & vec_TransName
);
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
QTextStream Stdout(stdout,QIODevice::WriteOnly);
QSqlDatabase db = QSqlDatabase::addDatabase("QPSQL");
if (db.isValid()==false)
return 0;
db.setHostName("127.0.0.1");
db.setDatabaseName("gis");
db.setUserName("archosm");
db.setPassword("archosm");
if (db.open()==false)
{
Stdout << db.lastError().text()<<"\n";
qDebug() << db.lastError().text();
return 0;
}
try
{
QHash <QString, QMap<int,QVector<QString> > > dict = make_dictionary(db);
outputDictionary(dict);
//! start to translate
QSqlQuery queryWordsToTrans(db);
const QString tableNames[4] = {
QString("planet_osm_line"),QString("planet_osm_point"),QString("planet_osm_polygon"),QString("planet_osm_roads")
};
//输出
QFile fpDict(QCoreApplication::applicationDirPath()+"/trans.txt");
if (fpDict.open(QIODevice::WriteOnly)==false)
return 0;
QTextStream stout(&fpDict);
QSqlQuery queryUpdate(db);
queryUpdate.setForwardOnly(true);
db.transaction();
for (int i=0;i<4;++i)
{
QVector<qint64> vec_osmid;
QVector<QString> vec_rawName;
QVector<QString> vec_TransName;
prepareToTranslate(dict,db,tableNames[i],vec_osmid,vec_rawName,vec_TransName);
QMap<QString, QString> map_trans;
int nTransed = vec_osmid.size();
for (int j=0;j<nTransed;++j)
map_trans[vec_rawName[j]] = vec_TransName[j];
QList<QString> key_raws = map_trans.keys();
foreach (QString str_rawName, key_raws)
{
QString strTransName = map_trans[str_rawName];
stout<<tableNames[i]<<","<<str_rawName<<","<<strTransName<<"\n";
queryUpdate.prepare(QString("update %1 set name = ? , trans_name_chs = ? where name = ? and trans_name_chs is null;").arg(tableNames[i]));
queryUpdate.addBindValue(str_rawName + ","+strTransName);
queryUpdate.addBindValue(strTransName);
queryUpdate.addBindValue(str_rawName);
if (queryUpdate.exec()==false)
throw queryUpdate.lastError().text();
stout.flush();
fpDict.flush();
}
}
db.commit();
fpDict.close();
}
catch (QString errMessage)
{
db.rollback();
Stdout<<"Error!"<<errMessage<<"\n";
qDebug()<<"Error!"<<errMessage;
}
db.close();
Stdout<<"Finished!\n";
qDebug()<<"Finished!";
exit(0);
return a.exec();
}
//预处理原始数据,生成词典
QHash <QString, QMap<int,QVector<QString> > > make_dictionary(QSqlDatabase db)
{
QVector<QString> lst_tails;
//这些后缀去掉后,会得到更多的有效词根。
lst_tails.push_back(QString::fromUtf8("国家野生动物保护区"));
lst_tails.push_back(QString::fromUtf8("国家森林公园"));
lst_tails.push_back(QString::fromUtf8("野生动物保护区"));
lst_tails.push_back(QString::fromUtf8("森林公园"));
lst_tails.push_back(QString::fromUtf8("国家公园"));
lst_tails.push_back(QString::fromUtf8("深海平原"));
lst_tails.push_back(QString::fromUtf8("海底峡谷"));
lst_tails.push_back(QString::fromUtf8("断裂带"));
lst_tails.push_back(QString::fromUtf8("自治区"));
lst_tails.push_back(QString::fromUtf8("裂口"));
lst_tails.push_back(QString::fromUtf8("盐湖"));
lst_tails.push_back(QString::fromUtf8("内湖"));
lst_tails.push_back(QString::fromUtf8("海岭"));
lst_tails.push_back(QString::fromUtf8("环礁"));
lst_tails.push_back(QString::fromUtf8("大区"));
lst_tails.push_back(QString::fromUtf8("机场"));
lst_tails.push_back(QString::fromUtf8("山口"));
lst_tails.push_back(QString::fromUtf8("公园"));
lst_tails.push_back(QString::fromUtf8("半岛"));
lst_tails.push_back(QString::fromUtf8("冰川"));
lst_tails.push_back(QString::fromUtf8("沙漠"));
lst_tails.push_back(QString::fromUtf8("峡谷"));
lst_tails.push_back(QString::fromUtf8("山谷"));
lst_tails.push_back(QString::fromUtf8("海沟"));
lst_tails.push_back(QString::fromUtf8("水道"));
lst_tails.push_back(QString::fromUtf8("水库"));
lst_tails.push_back(QString::fromUtf8("大坝"));
lst_tails.push_back(QString::fromUtf8("神庙"));
lst_tails.push_back(QString::fromUtf8("干河"));
lst_tails.push_back(QString::fromUtf8("平原"));
lst_tails.push_back(QString::fromUtf8("海岸"));
lst_tails.push_back(QString::fromUtf8("群岛"));
lst_tails.push_back(QString::fromUtf8("火山"));
lst_tails.push_back(QString::fromUtf8("浅滩"));
lst_tails.push_back(QString::fromUtf8("大桥"));
lst_tails.push_back(QString::fromUtf8("洼地"));
lst_tails.push_back(QString::fromUtf8("瀑布"));
lst_tails.push_back(QString::fromUtf8("海峡"));
lst_tails.push_back(QString::fromUtf8("熔岩"));
lst_tails.push_back(QString::fromUtf8("岛"));
lst_tails.push_back(QString::fromUtf8("湖"));
lst_tails.push_back(QString::fromUtf8("湾"));
lst_tails.push_back(QString::fromUtf8("山"));
lst_tails.push_back(QString::fromUtf8("河"));
lst_tails.push_back(QString::fromUtf8("滩"));
lst_tails.push_back(QString::fromUtf8("村"));
lst_tails.push_back(QString::fromUtf8("市"));
lst_tails.push_back(QString::fromUtf8("坝"));
lst_tails.push_back(QString::fromUtf8("港"));
lst_tails.push_back(QString::fromUtf8("区"));
lst_tails.push_back(QString::fromUtf8("县"));
lst_tails.push_back(QString::fromUtf8("省"));
lst_tails.push_back(QString::fromUtf8("礁"));
lst_tails.push_back(QString::fromUtf8("角"));
lst_tails.push_back(QString::fromUtf8("峰"));
lst_tails.push_back(QString::fromUtf8("站"));
lst_tails.push_back(QString::fromUtf8("岭"));
const int remvSz = lst_tails.size();
QSqlQuery query(db);
query.setForwardOnly(true);
if (false == query.exec("select * from national_place_names"))
throw query.lastError().text();
QHash <QString, QMap<int,QVector<QString> > > hash_dict;
//Make dictionary
while (query.next())
{
const QString raw_name = query.value("place_name").toString()
.replace("<u>","")
.replace("</u>","")
.replace("<rt>","")
.replace("</rt>","")
.replace("<ruby>","")
.replace("</ruby>","");
const QString raw_trans = query.value("trans_name").toString();
///Replace some split comma.
//! Replace "见"
QStringList lst_raw_name = raw_name.split(QRegExp(QString::fromUtf8("[〈〉见,()]")),QString::SkipEmptyParts);
if (lst_raw_name.size())
{
QString word = lst_raw_name.first();
QString upperKey = word.toUpper().trimmed();
upperKey.replace(QRegExp(QString::fromUtf8("[ ,, ]")),"_");
upperKey.replace("-","_");
upperKey.replace(".","_");
QStringList listWordsKey = upperKey.split("_");
int n = listWordsKey.size();
for (int i = 0 ;i < n; ++i)
{
QString finalKey;
for (int j = 0;j<=i;++j)
{
if (j)
finalKey += "_";
finalKey += listWordsKey.at(j);
}
//CHS
QStringList chslists = raw_trans.split(QRegExp(QString::fromUtf8("[()(),;。]")),QString::SkipEmptyParts);
if (chslists.size())
{
bool bfound = false;
int deleted = 0;
QString chs_value = chslists.first();
do
{
bfound = false;
for (int k = 0; k< remvSz ;++k)
{
if (chs_value.endsWith(lst_tails[k]))
{
QString newv = chs_value.left(chs_value.length()-lst_tails[k].length());
if (newv.size())
{
bfound = true;
chs_value = newv;
++deleted;
break;
}
}
}//end for (int k = 0; k< remvSz && bfound==true;++k)
}while (bfound); //end do remove laterFix
hash_dict[finalKey][deleted-i].push_back(chs_value);
}//end if (chslists.size())
}//end for i = 1 ~ n n = listWordsKey.size();
}//end if (lst_raw_name.size())
}
return hash_dict;
}
void outputDictionary(QHash <QString, QMap<int,QVector<QString> > > dict)
{
QFile fpDict(QCoreApplication::applicationDirPath()+"/dict.txt");
if (fpDict.open(QIODevice::WriteOnly)==false)
return;
QTextStream stout(&fpDict);
QList<QString> words = dict.keys();
std::sort(words.begin(),words.end());
foreach (QString word, words)
{
stout<<word<<":";
const QMap<int,QVector<QString> > & vals = dict[word];
QList<int> simrts = vals.keys();
foreach (int simrt, simrts)
{
stout<<simrt<<"={";
const QVector<QString> & transs = vals[simrt];
const int nPoss = transs.size();
for(int i=0;i<nPoss;++i)
stout<<transs[i]<<",";
stout<<simrt<<"}; ";
}
stout<<"\n";
}
fpDict.close();
}
void prepareToTranslate(QHash <QString, QMap<int,QVector<QString> > > dict,
QSqlDatabase db,
const QString & tableName,
QVector<qint64> & vec_osmid,
QVector<QString> & vec_rawName,
QVector<QString> & vec_TransName
)
{
QSqlQuery query(db);
query.setForwardOnly(true);
if (false == query.exec(QString("select osm_id,name from %1 where name > ' ';").arg(tableName)))
throw query.lastError().text();
while (query.next())
{
qint64 osmid = query.value(0).toLongLong();
const QString strRawName = query.value(1).toString();
QString transName;
if (strRawName.size()>1)
{
QString upperKey = strRawName.toUpper().trimmed();
upperKey.replace(QRegExp(QString::fromUtf8("[ ,, ]")),"_");
upperKey.replace("-","_");
upperKey.replace(".","_");
QStringList listWordsKey = upperKey.split("_");
int n = listWordsKey.size();
if (n )
{
for (int i = n-1 ;i >=0; --i)
{
QString finalKey;
for (int j = 0;j<=i;++j)
{
if (j)
finalKey += "_";
finalKey += listWordsKey.at(j);
}
if (dict.contains(finalKey))
{
if (finalKey.size()>3)
{
if (transName.size())
transName +="_";
transName += dict[finalKey].first().first();
for (int j = 0; j<=i;++j)
listWordsKey.pop_front();
}
break;
}
if (i<2)
break;
}
}
}
if (transName.size())
{
vec_osmid.push_back(osmid);
vec_rawName.push_back(strRawName);
vec_TransName.push_back(transName);
}
}
}