(-:<转载时请注明本文由goldenhawking在 CSDN blog 撰写 http://write.blog.csdn.net/postedit/7957192
经过 前三篇的调试,已经有了一个完整的Map可以浏览,我们痛苦的世界范围数据下载、导入过程也结束了。要提醒一下的是,鉴于网速,不要下载 planetosm.lastest 文件,因为这个文件每周更新,万一一周下不完,就over了。
当然了,导入后,别忘了
sudo touch /var/lib/mod_tile/planet-import-complete
sudo chown www-data /var/lib/mod_tile/planet-import-complete
设置时间戳哦!
导入后,只有中国、日本有些中文字符,其他国家都是鸟语,必须进行汉化。用PostgreSQL count 一下,name 字段不为null 的条目太多了,利用在线的翻译API似乎不现实。我们通过下载字典来进行本地自动匹配与翻译。字典在http://download.csdn.net/detail/goldenhawking/4556453, 导入后,含有17万个地名翻译的表如下面所示
由于place_name 里的地名有不规范的表示,比如括号中的曾用名、用逗号分隔的等效名等情况,不能直接把地名表与planet_osm_roads 、planet_osm_polygon 、planet_osm_line、planet_osm_point 四张表的name字段做 like 或者 = 的换算。同样,即使是做正则式的匹配,也要考虑到比如 XXXX 与 XX'XX (YYYY) 的情况,即原本地名已经包含阿拉伯语与英语两种语言的情况。
为此,写一个程序,进行匹配,提前把地名进行标准化。其算法过程是:
读取planet_osm_roads 、planet_osm_polygon 、planet_osm_line、planet_osm_point四张表里 name is not null 并多于1个字符的地名,进行简化,清除括号、非拉丁、斯拉夫字符,而后与经过规范化的 place_name 进行匹配。为了存储独立的中文字段,在四张表尾部追加了一个trans_name_chs的 text 字段,以便存储纯粹的中文地名,供搜索用。
ALTER TABLE planet_osm_point ADD COLUMN trans_name_chs text;
ALTER TABLE planet_osm_line ADD COLUMN trans_name_chs text;
ALTER TABLE planet_osm_polygon ADD COLUMN trans_name_chs text;
ALTER TABLE planet_osm_roads ADD COLUMN trans_name_chs text;
算法伪代码表示:
void Match(unicode TableName)
{
for_each (record in TableName where 长度>3)
{
unicode 地名 = record->name;
//清除首尾空格
TrimSpaces(地名);
//只保留两类字符,根据字符的unicode取值范围筛选
unicode 词干 = FilterChar (地名, new LanguageFilter({拉丁,斯拉夫}));
//在翻译表中查找可能的翻译
unicodeList 可能结果集 = DatabaseSearch("规范化词干表","like %s%",词干);
//对所有含有词干的可能结果,进行相似度排序,这里的策略是看看长度比例因子,
//比如 Shanghai 与 Shanghai City 为 8:13, 与Shanghai 为 1:1 ,因此取 Shanghai
unicode 最佳解=null;
double 最佳因子=0;
for_each (unicode 可能解 in 可能结果集 where length(词干)/length(可能解)>0.6)
{
double 当前因子 = length(词干)/length(可能解);
if (当前因子>最佳因子)
{
最佳解 = 可能解;
最佳因子 = 当前因子;
if (最佳因子 == 1)
break;
}
}
//刷新数据库
if (最佳因子 >0)
{
unicode 翻译结果 = 最佳解 + "(" + 地名 + ")";
UpdateTable(TableName, record->id, 翻译结果);
}
}
}
匹配过程大概需要1-2天,匹配完成后,翻译好的地名便存入了name字段中。渲染瓦片,看一看,主要的地名都OK啦
德国的
西班牙的
非洲的
还有老美的
最后,为这些字段建立索引
CREATE INDEX idx_planet_osm_roads_name ON planet_osm_roads USING btree ("name") where name is not null;
CREATE INDEX idx_planet_osm_roads_trans_name_chs ON planet_osm_roads USING btree ("trans_name_chs") where trans_name_chs is not null;
CREATE INDEX idx_planet_osm_polygon_name ON planet_osm_polygon USING btree ("name") where name is not null;
CREATE INDEX idx_planet_osm_polygon_trans_name_chs ON planet_osm_polygon USING btree ("trans_name_chs") where trans_name_chs is not null;
CREATE INDEX idx_planet_osm_line_name ON planet_osm_line USING btree ("name") where name is not null;
CREATE INDEX idx_planet_osm_line_trans_name_chs ON planet_osm_line USING btree ("trans_name_chs") where trans_name_chs is not null;
CREATE INDEX idx_planet_osm_point_name ON planet_osm_point USING btree ("name") where name is not null;
CREATE INDEX idx_planet_osm_point_trans_name_chs ON planet_osm_point USING btree ("trans_name_chs") where trans_name_chs is not null;
全部搞定后,vacuum 一下,索引就可以立刻参与查询了,而且使用 FCGI 实现地名的检索就变得简单了。这里为了测试,直接用C写CGI程序。
程序实现两个功能,一个是根据地名检索旁边的GIS对象,另一个是根据坐标检索最近的地名。这里用到 PostGIS的 CoverBy 系列函数. CGI代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <postgresql/libpq-fe.h>
#include <sys/stat.h>
const char * pConnDBStr = "host='localhost' dbname='osmgis' user='用户名' password='密码'";
#define FCGI_PRG 1
#if FCGI_PRG != 0
#include <fcgi_stdio.h>
#endif
const char * sql_mask = "\
select * from(\
select name , trans_name_chs ,ST_Distance(way,ST_Transform(ST_SetSRID(ST_Point(%lf,%lf),4326),900913)) as mindis,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id\n\
from planet_osm_polygon\n\
where\n\
name is not null\n\
and\n\
way && ST_Transform(ST_SetSRID('BOX3D(%lf %lf,%lf %lf)'::box3d,4326),900913)\n\
union\n\
select name , trans_name_chs,ST_Distance(way,ST_Transform(ST_SetSRID(ST_Point(%lf,%lf),4326),900913)) as mindis,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id\n\
from planet_osm_roads\n\
where\n\
name is not null\n\
and\n\
way && ST_Transform(ST_SetSRID('BOX3D(%lf %lf,%lf %lf)'::box3d,4326),900913)\n\
union\n\
select name, trans_name_chs,ST_Distance(way,ST_Transform(ST_SetSRID(ST_Point(%lf,%lf),4326),900913)) as mindis,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id\n\
from planet_osm_point\n\
where\n\
name is not null\n\
and\n\
way && ST_Transform(ST_SetSRID('BOX3D(%lf %lf,%lf %lf)'::box3d,4326),900913)\n\
union\n\
select name , trans_name_chs,ST_Distance(way,ST_Transform(ST_SetSRID(ST_Point(%lf,%lf),4326),900913)) as mindis,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id\n\
from planet_osm_line\
where\
name is not null\
and\
way && ST_Transform(ST_SetSRID('BOX3D(%lf %lf,%lf %lf)'::box3d,4326),900913)\
) as full_objs\
where mindis<200\
order by mindis\
limit 256\
";
const char * psql_findbyname =
"select distinct * from(\
select name,trans_name_chs,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id from planet_osm_point where name like '%s%%' or trans_name_chs like '%s%%'\
union\
select name,trans_name_chs,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id from planet_osm_roads where name like '%s%%' or trans_name_chs like '%s%%'\
union\
select name,trans_name_chs,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id from planet_osm_line where name like '%s%%' or trans_name_chs like '%s%%'\
union\
select name,trans_name_chs,ST_AsText(ST_Transform(way,4326)) as geobj,osm_id from planet_osm_polygon where name like '%s%%' or trans_name_chs like '%s%%'\
) as geobjs limit 256;";
void initialize(void)
{
}
//很据坐标检索地名
bool findobjs(double dCenterX,double dCenterY)
{
bool succeeded = false;
double dLeft, dRight, dTop, dBottom;
dLeft = dCenterX - 0.1;
dRight = dCenterX + 0.1;
dTop = dCenterY+0.1;
dBottom = dCenterY-0.1;
if (dLeft>=179 || dLeft <=-179 || dRight>=179 || dRight <=-179 ||
fabs(dRight-dLeft)>0.5)
return false;
if (dTop>=85 || dTop <=-85 || dBottom>=85 || dBottom <=-85 ||
fabs(dTop-dBottom)>0.5)
return false;
PGconn * pConn = PQconnectdb(pConnDBStr);
if (!pConn)
printf("<p>Error ! Could not open PostgreSQL Connections.</p>\r\n");
else if (PQstatus(pConn)==CONNECTION_OK)
{
char bufsql[4096];
sprintf(bufsql,sql_mask,
dCenterX,dCenterY,dLeft,dBottom,dRight,dTop,
dCenterX,dCenterY,dLeft,dBottom,dRight,dTop,
dCenterX,dCenterY,dLeft,dBottom,dRight,dTop,
dCenterX,dCenterY,dLeft,dBottom,dRight,dTop);
//printf("<p>Executing %s</p>\r\n",bufsql);
PGresult * pRes = PQexec(pConn,bufsql);
if (!pRes)
printf("<p>Error!Cold not init selection recordset.%s</p>\r\n",PQerrorMessage(pConn));
else
{
if (PQresultStatus(pRes)==PGRES_TUPLES_OK)
{
int n=PQntuples(pRes);
if (n==0)
{
printf("<p>No symbols found.</p>\r\n");
succeeded = true;
}
else
{
//fetching results.
printf("<p>nItems = %d</p>\r\n<table border=1>",n);
printf("<tr><td>No.</td><td>name</td><td>trans_name</td><td>distance</td><td>geobj</td><td>geo_id</td></tr>");
for (int i=0;i<n;i++)
{
printf(" <tr><td>%d</td>\r\n",i);
for (int j=0;j<5;j++)
{
char * pstrVal = PQgetvalue(pRes,i,j);
char buf_text[2048];
if (pstrVal)
{
if (strlen(pstrVal)>1024)
{
strncpy(buf_text,pstrVal,1024);
strcat(buf_text,"...");
}
else
strcpy(buf_text,pstrVal);
printf(" <td>%s</td>\r\n",buf_text);
}
}
printf("</tr>\r\n");
}
printf("</table>\r\n");
succeeded = true;
}
}
else
printf("<p>Error!Requesting Failed.%s</p>\r\n",PQresultErrorMessage(pRes));
PQclear (pRes);
}
}
else
printf("<p>Error!Connection Failed.%s</p>\r\n",PQerrorMessage(pConn));
if (pConn)
PQfinish (pConn);
return succeeded;
}
//根据地名检索坐标
bool findobjs(const char * name)
{
bool succeeded = false;
if (!name)
return succeeded;
int nLen_name = strlen(name);
bool failed = false;
for (int i=0;i<nLen_name && failed==false;i++)
{
const char * pMask = "~!@#$%^&*()_+=-`{}[]:;'\"|\\<>/?";
const char * pfinded = strchr(pMask,name[i]);
if (pfinded>=pMask && pfinded<=pMask+strlen(pMask)-1)
failed = true;
}
printf("<p>Find Placename: %s</p>\r\n",name);
if (failed==true)
{
printf("<p>Name \"%s\" has invalid charactors. Request rejected.</p>\r\n",name);
return false;
}
PGconn * pConn = PQconnectdb(pConnDBStr);
if (!pConn)
printf("<p>Error ! Could not open PostgreSQL Connections.</p>\r\n");
else if (PQstatus(pConn)==CONNECTION_OK)
{
char bufsql[4096];
sprintf(bufsql,psql_findbyname,
name,name,name,name,name,name,name,name);
//printf("<p>Executing %s</p>\r\n",bufsql);
PGresult * pRes = PQexec(pConn,bufsql);
if (!pRes)
printf("<p>Error!Cold not init selection recordset.%s</p>\r\n",PQerrorMessage(pConn));
else
{
if (PQresultStatus(pRes)==PGRES_TUPLES_OK)
{
int n=PQntuples(pRes);
if (n==0)
{
printf("<p>No symbols found.</p>\r\n");
succeeded = true;
}
else
{
//fetching results.
printf("<p>nItems = %d</p>\r\n<table border=1>",n);
printf("<tr><td>num</td><td>name</td><td>trans_name</td><td>geobj</td><td>geo_id</td></tr>");
for (int i=0;i<n;i++)
{
printf("<tr><td>%d</td>",i);
{
char * pstrVal = PQgetvalue(pRes,i,0);
if (pstrVal)
printf("<td>%s</td>",pstrVal);
}
{
char * pstrVal = PQgetvalue(pRes,i,1);
if (pstrVal)
printf("<td>%s</td>",pstrVal);
}
{
char * pstrVal = PQgetvalue(pRes,i,2);
char buf_text[2048];
if (pstrVal)
{
if (strlen(pstrVal)>1024)
{
strncpy(buf_text,pstrVal,1024);
strcat(buf_text,"...");
}
else
strcpy(buf_text,pstrVal);
printf(" <td>%s</td>\r\n",buf_text);
}
}
{
char * pstrVal = PQgetvalue(pRes,i,3);
if (pstrVal)
printf("<td>%s</td>",pstrVal);
}
printf("</tr>\r\n");
}
succeeded = true;
printf("</table>\r\n");
}
}
else
printf("<p>Error!Requesting Failed.%s</p>\r\n",PQresultErrorMessage(pRes));
PQclear (pRes);
}
}
else
printf("<p>Error!Connection Failed.%s</p>\r\n",PQerrorMessage(pConn));
if (pConn)
PQfinish (pConn);
return succeeded;
}
int main(void)
{
initialize();
#if FCGI_PRG != 0
while (FCGI_Accept() >= 0)
{
char * rawdata = getenv("QUERY_STRING");
//char * server_name = getenv("SERVER_NAME");
double cenx=0,ceny = 0;
printf("Content-type: text/html; charset=UTF-8\r\n"
"\r\n"
"<title>GEO Results</title>\r\n");
if (rawdata)
{
int nLen = strlen(rawdata);
char * data = new char [nLen+1];
strcpy (data,rawdata);
char * pcenx = 0,* pceny = 0, * pname=0;
char * tmp = 0;
tmp = strstr(data,"cenx=");
if (tmp)
pcenx = tmp;
tmp = strstr(data,"ceny=");
if (tmp)
pceny = tmp;
tmp = strstr(data,"name=");
if (tmp)
pname = tmp;
if ( pcenx && pceny)
{
for (int i=0;i<nLen;i++)
if (data[i]=='?'||data[i]=='&')
data[i] = 0;
cenx = atof(pcenx+5);
ceny = atof(pceny+5);
delete [] data;
if (false==findobjs(cenx,ceny))
{
printf("<p>Error!DBoper Failed!</p><p>\r\n");
continue;
}
}
else if (pname)
{
unsigned char bufCodes[1024];
unsigned char * pCode = (unsigned char *)pname+5;
int nCodes = 0;
if (*pCode=='%')
{
while (*pCode=='%'&&nCodes<1024-1)
{
if (pCode[1]==0)
break;
if (pCode[2]==0)
break;
if (pCode[1]>='0' && pCode[1]<='9')
bufCodes[nCodes] = pCode[1]-'0';
else if (pCode[1]>='A' && pCode[1]<='F')
bufCodes[nCodes] = pCode[1]-'A'+10;
else
break;
bufCodes[nCodes] <<=4;
if (pCode[2]>='0' && pCode[2]<='9')
bufCodes[nCodes] += pCode[2]-'0';
else if (pCode[2]>='A' && pCode[2]<='F')
bufCodes[nCodes] += pCode[2]-'A'+10;
else
break;
nCodes++;
pCode+=3;
}
bufCodes[nCodes]=0;
}
else
strcpy((char *)bufCodes,(char *)pCode);
if (bufCodes[0])
{
if (false==findobjs((char *)bufCodes))
{
printf("<p>Error!DBoper Failed!</p><p>\r\n");
continue;
}
}
else
printf("<p>Error!Wrong query place names!</p><p>\r\n");
}
else
{
printf("<p>Error! At least Missing para cenx,ceny or name.</p>\r\n");
delete [] data;
continue;
}
}
}
#else
//测试
findobjs (121.538,31.221);
#endif
return 0;
}
这里的功能及其有限,也没有考虑到安全性。仅仅实现功能而已。
下一篇,我们来看看使用 GeoServer 显示叠加的矢量图层数据。