源码位置:
redis/src/geo.c
redis/src/geohash_helper.c
redis/src/geohash.c
上文中花了大量篇幅讲解了geohash的实现,其实看到这里,你基本上已经理解了redis中的geohash的实现了。本质上redis中的geo就是对geohash的封装,具体geohash相关的代码就不给大家列了(可自行查阅),就给大家介绍下redis geo里的大体流程。
首先,可能大家最好奇的是geohash在redis中是怎么存储的,从geoadd命令的实现可以一窥端倪。
/* GEOADD key [CH] [NX|XX] long lat name [long2 lat2 name2 ... longN latN nameN] */
void geoaddCommand(client *c) {
int xx = 0, nx = 0, longidx = 2;
int i;
/* 解析可选参数 */
while (longidx < c->argc) {
char *opt = c->argv[longidx]->ptr;
if (!strcasecmp(opt,"nx")) nx = 1;//判断字符串是否相等
else if (!strcasecmp(opt,"xx")) xx = 1;
else if (!strcasecmp(opt,"ch")) {}
else break;
longidx++;
}
if ((c->argc - longidx) % 3 || (xx && nx)) {
/* 解析所有的经纬度值和member,并对其个数做校验 */
addReplyErrorObject(c,shared.syntaxerr);
return;
}
/* 构建zadd的参数数组 */
int elements = (c->argc - longidx) / 3;
int argc = longidx+elements*2; /* ZADD key [CH] [NX|XX] score ele ... */
robj **argv = zcalloc(argc*sizeof(robj*));
argv[0] = createRawStringObject("zadd",4);
for (i = 1; i < longidx; i++) {
argv[i] = c->argv[i];
incrRefCount(argv[i]);
}
/* 以3个参数为一组,将所有的经纬度和member信息从参数列表里解析出来,并放到zadd的参数数组中 */
for (i = 0; i < elements; i++) {
double xy[2];
if (extractLongLatOrReply(c, (c->argv+longidx)+(i*3),xy) == C_ERR) {
for (i = 0; i < argc; i++)
if (argv[i]) decrRefCount(argv[i]);
zfree(argv);
return;
}
/* 将经纬度坐标转化成score信息 */
GeoHashBits hash;
geohashEncodeWGS84(xy[0], xy[1], GEO_STEP_MAX, &hash);
GeoHashFix52Bits bits = geohashAlign52Bits(hash);
robj *score = createObject(OBJ_STRING, sdsfromlonglong(bits));
robj *val = c->argv[longidx + i * 3 + 2];
argv[longidx+i*2] = score;
argv[longidx+1+i*2] = val;
incrRefCount(val);
}
/* 转化成zadd命令所需要的参数格式*/
replaceClientCommandVector(c,argc,argv);
zaddCommand(c);
}
原来geo的存储只是zset包了一层壳(是不是有点小失望),关于zset的具体实现可以参考文章redis中skiplist的实现。
我们再来详细看下georadius的大体执行流程(代码偏长,故删除大量细节代码)。
void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
robj *storekey = NULL;
int storedist = 0; /* 0 for STORE, 1 for STOREDIST. */
/* 根据key找找到对应的zojb */
robj *zobj = NULL;
if ((zobj = lookupKeyReadOrReply(c, c->argv[srcKeyIndex], shared.emptyarray)) == NULL ||
checkType(c, zobj, OBJ_ZSET)) {
return;
}
/* 解析请求中的经纬度值 */
// 根据查询类型查找用于半径或方框搜索的长/宽
int base_args;
GeoShape shape = {0};//shape中存储的是
if (flags & RADIUS_COORDS) {
/*
* 各种必选参数的解析,省略细节代码,主要是解析坐标点信息和半径
*/
}
/* 解析所有的可选参数. */
int withdist = 0, withhash = 0, withcoords = 0;
int frommember = 0, fromloc = 0, byradius = 0, bybox = 0;
int sort = SORT_NONE;
int any = 0; /* any=1 means a limited search, stop as soon as enough results were found. */
long long count = 0; /* Max number of results to return. 0 means unlimited. */
if (c->argc > base_args) {
/*
* 各种可选参数的解析,省略细节代码
*/
}
/* Get all neighbor geohash boxes for our radius search
* 获取到要查找范围内所有的9个geo邻域 */
/* 后面有详细计算的方法
GeoHashRadius georadius = geohashCalculateAreasByShapeWGS84(&shape);
/* 创建geoArray存储结果列表 */
geoArray *ga = geoArrayCreate();
/* 扫描9个区域中是否有满足条的点,有就放到geoArray中 */
membersOfAllNeighbors(zobj, georadius, &shape, ga, any ? count : 0);
// 具体匹配过程:根据每个邻域的的编码与长度,如3位的,101010---->要获得以改编码为前缀的所有编码
/* 因为Redis中以52位Bit的整数形式存储Geohash编码,故要获得以该编码为前缀的所有编码,即计算该编码对应的最小52位分数,与最大52位整数,即101010?????????????????????????????????????????????-------->需要二进制的中的每个元素,是0 or 1??
*为了得到最小分数,我们只需使用初始哈希值左移,以获得52位的值。之后,我们增加6位前缀(参见hash.bits++语句),并获得新的前缀:101011,我们再次将其与52位对齐,以获得最大值(从搜索中排除)。所以我们得到以下两个分数之间的所有值(用二进制表示):
* 1010100000000000000000000000000000000000000000000000 (included)
* and
* 1010110000000000000000000000000000000000000000000000 (excluded).
*/
// 通过以上获得min,max
/* 查询Redis排序集以提取“min”和“max”之间的所有元素,并将它们附加到地质点结构数组“gparray”中。
// 具体在zset中的查询,是以跳表的的查询,相当于二分查找的速度
/* 如果没有匹配结果,返回空对象 */
if (ga->used == 0 && storekey == NULL) {
addReply(c,shared.emptyarray);
geoArrayFree(ga);
return;
}
long result_length = ga->used;
long returned_items = (count == 0 || result_length < count) ?
result_length : count;
long option_length = 0;
/*
* 后续一些参数逻辑,比如处理排序,存储……
*/
// 释放geoArray占用的空间
geoArrayFree(ga);
}
// 部分匹配过程
void scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max) {
/* We want to compute the sorted set scores that will include all the
* elements inside the specified Geohash 'hash', which has as many
* bits as specified by hash.step * 2.
*
* So if step is, for example, 3, and the hash value in binary
* is 101010, since our score is 52 bits we want every element which
* is in binary: 101010?????????????????????????????????????????????
* Where ? can be 0 or 1.
*
* To get the min score we just use the initial hash value left
* shifted enough to get the 52 bit value. Later we increment the
* 6 bit prefis (see the hash.bits++ statement), and get the new
* prefix: 101011, which we align again to 52 bits to get the maximum
* value (which is excluded from the search). So we get everything
* between the two following scores (represented in binary):
*
* 1010100000000000000000000000000000000000000000000000 (included)
* and
* 1010110000000000000000000000000000000000000000000000 (excluded).
*/
*min = geohashAlign52Bits(hash);
hash.bits++;
*max = geohashAlign52Bits(hash);
}
/* 单独列出根据查询区域计算要查找范围内所有的9个geo邻域的方法
// 此函数用于在radius查询期间估计9个搜索区域框的步长(位精度)---即根据查询区域范围计算编码长度/精度,保证9个框能完全覆盖查询区域
/* This function is used in order to estimate the step (bits precision)
* of the 9 search area boxes during radius queries. */
uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
if (range_meters == 0) return 26;
int step = 1;
while (range_meters < MERCATOR_MAX) {
range_meters *= 2;
step++;
}
step -= 2; /* Make sure range is included in most of the base cases. */
/* Wider range towards the poles... Note: it is possible to do better
* than this approximation by computing the distance between meridians
* at this latitude, but this does the trick for now. */
if (lat > 66 || lat < -66) {
step--;
if (lat > 80 || lat < -80) step--;
}
/* Frame to valid range. */
if (step < 1) step = 1;
if (step > 26) step = 26;
return step;
}
// 返回查询区域的bbox
/* Return the bounding box of the search area by shape (see geohash.h GeoShape)
* bounds[0] - bounds[2] is the minimum and maximum longitude
* while bounds[1] - bounds[3] is the minimum and maximum latitude.
* since the higher the latitude, the shorter the arc length, the box shape is as follows
* (left and right edges are actually bent), as shown in the following diagram:
*
* \-----------------/ -------- \-----------------/
* \ / / \ \ /
* \ (long,lat) / / (long,lat) \ \ (long,lat) /
* \ / / \ / \
* --------- /----------------\ /--------------\
* Northern Hemisphere Southern Hemisphere Around the equator
*/
int geohashBoundingBox(GeoShape *shape, double *bounds) {
if (!bounds) return 0;
double longitude = shape->xy[0];
double latitude = shape->xy[1];
double height = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.height/2);
double width = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.width/2);
const double lat_delta = rad_deg(height/EARTH_RADIUS_IN_METERS);
const double long_delta_top = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude+lat_delta)));
const double long_delta_bottom = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude-lat_delta)));
/* The directions of the northern and southern hemispheres
* are opposite, so we choice different points as min/max long/lat */
int southern_hemisphere = latitude < 0 ? 1 : 0;
bounds[0] = southern_hemisphere ? longitude-long_delta_bottom : longitude-long_delta_top;
bounds[2] = southern_hemisphere ? longitude+long_delta_bottom : longitude+long_delta_top;
bounds[1] = latitude - lat_delta;
bounds[3] = latitude + lat_delta;
return 1;
}
// 计算一组区域(中心+8),这些区域能够覆盖指定位置和形状的范围查询(请参见geohash.h GeoShape)以及保存在shaple中的边界框
/* Calculate a set of areas (center + 8) that are able to cover a range query
* for the specified position and shape (see geohash.h GeoShape).
* the bounding box saved in shaple.bounds */
GeoHashRadius geohashCalculateAreasByShapeWGS84(GeoShape *shape) {
GeoHashRange long_range, lat_range;
GeoHashRadius radius;
GeoHashBits hash;
GeoHashNeighbors neighbors;
GeoHashArea area;
double min_lon, max_lon, min_lat, max_lat;
int steps;
geohashBoundingBox(shape, shape->bounds);//得到查询区域的bbox
min_lon = shape->bounds[0];
min_lat = shape->bounds[1];
max_lon = shape->bounds[2];
max_lat = shape->bounds[3];
double longitude = shape->xy[0];
double latitude = shape->xy[1];
/* radius_meters is calculated differently in different search types:
* 1) CIRCULAR_TYPE, just use radius.
* 2) RECTANGLE_TYPE, we use sqrt((width/2)^2 + (height/2)^2) to
* calculate the distance from the center point to the corner */
/* 计算得到查询区域的的半径
* 对于圆形区域:即为中心点的半径
* 对于矩形区域:即为中心点到一个角点的距离(勾股定理计算)
*/
double radius_meters = shape->type == CIRCULAR_TYPE ? shape->t.radius :
sqrt((shape->t.r.width/2)*(shape->t.r.width/2) + (shape->t.r.height/2)*(shape->t.r.height/2));
radius_meters *= shape->conversion;
// 此函数用于在radius查询期间估计9个搜索区域框的步长(位精度)---即根据查询区域范围计算编码长度/精度,保证9个框能完全覆盖查询区域
steps = geohashEstimateStepsByRadius(radius_meters,latitude);
geohashGetCoordRange(&long_range,&lat_range);// Geohash编码的限制经纬度范围,譬如不能编码南北极点
geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);// 对查询区域的中心点进行指定长度的编码
geohashNeighbors(&hash,&neighbors);// 分别通过对x、y(精度、纬度)的二进制编码分别+1 ,-1,得到周围的8个邻域的编码
geohashDecode(long_range,lat_range,hash,&area);
// 检查覆盖区域范围内的step是否足够。有时,当搜索区域靠近该区域的边缘时,估计的步长不够小,因为北/南/西/东广场之一距离搜索区域太近,无法覆盖所有内容。------计算的编码长度的,9个格可能不完全覆盖到查询区域?过于偏向某个方向?如果不能完全覆盖,要进一步缩小step,缩短编码长度,扩大范围
/* Check if the step is enough at the limits of the covered area.
* Sometimes when the search area is near an edge of the
* area, the estimated step is not small enough, since one of the
* north / south / west / east square is too near to the search area
* to cover everything. */
int decrease_step = 0;
{
GeoHashArea north, south, east, west;
geohashDecode(long_range, lat_range, neighbors.north, &north);
geohashDecode(long_range, lat_range, neighbors.south, &south);
geohashDecode(long_range, lat_range, neighbors.east, &east);
geohashDecode(long_range, lat_range, neighbors.west, &west);
if (geohashGetDistance(longitude,latitude,longitude,north.latitude.max)
< radius_meters) decrease_step = 1;
if (geohashGetDistance(longitude,latitude,longitude,south.latitude.min)
< radius_meters) decrease_step = 1;
if (geohashGetDistance(longitude,latitude,east.longitude.max,latitude)
< radius_meters) decrease_step = 1;
if (geohashGetDistance(longitude,latitude,west.longitude.min,latitude)
< radius_meters) decrease_step = 1;
}
if (steps > 1 && decrease_step) {
steps--;
geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
geohashNeighbors(&hash,&neighbors);
geohashDecode(long_range,lat_range,hash,&area);
}
/* Exclude the search areas that are useless. */
// 排除与查询区域完全不相交的区域
if (steps >= 2) {
if (area.latitude.min < min_lat) {
GZERO(neighbors.south);
GZERO(neighbors.south_west);
GZERO(neighbors.south_east);
}
if (area.latitude.max > max_lat) {
GZERO(neighbors.north);
GZERO(neighbors.north_east);
GZERO(neighbors.north_west);
}
if (area.longitude.min < min_lon) {
GZERO(neighbors.west);
GZERO(neighbors.south_west);
GZERO(neighbors.north_west);
}
if (area.longitude.max > max_lon) {
GZERO(neighbors.east);
GZERO(neighbors.south_east);
GZERO(neighbors.north_east);
}
}
radius.hash = hash;
radius.neighbors = neighbors;
radius.area = area;
return radius;
}
上述代码删减了大量细节,有兴趣的同学可以自行查阅。不过可以看出georadius的整体流程非常清晰。
- 解析请求参数。
- 计算目标坐标所在的geohash和8个邻居。
- 在zset中查找这9个区域中满足距离限制的所有点集。
- 处理排序等后续逻辑。
- 清理临时存储空间。