Is there any way we can query and get location data using mongodb geospatial query that matches the following criteria?
Getting all locations that are part of intersection between two boxes or in general two polygons.
For example below can we get in query output only those locations that are within the yellow area which actually is the common area for the purple and red geometric objects [ polygons ] ?
My study of mongodb document so far
http://docs.mongodb.org/manual/reference/operator/query/geoWithin/
This provides results that are within one or more polygons [ I am looking for the intersection of these individual polygon results as output ]
Use case
db.places.find( {
loc: { $geoWithin: { $box: [ [ 0, 0 ], [ 100, 100 ] ] } }
} )
Above query provides results within a rectangle geometric area [ I am looking for locations that are common to two such individual queries ]
db.places.find( {
loc: { $geoWithin: { $box: [ [ 0, 0 ], [ 100, 100 ] ] } }
} )
db.places.find( {
loc: { $geoWithin: { $box: [ [ 50, 50 ], [ 90, 120 ] ] } }
} )
解决方案
So looking at this with a fresh mind the answer is staring me in the face. The key thing that you have already stated is that you want to find the "intersection" of two queries in a single response.
Another way to look at this is you want all of the points bound by the first query to then be "input" for the second query, and so on as required. That is essentially what an intersection does, but the logic is actually literal.
So just use the aggregation framework to chain the matching queries. For a simple example, consider the following documents:
{ "loc" : { "type" : "Point", "coordinates" : [ 4, 4 ] } }
{ "loc" : { "type" : "Point", "coordinates" : [ 8, 8 ] } }
{ "loc" : { "type" : "Point", "coordinates" : [ 12, 12 ] } }
And the chained aggregation pipeline, just two queries:
db.geotest.aggregate([
{ "$match": {
"loc": {
"$geoWithin": {
"$box": [ [0,0], [10,10] ]
}
}
}},
{ "$match": {
"loc": {
"$geoWithin": {
"$box": [ [5,5], [20,20] ]
}
}
}}
])
So if you consider that logically, the first result will find the points that fall within the bounds of the initial box or the first two items. Those results are then acted on by the second query, and since the new box bounds start at [5,5] that excludes the first point. The third point was already excluded, but if the box restrictions were reversed then the result would be the same middle document only.
How this works in quite unique to the $geoWithin query operator as compared to various other geo functions:
$geoWithin does not require a geospatial index. However, a geospatial index will improve query performance. Both 2dsphere and 2d geospatial indexes support $geoWithin.
So the results are both good and bad. Good in that you can do this type of operation without an index in place, but bad because once the aggregation pipeline has altered the collection results after the first query operation the no further index can be used. So any performance benefit of an index is lost on merging the "set" results from anything after the initial Polygon/MultiPolygon as supported.
For this reason I would still recommend that you calculate the intersection bounds "outside" of the query issued to MongoDB. Even though the aggregation framework can do this due to the "chained" nature of the pipeline, and even though resulting intersections will get smaller and smaller, your best performance is a single query with the correct bounds that can use all of the index benefits.
There are various methods for doing that, but for reference here is an implementation using the JSTS library, which is a JavaScript port of the popular JTS library for Java. There may be others or other language ports, but this has simple GeoJSON parsing and built in methods for such things as getting the intersection bounds:
var async = require('async');
util = require('util'),
jsts = require('jsts'),
mongo = require('mongodb'),
MongoClient = mongo.MongoClient;
var parser = new jsts.io.GeoJSONParser();
var polys= [
{
type: 'Polygon',
coordinates: [[
[ 0, 0 ], [ 0, 10 ], [ 10, 10 ], [ 10, 0 ], [ 0, 0 ]
]]
},
{
type: 'Polygon',
coordinates: [[
[ 5, 5 ], [ 5, 20 ], [ 20, 20 ], [ 20, 5 ], [ 5, 5 ]
]]
}
];
var points = [
{ type: 'Point', coordinates: [ 4, 4 ] },
{ type: 'Point', coordinates: [ 8, 8 ] },
{ type: 'Point', coordinates: [ 12, 12 ] }
];
MongoClient.connect('mongodb://localhost/test',function(err,db) {
db.collection('geotest',function(err,geo) {
if (err) throw err;
async.series(
[
// Insert some data
function(callback) {
var bulk = geo.initializeOrderedBulkOp();
bulk.find({}).remove();
async.each(points,function(point,callback) {
bulk.insert({ "loc": point });
callback();
},function(err) {
bulk.execute(callback);
});
},
// Run each version of the query
function(callback) {
async.parallel(
[
// Aggregation
function(callback) {
var pipeline = [];
polys.forEach(function(poly) {
pipeline.push({
"$match": {
"loc": {
"$geoWithin": {
"$geometry": poly
}
}
}
});
});
geo.aggregate(pipeline,callback);
},
// Using external set resolution
function(callback) {
var geos = polys.map(function(poly) {
return parser.read( poly );
});
var bounds = geos[0];
for ( var x=1; x
bounds = bounds.intersection( geos[x] );
}
var coords = parser.write( bounds );
geo.find({
"loc": {
"$geoWithin": {
"$geometry": coords
}
}
}).toArray(callback);
}
],
callback
);
}
],
function(err,results) {
if (err) throw err;
console.log(
util.inspect( results.slice(-1), false, 12, true ) );
db.close();
}
);
});
});
Using the full GeoJSON "Polygon" representations there as this translates to what JTS can understand and work with. Chances are any input you might receive for a real application would be in this format as well rather than applying conveniences such as $box.
So it can be done with the aggregation framework, or even parallel queries merging the "set" of results. But while the aggregation framework may do it better than merging sets of results externally, the best results will always come from computing the bounds first.