def get_closest_station(latitude, longitude, minumum_recent_data=20140000,
match_max=100):
'''Query function to find the nearest weather station to a particular
set of coordinates. Optionally allows for a recent date by which the
station is required to be still active at.
Parameters
----------
latitude : float
Latitude to search for nearby weather stations at, [degrees]
longitude : float
Longitude to search for nearby weather stations at, [degrees]
minumum_recent_data : int, optional
Date that the weather station is required to have more recent
weather data than; format YYYYMMDD; set this to 0 to not restrict data
by date.
match_max : int, optional
The maximum number of results in the KDTree to search for before
applying the filtering criteria; an internal parameter which is
increased automatically if the default value is insufficient [-]
Returns
-------
station : IntegratedSurfaceDatabaseStation
Instance of IntegratedSurfaceDatabaseStation which was nearest
to the requested coordinates and with sufficiently recent data
available [-]
Notes
-----
Searching for 100 stations is a reasonable choice as it takes, ~70
microseconds vs 50 microsecond to find only 1 station. The search does get
slower as more points are requested. Bad data is returned from a KDTree
search if more points are requested than are available.
Examples
--------
>>> get_closest_station(51.02532675, -114.049868485806, 20150000)
'''
# Both station strings may be important
# Searching for 100 stations is fine, 70 microseconds vs 50 microsecond for 1
# but there's little point for more points, it gets slower.
# bad data is returned if k > station_count
distances, indexes = kd_tree.query([latitude, longitude], k=min(match_max, station_count))
#
for i in indexes:
latlon = _latlongs[i]
enddate = stations[i].END
# Iterate for all indexes until one is found whose date is current
if enddate > minumum_recent_data:
return stations[i]
if match_max < station_count:
return get_closest_station(latitude, longitude, minumum_recent_data=minumum_recent_data, match_max=match_max*10)
raise Exception('Could not find a station with more recent data than '
'specified near the specified coordinates.')
# This should be agressively cached