add support for ERDDAP Tabledap provider (#1214)

This commit is contained in:
Tom Kralidis
2023-04-25 11:39:55 -04:00
committed by GitHub
parent 6c71f90645
commit b7ee1203cd
5 changed files with 238 additions and 10 deletions
+1 -1
View File
@@ -116,7 +116,7 @@ release = version
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
language = 'en'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
+4 -1
View File
@@ -32,7 +32,8 @@ Reference
^^^^^^^^^^
The ``server`` section provides directives on binding and high level tuning.
Please find more information related to API design rules (the property at the bottom of the example below) :ref:`further down<API Design Rules>`.
For more information related to API design rules (the ``api_rules`` property in the example below) see :ref:`API Design Rules`.
.. code-block:: yaml
@@ -312,6 +313,8 @@ Examples:
curl https://example.org/collections/foo # user can access resource normally
.. _API Design Rules:
API Design Rules
----------------
@@ -21,6 +21,7 @@ parameters.
`CSV`_,✅/✅,results/hits,❌,❌,❌,✅,❌,❌,✅
`Elasticsearch`_,✅/✅,results/hits,✅,✅,✅,✅,✅,✅,✅
`ERDDAP Tabledap Service`_,❌/❌,results/hits,✅,✅,❌,❌,❌,❌
`ESRI Feature Service`_,✅/✅,results/hits,✅,✅,✅,✅,❌,❌,✅
`GeoJSON`_,✅/✅,results/hits,❌,❌,❌,✅,❌,❌,✅
`MongoDB`_,✅/❌,results,✅,✅,✅,✅,❌,❌,✅
@@ -397,14 +398,41 @@ relies on `sodapy <https://github.com/xmunoz/sodapy>`.
.. code-block:: yaml
providers:
- type: feature
name: Socrata
data: https://soda.demo.socrata.com/
resource_id: emdb-u46w
id_field: earthquake_id
geom_field: location
time_field: datetime # Optional time_field for datetime queries
token: my_token # Optional app token
- type: feature
name: Socrata
data: https://soda.demo.socrata.com/
resource_id: emdb-u46w
id_field: earthquake_id
geom_field: location
time_field: datetime # Optional time_field for datetime queries
token: my_token # Optional app token
.. _ERDDAP Tabledap Service:
ERDDAP Tabledap Service
^^^^^^^^^^^^^^^^^^^^^^^
.. note::
Requires Python package `requests`_
To publish from an ERDDAP `Tabledap`_ service, the following are required in your index:
.. code-block:: yaml
providers:
- type: feature
name: ERDDAPTabledap
data: http://osmc.noaa.gov/erddap/tabledap/OSMC_Points
id_field: PLATFORM_CODE
time_field: time
options:
filters: "&parameter=\"SLP\"&platform!=\"C-MAN%20WEATHER%20STATIONS\"&platform!=\"TIDE GAUGE STATIONS (GENERIC)\""
max_age_hours: 12
.. note::
If the ``datetime`` parameter is passed by the client, this overrides the ``options.max_age_hours`` setting.
Controlling the order of properties
-----------------------------------
@@ -471,3 +499,5 @@ Data access examples
.. _`Google Cloud SQL`: https://cloud.google.com/sql
.. _`OGC API - Features`: https://www.ogc.org/standards/ogcapi-features
.. _`Tabledap`: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html
.. _`requests`: https://requests.readthedocs.io
+1
View File
@@ -42,6 +42,7 @@ PLUGINS = {
'CSV': 'pygeoapi.provider.csv_.CSVProvider',
'Elasticsearch': 'pygeoapi.provider.elasticsearch_.ElasticsearchProvider', # noqa
'ElasticsearchCatalogue': 'pygeoapi.provider.elasticsearch_.ElasticsearchCatalogueProvider', # noqa
'ERDDAPTabledap': 'pygeoapi.provider.erddap.TabledapProvider',
'ESRI': 'pygeoapi.provider.esri.ESRIServiceProvider',
'FileSystem': 'pygeoapi.provider.filesystem.FileSystemProvider',
'GeoJSON': 'pygeoapi.provider.geojson.GeoJSONProvider',
+194
View File
@@ -0,0 +1,194 @@
# =================================================================
#
# Authors: David Berry <david.i.berry@wmo.int>
# Tom Kralidis <tomkralidis@gmail.com>
#
# Copyright (c) 2023 David Inglis Berry
# Copyright (c) 2023 Tom Kralidis
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# =================================================================
# feature provider for ERDDAP integrations
#
# Tabledap sample configuration
# -----------------------------
#
# providers:
# - type: feature
# name: pygeoapi.provider.erddap.TabledapProvider
# data: http://osmc.noaa.gov/erddap/tabledap/OSMC_Points
# id_field: id
# options:
# filters: "&parameter=\"SLP\"&platform!=\"C-MAN%20WEATHER%20STATIONS\"&platform!=\"TIDE GAUGE STATIONS (GENERIC)\"" # noqa
# max_age_hours: 12
from datetime import datetime, timedelta, timezone
import logging
import requests
from pygeoapi.provider.base import (
BaseProvider, ProviderNotFoundError, ProviderQueryError)
LOGGER = logging.getLogger(__name__)
class TabledapProvider(BaseProvider):
def __init__(self, provider_def):
super().__init__(provider_def)
LOGGER.debug('Setting provider query filters')
self.filters = self.options.get('filters')
self.fields = self.get_fields()
def get_fields(self):
LOGGER.debug('Fetching one feature for field definitions')
properties = self.query(limit=1)['features'][0]['properties']
for key, value in properties.items():
LOGGER.debug(f'Field: {key}={value}')
properties[key] = {'type': type(value).__name__}
return properties
def query(self, startindex=0, limit=10, resulttype='results',
bbox=[], datetime_=None, properties=[], sortby=[],
select_properties=[], skip_geometry=False, q=None,
filterq=None, **kwargs):
query_params = []
max_age_hours = self.options.get('max_age_hours')
url = f'{self.data}.geoJson'
if self.filters is not None:
LOGGER.debug(f'Setting filters ({self.filters})')
query_params.append(self.filters)
if max_age_hours is not None:
LOGGER.debug(f'Setting default time filter {max_age_hours} hours')
currenttime = datetime.now(timezone.utc)
mintime = currenttime - timedelta(hours=max_age_hours)
mintime = mintime.strftime('%Y-%m-%dT%H:%M:%SZ')
query_params.append(f'time>={mintime}')
elif datetime_ is not None:
LOGGER.debug('Setting datetime filters')
LOGGER.debug('Setting datetime filters')
if '/' in datetime_: # envelope
LOGGER.debug('detected time range')
time_begin, time_end = datetime_.split('/')
if time_begin != '..':
LOGGER.debug('Setting time_begin')
query_params.append(f'time>={time_begin}')
if time_end != '..':
LOGGER.debug('Setting time_end')
query_params.append(f'time<={time_end}')
else:
query_params.append(f'time={datetime_}')
if bbox:
LOGGER.debug('Setting bbox')
query_params.extend([
f'latitude>={bbox[1]}',
f'latitude<={bbox[3]}',
f'longitude>={bbox[0]}',
f'longitude<={bbox[2]}'
])
url = f'{url}?{"&".join(query_params)}'
LOGGER.debug(f'Fetching data from {url}')
response = requests.get(url)
LOGGER.debug(f'Response: {response}')
data = response.json()
LOGGER.debug(f'Data: {data}')
matched = len(data['features'])
returned = limit
data = data['features'][startindex:limit]
# add id to each feature as this is required by pygeoapi
for idx in range(len(data)):
# ID used to extract individual features
try:
id_ = data[idx]['properties'][self.id_field]
except KeyError:
# ERDDAP changes case of parameters depending on result
id_ = data[idx]['properties'][self.id_field]
except Exception as err:
msg = 'Cannot determine station identifier'
LOGGER.error(msg, err)
raise ProviderQueryError(msg)
obs_time = data[idx]['properties']['time']
obs_id = f'{id_}.{obs_time}'
data[idx]['id'] = obs_id
return {
'type': 'FeatureCollection',
'features': data,
'numberMatched': matched,
'numberReturned': returned
}
def get(self, identifier, **kwargs):
query_params = []
url = f'{self.data}.geoJson'
if self.filters is not None:
LOGGER.debug(f'Setting filters ({self.filters})')
query_params.append(self.filters)
id_, obs_time = identifier.split('.')
query_params.extend([
f'time={obs_time}',
f'{self.id_field}=%22{id_}%22'
])
url = f'{url}?{"&".join(query_params)}'
LOGGER.debug(f'Fetching data from {url}')
response = requests.get(url)
LOGGER.debug(f'Response: {response}')
data = response.json()
LOGGER.debug(f'Data: {data}')
if len(data['features']) < 1:
msg = 'No features found'
LOGGER.error(msg)
raise ProviderNotFoundError(msg)
LOGGER.debug('Truncating to first feature')
data = data['features'][0]
data['id'] = identifier
return data