implement CSW facade (#1386)
This commit is contained in:
@@ -20,6 +20,7 @@ parameters.
|
||||
|
||||
`ElasticsearchCatalogue`_,✅,results/hits,✅,✅,✅,✅,❌
|
||||
`TinyDBCatalogue`_,✅,results/hits,✅,✅,✅,✅,✅
|
||||
`CSWFacade`_,✅,results/hits,✅,✅,✅,❌,❌
|
||||
|
||||
|
||||
Below are specific connection examples based on supported providers.
|
||||
@@ -70,6 +71,25 @@ To publish a TinyDB index, the following are required in your index:
|
||||
id_field: identifier
|
||||
time_field: datetimefield
|
||||
|
||||
CSWFacade
|
||||
^^^^^^^^^
|
||||
|
||||
.. note::
|
||||
Requires Python package `OWSLib`_
|
||||
|
||||
To publish a CSW using pygeoapi, the CSW base URL (`data`) is required. Note that the
|
||||
CSW Record core model is supported as a baseline.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
providers:
|
||||
- type: record
|
||||
name: CSWFacade
|
||||
data: https://demo.pycsw.org/cite/csw
|
||||
id_field: identifier
|
||||
time_field: datetime
|
||||
title_field: title
|
||||
|
||||
|
||||
Metadata search examples
|
||||
------------------------
|
||||
@@ -103,3 +123,4 @@ Metadata search examples
|
||||
|
||||
.. _`OGC API - Records`: https://ogcapi.ogc.org/records
|
||||
.. _`OGC API - Records GeoJSON Features`: https://raw.githubusercontent.com/opengeospatial/ogcapi-records/master/core/openapi/schemas/recordGeoJSON.yaml
|
||||
.. _`OWSLib`: https://geopython.github.io/OWSLib
|
||||
|
||||
@@ -40,6 +40,7 @@ PLUGINS = {
|
||||
'provider': {
|
||||
'AzureBlobStorage': 'pygeoapi.provider.azure_.AzureBlobStorageProvider', # noqa
|
||||
'CSV': 'pygeoapi.provider.csv_.CSVProvider',
|
||||
'CSWFacade': 'pygeoapi.provider.csw_facade.CSWFacadeProvider',
|
||||
'Elasticsearch': 'pygeoapi.provider.elasticsearch_.ElasticsearchProvider', # noqa
|
||||
'ElasticsearchCatalogue': 'pygeoapi.provider.elasticsearch_.ElasticsearchCatalogueProvider', # noqa
|
||||
'ERDDAPTabledap': 'pygeoapi.provider.erddap.TabledapProvider',
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
# =================================================================
|
||||
#
|
||||
# Authors: Tom Kralidis <tomkralidis@gmail.com>
|
||||
#
|
||||
# Copyright (c) 2023 Tom Kralidis
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from owslib import fes
|
||||
from owslib.csw import CatalogueServiceWeb
|
||||
from owslib.ows import ExceptionReport
|
||||
|
||||
from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError,
|
||||
ProviderInvalidQueryError,
|
||||
ProviderItemNotFoundError,
|
||||
ProviderQueryError)
|
||||
from pygeoapi.util import bbox2geojsongeometry, crs_transform, get_typed_value
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CSWFacadeProvider(BaseProvider):
|
||||
"""CSW Facade provider"""
|
||||
|
||||
def __init__(self, provider_def):
|
||||
"""
|
||||
Initialize object
|
||||
|
||||
:param provider_def: provider definition
|
||||
|
||||
:returns: pygeoapi.provider.csv_.CSWFacadeProvider
|
||||
"""
|
||||
|
||||
super().__init__(provider_def)
|
||||
|
||||
self.record_mappings = {
|
||||
'type': ('dc:type', 'type'),
|
||||
'title': ('dc:title', 'title'),
|
||||
'description': ('dct:abstract', 'abstract'),
|
||||
'keywords': ('dc:subject', 'subjects'),
|
||||
'date': ('dc:date', 'date'),
|
||||
'created': ('dct:created', 'created'),
|
||||
'updated': ('dct:modified', 'modified'),
|
||||
'rights': ('dc:rights', 'rights'),
|
||||
'language': ('dc:language', 'language')
|
||||
}
|
||||
|
||||
self.fields = self.get_fields()
|
||||
|
||||
def get_fields(self):
|
||||
"""
|
||||
Get provider field information (names, types)
|
||||
|
||||
:returns: dict of fields
|
||||
"""
|
||||
|
||||
fields = {}
|
||||
date_fields = ['date', 'created', 'updated']
|
||||
|
||||
for key in self.record_mappings.keys():
|
||||
LOGGER.debug(f'key: {key}')
|
||||
fields[key] = {'type': 'string'}
|
||||
|
||||
if key in date_fields:
|
||||
fields[key]['format'] = 'date-time'
|
||||
|
||||
return fields
|
||||
|
||||
@crs_transform
|
||||
def query(self, offset=0, limit=10, resulttype='results',
|
||||
bbox=[], datetime_=None, properties=[], sortby=[],
|
||||
select_properties=[], skip_geometry=False, q=None, **kwargs):
|
||||
"""
|
||||
CSW GetRecords query
|
||||
|
||||
:param offset: starting record to return (default 0)
|
||||
:param limit: number of records to return (default 10)
|
||||
:param resulttype: return results or hit limit (default results)
|
||||
:param bbox: bounding box [minx,miny,maxx,maxy]
|
||||
:param datetime_: temporal (datestamp or extent)
|
||||
:param properties: list of tuples (name, value)
|
||||
:param sortby: list of dicts (property, order)
|
||||
:param select_properties: list of property names
|
||||
:param skip_geometry: bool of whether to skip geometry (default False)
|
||||
:param q: full-text search term(s)
|
||||
|
||||
:returns: `dict` of GeoJSON FeatureCollection
|
||||
"""
|
||||
|
||||
constraints = []
|
||||
|
||||
response = {
|
||||
'type': 'FeatureCollection',
|
||||
'features': []
|
||||
}
|
||||
|
||||
LOGGER.debug('Processing query parameters')
|
||||
|
||||
if bbox:
|
||||
LOGGER.debug('Processing bbox parameter')
|
||||
LOGGER.debug('Swapping coordinate axis order from xy to yx')
|
||||
bbox2 = [bbox[1], bbox[0], bbox[3], bbox[2]]
|
||||
constraints.append(fes.BBox(bbox2))
|
||||
|
||||
if datetime_:
|
||||
date_property = self.record_mappings[self.time_field][0]
|
||||
LOGGER.debug('Processing datetime parameter')
|
||||
if '/' in datetime_:
|
||||
begin, end = datetime_.split('/')
|
||||
LOGGER.debug('Processing time extent')
|
||||
constraints.append(fes.PropertyIsGreaterThan(date_property, begin)) # noqa
|
||||
constraints.append(fes.PropertyIsLessThan(date_property, end))
|
||||
else:
|
||||
LOGGER.debug('Processing time instant')
|
||||
constraints.append(fes.PropertyIsEqualTo(date_property,
|
||||
datetime_))
|
||||
|
||||
for p in properties:
|
||||
LOGGER.debug(f'Processing property {p} parameter')
|
||||
if p[0] not in list(self.record_mappings.keys()):
|
||||
msg = f'Invalid property: {p[0]}'
|
||||
LOGGER.error(msg)
|
||||
raise ProviderInvalidQueryError(msg)
|
||||
|
||||
prop = self.record_mappings[p[0]][0]
|
||||
constraints.append(fes.PropertyIsEqualTo(prop, p[1]))
|
||||
|
||||
if q is not None:
|
||||
LOGGER.debug('Processing q parameter')
|
||||
anytext = fes.PropertyIsLike(propertyname='csw:AnyText', literal=q,
|
||||
escapeChar='\\', singleChar='?',
|
||||
wildCard='*')
|
||||
constraints.append(anytext)
|
||||
|
||||
if sortby:
|
||||
LOGGER.debug('Processing sortby parameter')
|
||||
sorts = []
|
||||
sort_orders = {
|
||||
'+': 'ASC',
|
||||
'-': 'DESC'
|
||||
}
|
||||
for s in sortby:
|
||||
sorts.append(fes.SortProperty(
|
||||
self.record_mappings[s['property']][0],
|
||||
sort_orders[s['order']]))
|
||||
sortby2 = fes.SortBy(sorts)
|
||||
else:
|
||||
sortby2 = None
|
||||
|
||||
if len(constraints) > 1:
|
||||
constraints = [fes.And(constraints)]
|
||||
|
||||
LOGGER.debug(f'Querying CSW: {self.data}')
|
||||
csw = self._get_csw()
|
||||
try:
|
||||
csw.getrecords2(esn='full', maxrecords=limit, startposition=offset,
|
||||
constraints=constraints, sortby=sortby2,
|
||||
resulttype=resulttype)
|
||||
except ExceptionReport as err:
|
||||
msg = f'CSW error {err}'
|
||||
LOGGER.error(msg)
|
||||
raise ProviderQueryError(msg)
|
||||
|
||||
response['numberMatched'] = csw.results['matches']
|
||||
response['numberReturned'] = csw.results['returned']
|
||||
LOGGER.debug(f"Found {response['numberMatched']} records")
|
||||
LOGGER.debug(f"Returned {response['numberReturned']} records")
|
||||
|
||||
LOGGER.debug('Building result set')
|
||||
for record in csw.records.values():
|
||||
response['features'].append(self._owslibrecord2record(record))
|
||||
|
||||
return response
|
||||
|
||||
@crs_transform
|
||||
def get(self, identifier, **kwargs):
|
||||
"""
|
||||
CSW GetRecordById query
|
||||
|
||||
:param identifier: feature id
|
||||
|
||||
:returns: dict of single GeoJSON feature
|
||||
"""
|
||||
|
||||
csw = self._get_csw()
|
||||
csw.getrecordbyid([identifier], esn='full')
|
||||
|
||||
if not csw.records:
|
||||
err = f'item {identifier} not found'
|
||||
LOGGER.error(err)
|
||||
raise ProviderItemNotFoundError(err)
|
||||
|
||||
record_key = list(csw.records.keys())[0]
|
||||
|
||||
return self._owslibrecord2record(csw.records[record_key])
|
||||
|
||||
def _get_csw(self) -> CatalogueServiceWeb:
|
||||
"""
|
||||
Helper function to lazy load a CSW
|
||||
|
||||
returns: `owslib.csw.CatalogueServiceWeb`
|
||||
"""
|
||||
|
||||
try:
|
||||
return CatalogueServiceWeb(self.data)
|
||||
except Exception as err:
|
||||
err = f'CSW connection error: {err}'
|
||||
LOGGER.error(err)
|
||||
raise ProviderConnectionError(err)
|
||||
|
||||
def _gen_getrecordbyid_link(self, identifier: str,
|
||||
csw_version: str = '2.0.2') -> dict:
|
||||
"""
|
||||
Helper function to generate a CSW GetRecordById URL
|
||||
|
||||
:param identifier: `str` of record identifier
|
||||
:param csw_version: `str` of CSW version (default is `2.0.2`)
|
||||
|
||||
:returns: `dict` of link object of GetRecordById URL
|
||||
"""
|
||||
|
||||
params = {
|
||||
'service': 'CSW',
|
||||
'version': csw_version,
|
||||
'request': 'GetRecordById',
|
||||
'id': identifier
|
||||
}
|
||||
|
||||
return {
|
||||
'rel': 'alternate',
|
||||
'type': 'application/xml',
|
||||
'title': 'This document as XML',
|
||||
'href': f'{self.data}?{urlencode(params)}',
|
||||
}
|
||||
|
||||
def _owslibrecord2record(self, record):
|
||||
LOGGER.debug(f'Transforming {record.identifier}')
|
||||
feature = {
|
||||
'id': record.identifier,
|
||||
'geometry': None,
|
||||
'time': record.date or None,
|
||||
'properties': {},
|
||||
'links': [
|
||||
self._gen_getrecordbyid_link(record.identifier)
|
||||
]
|
||||
}
|
||||
|
||||
LOGGER.debug('Processing record mappings to properties')
|
||||
for key, value in self.record_mappings.items():
|
||||
prop_value = getattr(record, value[1])
|
||||
if prop_value not in [None, [], '']:
|
||||
feature['properties'][key] = prop_value
|
||||
|
||||
if record.bbox is not None:
|
||||
LOGGER.debug('Adding bbox')
|
||||
bbox = [
|
||||
get_typed_value(record.bbox.minx),
|
||||
get_typed_value(record.bbox.miny),
|
||||
get_typed_value(record.bbox.maxx),
|
||||
get_typed_value(record.bbox.maxy)
|
||||
]
|
||||
feature['geometry'] = bbox2geojsongeometry(bbox)
|
||||
|
||||
if record.references:
|
||||
LOGGER.debug('Adding references as links')
|
||||
for link in record.references:
|
||||
feature['links'].append({
|
||||
'title': link['scheme'],
|
||||
'href': link['url']
|
||||
})
|
||||
if record.uris:
|
||||
LOGGER.debug('Adding URIs as links')
|
||||
for link in record.uris:
|
||||
feature['links'].append({
|
||||
'title': link['name'],
|
||||
'href': link['url']
|
||||
})
|
||||
|
||||
return feature
|
||||
|
||||
def __repr__(self):
|
||||
return f'<CSWFacadeProvider> {self.data}'
|
||||
+16
-2
@@ -47,8 +47,9 @@ from urllib.parse import urlparse
|
||||
from urllib.request import urlopen
|
||||
|
||||
import dateutil.parser
|
||||
import shapely.ops
|
||||
from shapely import ops
|
||||
from shapely.geometry import (
|
||||
box,
|
||||
GeometryCollection,
|
||||
LinearRing,
|
||||
LineString,
|
||||
@@ -710,7 +711,7 @@ def get_transform_from_crs(
|
||||
crs_transform = pyproj.Transformer.from_crs(
|
||||
crs_in, crs_out, always_xy=always_xy,
|
||||
).transform
|
||||
return partial(shapely.ops.transform, crs_transform)
|
||||
return partial(ops.transform, crs_transform)
|
||||
|
||||
|
||||
def crs_transform(func):
|
||||
@@ -840,3 +841,16 @@ class UrlPrefetcher:
|
||||
except Exception: # noqa
|
||||
return CaseInsensitiveDict()
|
||||
return response.headers
|
||||
|
||||
|
||||
def bbox2geojsongeometry(bbox: list) -> dict:
|
||||
"""
|
||||
Converts bbox values into GeoJSON geometry
|
||||
|
||||
:param bbox: `list` of minx, miny, maxx, maxy
|
||||
|
||||
:returns: `dict` of GeoJSON geometry
|
||||
"""
|
||||
|
||||
b = box(*bbox)
|
||||
return geom_to_geojson(b)
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
# =================================================================
|
||||
#
|
||||
# Authors: Tom Kralidis <tomkralidis@gmail.com>
|
||||
#
|
||||
# Copyright (c) 2023 Tom Kralidis
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
import pytest
|
||||
|
||||
from pygeoapi.provider.base import ProviderItemNotFoundError
|
||||
from pygeoapi.provider.csw_facade import CSWFacadeProvider
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def config():
|
||||
return {
|
||||
'name': 'CSWFacade',
|
||||
'type': 'record',
|
||||
# 'data': 'https://demo.pycsw.org/cite/csw',
|
||||
'data': 'http://localhost:8000',
|
||||
'id_field': 'identifier',
|
||||
'time_field': 'date'
|
||||
}
|
||||
|
||||
|
||||
def test_query(config):
|
||||
p = CSWFacadeProvider(config)
|
||||
|
||||
fields = p.get_fields()
|
||||
assert len(fields) == 9
|
||||
|
||||
for key, value in fields.items():
|
||||
assert value['type'] == 'string'
|
||||
|
||||
results = p.query()
|
||||
assert len(results['features']) == 10
|
||||
assert results['numberMatched'] == 12
|
||||
assert results['numberReturned'] == 10
|
||||
assert results['features'][0]['id'] == 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' # noqa
|
||||
assert results['features'][0]['geometry'] is None
|
||||
assert results['features'][0]['properties']['title'] == 'Lorem ipsum'
|
||||
assert results['features'][0]['properties']['keywords'][0] == 'Tourism--Greece' # noqa
|
||||
|
||||
assert results['features'][1]['geometry']['type'] == 'Polygon'
|
||||
assert results['features'][1]['geometry']['coordinates'][0][0][0] == 17.92
|
||||
assert results['features'][1]['geometry']['coordinates'][0][0][1] == 60.042
|
||||
|
||||
results = p.query(limit=1)
|
||||
assert len(results['features']) == 1
|
||||
assert results['features'][0]['id'] == 'urn:uuid:19887a8a-f6b0-4a63-ae56-7fba0e17801f' # noqa
|
||||
|
||||
results = p.query(offset=2, limit=1)
|
||||
assert len(results['features']) == 1
|
||||
assert results['features'][0]['id'] == 'urn:uuid:1ef30a8b-876d-4828-9246-c37ab4510bbd' # noqa
|
||||
|
||||
assert len(results['features'][0]['properties']) == 2
|
||||
|
||||
results = p.query(q='lorem')
|
||||
assert results['numberMatched'] == 5
|
||||
|
||||
results = p.query(q='lorem', sortby=[{'property': 'title', 'order': '-'}])
|
||||
assert results['numberMatched'] == 5
|
||||
|
||||
results = p.query(resulttype='hits')
|
||||
assert len(results['features']) == 0
|
||||
assert results['numberMatched'] == 12
|
||||
|
||||
results = p.query(bbox=[-10, 40, 0, 60])
|
||||
assert len(results['features']) == 2
|
||||
|
||||
results = p.query(properties=[('title', 'Maecenas enim')])
|
||||
assert len(results['features']) == 1
|
||||
|
||||
properties = [
|
||||
('title', 'Maecenas enim'),
|
||||
('type', 'http://purl.org/dc/dcmitype/Text')
|
||||
]
|
||||
results = p.query(properties=properties)
|
||||
assert len(results['features']) == 1
|
||||
|
||||
results = p.query(datetime_='2006-05-12')
|
||||
assert len(results['features']) == 1
|
||||
|
||||
results = p.query(datetime_='2004/2007')
|
||||
assert len(results['features']) == 3
|
||||
|
||||
|
||||
def test_get(config):
|
||||
p = CSWFacadeProvider(config)
|
||||
|
||||
result = p.get('urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2')
|
||||
assert result['id'] == 'urn:uuid:a06af396-3105-442d-8b40-22b57a90d2f2'
|
||||
assert result['geometry'] is None
|
||||
assert result['properties']['title'] == 'Lorem ipsum dolor sit amet'
|
||||
assert result['properties']['type'] == 'http://purl.org/dc/dcmitype/Image'
|
||||
|
||||
xml_link = result['links'][0]
|
||||
assert xml_link['rel'] == 'alternate'
|
||||
assert xml_link['type'] == 'application/xml'
|
||||
assert 'service=CSW' in xml_link['href']
|
||||
|
||||
|
||||
def test_get_not_existing_item_raise_exception(config):
|
||||
"""Testing query for a not existing object"""
|
||||
p = CSWFacadeProvider(config)
|
||||
with pytest.raises(ProviderItemNotFoundError):
|
||||
p.get('404')
|
||||
Reference in New Issue
Block a user