From a4bf35d310f66aa03889e086de4a0d3d4090f394 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 27 Apr 2021 19:52:28 -0400 Subject: [PATCH] implement STAC 1.0.0-rc.2 improvements (#679) --- pygeoapi/api.py | 19 ++-- pygeoapi/provider/filesystem.py | 138 ++++++++++++++++----------- pygeoapi/templates/stac/catalog.html | 17 +++- pygeoapi/templates/stac/item.html | 4 + pygeoapi/util.py | 44 +++++++++ tests/test_filesystem_provider.py | 2 +- 6 files changed, 155 insertions(+), 69 deletions(-) diff --git a/pygeoapi/api.py b/pygeoapi/api.py index 361cf9e..282a33c 100644 --- a/pygeoapi/api.py +++ b/pygeoapi/api.py @@ -2308,20 +2308,17 @@ tiles/{{{}}}/{{{}}}/{{{}}}/{{{}}}?f=mvt' 400, headers_, format_, 'InvalidParameterValue', msg) id_ = 'pygeoapi-stac' - stac_version = '0.6.2' + stac_version = '1.0.0-rc.2' stac_url = os.path.join(self.config['server']['url'], 'stac') content = { 'id': id_, + 'type': 'Catalog', 'stac_version': stac_version, 'title': self.config['metadata']['identification']['title'], 'description': self.config['metadata']['identification']['description'], # noqa - 'license': self.config['metadata']['license']['name'], - 'providers': [{ - 'name': self.config['metadata']['provider']['name'], - 'url': self.config['metadata']['provider']['url'], - }], - 'links': [] + 'links': [], + } stac_collections = filter_dict_by_key_value(self.config['resources'], @@ -2329,12 +2326,12 @@ tiles/{{{}}}/{{{}}}/{{{}}}/{{{}}}?f=mvt' for key, value in stac_collections.items(): content['links'].append({ - 'rel': 'collection', + 'rel': 'child', 'href': '{}/{}?f=json'.format(stac_url, key), 'type': 'application/json' }) content['links'].append({ - 'rel': 'collection', + 'rel': 'child', 'href': '{}/{}'.format(stac_url, key), 'type': 'text/html' }) @@ -2379,14 +2376,14 @@ tiles/{{{}}}/{{{}}}/{{{}}}/{{{}}}?f=mvt' 500, headers_, format_, 'NoApplicableCode', msg) id_ = '{}-stac'.format(dataset) - stac_version = '0.6.2' + stac_version = '1.0.0-rc.2' description = stac_collections[dataset]['description'] content = { 'id': id_, + 'type': 'Catalog', 'stac_version': stac_version, 'description': description, - 'extent': stac_collections[dataset]['extents'], 'links': [] } try: diff --git a/pygeoapi/provider/filesystem.py b/pygeoapi/provider/filesystem.py index c35e467..2db56e9 100644 --- a/pygeoapi/provider/filesystem.py +++ b/pygeoapi/provider/filesystem.py @@ -27,13 +27,16 @@ # # ================================================================= +from datetime import datetime import io +from json import loads import logging import os -from json import loads +from urllib.parse import urljoin + from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError, ProviderNotFoundError) -from urllib.parse import urljoin +from pygeoapi.util import file_modified_iso8601, get_path_basename LOGGER = logging.getLogger(__name__) @@ -141,39 +144,52 @@ class FileSystemProvider(BaseProvider): return fh.read() elif resource_type == 'directory': + content['type'] = 'Catalog' dirpath2 = os.listdir(data_path) dirpath2.sort() for dc in dirpath2: - # @TODO: handle a generic directory for tiles + # TODO: handle a generic directory for tiles if dc == "tiles": continue + fullpath = os.path.join(data_path, dc) + filectime = file_modified_iso8601(fullpath) + filesize = os.path.getsize(fullpath) + if os.path.isdir(fullpath): newpath = os.path.join(baseurl, urlpath, dc) - child_links.append({ - 'rel': 'child', - 'href': '{}?f=json'.format(newpath), - 'type': 'application/json' - }) +# child_links.append({ +# 'rel': 'child', +# 'href': '{}?f=json'.format(newpath), +# 'type': 'application/json' +# }) child_links.append({ 'rel': 'child', 'href': newpath, - 'type': 'text/html' + 'type': 'text/html', + 'created': filectime, }) elif os.path.isfile(fullpath): basename, extension = os.path.splitext(dc) newpath = os.path.join(baseurl, urlpath, basename) + newpath2 = '{}{}'.format(newpath, extension) if extension in self.file_types: - child_links.append({ - 'rel': 'item', - 'href': '{}?f=json'.format(newpath), - 'type': 'application/json' - }) + fullpath = os.path.join(data_path, dc) child_links.append({ 'rel': 'item', 'href': newpath, - 'type': 'text/html' + 'title': get_path_basename(newpath2), + 'created': filectime, + 'file:size': filesize }) +# child_links.append({ +# 'rel': 'item', +# 'title': get_path_basename(newpath2), +# 'href': newpath, +# 'type': 'text/html', +# 'created': filectime, +# 'file:size': filesize +# }) elif resource_type == 'file': filename = os.path.basename(data_path) @@ -183,6 +199,9 @@ class FileSystemProvider(BaseProvider): filename = filename.replace(id_, '') url = '{}/{}{}'.format(baseurl, urlpath, filename) + filectime = file_modified_iso8601(data_path) + filesize = os.path.getsize(data_path) + content = { 'id': id_, 'type': 'Feature', @@ -194,7 +213,9 @@ class FileSystemProvider(BaseProvider): content.update(_describe_file(data_path)) content['assets']['default'] = { - 'href': url + 'href': url, + 'created': filectime, + 'file:size': filesize } content['links'].extend(child_links) @@ -219,7 +240,7 @@ def _describe_file(filepath): content = { 'bbox': None, 'geometry': None, - 'properties': {} + 'properties': {'datetime': None} } mcf_file = '{}.yml'.format(os.path.splitext(filepath)[0]) @@ -279,45 +300,54 @@ def _describe_file(filepath): } for k, v in d.tags(1).items(): content['properties'][k] = v + if k in ['GRIB_REF_TIME']: + value = int(v.split()[0]) + datetime_ = datetime.fromtimestamp(value) + content['properties']['datetime'] = datetime_.isoformat() + 'Z' # noqa except rasterio.errors.RasterioIOError: - LOGGER.debug('Testing vector data detection') - d = fiona.open(filepath) - scrs = CRS(d.crs) - if scrs.to_epsg() is not None and scrs.to_epsg() != 4326: - tcrs = CRS.from_epsg(4326) - bnds = transform_bounds(scrs, tcrs, - d.bounds[0], d.bounds[1], - d.bounds[2], d.bounds[3]) - content['properties']['projection'] = scrs.to_epsg() - else: - bnds = d.bounds + try: + LOGGER.debug('Testing vector data detection') + d = fiona.open(filepath) + scrs = CRS(d.crs) + if scrs.to_epsg() is not None and scrs.to_epsg() != 4326: + tcrs = CRS.from_epsg(4326) + bnds = transform_bounds(scrs, tcrs, + d.bounds[0], d.bounds[1], + d.bounds[2], d.bounds[3]) + content['properties']['projection'] = scrs.to_epsg() + else: + bnds = d.bounds - if d.schema['geometry'] not in [None, 'None']: - content['bbox'] = [ - bnds[0], - bnds[1], - bnds[2], - bnds[3] - ] - content['geometry'] = { - 'type': 'Polygon', - 'coordinates': [[ - [bnds[0], bnds[1]], - [bnds[0], bnds[3]], - [bnds[2], bnds[3]], - [bnds[2], bnds[1]], - [bnds[0], bnds[1]] - ]] - } + if d.schema['geometry'] not in [None, 'None']: + content['bbox'] = [ + bnds[0], + bnds[1], + bnds[2], + bnds[3] + ] + content['geometry'] = { + 'type': 'Polygon', + 'coordinates': [[ + [bnds[0], bnds[1]], + [bnds[0], bnds[3]], + [bnds[2], bnds[3]], + [bnds[2], bnds[1]], + [bnds[0], bnds[1]] + ]] + } - for k, v in d.schema['properties'].items(): - content['properties'][k] = v + for k, v in d.schema['properties'].items(): + content['properties'][k] = v + + if d.driver == 'ESRI Shapefile': + id_ = os.path.splitext(os.path.basename(filepath))[0] + content['assets'] = {} + for suffix in ['shx', 'dbf', 'prj', 'shp.xml']: + content['assets'][suffix] = { + 'href': './{}.{}'.format(id_, suffix) + } + + except fiona.errors.DriverError: + LOGGER.debug('Could not detect raster or vector data') - if d.driver == 'ESRI Shapefile': - id_ = os.path.splitext(os.path.basename(filepath))[0] - content['assets'] = {} - for suffix in ['shx', 'dbf', 'prj', 'shp.xml']: - content['assets'][suffix] = { - 'href': './{}.{}'.format(id_, suffix) - } return content diff --git a/pygeoapi/templates/stac/catalog.html b/pygeoapi/templates/stac/catalog.html index 007ce11..cdfec84 100644 --- a/pygeoapi/templates/stac/catalog.html +++ b/pygeoapi/templates/stac/catalog.html @@ -15,16 +15,27 @@ Name + Last modified + Size {% for link in data['links'] %} - {% if link['type'] == 'text/html' and link['rel'] in ['child', 'item'] %} + {% if link['rel'] in ['child', 'item'] %} - - {{ link['href'] | get_path_basename }} + {% if link['title'] %} + {{ link['title'] | get_path_basename }} + {% else %} + {{ link['href'] | get_path_basename }} + {% endif %} + {{ link['created'] }} + {% if link['file:size'] %} + {{ link['file:size'] | human_size }} + {% else %} + - + {% endif %} {% endif %} {% endfor %} diff --git a/pygeoapi/templates/stac/item.html b/pygeoapi/templates/stac/item.html index f0ba1b6..1ea8e34 100644 --- a/pygeoapi/templates/stac/item.html +++ b/pygeoapi/templates/stac/item.html @@ -30,6 +30,8 @@ URL + Last Modified + Size @@ -39,6 +41,8 @@ {{ link['href'] | get_path_basename }} + {{ link['created'] }} + {{ link['file:size'] | human_size }} {% endfor %} diff --git a/pygeoapi/util.py b/pygeoapi/util.py index ac9f863..96b43ab 100644 --- a/pygeoapi/util.py +++ b/pygeoapi/util.py @@ -186,6 +186,49 @@ def format_datetime(value, format_=DATETIME_FORMAT): return dateutil.parser.isoparse(value).strftime(format_) +def file_modified_iso8601(filepath): + """ + Provide a file's ctime in ISO8601 + + :param filepath: path to file + + :returns: string of ISO8601 + """ + + return datetime.fromtimestamp( + os.path.getctime(filepath)).strftime('%Y-%m-%dT%H:%M:%SZ') + + +def human_size(nbytes): + """ + Provides human readable file size + + source: https://stackoverflow.com/a/14996816 + + :param nbytes: int of file size (bytes) + :param units: list of unit abbreviations + + :returns: string of human readable filesize + """ + + suffixes = ['B', 'K', 'M', 'G', 'T', 'P'] + + i = 0 + + while nbytes >= 1024 and i < len(suffixes)-1: + nbytes /= 1024. + i += 1 + + if suffixes[i] == 'K': + f = str(int(nbytes)).rstrip('0').rstrip('.') + elif suffixes[i] == 'B': + return nbytes + else: + f = '{:.1f}'.format(nbytes).rstrip('0').rstrip('.') + + return '{}{}'.format(f, suffixes[i]) + + def format_duration(start, end=None): """ Parse a start and (optional) end datetime as ISO 8601 strings, calculate @@ -279,6 +322,7 @@ def render_j2_template(config, template, data): env.filters['to_json'] = to_json env.filters['format_datetime'] = format_datetime env.filters['format_duration'] = format_duration + env.filters['human_size'] = human_size env.globals.update(to_json=to_json) env.filters['get_path_basename'] = get_path_basename diff --git a/tests/test_filesystem_provider.py b/tests/test_filesystem_provider.py index 7546927..9195e40 100644 --- a/tests/test_filesystem_provider.py +++ b/tests/test_filesystem_provider.py @@ -54,7 +54,7 @@ def test_query(config): r = p.get_data_path(baseurl, urlpath, dirpath) - assert len(r['links']) == 16 + assert len(r['links']) == 10 r = p.get_data_path(baseurl, urlpath, '/poi_portugal')