Pyarrow parquet provider (#1722)
* Pyarrow parquet provider * Defer crs management to pygeoapi * Add parquet provider docs * Fix flake8 errors * Remove extra .parquet * Address reviews
This commit is contained in:
@@ -27,6 +27,7 @@ parameters.
|
||||
`MongoDB`_,✅/❌,results,✅,✅,✅,✅,❌,❌,✅
|
||||
`OGR`_,✅/❌,results/hits,✅,❌,❌,✅,❌,❌,✅
|
||||
`Oracle`_,✅/✅,results/hits,✅,❌,✅,✅,❌,❌,✅
|
||||
`Parquet`_,✅/✅,results/hits,✅,✅,❌,✅,❌,❌,✅
|
||||
`PostgreSQL`_,✅/✅,results/hits,✅,✅,✅,✅,✅,❌,✅
|
||||
`SQLiteGPKG`_,✅/❌,results/hits,✅,❌,❌,✅,❌,❌,✅
|
||||
`SensorThings API`_,✅/✅,results/hits,✅,✅,✅,✅,❌,❌,✅
|
||||
@@ -432,6 +433,40 @@ useful e.g. for authorization at row level or manipulation of the explain plan w
|
||||
|
||||
An example an more information about that feature you can find in the test class in tests/test_oracle_provider.py.
|
||||
|
||||
.. _Parquet:
|
||||
|
||||
Parquet
|
||||
^^^^^^^
|
||||
|
||||
.. note::
|
||||
Requires Python package pyarrow
|
||||
|
||||
To publish a GeoParquet file (with a geometry column) the geopandas package is also required.
|
||||
|
||||
.. note::
|
||||
Reading data directly from a public s3 bucket is also supported.
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
providers:
|
||||
- type: feature
|
||||
name: Parquet
|
||||
data:
|
||||
source: ./tests/data/parquet/random.parquet
|
||||
id_field: id
|
||||
time_field: time
|
||||
x_field:
|
||||
- minlon
|
||||
- maxlon
|
||||
y_field:
|
||||
- minlat
|
||||
- maxlat
|
||||
|
||||
For GeoParquet data, the `x_field` and `y_field` must be specified in the provider definition,
|
||||
and they must be arrays of two column names that contain the x and y coordinates of the
|
||||
bounding box of each geometry. If the geometries in the data are all points, the `x_field` and `y_field`
|
||||
can be strings instead of arrays and refer to a single column each.
|
||||
|
||||
.. _PostgreSQL:
|
||||
|
||||
PostgreSQL
|
||||
|
||||
@@ -55,6 +55,7 @@ PLUGINS = {
|
||||
'MVT-proxy': 'pygeoapi.provider.mvt_proxy.MVTProxyProvider', # noqa: E501
|
||||
'OracleDB': 'pygeoapi.provider.oracle.OracleProvider',
|
||||
'OGR': 'pygeoapi.provider.ogr.OGRProvider',
|
||||
'Parquet': 'pygeoapi.provider.parquet.ParquetProvider',
|
||||
'PostgreSQL': 'pygeoapi.provider.postgresql.PostgreSQLProvider',
|
||||
'rasterio': 'pygeoapi.provider.rasterio_.RasterioProvider',
|
||||
'SensorThings': 'pygeoapi.provider.sensorthings.SensorThingsProvider',
|
||||
|
||||
@@ -0,0 +1,458 @@
|
||||
# =================================================================
|
||||
#
|
||||
# Authors: Leo Ghignone <leo.ghignone@gmail.com>
|
||||
#
|
||||
# Copyright (c) 2024 Leo Ghignone
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
from itertools import chain
|
||||
import json
|
||||
import logging
|
||||
|
||||
from dateutil.parser import isoparse
|
||||
import geopandas as gpd
|
||||
import pyarrow
|
||||
import pyarrow.compute as pc
|
||||
import pyarrow.dataset
|
||||
import s3fs
|
||||
|
||||
from pygeoapi.provider.base import (
|
||||
BaseProvider,
|
||||
ProviderConnectionError,
|
||||
ProviderGenericError,
|
||||
ProviderItemNotFoundError,
|
||||
ProviderQueryError,
|
||||
)
|
||||
from pygeoapi.util import crs_transform
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def arrow_to_pandas_type(arrow_type):
|
||||
pd_type = arrow_type.to_pandas_dtype()
|
||||
try:
|
||||
# Needed for specific types such as dtype('<M8[ns]')
|
||||
pd_type = pd_type.type
|
||||
except AttributeError:
|
||||
pd_type = pd_type
|
||||
return pd_type
|
||||
|
||||
|
||||
class ParquetProvider(BaseProvider):
|
||||
def __init__(self, provider_def):
|
||||
"""
|
||||
Initialize object
|
||||
|
||||
# Typical ParquetProvider YAML config:
|
||||
|
||||
provider:
|
||||
name: Parquet
|
||||
data:
|
||||
source: s3://example.com/parquet_directory/
|
||||
|
||||
id_field: gml_id
|
||||
|
||||
|
||||
:param provider_def: provider definition
|
||||
|
||||
:returns: pygeoapi.provider.parquet.ParquetProvider
|
||||
"""
|
||||
|
||||
super().__init__(provider_def)
|
||||
|
||||
# Source url is required
|
||||
self.source = self.data.get('source')
|
||||
if not self.source:
|
||||
msg = "Need explicit 'source' attr " \
|
||||
"in data field of provider config"
|
||||
LOGGER.error(msg)
|
||||
raise Exception(msg)
|
||||
|
||||
# Manage AWS S3 sources
|
||||
if self.source.startswith('s3'):
|
||||
self.source = self.source.split('://', 1)[1]
|
||||
self.fs = s3fs.S3FileSystem(default_cache_type='none')
|
||||
else:
|
||||
self.fs = None
|
||||
|
||||
# Build pyarrow dataset pointing to the data
|
||||
self.ds = pyarrow.dataset.dataset(self.source, filesystem=self.fs)
|
||||
|
||||
LOGGER.debug('Grabbing field information')
|
||||
self.fields = self.get_fields() # Must be set to visualise queryables
|
||||
|
||||
# Column names for bounding box data.
|
||||
if None in [self.x_field, self.y_field]:
|
||||
self.has_geometry = False
|
||||
else:
|
||||
self.has_geometry = True
|
||||
if isinstance(self.x_field, str):
|
||||
self.minx = self.x_field
|
||||
self.maxx = self.x_field
|
||||
else:
|
||||
self.minx, self.maxx = self.x_field
|
||||
|
||||
if isinstance(self.y_field, str):
|
||||
self.miny = self.y_field
|
||||
self.maxy = self.y_field
|
||||
else:
|
||||
self.miny, self.maxy = self.y_field
|
||||
self.bb = [self.minx, self.miny, self.maxx, self.maxy]
|
||||
|
||||
# Get the CRS of the data
|
||||
geo_metadata = json.loads(self.ds.schema.metadata[b'geo'])
|
||||
geom_column = geo_metadata['primary_column']
|
||||
# if the CRS is not set default to EPSG:4326, per geoparquet spec
|
||||
self.crs = (geo_metadata['columns'][geom_column]['crs']
|
||||
or 'EPSG:4326')
|
||||
|
||||
def _read_parquet(self, return_scanner=False, **kwargs):
|
||||
"""
|
||||
Scan a Parquet dataset with the given arguments
|
||||
|
||||
:returns: generator of RecordBatch with the queried values
|
||||
"""
|
||||
scanner = pyarrow.dataset.Scanner.from_dataset(self.ds, **kwargs)
|
||||
batches = scanner.to_batches()
|
||||
if return_scanner:
|
||||
return batches, scanner
|
||||
else:
|
||||
return batches
|
||||
|
||||
def get_fields(self):
|
||||
"""
|
||||
Get provider field information (names, types)
|
||||
|
||||
:returns: dict of fields
|
||||
"""
|
||||
|
||||
fields = dict()
|
||||
|
||||
for field_name, field_type in zip(self.ds.schema.names,
|
||||
self.ds.schema.types):
|
||||
# Geometry is managed as a special case by pygeoapi
|
||||
if field_name == 'geometry':
|
||||
continue
|
||||
|
||||
field_type = str(field_type)
|
||||
converted_type = None
|
||||
converted_format = None
|
||||
if field_type.startswith(('int', 'uint')):
|
||||
converted_type = 'integer'
|
||||
converted_format = field_type
|
||||
elif field_type == 'double' or field_type.startswith('float'):
|
||||
converted_type = 'number'
|
||||
converted_format = field_type
|
||||
elif field_type == 'string':
|
||||
converted_type = 'string'
|
||||
elif field_type == 'bool':
|
||||
converted_type = 'boolean'
|
||||
elif field_type.startswith('timestamp'):
|
||||
converted_type = 'string'
|
||||
converted_format = 'date-time'
|
||||
else:
|
||||
LOGGER.error(f'Unsupported field type {field_type}')
|
||||
|
||||
if converted_format is None:
|
||||
fields[field_name] = {'type': converted_type}
|
||||
else:
|
||||
fields[field_name] = {
|
||||
'type': converted_type,
|
||||
'format': converted_format,
|
||||
}
|
||||
|
||||
return fields
|
||||
|
||||
@crs_transform
|
||||
def query(
|
||||
self,
|
||||
offset=0,
|
||||
limit=10,
|
||||
resulttype='results',
|
||||
bbox=[],
|
||||
datetime_=None,
|
||||
properties=[],
|
||||
select_properties=[],
|
||||
skip_geometry=False,
|
||||
q=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Query Parquet source
|
||||
|
||||
:param offset: starting record to return (default 0)
|
||||
:param limit: number of records to return (default 10)
|
||||
:param resulttype: return results or hit limit (default results)
|
||||
:param bbox: bounding box [minx,miny,maxx,maxy]
|
||||
:param datetime_: temporal (datestamp or extent) following ISO-8601
|
||||
:param properties: list of tuples (field, comparison, value)
|
||||
:param select_properties: list of property names
|
||||
:param skip_geometry: bool of whether to skip geometry (default False)
|
||||
|
||||
:returns: dict of 0..n GeoJSON features
|
||||
"""
|
||||
result = None
|
||||
try:
|
||||
filter = pc.scalar(True)
|
||||
if bbox:
|
||||
if self.has_geometry is False:
|
||||
msg = (
|
||||
'Dataset does not have a geometry field, '
|
||||
'querying by bbox is not supported.'
|
||||
)
|
||||
raise ProviderQueryError(msg)
|
||||
LOGGER.debug('processing bbox parameter')
|
||||
if any(b is None for b in bbox):
|
||||
msg = 'Dataset does not support bbox filtering'
|
||||
raise ProviderQueryError(msg)
|
||||
|
||||
minx, miny, maxx, maxy = [float(b) for b in bbox]
|
||||
filter = (
|
||||
(pc.field(self.minx) > pc.scalar(minx))
|
||||
& (pc.field(self.miny) > pc.scalar(miny))
|
||||
& (pc.field(self.maxx) < pc.scalar(maxx))
|
||||
& (pc.field(self.maxy) < pc.scalar(maxy))
|
||||
)
|
||||
|
||||
if datetime_ is not None:
|
||||
if self.time_field is None:
|
||||
msg = (
|
||||
'Dataset does not have a time field, '
|
||||
'querying by datetime is not supported.'
|
||||
)
|
||||
raise ProviderQueryError(msg)
|
||||
timefield = pc.field(self.time_field)
|
||||
if '/' in datetime_:
|
||||
begin, end = datetime_.split('/')
|
||||
if begin != '..':
|
||||
begin = isoparse(begin)
|
||||
filter = filter & (timefield >= begin)
|
||||
if end != '..':
|
||||
end = isoparse(end)
|
||||
filter = filter & (timefield <= end)
|
||||
else:
|
||||
target_time = isoparse(datetime_)
|
||||
filter = filter & (timefield == target_time)
|
||||
|
||||
if properties:
|
||||
LOGGER.debug('processing properties')
|
||||
for name, value in properties:
|
||||
field = self.ds.schema.field(name)
|
||||
pd_type = arrow_to_pandas_type(field.type)
|
||||
expr = pc.field(name) == pc.scalar(pd_type(value))
|
||||
|
||||
filter = filter & expr
|
||||
|
||||
if len(select_properties) == 0:
|
||||
select_properties = self.ds.schema.names
|
||||
else: # Load id and geometry together with any specified columns
|
||||
if self.has_geometry and 'geometry' not in select_properties:
|
||||
select_properties.append('geometry')
|
||||
if self.id_field not in select_properties:
|
||||
select_properties.insert(0, self.id_field)
|
||||
|
||||
if skip_geometry:
|
||||
select_properties.remove('geometry')
|
||||
|
||||
# Make response based on resulttype specified
|
||||
if resulttype == 'hits':
|
||||
LOGGER.debug('hits only specified')
|
||||
result = self._response_feature_hits(filter)
|
||||
elif resulttype == 'results':
|
||||
LOGGER.debug('results specified')
|
||||
result = self._response_feature_collection(
|
||||
filter, offset, limit, columns=select_properties
|
||||
)
|
||||
else:
|
||||
LOGGER.error(f'Invalid resulttype: {resulttype}')
|
||||
|
||||
except RuntimeError as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderQueryError(err)
|
||||
except ProviderConnectionError as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderConnectionError(err)
|
||||
except Exception as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderGenericError(err)
|
||||
|
||||
return result
|
||||
|
||||
@crs_transform
|
||||
def get(self, identifier, **kwargs):
|
||||
"""
|
||||
Get Feature by id
|
||||
|
||||
:param identifier: feature id
|
||||
|
||||
:returns: a single feature
|
||||
"""
|
||||
result = None
|
||||
try:
|
||||
LOGGER.debug(f'Fetching identifier {identifier}')
|
||||
id_type = arrow_to_pandas_type(
|
||||
self.ds.schema.field(self.id_field).type)
|
||||
batches = self._read_parquet(
|
||||
filter=(
|
||||
pc.field(self.id_field) == pc.scalar(id_type(identifier))
|
||||
)
|
||||
)
|
||||
|
||||
for batch in batches:
|
||||
if batch.num_rows > 0:
|
||||
assert (
|
||||
batch.num_rows == 1
|
||||
), f'Multiple items found with ID {identifier}'
|
||||
row = batch.to_pandas()
|
||||
break
|
||||
else:
|
||||
raise ProviderItemNotFoundError(f'ID {identifier} not found')
|
||||
|
||||
if self.has_geometry:
|
||||
geom = gpd.GeoSeries.from_wkb(row['geometry'], crs=self.crs)
|
||||
else:
|
||||
geom = [None]
|
||||
gdf = gpd.GeoDataFrame(row, geometry=geom)
|
||||
LOGGER.debug('results computed')
|
||||
|
||||
# Grab the collection from geopandas geo_interface
|
||||
result = gdf.__geo_interface__['features'][0]
|
||||
|
||||
except RuntimeError as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderQueryError(err)
|
||||
except ProviderConnectionError as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderConnectionError(err)
|
||||
except ProviderItemNotFoundError as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderItemNotFoundError(err)
|
||||
except Exception as err:
|
||||
LOGGER.error(err)
|
||||
raise ProviderGenericError(err)
|
||||
|
||||
return result
|
||||
|
||||
def __repr__(self):
|
||||
return f'<ParquetProvider> {self.data}'
|
||||
|
||||
def _response_feature_collection(self, filter, offset, limit,
|
||||
columns=None):
|
||||
"""
|
||||
Assembles output from query as
|
||||
GeoJSON FeatureCollection structure.
|
||||
|
||||
:returns: GeoJSON FeatureCollection
|
||||
"""
|
||||
|
||||
LOGGER.debug(f'offset:{offset}, limit:{limit}')
|
||||
|
||||
try:
|
||||
batches, scanner = self._read_parquet(
|
||||
filter=filter, columns=columns, return_scanner=True
|
||||
)
|
||||
|
||||
# Discard batches until offset is reached
|
||||
counted = 0
|
||||
for batch in batches:
|
||||
if counted + batch.num_rows > offset:
|
||||
# Slice current batch to start from the requested row
|
||||
batch = batch.slice(offset=offset - counted)
|
||||
# Build a new generator yielding the current batch
|
||||
# and all following ones
|
||||
|
||||
batches = chain([batch], batches)
|
||||
break
|
||||
else:
|
||||
counted += batch.num_rows
|
||||
|
||||
# batches is a generator, it will now be either fully spent
|
||||
# or set to the new generator starting from offset
|
||||
|
||||
# Get the next `limit+1` rows
|
||||
# The extra row is used to check if a "next" link is needed
|
||||
# (when numberMatched > offset + limit)
|
||||
batches_list = []
|
||||
read = 0
|
||||
|
||||
for batch in batches:
|
||||
read += batch.num_rows
|
||||
if read > limit:
|
||||
batches_list.append(batch.slice(0, limit + 1))
|
||||
break
|
||||
else:
|
||||
batches_list.append(batch)
|
||||
|
||||
# Passing schema from scanner in case no rows are returned
|
||||
table = pyarrow.Table.from_batches(
|
||||
batches_list, schema=scanner.projected_schema
|
||||
)
|
||||
|
||||
rp = table.to_pandas()
|
||||
|
||||
number_matched = offset + len(rp)
|
||||
|
||||
# Remove the extra row
|
||||
if len(rp) > limit:
|
||||
rp = rp.iloc[:-1]
|
||||
|
||||
if 'geometry' not in rp.columns:
|
||||
# We need a null geometry column to create a GeoDataFrame
|
||||
rp['geometry'] = None
|
||||
geom = gpd.GeoSeries.from_wkb(rp['geometry'])
|
||||
else:
|
||||
geom = gpd.GeoSeries.from_wkb(rp['geometry'], crs=self.crs)
|
||||
|
||||
gdf = gpd.GeoDataFrame(rp, geometry=geom)
|
||||
LOGGER.debug('results computed')
|
||||
result = gdf.__geo_interface__
|
||||
|
||||
# Add numberMatched to generate "next" link
|
||||
result['numberMatched'] = number_matched
|
||||
|
||||
return result
|
||||
|
||||
except RuntimeError as error:
|
||||
LOGGER.error(error)
|
||||
raise error
|
||||
|
||||
def _response_feature_hits(self, filter):
|
||||
"""
|
||||
Assembles GeoJSON hits from row count
|
||||
|
||||
:returns: GeoJSON FeaturesCollection
|
||||
"""
|
||||
|
||||
try:
|
||||
scanner = pyarrow.dataset.Scanner.from_dataset(self.ds,
|
||||
filter=filter)
|
||||
return {
|
||||
'type': 'FeatureCollection',
|
||||
'numberMatched': scanner.count_rows(),
|
||||
'features': [],
|
||||
}
|
||||
except Exception as error:
|
||||
LOGGER.error(error)
|
||||
raise error
|
||||
@@ -6,11 +6,13 @@ elasticsearch-dsl
|
||||
fiona
|
||||
GDAL<=3.8.4
|
||||
geoalchemy2
|
||||
geopandas
|
||||
netCDF4
|
||||
numpy
|
||||
oracledb
|
||||
pandas
|
||||
psycopg2
|
||||
pyarrow
|
||||
pygeofilter[backend-sqlalchemy]
|
||||
pygeoif
|
||||
pygeometa
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,211 @@
|
||||
# =================================================================
|
||||
#
|
||||
# Authors: Leo Ghignone <leo.ghignone@gmail.com>
|
||||
#
|
||||
# Copyright (c) 2024 Leo Ghignone
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
import pytest
|
||||
|
||||
from pygeoapi.provider.base import ProviderItemNotFoundError
|
||||
from pygeoapi.provider.parquet import ParquetProvider
|
||||
|
||||
from .util import get_test_file_path
|
||||
|
||||
path = get_test_file_path(
|
||||
'data/random.parquet')
|
||||
|
||||
path_nogeom = get_test_file_path(
|
||||
'data/random_nogeom.parquet')
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def config_parquet():
|
||||
return {
|
||||
'name': 'Parquet',
|
||||
'type': 'feature',
|
||||
'data': {
|
||||
'source_type': 'Parquet',
|
||||
'source': path,
|
||||
},
|
||||
'id_field': 'id',
|
||||
'time_field': 'time',
|
||||
'x_field': 'lon',
|
||||
'y_field': 'lat',
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def config_parquet_nogeom_notime():
|
||||
return {
|
||||
'name': 'ParquetNoGeomNoTime',
|
||||
'type': 'feature',
|
||||
'data': {
|
||||
'source_type': 'Parquet',
|
||||
'source': path_nogeom,
|
||||
},
|
||||
'id_field': 'id'
|
||||
}
|
||||
|
||||
|
||||
def test_get_fields(config_parquet):
|
||||
"""Testing field types"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
results = p.get_fields()
|
||||
assert results['lat']['type'] == 'number'
|
||||
assert results['lon']['format'] == 'double'
|
||||
assert results['time']['format'] == 'date-time'
|
||||
|
||||
|
||||
def test_get(config_parquet):
|
||||
"""Testing query for a specific object"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
result = p.get('42')
|
||||
assert result['id'] == '42'
|
||||
assert result['properties']['lon'] == 4.947447
|
||||
|
||||
|
||||
def test_get_not_existing_feature_raise_exception(
|
||||
config_parquet
|
||||
):
|
||||
"""Testing query for a not existing object"""
|
||||
p = ParquetProvider(config_parquet)
|
||||
with pytest.raises(ProviderItemNotFoundError):
|
||||
p.get(-1)
|
||||
|
||||
|
||||
def test_query_hits(config_parquet):
|
||||
"""Testing query on entire collection for hits"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(resulttype='hits')
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
features = feature_collection.get('features')
|
||||
assert len(features) == 0
|
||||
hits = feature_collection.get('numberMatched')
|
||||
assert hits is not None
|
||||
assert hits == 100
|
||||
|
||||
|
||||
def test_query_bbox_hits(config_parquet):
|
||||
"""Testing query for a valid JSON object with geometry"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(
|
||||
bbox=[100, -50, 150, 0],
|
||||
resulttype='hits')
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
features = feature_collection.get('features')
|
||||
assert len(features) == 0
|
||||
hits = feature_collection.get('numberMatched')
|
||||
assert hits is not None
|
||||
assert hits == 6
|
||||
|
||||
|
||||
def test_query_with_limit(config_parquet):
|
||||
"""Testing query for a valid JSON object with geometry"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(limit=2, resulttype='results')
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
features = feature_collection.get('features')
|
||||
assert len(features) == 2
|
||||
hits = feature_collection.get('numberMatched')
|
||||
assert hits > 2
|
||||
feature = features[0]
|
||||
properties = feature.get('properties')
|
||||
assert properties is not None
|
||||
geometry = feature.get('geometry')
|
||||
assert geometry is not None
|
||||
|
||||
|
||||
def test_query_with_offset(config_parquet):
|
||||
"""Testing query for a valid JSON object with geometry"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(offset=20, limit=10, resulttype='results')
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
features = feature_collection.get('features')
|
||||
assert len(features) == 10
|
||||
hits = feature_collection.get('numberMatched')
|
||||
assert hits > 30
|
||||
feature = features[0]
|
||||
properties = feature.get('properties')
|
||||
assert properties is not None
|
||||
assert feature['id'] == '21'
|
||||
assert properties['lat'] == 66.264988
|
||||
geometry = feature.get('geometry')
|
||||
assert geometry is not None
|
||||
|
||||
|
||||
def test_query_with_property(config_parquet):
|
||||
"""Testing query for a valid JSON object with property filter"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(
|
||||
resulttype='results',
|
||||
properties=[('lon', -12.855022)])
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
features = feature_collection.get('features')
|
||||
assert len(features) == 1
|
||||
for feature in features:
|
||||
assert feature['properties']['lon'] == -12.855022
|
||||
|
||||
|
||||
def test_query_with_skip_geometry(config_parquet):
|
||||
"""Testing query for a valid JSON object with property filter"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(skip_geometry=True)
|
||||
for feature in feature_collection['features']:
|
||||
assert feature.get('geometry') is None
|
||||
|
||||
|
||||
def test_query_with_datetime(config_parquet):
|
||||
"""Testing query for a valid JSON object with time"""
|
||||
|
||||
p = ParquetProvider(config_parquet)
|
||||
feature_collection = p.query(
|
||||
datetime_='2022-05-01T00:00:00Z/2022-05-31T23:59:59Z')
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
features = feature_collection.get('features')
|
||||
assert len(features) == 7
|
||||
for feature in feature_collection['features']:
|
||||
time = feature['properties'][config_parquet['time_field']]
|
||||
assert time.year == 2022
|
||||
assert time.month == 5
|
||||
|
||||
|
||||
def test_query_nogeom(config_parquet_nogeom_notime):
|
||||
"""Testing query for a valid JSON object without geometry"""
|
||||
|
||||
p = ParquetProvider(config_parquet_nogeom_notime)
|
||||
feature_collection = p.query(resulttype='results')
|
||||
assert feature_collection.get('type') == 'FeatureCollection'
|
||||
assert len(feature_collection.get('features')) > 0
|
||||
for feature in feature_collection['features']:
|
||||
assert feature.get('geometry') is None
|
||||
Reference in New Issue
Block a user