From d6ad4f8724311690c1599fd1cbbe97a599aef7fe Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Fri, 3 Feb 2023 08:45:03 -0500 Subject: [PATCH] upgrade Elasticsearch support to 8 (#1017) * upgrade Elasticsearch support to version 8 * Update ogcapi-features.rst --- .github/workflows/main.yml | 5 +- debian/control | 2 +- docker/examples/README.md | 2 +- docker/examples/elastic/README.md | 6 +- .../elastic/pygeoapi/es-entrypoint.sh | 4 +- .../data-publishing/ogcapi-features.rst | 3 +- .../source/data-publishing/ogcapi-records.rst | 3 +- pygeoapi/provider/elasticsearch_.py | 89 +++++-------------- pygeoapi/provider/postgresql.py | 15 ++-- requirements-provider.txt | 4 +- tests/load_es_data.py | 40 ++++----- 11 files changed, 67 insertions(+), 106 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4f80aca..6e765fb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -63,7 +63,7 @@ jobs: - name: Install and run Elasticsearch 📦 uses: getong/elasticsearch-action@v1.2 with: - elasticsearch version: '7.6.1' + elasticsearch version: '8.3.1' host port: 9200 container port: 9200 host node port: 9300 @@ -87,7 +87,8 @@ jobs: pip3 install -r requirements-dev.txt pip3 install -r requirements-provider.txt python3 setup.py install - pip3 install --upgrade numpy + pip3 install --upgrade numpy elasticsearch + pip3 install --upgrade numpy "sqlalchemy<2" #pip3 install --upgrade rasterio==1.1.8 - name: setup test data ⚙️ run: | diff --git a/debian/control b/debian/control index ede05f0..1a003b4 100644 --- a/debian/control +++ b/debian/control @@ -24,7 +24,7 @@ Depends: ${python3:Depends}, python3-yaml, python3-pygeofilter, ${misc:Depends} -Suggests: python3-babel, python3-elasticsearch (<8), python3-fiona, python3-mapscript, python3-pygeoif, python3-geojson, python3-pydantic, python3-pygeometa, python3-pyproj, python3-rasterio, python3-requests +Suggests: python3-babel, python3-elasticsearch, python3-fiona, python3-mapscript, python3-pygeoif, python3-geojson, python3-pydantic, python3-pygeometa, python3-pyproj, python3-rasterio, python3-requests Description: pygeoapi provides an API to geospatial data. . This package the pygeoapi module for Python 3. diff --git a/docker/examples/README.md b/docker/examples/README.md index 823b2e3..d928371 100644 --- a/docker/examples/README.md +++ b/docker/examples/README.md @@ -8,6 +8,6 @@ This folder contains the sub-folders: - mongo The [simple](simple) example will run pygeoapi with Docker with your local config. -The [elastic](elastic) example demonstrates a docker compose configuration to run pygeoapi with local ElasticSearch backend. +The [elastic](elastic) example demonstrates a docker compose configuration to run pygeoapi with local Elasticsearch backend. The [mongo](mongo) example demonstrates a docker compose configuration to run pygeoapi with local MongoDB backend. The [sensorthings](sensorthings) example demonstrates various pygeoapi implementations of SensorThings API endpoints. diff --git a/docker/examples/elastic/README.md b/docker/examples/elastic/README.md index 380c20d..d3e0bbd 100644 --- a/docker/examples/elastic/README.md +++ b/docker/examples/elastic/README.md @@ -1,13 +1,13 @@ -# pygeoapi with ElasticSearch (ES) +# pygeoapi with Elasticsearch (ES) These folders contain a Docker Compose configuration necessary to setup a minimal `pygeoapi` server that uses a local ES backend service. This config is only for local development and testing. -## ElasticSearch +## Elasticsearch -- official ElasticSearch: **5.6.8** on **CentosOS 7** +- official Elasticsearch: **5.6.8** on **CentosOS 7** - ports **9300** and **9200** ES requires the host system to have its virtual memory diff --git a/docker/examples/elastic/pygeoapi/es-entrypoint.sh b/docker/examples/elastic/pygeoapi/es-entrypoint.sh index c113964..7e8bea1 100755 --- a/docker/examples/elastic/pygeoapi/es-entrypoint.sh +++ b/docker/examples/elastic/pygeoapi/es-entrypoint.sh @@ -3,9 +3,11 @@ # # Authors: Just van den Broecke > # Jorge Samuel Mendes de Jesus +# Tom Kralidis # # Copyright (c) 2019 Just van den Broecke # Copyright (c) 2019 Jorge Samuel Mendes de Jesus +# Copyright (c) 2023 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -37,7 +39,7 @@ echo "Install Curl" apt-get update -y && apt-get install curl -y && -echo "Waiting for ElasticSearch container..." +echo "Waiting for Elasticsearch container..." # First wait for ES to be up and then execute the original pygeoapi entrypoint. /wait-for-elasticsearch.sh http://elastic_search:9200 /entrypoint.sh || echo "ES failed: $?, exit" && exit 1 diff --git a/docs/source/data-publishing/ogcapi-features.rst b/docs/source/data-publishing/ogcapi-features.rst index 4dba61a..fd5dc70 100644 --- a/docs/source/data-publishing/ogcapi-features.rst +++ b/docs/source/data-publishing/ogcapi-features.rst @@ -76,8 +76,7 @@ Elasticsearch Requires Python packages elasticsearch and elasticsearch-dsl .. note:: - Elasticsearch 7 or greater is supported. - + Elasticsearch 8 or greater is supported. To publish an Elasticsearch index, the following are required in your index: diff --git a/docs/source/data-publishing/ogcapi-records.rst b/docs/source/data-publishing/ogcapi-records.rst index caf134c..3d7c4ef 100644 --- a/docs/source/data-publishing/ogcapi-records.rst +++ b/docs/source/data-publishing/ogcapi-records.rst @@ -33,7 +33,7 @@ ElasticsearchCatalogue Requires Python packages elasticsearch and elasticsearch-dsl .. note:: - Elasticsearch 7 or greater is supported. + Elasticsearch 8 or greater is supported. To publish an Elasticsearch index, the following are required in your index: @@ -52,6 +52,7 @@ To publish an Elasticsearch index, the following are required in your index: TinyDBCatalogue ^^^^^^^^^^^^^^^ + .. note:: Requires Python package tinydb diff --git a/pygeoapi/provider/elasticsearch_.py b/pygeoapi/provider/elasticsearch_.py index e10c027..d677d76 100644 --- a/pygeoapi/provider/elasticsearch_.py +++ b/pygeoapi/provider/elasticsearch_.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2023 Tom Kralidis # Copyright (c) 2021 Francesco Bartoli # # Permission is hereby granted, free of charge, to any person @@ -32,10 +32,8 @@ from typing import Dict from collections import OrderedDict import json import logging -from urllib.parse import urlparse from elasticsearch import Elasticsearch, exceptions, helpers -from elasticsearch.client.indices import IndicesClient from elasticsearch_dsl import Search, Q from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError, @@ -62,41 +60,19 @@ class ElasticsearchProvider(BaseProvider): super().__init__(provider_def) + self.select_properties = [] + self.es_host, self.index_name = self.data.rsplit('/', 1) LOGGER.debug('Setting Elasticsearch properties') - self.is_gdal = False LOGGER.debug(f'host: {self.es_host}') LOGGER.debug(f'index: {self.index_name}') - self.type_name = 'FeatureCollection' - self.url_parsed = urlparse(self.es_host) - - self.select_properties = [] - LOGGER.debug('Connecting to Elasticsearch') - if self.url_parsed.port is None: # proxy to default HTTP(S) port - if self.url_parsed.scheme == 'https': - port = 443 - else: - port = 80 - else: # was set explictly - port = self.url_parsed.port - - url_settings = { - 'scheme': self.url_parsed.scheme, - 'host': self.url_parsed.hostname, - 'port': port - } - - if self.url_parsed.path: - url_settings['url_prefix'] = self.url_parsed.path - - LOGGER.debug(f'URL settings: {url_settings}') LOGGER.debug('Connecting to Elasticsearch') - self.es = Elasticsearch([url_settings]) + self.es = Elasticsearch(self.es_host) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) @@ -124,8 +100,7 @@ class ElasticsearchProvider(BaseProvider): """ fields_ = {} - ic = IndicesClient(self.es) - ii = ic.get(index=self.index_name) + ii = self.es.indices.get(index=self.index_name) try: if '*' not in self.index_name: @@ -134,10 +109,6 @@ class ElasticsearchProvider(BaseProvider): LOGGER.debug('Wildcard index; setting from first match') index_name_ = list(ii.keys())[0] p = ii[index_name_]['mappings']['properties']['properties'] - except KeyError: - LOGGER.debug('ES index looks generated by GDAL') - self.is_gdal = True - p = ii[self.index_name]['mappings'] except IndexError: LOGGER.warning('could not get fields; returning empty set') return {} @@ -336,13 +307,15 @@ class ElasticsearchProvider(BaseProvider): next(gen) except StopIteration: break - results['hits']['total'] = \ - len(results['hits']['hits']) + offset + + matched = len(results['hits']['hits']) + offset + returned = len(results['hits']['hits']) else: - results = self.es.search(index=self.index_name, - from_=offset, size=limit, - body=query) - results['hits']['total'] = results['hits']['total']['value'] + es_results = self.es.search(index=self.index_name, + from_=offset, size=limit, **query) + results = es_results + matched = es_results['hits']['total']['value'] + returned = len(es_results['hits']['hits']) except exceptions.ConnectionError as err: LOGGER.error(err) @@ -354,12 +327,12 @@ class ElasticsearchProvider(BaseProvider): LOGGER.error(err) raise ProviderQueryError() - feature_collection['numberMatched'] = results['hits']['total'] + feature_collection['numberMatched'] = matched if resulttype == 'hits': return feature_collection - feature_collection['numberReturned'] = len(results['hits']['hits']) + feature_collection['numberReturned'] = returned LOGGER.debug('serializing features') for feature in results['hits']['hits']: @@ -399,7 +372,7 @@ class ElasticsearchProvider(BaseProvider): } try: - result = self.es.search(index=self.index_name, body=query) + result = self.es.search(index=self.index_name, **query) if len(result['hits']['hits']) == 0: LOGGER.error(err) raise ProviderItemNotFoundError(err) @@ -445,7 +418,7 @@ class ElasticsearchProvider(BaseProvider): identifier, json_data = self._load_and_prepare_item( item, identifier, raise_if_exists=False) - _ = self.es.index(index=self.index_name, id=identifier, body=json_data) + _ = self.es.index(index=self.index_name, id=identifier, **json_data) return True @@ -475,24 +448,11 @@ class ElasticsearchProvider(BaseProvider): feature_ = {} feature_thinned = {} - if 'properties' not in doc['_source']: - LOGGER.debug('Looks like a GDAL ES 7 document') - id_ = doc['_source'][self.id_field] - if 'type' not in doc['_source']: - feature_['id'] = id_ - feature_['type'] = 'Feature' - feature_['geometry'] = doc['_source'].get('geometry') - feature_['properties'] = {} - for key, value in doc['_source'].items(): - if key == 'geometry': - continue - feature_['properties'][key] = value - else: - LOGGER.debug('Looks like true GeoJSON document') - feature_ = doc['_source'] - id_ = doc['_source']['properties'][self.id_field] - feature_['id'] = id_ - feature_['geometry'] = doc['_source'].get('geometry') + LOGGER.debug('Fetching id and geometry from GeoJSON document') + feature_ = doc['_source'] + id_ = doc['_source']['properties'][self.id_field] + feature_['id'] = id_ + feature_['geometry'] = doc['_source'].get('geometry') if self.properties or self.select_properties: LOGGER.debug('Filtering properties') @@ -525,10 +485,7 @@ class ElasticsearchProvider(BaseProvider): :returns: masked property name """ - if self.is_gdal: - return property_name - else: - return f'properties.{property_name}' + return 'properties.{}'.format(property_name) def get_properties(self): all_properties = [] diff --git a/pygeoapi/provider/postgresql.py b/pygeoapi/provider/postgresql.py index de3d05e..f677b6e 100644 --- a/pygeoapi/provider/postgresql.py +++ b/pygeoapi/provider/postgresql.py @@ -47,20 +47,21 @@ # psql -U postgres -h 127.0.0.1 -p 5432 test import logging -from pygeoapi.provider.base import BaseProvider, \ - ProviderConnectionError, ProviderQueryError, ProviderItemNotFoundError +from geoalchemy2 import Geometry # noqa - this isn't used explicitly but is needed to process Geometry columns +from geoalchemy2.functions import ST_MakeEnvelope +from geoalchemy2.shape import to_shape +from pygeofilter.backends.sqlalchemy.evaluate import to_filter +import shapely from sqlalchemy import create_engine, MetaData, PrimaryKeyConstraint, asc, desc from sqlalchemy.exc import InvalidRequestError, OperationalError from sqlalchemy.ext.automap import automap_base from sqlalchemy.orm import Session, load_only from sqlalchemy.sql.expression import and_ -from geoalchemy2 import Geometry # noqa - this isn't used explicitly but is needed to process Geometry columns -from geoalchemy2.functions import ST_MakeEnvelope -from pygeofilter.backends.sqlalchemy.evaluate import to_filter -import shapely -from geoalchemy2.shape import to_shape +from pygeoapi.provider.base import BaseProvider, \ + ProviderConnectionError, ProviderQueryError, ProviderItemNotFoundError + _ENGINE_STORE = {} _TABLE_MODEL_STORE = {} diff --git a/requirements-provider.txt b/requirements-provider.txt index 757c3b8..0d6fe3b 100644 --- a/requirements-provider.txt +++ b/requirements-provider.txt @@ -1,5 +1,5 @@ -elasticsearch<8 -elasticsearch-dsl<8 +elasticsearch +elasticsearch-dsl fiona #GDAL>=3.0.0 geoalchemy diff --git a/tests/load_es_data.py b/tests/load_es_data.py index d31efba..90db8cb 100644 --- a/tests/load_es_data.py +++ b/tests/load_es_data.py @@ -2,7 +2,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2022 Tom Kralidis +# Copyright (c) 2023 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -32,7 +32,7 @@ from pathlib import Path import sys from elasticsearch import Elasticsearch, helpers -es = Elasticsearch() +es = Elasticsearch('http://localhost:9200') if len(sys.argv) < 3: print(f'Usage: {sys.argv[0]} ') @@ -41,28 +41,27 @@ if len(sys.argv) < 3: index_name = Path(sys.argv[1]).stem.lower() id_field = sys.argv[2] -if es.indices.exists(index_name): - es.indices.delete(index_name) +if es.indices.exists(index=index_name): + es.indices.delete(index=index_name) # index settings settings = { - 'settings': { - 'number_of_shards': 1, - 'number_of_replicas': 0 - }, - 'mappings': { + 'number_of_shards': 1, + 'number_of_replicas': 0 +} + +mappings = { + 'properties': { + 'geometry': { + 'type': 'geo_shape' + }, 'properties': { - 'geometry': { - 'type': 'geo_shape' - }, 'properties': { - 'properties': { - 'nameascii': { - 'type': 'text', - 'fields': { - 'raw': { - 'type': 'keyword' - } + 'nameascii': { + 'type': 'text', + 'fields': { + 'raw': { + 'type': 'keyword' } } } @@ -90,7 +89,8 @@ def gendata(data): # create index -es.indices.create(index=index_name, body=settings, request_timeout=90) +es.options(request_timeout=90).indices.create( + index=index_name, settings=settings, mappings=mappings) with open(sys.argv[1], encoding='utf-8') as fh: d = json.load(fh)