upgrade Elasticsearch support to 8 (#1017)

* upgrade Elasticsearch support to version 8

* Update ogcapi-features.rst
This commit is contained in:
Tom Kralidis
2023-02-03 08:45:03 -05:00
committed by GitHub
parent 86b786d723
commit d6ad4f8724
11 changed files with 67 additions and 106 deletions
+3 -2
View File
@@ -63,7 +63,7 @@ jobs:
- name: Install and run Elasticsearch 📦
uses: getong/elasticsearch-action@v1.2
with:
elasticsearch version: '7.6.1'
elasticsearch version: '8.3.1'
host port: 9200
container port: 9200
host node port: 9300
@@ -87,7 +87,8 @@ jobs:
pip3 install -r requirements-dev.txt
pip3 install -r requirements-provider.txt
python3 setup.py install
pip3 install --upgrade numpy
pip3 install --upgrade numpy elasticsearch
pip3 install --upgrade numpy "sqlalchemy<2"
#pip3 install --upgrade rasterio==1.1.8
- name: setup test data ⚙️
run: |
+1 -1
View File
@@ -24,7 +24,7 @@ Depends: ${python3:Depends},
python3-yaml,
python3-pygeofilter,
${misc:Depends}
Suggests: python3-babel, python3-elasticsearch (<8), python3-fiona, python3-mapscript, python3-pygeoif, python3-geojson, python3-pydantic, python3-pygeometa, python3-pyproj, python3-rasterio, python3-requests
Suggests: python3-babel, python3-elasticsearch, python3-fiona, python3-mapscript, python3-pygeoif, python3-geojson, python3-pydantic, python3-pygeometa, python3-pyproj, python3-rasterio, python3-requests
Description: pygeoapi provides an API to geospatial data.
.
This package the pygeoapi module for Python 3.
+1 -1
View File
@@ -8,6 +8,6 @@ This folder contains the sub-folders:
- mongo
The [simple](simple) example will run pygeoapi with Docker with your local config.
The [elastic](elastic) example demonstrates a docker compose configuration to run pygeoapi with local ElasticSearch backend.
The [elastic](elastic) example demonstrates a docker compose configuration to run pygeoapi with local Elasticsearch backend.
The [mongo](mongo) example demonstrates a docker compose configuration to run pygeoapi with local MongoDB backend.
The [sensorthings](sensorthings) example demonstrates various pygeoapi implementations of SensorThings API endpoints.
+3 -3
View File
@@ -1,13 +1,13 @@
# pygeoapi with ElasticSearch (ES)
# pygeoapi with Elasticsearch (ES)
These folders contain a Docker Compose configuration necessary to setup a minimal
`pygeoapi` server that uses a local ES backend service.
This config is only for local development and testing.
## ElasticSearch
## Elasticsearch
- official ElasticSearch: **5.6.8** on **CentosOS 7**
- official Elasticsearch: **5.6.8** on **CentosOS 7**
- ports **9300** and **9200**
ES requires the host system to have its virtual memory
@@ -3,9 +3,11 @@
#
# Authors: Just van den Broecke <justb4@gmail.com>>
# Jorge Samuel Mendes de Jesus <jorge.dejesus@geocat.net>
# Tom Kralidis <tomkralidis@gmail.com>
#
# Copyright (c) 2019 Just van den Broecke
# Copyright (c) 2019 Jorge Samuel Mendes de Jesus
# Copyright (c) 2023 Tom Kralidis
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
@@ -37,7 +39,7 @@ echo "Install Curl"
apt-get update -y &&
apt-get install curl -y &&
echo "Waiting for ElasticSearch container..."
echo "Waiting for Elasticsearch container..."
# First wait for ES to be up and then execute the original pygeoapi entrypoint.
/wait-for-elasticsearch.sh http://elastic_search:9200 /entrypoint.sh || echo "ES failed: $?, exit" && exit 1
@@ -76,8 +76,7 @@ Elasticsearch
Requires Python packages elasticsearch and elasticsearch-dsl
.. note::
Elasticsearch 7 or greater is supported.
Elasticsearch 8 or greater is supported.
To publish an Elasticsearch index, the following are required in your index:
@@ -33,7 +33,7 @@ ElasticsearchCatalogue
Requires Python packages elasticsearch and elasticsearch-dsl
.. note::
Elasticsearch 7 or greater is supported.
Elasticsearch 8 or greater is supported.
To publish an Elasticsearch index, the following are required in your index:
@@ -52,6 +52,7 @@ To publish an Elasticsearch index, the following are required in your index:
TinyDBCatalogue
^^^^^^^^^^^^^^^
.. note::
Requires Python package tinydb
+23 -66
View File
@@ -2,7 +2,7 @@
#
# Authors: Tom Kralidis <tomkralidis@gmail.com>
#
# Copyright (c) 2022 Tom Kralidis
# Copyright (c) 2023 Tom Kralidis
# Copyright (c) 2021 Francesco Bartoli
#
# Permission is hereby granted, free of charge, to any person
@@ -32,10 +32,8 @@ from typing import Dict
from collections import OrderedDict
import json
import logging
from urllib.parse import urlparse
from elasticsearch import Elasticsearch, exceptions, helpers
from elasticsearch.client.indices import IndicesClient
from elasticsearch_dsl import Search, Q
from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError,
@@ -62,41 +60,19 @@ class ElasticsearchProvider(BaseProvider):
super().__init__(provider_def)
self.select_properties = []
self.es_host, self.index_name = self.data.rsplit('/', 1)
LOGGER.debug('Setting Elasticsearch properties')
self.is_gdal = False
LOGGER.debug(f'host: {self.es_host}')
LOGGER.debug(f'index: {self.index_name}')
self.type_name = 'FeatureCollection'
self.url_parsed = urlparse(self.es_host)
self.select_properties = []
LOGGER.debug('Connecting to Elasticsearch')
if self.url_parsed.port is None: # proxy to default HTTP(S) port
if self.url_parsed.scheme == 'https':
port = 443
else:
port = 80
else: # was set explictly
port = self.url_parsed.port
url_settings = {
'scheme': self.url_parsed.scheme,
'host': self.url_parsed.hostname,
'port': port
}
if self.url_parsed.path:
url_settings['url_prefix'] = self.url_parsed.path
LOGGER.debug(f'URL settings: {url_settings}')
LOGGER.debug('Connecting to Elasticsearch')
self.es = Elasticsearch([url_settings])
self.es = Elasticsearch(self.es_host)
if not self.es.ping():
msg = 'Cannot connect to Elasticsearch'
LOGGER.error(msg)
@@ -124,8 +100,7 @@ class ElasticsearchProvider(BaseProvider):
"""
fields_ = {}
ic = IndicesClient(self.es)
ii = ic.get(index=self.index_name)
ii = self.es.indices.get(index=self.index_name)
try:
if '*' not in self.index_name:
@@ -134,10 +109,6 @@ class ElasticsearchProvider(BaseProvider):
LOGGER.debug('Wildcard index; setting from first match')
index_name_ = list(ii.keys())[0]
p = ii[index_name_]['mappings']['properties']['properties']
except KeyError:
LOGGER.debug('ES index looks generated by GDAL')
self.is_gdal = True
p = ii[self.index_name]['mappings']
except IndexError:
LOGGER.warning('could not get fields; returning empty set')
return {}
@@ -336,13 +307,15 @@ class ElasticsearchProvider(BaseProvider):
next(gen)
except StopIteration:
break
results['hits']['total'] = \
len(results['hits']['hits']) + offset
matched = len(results['hits']['hits']) + offset
returned = len(results['hits']['hits'])
else:
results = self.es.search(index=self.index_name,
from_=offset, size=limit,
body=query)
results['hits']['total'] = results['hits']['total']['value']
es_results = self.es.search(index=self.index_name,
from_=offset, size=limit, **query)
results = es_results
matched = es_results['hits']['total']['value']
returned = len(es_results['hits']['hits'])
except exceptions.ConnectionError as err:
LOGGER.error(err)
@@ -354,12 +327,12 @@ class ElasticsearchProvider(BaseProvider):
LOGGER.error(err)
raise ProviderQueryError()
feature_collection['numberMatched'] = results['hits']['total']
feature_collection['numberMatched'] = matched
if resulttype == 'hits':
return feature_collection
feature_collection['numberReturned'] = len(results['hits']['hits'])
feature_collection['numberReturned'] = returned
LOGGER.debug('serializing features')
for feature in results['hits']['hits']:
@@ -399,7 +372,7 @@ class ElasticsearchProvider(BaseProvider):
}
try:
result = self.es.search(index=self.index_name, body=query)
result = self.es.search(index=self.index_name, **query)
if len(result['hits']['hits']) == 0:
LOGGER.error(err)
raise ProviderItemNotFoundError(err)
@@ -445,7 +418,7 @@ class ElasticsearchProvider(BaseProvider):
identifier, json_data = self._load_and_prepare_item(
item, identifier, raise_if_exists=False)
_ = self.es.index(index=self.index_name, id=identifier, body=json_data)
_ = self.es.index(index=self.index_name, id=identifier, **json_data)
return True
@@ -475,24 +448,11 @@ class ElasticsearchProvider(BaseProvider):
feature_ = {}
feature_thinned = {}
if 'properties' not in doc['_source']:
LOGGER.debug('Looks like a GDAL ES 7 document')
id_ = doc['_source'][self.id_field]
if 'type' not in doc['_source']:
feature_['id'] = id_
feature_['type'] = 'Feature'
feature_['geometry'] = doc['_source'].get('geometry')
feature_['properties'] = {}
for key, value in doc['_source'].items():
if key == 'geometry':
continue
feature_['properties'][key] = value
else:
LOGGER.debug('Looks like true GeoJSON document')
feature_ = doc['_source']
id_ = doc['_source']['properties'][self.id_field]
feature_['id'] = id_
feature_['geometry'] = doc['_source'].get('geometry')
LOGGER.debug('Fetching id and geometry from GeoJSON document')
feature_ = doc['_source']
id_ = doc['_source']['properties'][self.id_field]
feature_['id'] = id_
feature_['geometry'] = doc['_source'].get('geometry')
if self.properties or self.select_properties:
LOGGER.debug('Filtering properties')
@@ -525,10 +485,7 @@ class ElasticsearchProvider(BaseProvider):
:returns: masked property name
"""
if self.is_gdal:
return property_name
else:
return f'properties.{property_name}'
return 'properties.{}'.format(property_name)
def get_properties(self):
all_properties = []
+8 -7
View File
@@ -47,20 +47,21 @@
# psql -U postgres -h 127.0.0.1 -p 5432 test
import logging
from pygeoapi.provider.base import BaseProvider, \
ProviderConnectionError, ProviderQueryError, ProviderItemNotFoundError
from geoalchemy2 import Geometry # noqa - this isn't used explicitly but is needed to process Geometry columns
from geoalchemy2.functions import ST_MakeEnvelope
from geoalchemy2.shape import to_shape
from pygeofilter.backends.sqlalchemy.evaluate import to_filter
import shapely
from sqlalchemy import create_engine, MetaData, PrimaryKeyConstraint, asc, desc
from sqlalchemy.exc import InvalidRequestError, OperationalError
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session, load_only
from sqlalchemy.sql.expression import and_
from geoalchemy2 import Geometry # noqa - this isn't used explicitly but is needed to process Geometry columns
from geoalchemy2.functions import ST_MakeEnvelope
from pygeofilter.backends.sqlalchemy.evaluate import to_filter
import shapely
from geoalchemy2.shape import to_shape
from pygeoapi.provider.base import BaseProvider, \
ProviderConnectionError, ProviderQueryError, ProviderItemNotFoundError
_ENGINE_STORE = {}
_TABLE_MODEL_STORE = {}
+2 -2
View File
@@ -1,5 +1,5 @@
elasticsearch<8
elasticsearch-dsl<8
elasticsearch
elasticsearch-dsl
fiona
#GDAL>=3.0.0
geoalchemy
+20 -20
View File
@@ -2,7 +2,7 @@
#
# Authors: Tom Kralidis <tomkralidis@gmail.com>
#
# Copyright (c) 2022 Tom Kralidis
# Copyright (c) 2023 Tom Kralidis
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
@@ -32,7 +32,7 @@ from pathlib import Path
import sys
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch()
es = Elasticsearch('http://localhost:9200')
if len(sys.argv) < 3:
print(f'Usage: {sys.argv[0]} <path/to/data.geojson> <id-field>')
@@ -41,28 +41,27 @@ if len(sys.argv) < 3:
index_name = Path(sys.argv[1]).stem.lower()
id_field = sys.argv[2]
if es.indices.exists(index_name):
es.indices.delete(index_name)
if es.indices.exists(index=index_name):
es.indices.delete(index=index_name)
# index settings
settings = {
'settings': {
'number_of_shards': 1,
'number_of_replicas': 0
},
'mappings': {
'number_of_shards': 1,
'number_of_replicas': 0
}
mappings = {
'properties': {
'geometry': {
'type': 'geo_shape'
},
'properties': {
'geometry': {
'type': 'geo_shape'
},
'properties': {
'properties': {
'nameascii': {
'type': 'text',
'fields': {
'raw': {
'type': 'keyword'
}
'nameascii': {
'type': 'text',
'fields': {
'raw': {
'type': 'keyword'
}
}
}
@@ -90,7 +89,8 @@ def gendata(data):
# create index
es.indices.create(index=index_name, body=settings, request_timeout=90)
es.options(request_timeout=90).indices.create(
index=index_name, settings=settings, mappings=mappings)
with open(sys.argv[1], encoding='utf-8') as fh:
d = json.load(fh)