Release Process Manager backed by Postgresql (#1730)

* Release Process Manager backed by Postgresql

* Update postgresdb_.py

* Changes as for Tom comment on 16.7

* Update postgresql.py

* Included further Tom suggestions

* Renamed Manager requirements file

* renamed table job_info -> jobs

* Update postgres_manager_full_structure.backup.sql

* Update postgresql.py

* Update requirements-manager.txt

Accepted @Tom requirement.

I found the issue about compiling psycopg2:
it was a bug in my configuration.

* Modified to adhere to CI

* Update postgresql.py

---------

Co-authored-by: FrancescoIngv <FrancescoIngv@users.noreply.github.com>
This commit is contained in:
francescoingv
2024-07-19 15:10:21 +02:00
committed by GitHub
parent e91f051ea0
commit c322d3fb40
7 changed files with 635 additions and 1 deletions
+3
View File
@@ -91,6 +91,7 @@ jobs:
pip3 install -r requirements-starlette.txt
pip3 install -r requirements-dev.txt
pip3 install -r requirements-provider.txt
pip3 install -r requirements-manager.txt
pip3 install -r requirements-django.txt
python3 setup.py install
pip3 install --upgrade numpy elasticsearch
@@ -104,6 +105,7 @@ jobs:
gunzip < tests/data/hotosm_bdi_waterways.sql.gz | psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test
psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/dummy_data.sql
psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/dummy_types_data.sql
psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/postgres_manager_full_structure.backup.sql
docker ps
python3 tests/load_oracle_data.py
- name: run unit tests ⚙️
@@ -127,6 +129,7 @@ jobs:
pytest tests/test_ogr_shapefile_provider.py
pytest tests/test_ogr_sqlite_provider.py
pytest tests/test_ogr_wfs_provider.py
pytest tests/test_postgresql_manager.py
# pytest tests/test_ogr_wfs_provider_live.py # NOTE: these are skipped in the file but listed here for completeness
pytest tests/test_openapi.py
pytest tests/test_oracle_provider.py
+2 -1
View File
@@ -78,7 +78,8 @@ PLUGINS = {
'process_manager': {
'Dummy': 'pygeoapi.process.manager.dummy.DummyManager',
'MongoDB': 'pygeoapi.process.manager.mongodb_.MongoDBManager',
'TinyDB': 'pygeoapi.process.manager.tinydb_.TinyDBManager'
'TinyDB': 'pygeoapi.process.manager.tinydb_.TinyDBManager',
'PostgreSQL': 'pygeoapi.process.manager.postgresql.PostgreSQLManager'
}
}
+270
View File
@@ -0,0 +1,270 @@
# =================================================================
#
# Authors: Francesco Martinelli <francesco.martinelli@ingv.it>
#
# Copyright (c) 2024 Francesco Martinelli
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# =================================================================
# Requires postgresql database structure.
# Create the database:
# e.g.
# CREATE DATABASE test
# WITH TEMPLATE = template0
# ENCODING = 'UTF8'
# LOCALE = 'en_US.UTF-8';
# ALTER DATABASE test OWNER TO postgres;
#
# Import dump:
# psql -U postgres -h 127.0.0.1 -p 5432 test <
# tests/data/postgres_manager_full_structure.backup.sql
import json
import logging
from pathlib import Path
from typing import Any, Tuple
import psycopg2
import psycopg2.extras
from pygeoapi.process.manager.base import BaseManager
from pygeoapi.process.base import (
JobNotFoundError,
JobResultNotFoundError,
ProcessorGenericError,
)
from pygeoapi.util import JobStatus
LOGGER = logging.getLogger(__name__)
class PostgreSQLManager(BaseManager):
"""PostgreSql Manager"""
def __init__(self, manager_def: dict):
"""
Initialize object
:param manager_def: manager definition
:returns: `pygeoapi.process.manager.postgresqs.PostgreSQLManager`
"""
super().__init__(manager_def)
self.is_async = True
self.supports_subscribing = True
self.__database_connection_parameters = manager_def['connection']
try:
# Test connection parameters:
test_query = """SELECT version()"""
with self.get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(test_query)
cur.fetchone()
except Exception as err:
LOGGER.error(f'Test connecting to DB failed: {err}')
raise ProcessorGenericError('Test connecting to DB failed.')
def get_db_connection(self):
"""
Get and return a new connection to the DB.
"""
if isinstance(self.__database_connection_parameters, str):
conn = psycopg2.connect(self.__database_connection_parameters)
else:
conn = psycopg2.connect(**self.__database_connection_parameters)
return conn
def get_jobs(self, status: JobStatus = None) -> list:
"""
Get jobs
:param status: job status (accepted, running, successful,
failed, results) (default is all)
:returns: 'list` of jobs (type (default='process'), identifier,
status, process_id, job_start_datetime, job_end_datetime, location,
mimetype, message, progress)
"""
with self.get_db_connection() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
query_select = """SELECT * FROM jobs """
if status is not None:
query_select = query_select + "WHERE status = %s"
query_params = [status.value]
else:
query_params = []
cur.execute(query_select, query_params)
return cur.fetchall()
def add_job(self, job_metadata: dict) -> str:
"""
Add a job
:param job_metadata: `dict` of job metadata
:returns: identifier of added job
"""
query_insert = """INSERT INTO jobs(
type, process_id, identifier, status, message,
progress, job_start_datetime, job_end_datetime
) VALUES(%(type)s, %(process_id)s, %(identifier)s, %(status)s,
%(message)s, %(progress)s, %(job_start_datetime)s,
%(job_end_datetime)s);"""
with self.get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(query_insert, job_metadata)
conn.commit()
return job_metadata['identifier']
def update_job(self, job_id: str, update_dict: dict) -> bool:
"""
Updates a job
:param job_id: job identifier
:param update_dict: `dict` of property updates
:returns: `bool` of status result
"""
query_update = "UPDATE jobs SET ("
keys_to_update = 0
for key in update_dict.keys():
if keys_to_update:
query_update = query_update + (", ")
query_update = query_update + key
keys_to_update = keys_to_update + 1
query_update = query_update + ") = ("
keys_to_update = 0
for key in update_dict.keys():
if keys_to_update:
query_update = query_update + (", ")
query_update = query_update + "%(" + key + ")s"
keys_to_update = keys_to_update + 1
query_update = query_update + (") WHERE identifier = %(identifier)s")
update_dict['identifier'] = job_id
with self.get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(query_update, update_dict)
rowcount = cur.rowcount
conn.commit()
return rowcount == 1
def get_job(self, job_id: str) -> dict:
"""
Get a single job
:param job_id: job identifier
:raises JobNotFoundError: if the job_id does not correspond to a
known job
:returns: `dict` # `pygeoapi.process.manager.Job`
"""
with self.get_db_connection() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
query_select = \
"""SELECT * FROM jobs WHERE identifier = %s"""
query_params = [job_id]
cur.execute(query_select, query_params)
found = cur.fetchone()
if found is not None:
return found
else:
raise JobNotFoundError()
def delete_job(self, job_id: str) -> bool:
"""
Deletes a job
:param job_id: job identifier
:raises JobNotFoundError: if the job_id does not correspond to a
known job
:return `bool` of status result
"""
# delete result file if present
job_result = self.get_job(job_id)
location = job_result.get('location')
if location and self.output_dir is not None:
try:
Path(location).unlink()
except FileNotFoundError:
pass
query_delete = "DELETE FROM jobs WHERE identifier = %s"
with self.get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(query_delete, [job_id])
rowcount = cur.rowcount
conn.commit()
return rowcount == 1
def get_job_result(self, job_id: str) -> Tuple[str, Any]:
"""
Get a job's status, and actual output of executing the process
:param job_id: job identifier
:raises JobNotFoundError: if the job_id does not correspond to a
known job
:raises JobResultNotFoundError: if the job-related result cannot
be returned
:returns: `tuple` of mimetype and raw output
"""
job_result = self.get_job(job_id)
location = job_result.get('location')
mimetype = job_result.get('mimetype')
job_status = JobStatus[job_result['status']]
if job_status != JobStatus.successful:
# Job is incomplete
return (None,)
if not location:
LOGGER.warning(f'job {job_id!r} - unknown result location')
raise JobResultNotFoundError()
else:
try:
location = Path(location)
with location.open(encoding='utf-8') as fh:
result = json.load(fh)
except (TypeError, FileNotFoundError, json.JSONDecodeError):
raise JobResultNotFoundError()
else:
return mimetype, result
def __repr__(self):
return f'<PostgreSQLManager> {self.name}'
+2
View File
@@ -0,0 +1,2 @@
psycopg2
@@ -0,0 +1,68 @@
--
-- PostgreSQL database dump
--
-- Dumped from database version 14.12 (Ubuntu 14.12-1.pgdg20.04+1)
-- Dumped by pg_dump version 16.3 (Ubuntu 16.3-1.pgdg20.04+1)
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;
--
-- Name: public; Type: SCHEMA; Schema: -; Owner: postgres
--
ALTER SCHEMA public OWNER TO postgres;
SET default_tablespace = '';
SET default_table_access_method = heap;
--
-- Name: jobs; Type: TABLE; Schema: public; Owner: postgres
--
CREATE TABLE public.jobs (
type character varying DEFAULT 'process'::character varying NOT NULL,
identifier character varying NOT NULL,
process_id character varying NOT NULL,
job_start_datetime timestamp without time zone,
job_end_datetime timestamp without time zone,
status character varying NOT NULL,
location character varying,
mimetype character varying,
message character varying,
progress integer NOT NULL
);
ALTER TABLE public.jobs OWNER TO postgres;
--
-- Name: jobs jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres
--
ALTER TABLE ONLY public.jobs
ADD CONSTRAINT jobs_pkey PRIMARY KEY (identifier);
--
-- Name: SCHEMA public; Type: ACL; Schema: -; Owner: postgres
--
REVOKE USAGE ON SCHEMA public FROM PUBLIC;
GRANT ALL ON SCHEMA public TO PUBLIC;
--
-- PostgreSQL database dump complete
--
@@ -0,0 +1,113 @@
# =================================================================
#
# Authors: Francesco Martinelli <francesco.martinelli@ingv.it>
#
# Copyright (c) 2024 Francesco Martinelli
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# =================================================================
server:
bind:
host: 0.0.0.0
port: 5000
url: http://localhost:5000/
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
languages:
# First language is the default language
- en-US
- fr-CA
cors: true
pretty_print: true
limit: 10
# templates: /path/to/templates
map:
url: https://maps.wikimedia.org/osm-intl/{z}/{x}/{y}.png
attribution: '<a href="https://wikimediafoundation.org/wiki/Maps_Terms_of_Use">Wikimedia maps</a> | Map data &copy; <a href="https://openstreetmap.org/copyright">OpenStreetMap contributors</a>'
manager:
name: PostgreSQL
connection:
host: localhost
port: 5432
database: test
user: postgres
password: ${POSTGRESQL_PASSWORD:-postgres}
# Alternative accepted connection definition:
# connection: postgresql://postgres:postgres@localhost:5432/test
output_dir: /tmp
logging:
level: DEBUG
#logfile: /tmp/pygeoapi.log
metadata:
identification:
title:
en: pygeoapi instance to test Process Manager backed by PostgreSql
fr: test instance de pygeoapi
description:
en: pygeoapi provides an API to geospatial data
fr: pygeoapi fournit une API aux données géospatiales
keywords:
en:
- geospatial
- data
- api
fr:
- géospatiale
- données
- api
keywords_type: theme
terms_of_service: https://creativecommons.org/licenses/by/4.0/
url: http://example.org
license:
name: CC-BY 4.0 license
url: https://creativecommons.org/licenses/by/4.0/
provider:
name: Organization Name
url: https://pygeoapi.io
contact:
name: Lastname, Firstname
position: Position Title
address: Mailing Address
city: City
stateorprovince: Administrative Area
postalcode: Zip or Postal Code
country: Country
phone: +xx-xxx-xxx-xxxx
fax: +xx-xxx-xxx-xxxx
email: you@example.org
url: Contact URL
hours: Hours of Service
instructions: During hours of service. Off on weekends.
role: pointOfContact
resources:
hello-world:
type: process
processor:
name: HelloWorld
+177
View File
@@ -0,0 +1,177 @@
# =================================================================
#
# Authors: Francesco Martinelli <francesco.martinelli@ingv.it>
#
# Copyright (c) 2024 Francesco Martinelli
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# =================================================================
# See pygeoapi/process/manager/postgresql.py
# for instructions on setting up database structure.
import json
import pytest
from werkzeug.wrappers import Request
from werkzeug.test import create_environ
from .util import get_test_file_path
from pygeoapi.api import API, APIRequest
import pygeoapi.api.processes as processes_api
from pygeoapi.util import yaml_load
@pytest.fixture()
def config():
with open(get_test_file_path(
'pygeoapi-test-config-postgres-manager.yml')
) as fh:
return yaml_load(fh)
@pytest.fixture()
def openapi():
with open(get_test_file_path('pygeoapi-test-openapi.yml')) as fh:
return yaml_load(fh)
@pytest.fixture()
def api_(config, openapi):
return API(config, openapi)
def _create_execute_request(name, message, locales):
data = {
"response": "raw",
"inputs": {
"name": name,
"message": message
}
}
environ = create_environ(
base_url='http://localhost:5000/processes/hello-world/execution',
method="POST", json=data)
req = Request(environ)
return APIRequest.with_data(req, locales)
def _create_job_request(job_id, locales):
environ = create_environ(
base_url=f'http://localhost:5000/jobs/{job_id}',
query_string="f=json",
method="GET")
req = Request(environ)
return APIRequest.with_data(req, locales)
def _create_results_request(job_id, locales):
environ = create_environ(
base_url=f'http://localhost:5000/jobs/{job_id}/results',
query_string="f=json",
method="GET")
req = Request(environ)
return APIRequest.with_data(req, locales)
def _create_delete_request(job_id, locales):
environ = create_environ(
base_url=f'http://localhost:5000/jobs/{job_id}',
query_string="f=json",
method="DELETE")
req = Request(environ)
return APIRequest.with_data(req, locales)
def test_job_sync_hello_world(api_, config):
"""
Create a new job for hello-world,
which mplicitly tests add_job() and update_job();
then:
-) get the job info, whch tests get_job(),
-) get the job results, whch tests get_job_result(),
-) get all present jobs, whch tests get_jobs(),
-) delete the newly inserted job, whch tests delete_job().
"""
process_id = "hello-world"
# Create new job
req = _create_execute_request("World", "Hello", api_.locales)
headers, http_status, response = processes_api.execute_process(
api_, req, process_id)
assert http_status == 200
out_json = json.loads(response)
assert out_json["id"] == "echo"
assert out_json["value"] == "Hello World! Hello"
# Save job_id for later use
job_id = headers['Location'].split('/')[-1]
mimetype = headers['Content-Type']
# Get job info
req = _create_job_request(job_id, api_.locales)
headers, http_status, response = processes_api.get_jobs(
api_, req, job_id)
assert http_status == 200
out_json = json.loads(response)
assert out_json["type"] == "process"
assert out_json["processID"] == process_id
assert out_json["jobID"] == job_id
# Get job results
req = _create_results_request(job_id, api_.locales)
headers, http_status, response = processes_api.get_job_result(
api_, req, job_id)
assert http_status == 200
assert mimetype == headers['Content-Type']
out_json = json.loads(response)
assert out_json["id"] == "echo"
assert out_json["value"] == "Hello World! Hello"
# Get all present jobs
req = _create_job_request(None, api_.locales)
headers, http_status, response = processes_api.get_jobs(
api_, req, None)
assert http_status == 200
# check the inserted job is in the list
out_json = json.loads(response)
jobs = out_json["jobs"]
assert any(job["jobID"] == job_id for job in jobs)
# Delete the inserted job
req = _create_delete_request(job_id, api_.locales)
headers, http_status, response = processes_api.delete_job(
api_, req, job_id)
assert http_status == 200
out_json = json.loads(response)
assert out_json["jobID"] == job_id
assert out_json["status"] == "dismissed"
# Try again to delete the inserted job
req = _create_delete_request(job_id, api_.locales)
headers, http_status, response = processes_api.get_jobs(
api_, req, job_id)
assert http_status == 404
out_json = json.loads(response)
assert out_json["code"] == "InvalidParameterValue"
assert out_json["description"] == job_id