diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 224c072..fa9836c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,6 +91,7 @@ jobs: pip3 install -r requirements-starlette.txt pip3 install -r requirements-dev.txt pip3 install -r requirements-provider.txt + pip3 install -r requirements-manager.txt pip3 install -r requirements-django.txt python3 setup.py install pip3 install --upgrade numpy elasticsearch @@ -104,6 +105,7 @@ jobs: gunzip < tests/data/hotosm_bdi_waterways.sql.gz | psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/dummy_data.sql psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/dummy_types_data.sql + psql postgresql://postgres:${{ secrets.DatabasePassword || 'postgres' }}@localhost:5432/test -f tests/data/postgres_manager_full_structure.backup.sql docker ps python3 tests/load_oracle_data.py - name: run unit tests ⚙️ @@ -127,6 +129,7 @@ jobs: pytest tests/test_ogr_shapefile_provider.py pytest tests/test_ogr_sqlite_provider.py pytest tests/test_ogr_wfs_provider.py + pytest tests/test_postgresql_manager.py # pytest tests/test_ogr_wfs_provider_live.py # NOTE: these are skipped in the file but listed here for completeness pytest tests/test_openapi.py pytest tests/test_oracle_provider.py diff --git a/pygeoapi/plugin.py b/pygeoapi/plugin.py index 7e42255..74ef732 100644 --- a/pygeoapi/plugin.py +++ b/pygeoapi/plugin.py @@ -78,7 +78,8 @@ PLUGINS = { 'process_manager': { 'Dummy': 'pygeoapi.process.manager.dummy.DummyManager', 'MongoDB': 'pygeoapi.process.manager.mongodb_.MongoDBManager', - 'TinyDB': 'pygeoapi.process.manager.tinydb_.TinyDBManager' + 'TinyDB': 'pygeoapi.process.manager.tinydb_.TinyDBManager', + 'PostgreSQL': 'pygeoapi.process.manager.postgresql.PostgreSQLManager' } } diff --git a/pygeoapi/process/manager/postgresql.py b/pygeoapi/process/manager/postgresql.py new file mode 100644 index 0000000..017de7f --- /dev/null +++ b/pygeoapi/process/manager/postgresql.py @@ -0,0 +1,270 @@ +# ================================================================= +# +# Authors: Francesco Martinelli +# +# Copyright (c) 2024 Francesco Martinelli +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +# Requires postgresql database structure. +# Create the database: +# e.g. +# CREATE DATABASE test +# WITH TEMPLATE = template0 +# ENCODING = 'UTF8' +# LOCALE = 'en_US.UTF-8'; +# ALTER DATABASE test OWNER TO postgres; +# +# Import dump: +# psql -U postgres -h 127.0.0.1 -p 5432 test < +# tests/data/postgres_manager_full_structure.backup.sql + +import json +import logging +from pathlib import Path +from typing import Any, Tuple + +import psycopg2 +import psycopg2.extras + +from pygeoapi.process.manager.base import BaseManager +from pygeoapi.process.base import ( + JobNotFoundError, + JobResultNotFoundError, + ProcessorGenericError, +) +from pygeoapi.util import JobStatus + + +LOGGER = logging.getLogger(__name__) + + +class PostgreSQLManager(BaseManager): + """PostgreSql Manager""" + + def __init__(self, manager_def: dict): + """ + Initialize object + + :param manager_def: manager definition + + :returns: `pygeoapi.process.manager.postgresqs.PostgreSQLManager` + """ + + super().__init__(manager_def) + self.is_async = True + self.supports_subscribing = True + + self.__database_connection_parameters = manager_def['connection'] + try: + # Test connection parameters: + test_query = """SELECT version()""" + with self.get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute(test_query) + cur.fetchone() + except Exception as err: + LOGGER.error(f'Test connecting to DB failed: {err}') + raise ProcessorGenericError('Test connecting to DB failed.') + + def get_db_connection(self): + """ + Get and return a new connection to the DB. + """ + if isinstance(self.__database_connection_parameters, str): + conn = psycopg2.connect(self.__database_connection_parameters) + else: + conn = psycopg2.connect(**self.__database_connection_parameters) + + return conn + + def get_jobs(self, status: JobStatus = None) -> list: + """ + Get jobs + + :param status: job status (accepted, running, successful, + failed, results) (default is all) + + :returns: 'list` of jobs (type (default='process'), identifier, + status, process_id, job_start_datetime, job_end_datetime, location, + mimetype, message, progress) + """ + + with self.get_db_connection() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + query_select = """SELECT * FROM jobs """ + if status is not None: + query_select = query_select + "WHERE status = %s" + query_params = [status.value] + else: + query_params = [] + cur.execute(query_select, query_params) + return cur.fetchall() + + def add_job(self, job_metadata: dict) -> str: + """ + Add a job + + :param job_metadata: `dict` of job metadata + + :returns: identifier of added job + """ + + query_insert = """INSERT INTO jobs( + type, process_id, identifier, status, message, + progress, job_start_datetime, job_end_datetime + ) VALUES(%(type)s, %(process_id)s, %(identifier)s, %(status)s, + %(message)s, %(progress)s, %(job_start_datetime)s, + %(job_end_datetime)s);""" + with self.get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute(query_insert, job_metadata) + conn.commit() + return job_metadata['identifier'] + + def update_job(self, job_id: str, update_dict: dict) -> bool: + """ + Updates a job + + :param job_id: job identifier + :param update_dict: `dict` of property updates + + :returns: `bool` of status result + """ + + query_update = "UPDATE jobs SET (" + keys_to_update = 0 + for key in update_dict.keys(): + if keys_to_update: + query_update = query_update + (", ") + query_update = query_update + key + keys_to_update = keys_to_update + 1 + + query_update = query_update + ") = (" + keys_to_update = 0 + for key in update_dict.keys(): + if keys_to_update: + query_update = query_update + (", ") + query_update = query_update + "%(" + key + ")s" + keys_to_update = keys_to_update + 1 + query_update = query_update + (") WHERE identifier = %(identifier)s") + + update_dict['identifier'] = job_id + + with self.get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute(query_update, update_dict) + rowcount = cur.rowcount + conn.commit() + + return rowcount == 1 + + def get_job(self, job_id: str) -> dict: + """ + Get a single job + + :param job_id: job identifier + + :raises JobNotFoundError: if the job_id does not correspond to a + known job + :returns: `dict` # `pygeoapi.process.manager.Job` + """ + + with self.get_db_connection() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + query_select = \ + """SELECT * FROM jobs WHERE identifier = %s""" + query_params = [job_id] + cur.execute(query_select, query_params) + found = cur.fetchone() + + if found is not None: + return found + else: + raise JobNotFoundError() + + def delete_job(self, job_id: str) -> bool: + """ + Deletes a job + + :param job_id: job identifier + + :raises JobNotFoundError: if the job_id does not correspond to a + known job + :return `bool` of status result + """ + # delete result file if present + job_result = self.get_job(job_id) + location = job_result.get('location') + if location and self.output_dir is not None: + try: + Path(location).unlink() + except FileNotFoundError: + pass + + query_delete = "DELETE FROM jobs WHERE identifier = %s" + with self.get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute(query_delete, [job_id]) + rowcount = cur.rowcount + conn.commit() + + return rowcount == 1 + + def get_job_result(self, job_id: str) -> Tuple[str, Any]: + """ + Get a job's status, and actual output of executing the process + + :param job_id: job identifier + + :raises JobNotFoundError: if the job_id does not correspond to a + known job + :raises JobResultNotFoundError: if the job-related result cannot + be returned + :returns: `tuple` of mimetype and raw output + """ + + job_result = self.get_job(job_id) + location = job_result.get('location') + mimetype = job_result.get('mimetype') + job_status = JobStatus[job_result['status']] + + if job_status != JobStatus.successful: + # Job is incomplete + return (None,) + if not location: + LOGGER.warning(f'job {job_id!r} - unknown result location') + raise JobResultNotFoundError() + else: + try: + location = Path(location) + with location.open(encoding='utf-8') as fh: + result = json.load(fh) + except (TypeError, FileNotFoundError, json.JSONDecodeError): + raise JobResultNotFoundError() + else: + return mimetype, result + + def __repr__(self): + return f' {self.name}' diff --git a/requirements-manager.txt b/requirements-manager.txt new file mode 100644 index 0000000..6ccfa4b --- /dev/null +++ b/requirements-manager.txt @@ -0,0 +1,2 @@ +psycopg2 + diff --git a/tests/data/postgres_manager_full_structure.backup.sql b/tests/data/postgres_manager_full_structure.backup.sql new file mode 100644 index 0000000..804b34b --- /dev/null +++ b/tests/data/postgres_manager_full_structure.backup.sql @@ -0,0 +1,68 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 14.12 (Ubuntu 14.12-1.pgdg20.04+1) +-- Dumped by pg_dump version 16.3 (Ubuntu 16.3-1.pgdg20.04+1) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: public; Type: SCHEMA; Schema: -; Owner: postgres +-- + +ALTER SCHEMA public OWNER TO postgres; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: jobs; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.jobs ( + type character varying DEFAULT 'process'::character varying NOT NULL, + identifier character varying NOT NULL, + process_id character varying NOT NULL, + job_start_datetime timestamp without time zone, + job_end_datetime timestamp without time zone, + status character varying NOT NULL, + location character varying, + mimetype character varying, + message character varying, + progress integer NOT NULL +); + + +ALTER TABLE public.jobs OWNER TO postgres; + +-- +-- Name: jobs jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.jobs + ADD CONSTRAINT jobs_pkey PRIMARY KEY (identifier); + + +-- +-- Name: SCHEMA public; Type: ACL; Schema: -; Owner: postgres +-- + +REVOKE USAGE ON SCHEMA public FROM PUBLIC; +GRANT ALL ON SCHEMA public TO PUBLIC; + + +-- +-- PostgreSQL database dump complete +-- + diff --git a/tests/pygeoapi-test-config-postgres-manager.yml b/tests/pygeoapi-test-config-postgres-manager.yml new file mode 100644 index 0000000..496f558 --- /dev/null +++ b/tests/pygeoapi-test-config-postgres-manager.yml @@ -0,0 +1,113 @@ +# ================================================================= +# +# Authors: Francesco Martinelli +# +# Copyright (c) 2024 Francesco Martinelli +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +server: + bind: + host: 0.0.0.0 + port: 5000 + url: http://localhost:5000/ + mimetype: application/json; charset=UTF-8 + encoding: utf-8 + gzip: false + languages: + # First language is the default language + - en-US + - fr-CA + cors: true + pretty_print: true + limit: 10 + # templates: /path/to/templates + map: + url: https://maps.wikimedia.org/osm-intl/{z}/{x}/{y}.png + attribution: 'Wikimedia maps | Map data © OpenStreetMap contributors' + manager: + name: PostgreSQL + connection: + host: localhost + port: 5432 + database: test + user: postgres + password: ${POSTGRESQL_PASSWORD:-postgres} + # Alternative accepted connection definition: + # connection: postgresql://postgres:postgres@localhost:5432/test + output_dir: /tmp + +logging: + level: DEBUG + #logfile: /tmp/pygeoapi.log + +metadata: + identification: + title: + en: pygeoapi instance to test Process Manager backed by PostgreSql + fr: test instance de pygeoapi + description: + en: pygeoapi provides an API to geospatial data + fr: pygeoapi fournit une API aux données géospatiales + keywords: + en: + - geospatial + - data + - api + fr: + - géospatiale + - données + - api + keywords_type: theme + terms_of_service: https://creativecommons.org/licenses/by/4.0/ + url: http://example.org + license: + name: CC-BY 4.0 license + url: https://creativecommons.org/licenses/by/4.0/ + provider: + name: Organization Name + url: https://pygeoapi.io + contact: + name: Lastname, Firstname + position: Position Title + address: Mailing Address + city: City + stateorprovince: Administrative Area + postalcode: Zip or Postal Code + country: Country + phone: +xx-xxx-xxx-xxxx + fax: +xx-xxx-xxx-xxxx + email: you@example.org + url: Contact URL + hours: Hours of Service + instructions: During hours of service. Off on weekends. + role: pointOfContact + +resources: + + hello-world: + type: process + processor: + name: HelloWorld + diff --git a/tests/test_postgresql_manager.py b/tests/test_postgresql_manager.py new file mode 100644 index 0000000..cd3c86f --- /dev/null +++ b/tests/test_postgresql_manager.py @@ -0,0 +1,177 @@ +# ================================================================= +# +# Authors: Francesco Martinelli +# +# Copyright (c) 2024 Francesco Martinelli +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +# See pygeoapi/process/manager/postgresql.py +# for instructions on setting up database structure. + +import json + +import pytest +from werkzeug.wrappers import Request +from werkzeug.test import create_environ + +from .util import get_test_file_path +from pygeoapi.api import API, APIRequest +import pygeoapi.api.processes as processes_api +from pygeoapi.util import yaml_load + + +@pytest.fixture() +def config(): + with open(get_test_file_path( + 'pygeoapi-test-config-postgres-manager.yml') + ) as fh: + return yaml_load(fh) + + +@pytest.fixture() +def openapi(): + with open(get_test_file_path('pygeoapi-test-openapi.yml')) as fh: + return yaml_load(fh) + + +@pytest.fixture() +def api_(config, openapi): + return API(config, openapi) + + +def _create_execute_request(name, message, locales): + data = { + "response": "raw", + "inputs": { + "name": name, + "message": message + } + } + environ = create_environ( + base_url='http://localhost:5000/processes/hello-world/execution', + method="POST", json=data) + req = Request(environ) + return APIRequest.with_data(req, locales) + + +def _create_job_request(job_id, locales): + environ = create_environ( + base_url=f'http://localhost:5000/jobs/{job_id}', + query_string="f=json", + method="GET") + req = Request(environ) + return APIRequest.with_data(req, locales) + + +def _create_results_request(job_id, locales): + environ = create_environ( + base_url=f'http://localhost:5000/jobs/{job_id}/results', + query_string="f=json", + method="GET") + req = Request(environ) + return APIRequest.with_data(req, locales) + + +def _create_delete_request(job_id, locales): + environ = create_environ( + base_url=f'http://localhost:5000/jobs/{job_id}', + query_string="f=json", + method="DELETE") + req = Request(environ) + return APIRequest.with_data(req, locales) + + +def test_job_sync_hello_world(api_, config): + """ + Create a new job for hello-world, + which mplicitly tests add_job() and update_job(); + then: + -) get the job info, whch tests get_job(), + -) get the job results, whch tests get_job_result(), + -) get all present jobs, whch tests get_jobs(), + -) delete the newly inserted job, whch tests delete_job(). + """ + process_id = "hello-world" + + # Create new job + req = _create_execute_request("World", "Hello", api_.locales) + headers, http_status, response = processes_api.execute_process( + api_, req, process_id) + assert http_status == 200 + out_json = json.loads(response) + assert out_json["id"] == "echo" + assert out_json["value"] == "Hello World! Hello" + + # Save job_id for later use + job_id = headers['Location'].split('/')[-1] + mimetype = headers['Content-Type'] + + # Get job info + req = _create_job_request(job_id, api_.locales) + headers, http_status, response = processes_api.get_jobs( + api_, req, job_id) + assert http_status == 200 + out_json = json.loads(response) + assert out_json["type"] == "process" + assert out_json["processID"] == process_id + assert out_json["jobID"] == job_id + + # Get job results + req = _create_results_request(job_id, api_.locales) + headers, http_status, response = processes_api.get_job_result( + api_, req, job_id) + assert http_status == 200 + assert mimetype == headers['Content-Type'] + out_json = json.loads(response) + assert out_json["id"] == "echo" + assert out_json["value"] == "Hello World! Hello" + + # Get all present jobs + req = _create_job_request(None, api_.locales) + headers, http_status, response = processes_api.get_jobs( + api_, req, None) + assert http_status == 200 + # check the inserted job is in the list + out_json = json.loads(response) + jobs = out_json["jobs"] + assert any(job["jobID"] == job_id for job in jobs) + + # Delete the inserted job + req = _create_delete_request(job_id, api_.locales) + headers, http_status, response = processes_api.delete_job( + api_, req, job_id) + assert http_status == 200 + out_json = json.loads(response) + assert out_json["jobID"] == job_id + assert out_json["status"] == "dismissed" + + # Try again to delete the inserted job + req = _create_delete_request(job_id, api_.locales) + headers, http_status, response = processes_api.get_jobs( + api_, req, job_id) + assert http_status == 404 + out_json = json.loads(response) + assert out_json["code"] == "InvalidParameterValue" + assert out_json["description"] == job_id