diff --git a/docker/default.config.yml b/docker/default.config.yml index 4c8d2cb..b8da4b5 100644 --- a/docker/default.config.yml +++ b/docker/default.config.yml @@ -41,6 +41,7 @@ server: url: http://localhost:5000 mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false language: en-US cors: true pretty_print: true diff --git a/docker/examples/elastic/pygeoapi/docker.config.yml b/docker/examples/elastic/pygeoapi/docker.config.yml index d55623e..428c77e 100644 --- a/docker/examples/elastic/pygeoapi/docker.config.yml +++ b/docker/examples/elastic/pygeoapi/docker.config.yml @@ -38,6 +38,7 @@ server: url: http://localhost:5000/ mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false language: en-US cors: true pretty_print: true diff --git a/docker/examples/geosparql/test.pygeoapi.config.yml b/docker/examples/geosparql/test.pygeoapi.config.yml index 4acc166..ae807cd 100644 --- a/docker/examples/geosparql/test.pygeoapi.config.yml +++ b/docker/examples/geosparql/test.pygeoapi.config.yml @@ -34,6 +34,7 @@ server: url: http://localhost:5000 #change to host URL if running your own instance mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false language: en-US cors: true pretty_print: true diff --git a/docker/examples/sensorthings/brgm.sta.pygeoapi.config.yml b/docker/examples/sensorthings/brgm.sta.pygeoapi.config.yml index 1a25e51..06b5111 100644 --- a/docker/examples/sensorthings/brgm.sta.pygeoapi.config.yml +++ b/docker/examples/sensorthings/brgm.sta.pygeoapi.config.yml @@ -34,6 +34,7 @@ server: url: http://localhost:5000 #change to host URL if running your own instance mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false languages: # First language is the default language - en-US diff --git a/docker/examples/sensorthings/iow.sta.pygeoapi.config.yml b/docker/examples/sensorthings/iow.sta.pygeoapi.config.yml index 3ebbc66..ab1cdf4 100644 --- a/docker/examples/sensorthings/iow.sta.pygeoapi.config.yml +++ b/docker/examples/sensorthings/iow.sta.pygeoapi.config.yml @@ -34,6 +34,7 @@ server: url: http://localhost:5000 #change to host URL if running your own instance mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false languages: # First language is the default language - en-US diff --git a/docker/examples/sensorthings/sta.pygeoapi.config.yml b/docker/examples/sensorthings/sta.pygeoapi.config.yml index 0f60476..5f035ff 100644 --- a/docker/examples/sensorthings/sta.pygeoapi.config.yml +++ b/docker/examples/sensorthings/sta.pygeoapi.config.yml @@ -34,6 +34,7 @@ server: url: http://localhost:5000 #change to host URL if running your own instance mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false languages: # First language is the default language - en-US diff --git a/docker/examples/simple/my.config.yml b/docker/examples/simple/my.config.yml index 0379c14..348bb9f 100644 --- a/docker/examples/simple/my.config.yml +++ b/docker/examples/simple/my.config.yml @@ -36,6 +36,7 @@ server: url: http://localhost:5000 mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false language: en-US cors: true pretty_print: true diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index dccf505..a1ac41a 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -41,6 +41,7 @@ The ``server`` section provides directives on binding and high level tuning. mimetype: application/json; charset=UTF-8 # default MIME type encoding: utf-8 # default server encoding language: en-US # default server language + gzip: false # default server config to gzip/compress responses to requests with gzip in the Accept-Encoding header cors: true # boolean on whether server should support CORS pretty_print: true # whether JSON responses should be pretty-printed limit: 10 # server limit on number of items to return diff --git a/pygeoapi-config.yml b/pygeoapi-config.yml index 3b5da89..c461f9e 100644 --- a/pygeoapi-config.yml +++ b/pygeoapi-config.yml @@ -34,6 +34,7 @@ server: url: http://localhost:5000 mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false languages: # First language is the default language - en-US diff --git a/pygeoapi/api.py b/pygeoapi/api.py index 9616da8..5a3c343 100644 --- a/pygeoapi/api.py +++ b/pygeoapi/api.py @@ -38,6 +38,7 @@ from collections import OrderedDict from copy import deepcopy from datetime import datetime, timezone from functools import partial +from gzip import compress import json import logging import os @@ -81,9 +82,11 @@ HEADERS = { 'X-Powered-By': 'pygeoapi {}'.format(__version__) } +CHARSET = ['utf-8'] F_JSON = 'json' F_HTML = 'html' F_JSONLD = 'jsonld' +F_GZIP = 'gzip' #: Formats allowed for ?f= requests (order matters for complex MIME types) FORMAT_TYPES = OrderedDict(( @@ -144,6 +147,33 @@ def pre_process(func): return inner +def gzip(func): + """ + Decorator that compresses the content of an outgoing API result + instance if the Content-Encoding response header was set to gzip. + + :param func: decorated function + + :returns: `func` + """ + + def inner(*args, **kwargs): + headers, status, content = func(*args, **kwargs) + if F_GZIP in headers.get('Content-Encoding', []): + try: + charset = CHARSET[0] + headers['Content-Type'] = \ + f"{headers['Content-Type']}; charset={charset}" + content = compress(content.encode(charset)) + except TypeError as err: + headers.pop('Content-Encoding') + LOGGER.error('Error in compression: {}'.format(err)) + + return headers, status, content + + return inner + + class APIRequest: """ Transforms an incoming server-specific Request into an object @@ -345,13 +375,14 @@ class APIRequest: # Format not specified: get from Accept headers (MIME types) # e.g. format_ = 'text/html' - for h in (v.strip() for k, v in headers.items() if k.lower() == 'accept'): # noqa - for fmt, mime in FORMAT_TYPES.items(): - # basic support for complex types (i.e. with "q=0.x") - types_ = (t.split(';')[0].strip() for t in h.split(',') if t) - if mime.strip() in types_: - format_ = fmt - break + h = headers.get('accept', headers.get('Accept', '')).strip() # noqa + (fmts, mimes) = zip(*FORMAT_TYPES.items()) + # basic support for complex types (i.e. with "q=0.x") + for type_ in (t.split(';')[0].strip() for t in h.split(',') if t): + if type_ in mimes: + idx_ = mimes.index(type_) + format_ = fmts[idx_] + break return format_ or None @@ -469,7 +500,8 @@ class APIRequest: return False def get_response_headers(self, force_lang: l10n.Locale = None, - force_type: str = None) -> dict: + force_type: str = None, + force_encoding: str = None) -> dict: """ Prepares and returns a dictionary with Response object headers. @@ -492,6 +524,7 @@ class APIRequest: :param force_lang: An optional Content-Language header override. :param force_type: An optional Content-Type header override. + :param force_encoding: An optional Content-Encoding header override. :returns: A header dict """ @@ -503,6 +536,13 @@ class APIRequest: elif self.is_valid() and self._format: # Set MIME type for valid formats headers['Content-Type'] = FORMAT_TYPES[self._format] + + if F_GZIP in FORMAT_TYPES: + if force_encoding: + headers['Content-Encoding'] = force_encoding + elif F_GZIP in self._headers.get('Accept-Encoding', ''): + headers['Content-Encoding'] = F_GZIP + return headers def get_request_headers(self, headers) -> dict: @@ -534,6 +574,11 @@ class API: self.config = config self.config['server']['url'] = self.config['server']['url'].rstrip('/') + CHARSET[0] = config['server'].get('encoding', 'utf-8') + if config['server'].get('gzip') is True: + FORMAT_TYPES[F_GZIP] = 'application/gzip' + FORMAT_TYPES.move_to_end(F_JSON) + # Process language settings (first locale is default!) self.locales = l10n.get_locales(config) self.default_locale = self.locales[0] @@ -563,6 +608,7 @@ class API: self.manager = load_plugin('process_manager', manager_def) LOGGER.info('Process manager plugin loaded') + @gzip @pre_process @jsonldify def landing_page(self, @@ -654,6 +700,7 @@ class API: return headers, 200, to_json(fcm, self.pretty_print) + @gzip @pre_process def openapi(self, request: Union[APIRequest, Any], openapi) -> Tuple[dict, int, str]: @@ -692,6 +739,7 @@ class API: else: return headers, 200, openapi + @gzip @pre_process def conformance(self, request: Union[APIRequest, Any]) -> Tuple[dict, int, str]: @@ -718,6 +766,7 @@ class API: return headers, 200, to_json(conformance, self.pretty_print) + @gzip @pre_process @jsonldify def describe_collections(self, request: Union[APIRequest, Any], @@ -1063,6 +1112,7 @@ class API: return headers, 200, to_json(fcm, self.pretty_print) + @gzip @pre_process @jsonldify def get_collection_queryables(self, request: Union[APIRequest, Any], @@ -1147,6 +1197,7 @@ class API: return headers, 200, to_json(queryables, self.pretty_print) + @gzip @pre_process def get_collection_items( self, request: Union[APIRequest, Any], @@ -1493,6 +1544,7 @@ class API: return headers, 200, to_json(content, self.pretty_print) + @gzip @pre_process def post_collection_items( self, request: Union[APIRequest, Any], @@ -1732,6 +1784,7 @@ class API: return headers, 200, to_json(content, self.pretty_print) + @gzip @pre_process def get_collection_item(self, request: Union[APIRequest, Any], dataset, identifier) -> Tuple[dict, int, str]: @@ -2020,6 +2073,7 @@ class API: else: return self.get_format_exception(request) + @gzip @pre_process @jsonldify def get_collection_coverage_domainset( @@ -2073,6 +2127,7 @@ class API: else: return self.get_format_exception(request) + @gzip @pre_process @jsonldify def get_collection_coverage_rangetype( @@ -2125,6 +2180,7 @@ class API: else: return self.get_format_exception(request) + @gzip @pre_process @jsonldify def get_collection_tiles(self, request: Union[APIRequest, Any], @@ -2229,6 +2285,7 @@ class API: return headers, 200, to_json(tiles, self.pretty_print) + @gzip @pre_process @jsonldify def get_collection_tiles_data( @@ -2313,6 +2370,7 @@ class API: return self.get_exception( 500, headers, format_, 'NoApplicableCode', msg) + @gzip @pre_process @jsonldify def get_collection_tiles_metadata( @@ -2395,6 +2453,7 @@ class API: return headers, 200, to_json(tiles_metadata, self.pretty_print) + @gzip @pre_process @jsonldify def describe_processes(self, request: Union[APIRequest, Any], @@ -2494,6 +2553,7 @@ class API: return headers, 200, to_json(response, self.pretty_print) + @gzip @pre_process def get_process_jobs(self, request: Union[APIRequest, Any], process_id, job_id=None) -> Tuple[dict, int, str]: @@ -2598,6 +2658,7 @@ class API: return headers, 200, to_json(serialized_jobs, self.pretty_print) + @gzip @pre_process def execute_process(self, request: Union[APIRequest, Any], process_id) -> Tuple[dict, int, str]: @@ -2701,6 +2762,7 @@ class API: return headers, http_status, to_json(response, self.pretty_print) + @gzip @pre_process def get_process_job_result(self, request: Union[APIRequest, Any], process_id, job_id) -> Tuple[dict, int, str]: @@ -2825,6 +2887,7 @@ class API: # TODO: this response does not have any headers return {}, http_status, response + @gzip @pre_process def get_collection_edr_query( self, request: Union[APIRequest, Any], @@ -2950,6 +3013,7 @@ class API: return headers, 200, content + @gzip @pre_process @jsonldify def get_stac_root( @@ -3005,6 +3069,7 @@ class API: return headers, 200, to_json(content, self.pretty_print) + @gzip @pre_process @jsonldify def get_stac_path(self, request: Union[APIRequest, Any], diff --git a/tests/pygeoapi-test-config-envvars.yml b/tests/pygeoapi-test-config-envvars.yml index 86b309c..c291334 100644 --- a/tests/pygeoapi-test-config-envvars.yml +++ b/tests/pygeoapi-test-config-envvars.yml @@ -35,6 +35,7 @@ server: mimetype: application/json; charset=UTF-8 encoding: utf-8 language: en-US + gzip: false cors: true pretty_print: true limit: 10 diff --git a/tests/pygeoapi-test-config.yml b/tests/pygeoapi-test-config.yml index 753a759..f24a1ca 100644 --- a/tests/pygeoapi-test-config.yml +++ b/tests/pygeoapi-test-config.yml @@ -34,6 +34,7 @@ server: url: http://localhost:5000/ mimetype: application/json; charset=UTF-8 encoding: utf-8 + gzip: false languages: # First language is the default language - en-US diff --git a/tests/pygeoapi-test-ogr-config.yml b/tests/pygeoapi-test-ogr-config.yml index 30f5c61..8977cad 100644 --- a/tests/pygeoapi-test-ogr-config.yml +++ b/tests/pygeoapi-test-ogr-config.yml @@ -36,6 +36,7 @@ server: encoding: utf-8 language: en-US cors: true + gzip: false pretty_print: true limit: 10 # templates: /path/to/templates diff --git a/tests/test_api.py b/tests/test_api.py index 3d52a77..61e2798 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -30,12 +30,13 @@ import json import logging import time +import gzip from pyld import jsonld import pytest from pygeoapi.api import ( API, APIRequest, FORMAT_TYPES, validate_bbox, validate_datetime, - F_HTML, F_JSON, F_JSONLD + F_HTML, F_JSON, F_JSONLD, F_GZIP ) from pygeoapi.util import yaml_load @@ -115,6 +116,17 @@ def test_apirequest(api_): assert apireq.get_linkrel(F_HTML) == 'self' assert apireq.get_linkrel(F_JSON) == 'alternate' + # Test accept header with multiple valid formats + hh = 'plain/text,application/ld+json,application/json;q=0.9,' + req = mock_request(HTTP_ACCEPT=hh) + apireq = APIRequest(req, api_.locales) + assert apireq.is_valid() + assert apireq.format == F_JSONLD + assert apireq.get_response_headers()['Content-Type'] == \ + FORMAT_TYPES[F_JSONLD] + assert apireq.get_linkrel(F_JSONLD) == 'self' + assert apireq.get_linkrel(F_HTML) == 'alternate' + # Overrule HTTP content negotiation req = mock_request({'f': 'html'}, HTTP_ACCEPT='application/json') # noqa apireq = APIRequest(req, api_.locales) @@ -237,6 +249,101 @@ def test_api_exception(config, api_): assert code == 400 +def test_gzip(config, api_): + # Requests for each response type and gzip encoding + req_gzip_json = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSON], + HTTP_ACCEPT_ENCODING=F_GZIP) + req_gzip_jsonld = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSONLD], + HTTP_ACCEPT_ENCODING=F_GZIP) + req_gzip_html = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_HTML], + HTTP_ACCEPT_ENCODING=F_GZIP) + req_gzip_gzip = mock_request(HTTP_ACCEPT='application/gzip', + HTTP_ACCEPT_ENCODING=F_GZIP) + + # Responses from server config without gzip compression + rsp_headers, _, rsp_json = api_.landing_page(req_gzip_json) + assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSON] + rsp_headers, _, rsp_jsonld = api_.landing_page(req_gzip_jsonld) + assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSONLD] + rsp_headers, _, rsp_html = api_.landing_page(req_gzip_html) + assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_HTML] + rsp_headers, _, _ = api_.landing_page(req_gzip_gzip) + assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSON] + + # Add gzip to server and use utf-16 encoding + config['server']['gzip'] = True + enc_16 = 'utf-16' + config['server']['encoding'] = enc_16 + api_ = API(config) + + # Responses from server with gzip compression + rsp_json_headers, _, rsp_gzip_json = api_.landing_page(req_gzip_json) + rsp_jsonld_headers, _, rsp_gzip_jsonld = api_.landing_page(req_gzip_jsonld) + rsp_html_headers, _, rsp_gzip_html = api_.landing_page(req_gzip_html) + rsp_gzip_headers, _, rsp_gzip_gzip = api_.landing_page(req_gzip_gzip) + + # Validate compressed json response + assert rsp_json_headers['Content-Type'] == \ + f'{FORMAT_TYPES[F_JSON]}; charset={enc_16}' + assert rsp_json_headers['Content-Encoding'] == F_GZIP + + parsed_gzip_json = gzip.decompress(rsp_gzip_json).decode(enc_16) + assert isinstance(parsed_gzip_json, str) + parsed_gzip_json = json.loads(parsed_gzip_json) + assert isinstance(parsed_gzip_json, dict) + assert parsed_gzip_json == json.loads(rsp_json) + + # Validate compressed jsonld response + assert rsp_jsonld_headers['Content-Type'] == \ + f'{FORMAT_TYPES[F_JSONLD]}; charset={enc_16}' + assert rsp_jsonld_headers['Content-Encoding'] == F_GZIP + + parsed_gzip_jsonld = gzip.decompress(rsp_gzip_jsonld).decode(enc_16) + assert isinstance(parsed_gzip_jsonld, str) + parsed_gzip_jsonld = json.loads(parsed_gzip_jsonld) + assert isinstance(parsed_gzip_jsonld, dict) + assert parsed_gzip_jsonld == json.loads(rsp_jsonld) + + # Validate compressed html response + assert rsp_html_headers['Content-Type'] == \ + f'{FORMAT_TYPES[F_HTML]}; charset={enc_16}' + assert rsp_html_headers['Content-Encoding'] == F_GZIP + + parsed_gzip_html = gzip.decompress(rsp_gzip_html).decode(enc_16) + assert isinstance(parsed_gzip_html, str) + assert parsed_gzip_html == rsp_html + + # Validate compressed gzip response + assert rsp_gzip_headers['Content-Type'] == \ + f'{FORMAT_TYPES[F_GZIP]}; charset={enc_16}' + assert rsp_gzip_headers['Content-Encoding'] == F_GZIP + + parsed_gzip_gzip = gzip.decompress(rsp_gzip_gzip).decode(enc_16) + assert isinstance(parsed_gzip_gzip, str) + parsed_gzip_gzip = json.loads(parsed_gzip_gzip) + assert isinstance(parsed_gzip_gzip, dict) + + # Requests without content encoding header + req_json = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSON]) + req_jsonld = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSONLD]) + req_html = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_HTML]) + + # Responses without content encoding + _, _, rsp_json_ = api_.landing_page(req_json) + _, _, rsp_jsonld_ = api_.landing_page(req_jsonld) + _, _, rsp_html_ = api_.landing_page(req_html) + + # Confirm each request is the same when decompressed + assert rsp_json_ == rsp_json == \ + gzip.decompress(rsp_gzip_json).decode(enc_16) + + assert rsp_jsonld_ == rsp_jsonld == \ + gzip.decompress(rsp_gzip_jsonld).decode(enc_16) + + assert rsp_html_ == rsp_html == \ + gzip.decompress(rsp_gzip_html).decode(enc_16) + + def test_root(config, api_): req = mock_request() rsp_headers, code, response = api_.landing_page(req)