Add Gzip to response types (#795)

* Add gzip to API.py

* pytest for gzip

* fix pytest

* Update test_api.py

* Add gzip to server block of configuration files

* Update api.py to include gzip opts from config

Update logic behind gzip compression  such that compression will only happen when gzip in the Accept-Encoding request header and config.server.gzip is True. Use server charset for encoding/decoding and include charset in Content-Type when compressed.

* Update pytest to use gzip config options

Update pytest to test with (gzip in Accept-Encoding & server.gzip = False), (gzip in Accept-Encoding & server.gzip = True), (gzip not in Accept-Encoding & server.gzip = True)

* Better Content Negotiation (#7)

* Change content negotiation in _get_format()

* Add to pytest and force h to always be string

* Force F_JSON to default response

* Update test_api.py for utf-16 encoding

* better content negotiation pytest.

* Add comments to pytest

* Fix flake 8

* Improve docstring for gzip decorator function
This commit is contained in:
Benjamin Webb
2021-10-27 20:17:02 -04:00
committed by GitHub
parent 04250002d3
commit 205ff3002c
14 changed files with 193 additions and 9 deletions
+1
View File
@@ -41,6 +41,7 @@ server:
url: http://localhost:5000
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
language: en-US
cors: true
pretty_print: true
@@ -38,6 +38,7 @@ server:
url: http://localhost:5000/
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
language: en-US
cors: true
pretty_print: true
@@ -34,6 +34,7 @@ server:
url: http://localhost:5000 #change to host URL if running your own instance
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
language: en-US
cors: true
pretty_print: true
@@ -34,6 +34,7 @@ server:
url: http://localhost:5000 #change to host URL if running your own instance
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
languages:
# First language is the default language
- en-US
@@ -34,6 +34,7 @@ server:
url: http://localhost:5000 #change to host URL if running your own instance
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
languages:
# First language is the default language
- en-US
@@ -34,6 +34,7 @@ server:
url: http://localhost:5000 #change to host URL if running your own instance
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
languages:
# First language is the default language
- en-US
+1
View File
@@ -36,6 +36,7 @@ server:
url: http://localhost:5000
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
language: en-US
cors: true
pretty_print: true
+1
View File
@@ -41,6 +41,7 @@ The ``server`` section provides directives on binding and high level tuning.
mimetype: application/json; charset=UTF-8 # default MIME type
encoding: utf-8 # default server encoding
language: en-US # default server language
gzip: false # default server config to gzip/compress responses to requests with gzip in the Accept-Encoding header
cors: true # boolean on whether server should support CORS
pretty_print: true # whether JSON responses should be pretty-printed
limit: 10 # server limit on number of items to return
+1
View File
@@ -34,6 +34,7 @@ server:
url: http://localhost:5000
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
languages:
# First language is the default language
- en-US
+73 -8
View File
@@ -38,6 +38,7 @@ from collections import OrderedDict
from copy import deepcopy
from datetime import datetime, timezone
from functools import partial
from gzip import compress
import json
import logging
import os
@@ -81,9 +82,11 @@ HEADERS = {
'X-Powered-By': 'pygeoapi {}'.format(__version__)
}
CHARSET = ['utf-8']
F_JSON = 'json'
F_HTML = 'html'
F_JSONLD = 'jsonld'
F_GZIP = 'gzip'
#: Formats allowed for ?f= requests (order matters for complex MIME types)
FORMAT_TYPES = OrderedDict((
@@ -144,6 +147,33 @@ def pre_process(func):
return inner
def gzip(func):
"""
Decorator that compresses the content of an outgoing API result
instance if the Content-Encoding response header was set to gzip.
:param func: decorated function
:returns: `func`
"""
def inner(*args, **kwargs):
headers, status, content = func(*args, **kwargs)
if F_GZIP in headers.get('Content-Encoding', []):
try:
charset = CHARSET[0]
headers['Content-Type'] = \
f"{headers['Content-Type']}; charset={charset}"
content = compress(content.encode(charset))
except TypeError as err:
headers.pop('Content-Encoding')
LOGGER.error('Error in compression: {}'.format(err))
return headers, status, content
return inner
class APIRequest:
"""
Transforms an incoming server-specific Request into an object
@@ -345,13 +375,14 @@ class APIRequest:
# Format not specified: get from Accept headers (MIME types)
# e.g. format_ = 'text/html'
for h in (v.strip() for k, v in headers.items() if k.lower() == 'accept'): # noqa
for fmt, mime in FORMAT_TYPES.items():
# basic support for complex types (i.e. with "q=0.x")
types_ = (t.split(';')[0].strip() for t in h.split(',') if t)
if mime.strip() in types_:
format_ = fmt
break
h = headers.get('accept', headers.get('Accept', '')).strip() # noqa
(fmts, mimes) = zip(*FORMAT_TYPES.items())
# basic support for complex types (i.e. with "q=0.x")
for type_ in (t.split(';')[0].strip() for t in h.split(',') if t):
if type_ in mimes:
idx_ = mimes.index(type_)
format_ = fmts[idx_]
break
return format_ or None
@@ -469,7 +500,8 @@ class APIRequest:
return False
def get_response_headers(self, force_lang: l10n.Locale = None,
force_type: str = None) -> dict:
force_type: str = None,
force_encoding: str = None) -> dict:
"""
Prepares and returns a dictionary with Response object headers.
@@ -492,6 +524,7 @@ class APIRequest:
:param force_lang: An optional Content-Language header override.
:param force_type: An optional Content-Type header override.
:param force_encoding: An optional Content-Encoding header override.
:returns: A header dict
"""
@@ -503,6 +536,13 @@ class APIRequest:
elif self.is_valid() and self._format:
# Set MIME type for valid formats
headers['Content-Type'] = FORMAT_TYPES[self._format]
if F_GZIP in FORMAT_TYPES:
if force_encoding:
headers['Content-Encoding'] = force_encoding
elif F_GZIP in self._headers.get('Accept-Encoding', ''):
headers['Content-Encoding'] = F_GZIP
return headers
def get_request_headers(self, headers) -> dict:
@@ -534,6 +574,11 @@ class API:
self.config = config
self.config['server']['url'] = self.config['server']['url'].rstrip('/')
CHARSET[0] = config['server'].get('encoding', 'utf-8')
if config['server'].get('gzip') is True:
FORMAT_TYPES[F_GZIP] = 'application/gzip'
FORMAT_TYPES.move_to_end(F_JSON)
# Process language settings (first locale is default!)
self.locales = l10n.get_locales(config)
self.default_locale = self.locales[0]
@@ -563,6 +608,7 @@ class API:
self.manager = load_plugin('process_manager', manager_def)
LOGGER.info('Process manager plugin loaded')
@gzip
@pre_process
@jsonldify
def landing_page(self,
@@ -654,6 +700,7 @@ class API:
return headers, 200, to_json(fcm, self.pretty_print)
@gzip
@pre_process
def openapi(self, request: Union[APIRequest, Any],
openapi) -> Tuple[dict, int, str]:
@@ -692,6 +739,7 @@ class API:
else:
return headers, 200, openapi
@gzip
@pre_process
def conformance(self,
request: Union[APIRequest, Any]) -> Tuple[dict, int, str]:
@@ -718,6 +766,7 @@ class API:
return headers, 200, to_json(conformance, self.pretty_print)
@gzip
@pre_process
@jsonldify
def describe_collections(self, request: Union[APIRequest, Any],
@@ -1063,6 +1112,7 @@ class API:
return headers, 200, to_json(fcm, self.pretty_print)
@gzip
@pre_process
@jsonldify
def get_collection_queryables(self, request: Union[APIRequest, Any],
@@ -1147,6 +1197,7 @@ class API:
return headers, 200, to_json(queryables, self.pretty_print)
@gzip
@pre_process
def get_collection_items(
self, request: Union[APIRequest, Any],
@@ -1493,6 +1544,7 @@ class API:
return headers, 200, to_json(content, self.pretty_print)
@gzip
@pre_process
def post_collection_items(
self, request: Union[APIRequest, Any],
@@ -1732,6 +1784,7 @@ class API:
return headers, 200, to_json(content, self.pretty_print)
@gzip
@pre_process
def get_collection_item(self, request: Union[APIRequest, Any],
dataset, identifier) -> Tuple[dict, int, str]:
@@ -2020,6 +2073,7 @@ class API:
else:
return self.get_format_exception(request)
@gzip
@pre_process
@jsonldify
def get_collection_coverage_domainset(
@@ -2073,6 +2127,7 @@ class API:
else:
return self.get_format_exception(request)
@gzip
@pre_process
@jsonldify
def get_collection_coverage_rangetype(
@@ -2125,6 +2180,7 @@ class API:
else:
return self.get_format_exception(request)
@gzip
@pre_process
@jsonldify
def get_collection_tiles(self, request: Union[APIRequest, Any],
@@ -2229,6 +2285,7 @@ class API:
return headers, 200, to_json(tiles, self.pretty_print)
@gzip
@pre_process
@jsonldify
def get_collection_tiles_data(
@@ -2313,6 +2370,7 @@ class API:
return self.get_exception(
500, headers, format_, 'NoApplicableCode', msg)
@gzip
@pre_process
@jsonldify
def get_collection_tiles_metadata(
@@ -2395,6 +2453,7 @@ class API:
return headers, 200, to_json(tiles_metadata, self.pretty_print)
@gzip
@pre_process
@jsonldify
def describe_processes(self, request: Union[APIRequest, Any],
@@ -2494,6 +2553,7 @@ class API:
return headers, 200, to_json(response, self.pretty_print)
@gzip
@pre_process
def get_process_jobs(self, request: Union[APIRequest, Any],
process_id, job_id=None) -> Tuple[dict, int, str]:
@@ -2598,6 +2658,7 @@ class API:
return headers, 200, to_json(serialized_jobs, self.pretty_print)
@gzip
@pre_process
def execute_process(self, request: Union[APIRequest, Any],
process_id) -> Tuple[dict, int, str]:
@@ -2701,6 +2762,7 @@ class API:
return headers, http_status, to_json(response, self.pretty_print)
@gzip
@pre_process
def get_process_job_result(self, request: Union[APIRequest, Any],
process_id, job_id) -> Tuple[dict, int, str]:
@@ -2825,6 +2887,7 @@ class API:
# TODO: this response does not have any headers
return {}, http_status, response
@gzip
@pre_process
def get_collection_edr_query(
self, request: Union[APIRequest, Any],
@@ -2950,6 +3013,7 @@ class API:
return headers, 200, content
@gzip
@pre_process
@jsonldify
def get_stac_root(
@@ -3005,6 +3069,7 @@ class API:
return headers, 200, to_json(content, self.pretty_print)
@gzip
@pre_process
@jsonldify
def get_stac_path(self, request: Union[APIRequest, Any],
+1
View File
@@ -35,6 +35,7 @@ server:
mimetype: application/json; charset=UTF-8
encoding: utf-8
language: en-US
gzip: false
cors: true
pretty_print: true
limit: 10
+1
View File
@@ -34,6 +34,7 @@ server:
url: http://localhost:5000/
mimetype: application/json; charset=UTF-8
encoding: utf-8
gzip: false
languages:
# First language is the default language
- en-US
+1
View File
@@ -36,6 +36,7 @@ server:
encoding: utf-8
language: en-US
cors: true
gzip: false
pretty_print: true
limit: 10
# templates: /path/to/templates
+108 -1
View File
@@ -30,12 +30,13 @@
import json
import logging
import time
import gzip
from pyld import jsonld
import pytest
from pygeoapi.api import (
API, APIRequest, FORMAT_TYPES, validate_bbox, validate_datetime,
F_HTML, F_JSON, F_JSONLD
F_HTML, F_JSON, F_JSONLD, F_GZIP
)
from pygeoapi.util import yaml_load
@@ -115,6 +116,17 @@ def test_apirequest(api_):
assert apireq.get_linkrel(F_HTML) == 'self'
assert apireq.get_linkrel(F_JSON) == 'alternate'
# Test accept header with multiple valid formats
hh = 'plain/text,application/ld+json,application/json;q=0.9,'
req = mock_request(HTTP_ACCEPT=hh)
apireq = APIRequest(req, api_.locales)
assert apireq.is_valid()
assert apireq.format == F_JSONLD
assert apireq.get_response_headers()['Content-Type'] == \
FORMAT_TYPES[F_JSONLD]
assert apireq.get_linkrel(F_JSONLD) == 'self'
assert apireq.get_linkrel(F_HTML) == 'alternate'
# Overrule HTTP content negotiation
req = mock_request({'f': 'html'}, HTTP_ACCEPT='application/json') # noqa
apireq = APIRequest(req, api_.locales)
@@ -237,6 +249,101 @@ def test_api_exception(config, api_):
assert code == 400
def test_gzip(config, api_):
# Requests for each response type and gzip encoding
req_gzip_json = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSON],
HTTP_ACCEPT_ENCODING=F_GZIP)
req_gzip_jsonld = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSONLD],
HTTP_ACCEPT_ENCODING=F_GZIP)
req_gzip_html = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_HTML],
HTTP_ACCEPT_ENCODING=F_GZIP)
req_gzip_gzip = mock_request(HTTP_ACCEPT='application/gzip',
HTTP_ACCEPT_ENCODING=F_GZIP)
# Responses from server config without gzip compression
rsp_headers, _, rsp_json = api_.landing_page(req_gzip_json)
assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSON]
rsp_headers, _, rsp_jsonld = api_.landing_page(req_gzip_jsonld)
assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSONLD]
rsp_headers, _, rsp_html = api_.landing_page(req_gzip_html)
assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_HTML]
rsp_headers, _, _ = api_.landing_page(req_gzip_gzip)
assert rsp_headers['Content-Type'] == FORMAT_TYPES[F_JSON]
# Add gzip to server and use utf-16 encoding
config['server']['gzip'] = True
enc_16 = 'utf-16'
config['server']['encoding'] = enc_16
api_ = API(config)
# Responses from server with gzip compression
rsp_json_headers, _, rsp_gzip_json = api_.landing_page(req_gzip_json)
rsp_jsonld_headers, _, rsp_gzip_jsonld = api_.landing_page(req_gzip_jsonld)
rsp_html_headers, _, rsp_gzip_html = api_.landing_page(req_gzip_html)
rsp_gzip_headers, _, rsp_gzip_gzip = api_.landing_page(req_gzip_gzip)
# Validate compressed json response
assert rsp_json_headers['Content-Type'] == \
f'{FORMAT_TYPES[F_JSON]}; charset={enc_16}'
assert rsp_json_headers['Content-Encoding'] == F_GZIP
parsed_gzip_json = gzip.decompress(rsp_gzip_json).decode(enc_16)
assert isinstance(parsed_gzip_json, str)
parsed_gzip_json = json.loads(parsed_gzip_json)
assert isinstance(parsed_gzip_json, dict)
assert parsed_gzip_json == json.loads(rsp_json)
# Validate compressed jsonld response
assert rsp_jsonld_headers['Content-Type'] == \
f'{FORMAT_TYPES[F_JSONLD]}; charset={enc_16}'
assert rsp_jsonld_headers['Content-Encoding'] == F_GZIP
parsed_gzip_jsonld = gzip.decompress(rsp_gzip_jsonld).decode(enc_16)
assert isinstance(parsed_gzip_jsonld, str)
parsed_gzip_jsonld = json.loads(parsed_gzip_jsonld)
assert isinstance(parsed_gzip_jsonld, dict)
assert parsed_gzip_jsonld == json.loads(rsp_jsonld)
# Validate compressed html response
assert rsp_html_headers['Content-Type'] == \
f'{FORMAT_TYPES[F_HTML]}; charset={enc_16}'
assert rsp_html_headers['Content-Encoding'] == F_GZIP
parsed_gzip_html = gzip.decompress(rsp_gzip_html).decode(enc_16)
assert isinstance(parsed_gzip_html, str)
assert parsed_gzip_html == rsp_html
# Validate compressed gzip response
assert rsp_gzip_headers['Content-Type'] == \
f'{FORMAT_TYPES[F_GZIP]}; charset={enc_16}'
assert rsp_gzip_headers['Content-Encoding'] == F_GZIP
parsed_gzip_gzip = gzip.decompress(rsp_gzip_gzip).decode(enc_16)
assert isinstance(parsed_gzip_gzip, str)
parsed_gzip_gzip = json.loads(parsed_gzip_gzip)
assert isinstance(parsed_gzip_gzip, dict)
# Requests without content encoding header
req_json = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSON])
req_jsonld = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_JSONLD])
req_html = mock_request(HTTP_ACCEPT=FORMAT_TYPES[F_HTML])
# Responses without content encoding
_, _, rsp_json_ = api_.landing_page(req_json)
_, _, rsp_jsonld_ = api_.landing_page(req_jsonld)
_, _, rsp_html_ = api_.landing_page(req_html)
# Confirm each request is the same when decompressed
assert rsp_json_ == rsp_json == \
gzip.decompress(rsp_gzip_json).decode(enc_16)
assert rsp_jsonld_ == rsp_jsonld == \
gzip.decompress(rsp_gzip_jsonld).decode(enc_16)
assert rsp_html_ == rsp_html == \
gzip.decompress(rsp_gzip_html).decode(enc_16)
def test_root(config, api_):
req = mock_request()
rsp_headers, code, response = api_.landing_page(req)