Files
specklepy/speckle/serialization/base_object_serializer.py
T
2021-01-26 10:02:55 +00:00

330 lines
12 KiB
Python

import json
import hashlib
import re
from speckle import objects
from uuid import uuid4
from typing import Any, Dict, List, Tuple
from speckle.objects.base import Base, DataChunk
from speckle.logging.exceptions import SerializationException, SpeckleException
from speckle.transports.abstract_transport import AbstractTransport
PRIMITIVES = (int, float, str, bool)
def hash_obj(obj: Any) -> str:
return hashlib.sha256(json.dumps(obj).encode()).hexdigest()[:32]
class BaseObjectSerializer:
read_transport: AbstractTransport
write_transports: List[AbstractTransport]
detach_lineage: List[bool] = [] # tracks depth and whether or not to detach
lineage: List[str] = [] # keeps track of hash chain through the object tree
family_tree: Dict[str, Dict[str, int]] = {}
closure_table: Dict[str, Dict[str, int]] = {}
def __init__(
self, write_transports: List[AbstractTransport] = [], read_transport=None
) -> None:
self.write_transports = write_transports
self.read_transport = read_transport
def write_json(self, base: Base):
self.__reset_writer()
self.detach_lineage = [True]
hash, obj = self.traverse_base(base)
return hash, json.dumps(obj)
def traverse_base(self, base: Base) -> Tuple[str, Dict]:
"""Decomposes the given base object and builds a serializable dictionary
Arguments:
base {Base} -- the base object to be decomposed and serialized
Returns:
(str, dict) -- a tuple containing the hash (id) of the base object and the constructed serializable dictionary
"""
if not self.detach_lineage:
self.detach_lineage = [True]
self.lineage.append(uuid4().hex)
object_builder = {"id": ""}
obj, props = base, base.get_member_names()
while props:
prop = props.pop(0)
value = obj[prop]
# skip nulls or props marked to be ignored with "__" or "_"
if not value or prop.startswith(("__", "_")):
continue
# don't prepopulate id as this will mess up hashing
if prop == "id":
continue
dynamic_chunk_match = re.match(r"^@\((\d*)\)", prop)
if dynamic_chunk_match:
chunk_size = dynamic_chunk_match.groups()[0]
base._chunkable[prop] = (
int(chunk_size) if chunk_size else base._chunk_size_default
)
chunkable = True if prop in base._chunkable else False
detach = (
True
if prop.startswith("@") or prop in base._detachable or chunkable
else False
)
# 1. handle primitives (ints, floats, strings, and bools)
if isinstance(value, PRIMITIVES):
object_builder[prop] = value
continue
# 2. handle Base objects
elif isinstance(value, Base):
child_obj = self.traverse_value(value, detach=detach)
if detach and self.write_transports:
ref_hash = child_obj["id"]
object_builder[prop] = self.detach_helper(ref_hash=ref_hash)
else:
object_builder[prop] = child_obj
# 3. handle chunkable props
elif chunkable and self.write_transports:
chunks = []
max_size = base._chunkable[prop]
chunk = DataChunk()
for count, item in enumerate(value):
if count and count % max_size == 0:
chunks.append(chunk)
chunk = DataChunk()
chunk.data.append(item)
chunks.append(chunk)
chunk_refs = []
for c in chunks:
self.detach_lineage.append(detach)
ref_hash, _ = self.traverse_base(c)
ref_obj = self.detach_helper(ref_hash=ref_hash)
chunk_refs.append(ref_obj)
object_builder[prop] = chunk_refs
# 4. handle all other cases
else:
child_obj = self.traverse_value(value)
object_builder[prop] = child_obj
hash = hash_obj(object_builder)
object_builder["id"] = hash
detached = self.detach_lineage.pop()
# add closures to the object
if self.lineage[-1] in self.family_tree:
object_builder["__closure"] = self.closure_table[hash] = {
ref: depth - len(self.detach_lineage)
for ref, depth in self.family_tree[self.lineage[-1]].items()
}
# write detached or root objects to transports
if detached and self.write_transports:
for t in self.write_transports:
t.save_object(id=hash, serialized_object=json.dumps(object_builder))
del self.lineage[-1]
return hash, object_builder
def traverse_value(self, obj: Any, detach: bool = False) -> Any:
"""Decomposes a given object and constructs a serializable object or dictionary
Arguments:
obj {Any} -- the value to decompose
Returns:
Any -- a serializable version of the given object
"""
if isinstance(obj, PRIMITIVES):
return obj
elif isinstance(obj, (list, tuple, set)):
return [self.traverse_value(o) for o in obj]
elif isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, PRIMITIVES):
continue
else:
obj[k] = self.traverse_value(v)
return obj
elif isinstance(obj, Base):
self.detach_lineage.append(detach)
_, base_obj = self.traverse_base(obj)
return base_obj
else:
try:
return obj.dict()
except:
SerializationException(
message=f"Failed to handle {type(obj)} in `BaseObjectSerializer.traverse_value`",
object=obj,
)
return str(obj)
def detach_helper(self, ref_hash: str) -> Dict[str, str]:
"""Helper to keep track of detached objects and their depth in the family tree and create reference objects to place in the parent object
Arguments:
ref_hash {str} -- the hash of the fully traversed object
Returns:
dict -- a reference object to be inserted into the given object's parent
"""
for parent in self.lineage:
if parent not in self.family_tree:
self.family_tree[parent] = {}
if ref_hash not in self.family_tree[parent] or self.family_tree[parent][
ref_hash
] > len(self.detach_lineage):
self.family_tree[parent][ref_hash] = len(self.detach_lineage)
return {
"referencedId": ref_hash,
"speckle_type": "reference",
}
def __reset_writer(self) -> None:
"""Reinitializes the lineage, and other variables that get used during the json writing process"""
self.detach_lineage = []
self.lineage = []
self.family_tree = {}
self.closure_table = {}
def read_json(self, obj_string: str, obj_dict: dict = None) -> Base:
"""Recomposes a Base object from the string representation of the object
Arguments:
obj_string {str} -- the string representation of the object
Returns:
Base -- the base object with all it's children attached
"""
if not obj_string and not obj_dict:
return None
obj = obj_dict or json.loads(obj_string)
base = self.recompose_base(obj=obj)
return base
def recompose_base(self, obj: dict) -> Base:
"""Steps through a base object dictionary and recomposes the base object
Arguments:
obj {dict} -- the dictionary representation of the object
Returns:
Base -- the base object with all its children attached
"""
# make sure an obj was passed and create dict if string was somehow passed
if not obj:
return
if isinstance(obj, str):
obj = json.loads(obj)
# TODO: remove check for `speckleType` when server bug is fixed
if "speckle_type" in obj and obj["speckle_type"] == "reference":
obj = self.get_child(obj=obj)
if "speckleType" in obj and obj["speckleType"] == "reference":
obj = self.get_child(obj=obj)
# initialise the base object using `speckle_type`
base = getattr(
objects,
obj["speckle_type"] if "speckle_type" in obj else obj["speckleType"],
Base,
)()
# get total children count
if "__closure" in obj:
if not self.read_transport:
raise SpeckleException(
message="Cannot resolve reference - no read transport is defined"
)
closure = obj.pop("__closure")
base.totalChildrenCount = len(closure)
for prop, value in obj.items():
# 1. handle primitives (ints, floats, strings, and bools)
if isinstance(value, PRIMITIVES):
base.__setattr__(prop, value)
continue
# 2. handle referenced child objects
elif "referencedId" in value:
ref_hash = value["referencedId"]
ref_obj_str = self.read_transport.get_object(id=ref_hash)
if not ref_obj_str:
raise SpeckleException(
f"Could not find the referenced child object of id `{ref_hash}` in the given read transport: {self.read_transport.name}"
)
ref_obj = json.loads(ref_obj_str)
base.__setattr__(prop, self.recompose_base(obj=ref_obj))
# 3. handle all other cases (base objects, lists, and dicts)
else:
base.__setattr__(prop, self.handle_value(value))
return base
def handle_value(self, obj: Any):
"""Helper for recomposing a base object by handling the dictionary representation's values
Arguments:
obj {Any} -- a value from the base object dictionary
Returns:
Any -- the handled value (primitive, list, dictionary, or Base)
"""
if isinstance(obj, PRIMITIVES):
return obj
# lists (regular and chunked)
if isinstance(obj, list):
obj_list = [self.handle_value(o) for o in obj]
# handle chunked lists
if isinstance(obj_list[0], DataChunk):
data = []
for o in obj_list:
data.extend(o["data"])
return data
else:
return obj_list
# bases
if isinstance(obj, dict) and "speckle_type" in obj:
return self.recompose_base(obj=obj)
# dictionaries
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, PRIMITIVES):
continue
else:
obj[k] = self.handle_value(v)
return obj
def get_child(self, obj: Dict):
ref_hash = obj["referencedId"]
ref_obj_str = self.read_transport.get_object(id=ref_hash)
if not ref_obj_str:
raise SpeckleException(
f"Could not find the referenced child object of id `{ref_hash}` in the given read transport: {self.read_transport.name}"
)
return json.loads(ref_obj_str)