From 666e69c428b81ea5159c96d148036f6bd0bec7a6 Mon Sep 17 00:00:00 2001 From: Dimitrie Stefanescu Date: Sun, 3 May 2020 17:49:54 +0100 Subject: [PATCH] feat(trees): migrated reference tracking to closure table for faster scans on getChildren change reduces query time from 4s to a predictable 500ms for queries with totalCount; without totalCount we get a predictable <100ms. Benchmarking done on 300k object "commit" --- modules/core/objects/services.js | 45 +++++++++------ modules/core/tests/objects.spec.js | 77 +++++++++++--------------- modules/core/tests/sampleObjectData.js | 10 ++-- 3 files changed, 65 insertions(+), 67 deletions(-) diff --git a/modules/core/objects/services.js b/modules/core/objects/services.js index 03bd3f698..f3e33f28a 100644 --- a/modules/core/objects/services.js +++ b/modules/core/objects/services.js @@ -12,6 +12,7 @@ const knex = require( `${root}/db/knex` ) const Streams = ( ) => knex( 'streams' ) const Objects = ( ) => knex( 'objects' ) const Refs = ( ) => knex( 'object_tree_refs' ) +const Closures = ( ) => knex( 'object_children_closure' ) const StreamCommits = ( ) => knex( 'stream_commits' ) module.exports = { @@ -41,18 +42,24 @@ module.exports = { Objects Proper */ async createObject( object ) { - // Prep tree refs - let objTreeRefs = object.__tree !== null && object.__tree ? object.__tree.map( entry => { - return { parent: entry.split( '.' )[ 0 ], path: entry } - } ) : [ ] let insertionObject = prepInsertionObject( object ) + let closures = [ ] + if ( object.__closure !== null ) { + for ( const prop in object.__closure ) { + closures.push( { parent: insertionObject.id, child: prop, minDepth: object.__closure[ prop ] } ) + } + } + + delete insertionObject.__tree + delete insertionObject.__closure + let q1 = Objects( ).insert( insertionObject ).toString( ) + ' on conflict do nothing' await knex.raw( q1 ) - if ( objTreeRefs.length > 0 ) { - let q2 = Refs( ).insert( objTreeRefs ).toString( ) + ' on conflict do nothing' + if ( closures.length > 0 ) { + let q2 = `${ Closures().insert( closures ).toString() } on conflict do nothing` await knex.raw( q2 ) } @@ -73,20 +80,23 @@ module.exports = { let ids = [ ] let promises = batches.map( async ( batch, index ) => new Promise( async ( resolve, reject ) => { - let objTreeRefs = [ ] + let closures = [ ] let objsToInsert = [ ] let t0 = performance.now( ) batch.forEach( obj => { - if ( obj.__tree !== null && obj.__tree ) { - objTreeRefs = [ ...objTreeRefs, ...obj.__tree.map( entry => { - return { parent: entry.split( '.' )[ 0 ], path: entry } - } ) ] + let insertionObject = prepInsertionObject( obj ) + + if ( obj.__closure !== null ) { + for ( const prop in obj.__closure ) { + closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } ) + } } - let insertionObject = prepInsertionObject( obj ) + delete obj.__tree + delete obj.__closure objsToInsert.push( insertionObject ) ids.push( insertionObject.id ) @@ -95,14 +105,14 @@ module.exports = { let queryObjs = Objects( ).insert( objsToInsert ).toString( ) + ' on conflict do nothing' await knex.raw( queryObjs ) - if ( objTreeRefs.length > 0 ) { - let queryRefs = Refs( ).insert( objTreeRefs ).toString( ) + ' on conflict do nothing' - await knex.raw( queryRefs ) + if ( closures.length > 0 ) { + let q2 = `${ Closures().insert( closures ).toString() } on conflict do nothing` + await knex.raw( q2 ) } let t1 = performance.now( ) - debug( `Batch ${index + 1}/${batches.length}: Stored ${objTreeRefs.length + objsToInsert.length} objects in ${t1-t0}ms.` ) - // console.log( `Batch ${index + 1}/${batches.length}: Stored ${objTreeRefs.length + objsToInsert.length} objects in ${t1-t0}ms.` ) + debug( `Batch ${index + 1}/${batches.length}: Stored ${closures.length + objsToInsert.length} objects in ${t1-t0}ms.` ) + console.log( `Batch ${index + 1}/${batches.length}: Stored ${closures.length + objsToInsert.length} objects in ${t1-t0}ms.` ) resolve( ) } ) ) @@ -212,7 +222,6 @@ module.exports = { // we cannot provide a full response back including all object hashes. function prepInsertionObject( obj ) { obj.id = obj.id || crypto.createHash( 'md5' ).update( JSON.stringify( obj ) ).digest( 'hex' ) // generate a hash if none is present - delete obj.__tree let stringifiedObj = JSON.stringify( obj ) return { data: stringifiedObj, // stored in jsonb column diff --git a/modules/core/tests/objects.spec.js b/modules/core/tests/objects.spec.js index 57a968076..8e0abb62c 100644 --- a/modules/core/tests/objects.spec.js +++ b/modules/core/tests/objects.spec.js @@ -15,7 +15,6 @@ const { createStream, getStream, updateStream, deleteStream, getStreamsUser, gra const { createCommit, createObject, createObjects, getObject, getObjects, getObjectChildren } = require( '../objects/services' ) const sampleObjects = require( './sampleObjectData' ) -// console.log( sampleObjects ) let sampleCommit = JSON.parse( `{ "Objects": [ @@ -156,50 +155,11 @@ describe( 'Objects', ( ) => { } ) it( 'Should get object children', async ( ) => { - let objectCount = 10000 - let objs = [ ] - for ( let i = 0; i < objectCount; i++ ) { - objs.push( { - id: `${i}_hash`, - text: `This is object ${i}`, - arr: [ 12, 21.0003, i * 100 ], - nest: { - flag: true, - what: 'butt ' + i, - orderMe: Math.random( ) * i, - nextNest: { - really: 'cool' - } - }, - __tree: [ ] - } ) + let nestedBoys = createAShitTonOfFuckingObjects( 300000 ) - // if ( i % 2 === 0 ) - // delete objs[ i ].nest - - if ( i === 0 ) { - let __tree = [ ] - - for ( let j = 1; j < objectCount - 2; j++ ) { - __tree.push( `0_hash.${j}_hash` ) - __tree.push( `0_hash.${j}_hash.${j+1}_hash` ) - __tree.push( `0_hash.${j}_hash.${j+1}_hash.${j+2}_hash` ) - // if ( j < objectCount - 2 ) - // __tree.push( `0_hash.${j}_hash.${j+1}_hash.${j+2}_hash` ) - } - - objs[ i ].__tree = __tree - } else if ( i < objectCount - 2 ) { - objs[ i ].__tree.push( `${i}_hash.${i+1}_hash` ) - } - } - let print = objs.slice( 1, 10 ) - let ttree1 = objs[ 0 ].__tree.slice( 0, 30 ) - console.log( ttree1 ) - console.log( print.map( o => ( { id: o.id, tree: o.__tree } ) ) ) - let ids = await createObjects( objs ) - // console.log( ids ) + let ids = await createObjects( nestedBoys ) + console.log( `base id is: ${ids[0]} ` ) let res = await getObjectChildren( '0_hash' ) // console.log( res ) @@ -286,5 +246,34 @@ describe( 'Objects', ( ) => { } ) } ) +} ) -} ) \ No newline at end of file +const crypto = require( 'crypto' ) + +function createAShitTonOfFuckingObjects( shitTon, noise ) { + shitTon = shitTon || 10000 + noise = noise || Math.random() * 100 + + let objs = [ ] + + let base = { name: 'base bastard 2', noise: noise, __closure: {} } + objs.push( base ) + + for ( let i = 0; i < shitTon; i++ ) { + let baby = { name: `mr. ${i}`, noise: noise, sortValueA: i, sortValueB: i * 0.42 * i } + getAFuckingId( baby ) + base.__closure[ baby.id ] = 1 + + if( i > 1000 ) + base.__closure[ baby.id ] = i / 1000 + + objs.push( baby ) + } + + getAFuckingId( base ) + return objs +} + +function getAFuckingId( obj ) { + obj.id = obj.id || crypto.createHash( 'md5' ).update( JSON.stringify( obj ) ).digest( 'hex' ) +} \ No newline at end of file diff --git a/modules/core/tests/sampleObjectData.js b/modules/core/tests/sampleObjectData.js index 7c74271e9..f02869345 100644 --- a/modules/core/tests/sampleObjectData.js +++ b/modules/core/tests/sampleObjectData.js @@ -1,7 +1,7 @@ module.exports = JSON.parse( `[ { "name": "depth five", - "hash": "99b77f596443d2a2cf59124dbc6a4a8f", + "id": "99b77f596443d2a2cf59124dbc6a4a8f", "speckle_type": "" }, { "name": "depth four", @@ -9,7 +9,7 @@ module.exports = JSON.parse( `[ "speckle_type": "reference", "referencedId": "99b77f596443d2a2cf59124dbc6a4a8f" }, - "hash": "c2cdd8d01c219703926f7282db700e14", + "id": "c2cdd8d01c219703926f7282db700e14", "speckle_type": "", "__tree": [ "c2cdd8d01c219703926f7282db700e14.99b77f596443d2a2cf59124dbc6a4a8f" @@ -23,7 +23,7 @@ module.exports = JSON.parse( `[ "speckle_type": "reference", "referencedId": "c2cdd8d01c219703926f7282db700e14" }, - "hash": "78af2314eed937c7338fccc4224393c0", + "id": "78af2314eed937c7338fccc4224393c0", "speckle_type": "", "__tree": [ "78af2314eed937c7338fccc4224393c0.c2cdd8d01c219703926f7282db700e14", @@ -45,7 +45,7 @@ module.exports = JSON.parse( `[ "referencedId": "99b77f596443d2a2cf59124dbc6a4a8f" } ], - "hash": "254cb2d7094eca3c809cdac2ffc4010b", + "id": "254cb2d7094eca3c809cdac2ffc4010b", "speckle_type": "", "__tree": [ "254cb2d7094eca3c809cdac2ffc4010b.78af2314eed937c7338fccc4224393c0", @@ -68,7 +68,7 @@ module.exports = JSON.parse( `[ "speckle_type": "reference", "referencedId": "99b77f596443d2a2cf59124dbc6a4a8f" }, - "hash": "d2b9e647cb345673ff2b35ccab45ffc2", + "id": "d2b9e647cb345673ff2b35ccab45ffc2", "speckle_type": "", "__tree": [ "d2b9e647cb345673ff2b35ccab45ffc2.254cb2d7094eca3c809cdac2ffc4010b",