feat(trees): migrated reference tracking to closure table for faster scans on getChildren

change reduces query time from 4s to a predictable 500ms for queries with totalCount; without
totalCount we get a predictable <100ms. Benchmarking done on 300k object "commit"
This commit is contained in:
Dimitrie Stefanescu
2020-05-03 17:49:54 +01:00
parent 9999bd6eae
commit 666e69c428
3 changed files with 65 additions and 67 deletions
+27 -18
View File
@@ -12,6 +12,7 @@ const knex = require( `${root}/db/knex` )
const Streams = ( ) => knex( 'streams' )
const Objects = ( ) => knex( 'objects' )
const Refs = ( ) => knex( 'object_tree_refs' )
const Closures = ( ) => knex( 'object_children_closure' )
const StreamCommits = ( ) => knex( 'stream_commits' )
module.exports = {
@@ -41,18 +42,24 @@ module.exports = {
Objects Proper
*/
async createObject( object ) {
// Prep tree refs
let objTreeRefs = object.__tree !== null && object.__tree ? object.__tree.map( entry => {
return { parent: entry.split( '.' )[ 0 ], path: entry }
} ) : [ ]
let insertionObject = prepInsertionObject( object )
let closures = [ ]
if ( object.__closure !== null ) {
for ( const prop in object.__closure ) {
closures.push( { parent: insertionObject.id, child: prop, minDepth: object.__closure[ prop ] } )
}
}
delete insertionObject.__tree
delete insertionObject.__closure
let q1 = Objects( ).insert( insertionObject ).toString( ) + ' on conflict do nothing'
await knex.raw( q1 )
if ( objTreeRefs.length > 0 ) {
let q2 = Refs( ).insert( objTreeRefs ).toString( ) + ' on conflict do nothing'
if ( closures.length > 0 ) {
let q2 = `${ Closures().insert( closures ).toString() } on conflict do nothing`
await knex.raw( q2 )
}
@@ -73,20 +80,23 @@ module.exports = {
let ids = [ ]
let promises = batches.map( async ( batch, index ) => new Promise( async ( resolve, reject ) => {
let objTreeRefs = [ ]
let closures = [ ]
let objsToInsert = [ ]
let t0 = performance.now( )
batch.forEach( obj => {
if ( obj.__tree !== null && obj.__tree ) {
objTreeRefs = [ ...objTreeRefs, ...obj.__tree.map( entry => {
return { parent: entry.split( '.' )[ 0 ], path: entry }
} ) ]
let insertionObject = prepInsertionObject( obj )
if ( obj.__closure !== null ) {
for ( const prop in obj.__closure ) {
closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } )
}
}
let insertionObject = prepInsertionObject( obj )
delete obj.__tree
delete obj.__closure
objsToInsert.push( insertionObject )
ids.push( insertionObject.id )
@@ -95,14 +105,14 @@ module.exports = {
let queryObjs = Objects( ).insert( objsToInsert ).toString( ) + ' on conflict do nothing'
await knex.raw( queryObjs )
if ( objTreeRefs.length > 0 ) {
let queryRefs = Refs( ).insert( objTreeRefs ).toString( ) + ' on conflict do nothing'
await knex.raw( queryRefs )
if ( closures.length > 0 ) {
let q2 = `${ Closures().insert( closures ).toString() } on conflict do nothing`
await knex.raw( q2 )
}
let t1 = performance.now( )
debug( `Batch ${index + 1}/${batches.length}: Stored ${objTreeRefs.length + objsToInsert.length} objects in ${t1-t0}ms.` )
// console.log( `Batch ${index + 1}/${batches.length}: Stored ${objTreeRefs.length + objsToInsert.length} objects in ${t1-t0}ms.` )
debug( `Batch ${index + 1}/${batches.length}: Stored ${closures.length + objsToInsert.length} objects in ${t1-t0}ms.` )
console.log( `Batch ${index + 1}/${batches.length}: Stored ${closures.length + objsToInsert.length} objects in ${t1-t0}ms.` )
resolve( )
} ) )
@@ -212,7 +222,6 @@ module.exports = {
// we cannot provide a full response back including all object hashes.
function prepInsertionObject( obj ) {
obj.id = obj.id || crypto.createHash( 'md5' ).update( JSON.stringify( obj ) ).digest( 'hex' ) // generate a hash if none is present
delete obj.__tree
let stringifiedObj = JSON.stringify( obj )
return {
data: stringifiedObj, // stored in jsonb column
+33 -44
View File
@@ -15,7 +15,6 @@ const { createStream, getStream, updateStream, deleteStream, getStreamsUser, gra
const { createCommit, createObject, createObjects, getObject, getObjects, getObjectChildren } = require( '../objects/services' )
const sampleObjects = require( './sampleObjectData' )
// console.log( sampleObjects )
let sampleCommit = JSON.parse( `{
"Objects": [
@@ -156,50 +155,11 @@ describe( 'Objects', ( ) => {
} )
it( 'Should get object children', async ( ) => {
let objectCount = 10000
let objs = [ ]
for ( let i = 0; i < objectCount; i++ ) {
objs.push( {
id: `${i}_hash`,
text: `This is object ${i}`,
arr: [ 12, 21.0003, i * 100 ],
nest: {
flag: true,
what: 'butt ' + i,
orderMe: Math.random( ) * i,
nextNest: {
really: 'cool'
}
},
__tree: [ ]
} )
let nestedBoys = createAShitTonOfFuckingObjects( 300000 )
// if ( i % 2 === 0 )
// delete objs[ i ].nest
if ( i === 0 ) {
let __tree = [ ]
for ( let j = 1; j < objectCount - 2; j++ ) {
__tree.push( `0_hash.${j}_hash` )
__tree.push( `0_hash.${j}_hash.${j+1}_hash` )
__tree.push( `0_hash.${j}_hash.${j+1}_hash.${j+2}_hash` )
// if ( j < objectCount - 2 )
// __tree.push( `0_hash.${j}_hash.${j+1}_hash.${j+2}_hash` )
}
objs[ i ].__tree = __tree
} else if ( i < objectCount - 2 ) {
objs[ i ].__tree.push( `${i}_hash.${i+1}_hash` )
}
}
let print = objs.slice( 1, 10 )
let ttree1 = objs[ 0 ].__tree.slice( 0, 30 )
console.log( ttree1 )
console.log( print.map( o => ( { id: o.id, tree: o.__tree } ) ) )
let ids = await createObjects( objs )
// console.log( ids )
let ids = await createObjects( nestedBoys )
console.log( `base id is: ${ids[0]} ` )
let res = await getObjectChildren( '0_hash' )
// console.log( res )
@@ -286,5 +246,34 @@ describe( 'Objects', ( ) => {
} )
} )
} )
} )
const crypto = require( 'crypto' )
function createAShitTonOfFuckingObjects( shitTon, noise ) {
shitTon = shitTon || 10000
noise = noise || Math.random() * 100
let objs = [ ]
let base = { name: 'base bastard 2', noise: noise, __closure: {} }
objs.push( base )
for ( let i = 0; i < shitTon; i++ ) {
let baby = { name: `mr. ${i}`, noise: noise, sortValueA: i, sortValueB: i * 0.42 * i }
getAFuckingId( baby )
base.__closure[ baby.id ] = 1
if( i > 1000 )
base.__closure[ baby.id ] = i / 1000
objs.push( baby )
}
getAFuckingId( base )
return objs
}
function getAFuckingId( obj ) {
obj.id = obj.id || crypto.createHash( 'md5' ).update( JSON.stringify( obj ) ).digest( 'hex' )
}
+5 -5
View File
@@ -1,7 +1,7 @@
module.exports = JSON.parse( `[
{
"name": "depth five",
"hash": "99b77f596443d2a2cf59124dbc6a4a8f",
"id": "99b77f596443d2a2cf59124dbc6a4a8f",
"speckle_type": ""
}, {
"name": "depth four",
@@ -9,7 +9,7 @@ module.exports = JSON.parse( `[
"speckle_type": "reference",
"referencedId": "99b77f596443d2a2cf59124dbc6a4a8f"
},
"hash": "c2cdd8d01c219703926f7282db700e14",
"id": "c2cdd8d01c219703926f7282db700e14",
"speckle_type": "",
"__tree": [
"c2cdd8d01c219703926f7282db700e14.99b77f596443d2a2cf59124dbc6a4a8f"
@@ -23,7 +23,7 @@ module.exports = JSON.parse( `[
"speckle_type": "reference",
"referencedId": "c2cdd8d01c219703926f7282db700e14"
},
"hash": "78af2314eed937c7338fccc4224393c0",
"id": "78af2314eed937c7338fccc4224393c0",
"speckle_type": "",
"__tree": [
"78af2314eed937c7338fccc4224393c0.c2cdd8d01c219703926f7282db700e14",
@@ -45,7 +45,7 @@ module.exports = JSON.parse( `[
"referencedId": "99b77f596443d2a2cf59124dbc6a4a8f"
}
],
"hash": "254cb2d7094eca3c809cdac2ffc4010b",
"id": "254cb2d7094eca3c809cdac2ffc4010b",
"speckle_type": "",
"__tree": [
"254cb2d7094eca3c809cdac2ffc4010b.78af2314eed937c7338fccc4224393c0",
@@ -68,7 +68,7 @@ module.exports = JSON.parse( `[
"speckle_type": "reference",
"referencedId": "99b77f596443d2a2cf59124dbc6a4a8f"
},
"hash": "d2b9e647cb345673ff2b35ccab45ffc2",
"id": "d2b9e647cb345673ff2b35ccab45ffc2",
"speckle_type": "",
"__tree": [
"d2b9e647cb345673ff2b35ccab45ffc2.254cb2d7094eca3c809cdac2ffc4010b",