feat(trees): migrated reference tracking to closure table for faster scans on getChildren
change reduces query time from 4s to a predictable 500ms for queries with totalCount; without totalCount we get a predictable <100ms. Benchmarking done on 300k object "commit"
This commit is contained in:
@@ -12,6 +12,7 @@ const knex = require( `${root}/db/knex` )
|
||||
const Streams = ( ) => knex( 'streams' )
|
||||
const Objects = ( ) => knex( 'objects' )
|
||||
const Refs = ( ) => knex( 'object_tree_refs' )
|
||||
const Closures = ( ) => knex( 'object_children_closure' )
|
||||
const StreamCommits = ( ) => knex( 'stream_commits' )
|
||||
|
||||
module.exports = {
|
||||
@@ -41,18 +42,24 @@ module.exports = {
|
||||
Objects Proper
|
||||
*/
|
||||
async createObject( object ) {
|
||||
// Prep tree refs
|
||||
let objTreeRefs = object.__tree !== null && object.__tree ? object.__tree.map( entry => {
|
||||
return { parent: entry.split( '.' )[ 0 ], path: entry }
|
||||
} ) : [ ]
|
||||
|
||||
let insertionObject = prepInsertionObject( object )
|
||||
|
||||
let closures = [ ]
|
||||
if ( object.__closure !== null ) {
|
||||
for ( const prop in object.__closure ) {
|
||||
closures.push( { parent: insertionObject.id, child: prop, minDepth: object.__closure[ prop ] } )
|
||||
}
|
||||
}
|
||||
|
||||
delete insertionObject.__tree
|
||||
delete insertionObject.__closure
|
||||
|
||||
let q1 = Objects( ).insert( insertionObject ).toString( ) + ' on conflict do nothing'
|
||||
await knex.raw( q1 )
|
||||
|
||||
if ( objTreeRefs.length > 0 ) {
|
||||
let q2 = Refs( ).insert( objTreeRefs ).toString( ) + ' on conflict do nothing'
|
||||
if ( closures.length > 0 ) {
|
||||
let q2 = `${ Closures().insert( closures ).toString() } on conflict do nothing`
|
||||
await knex.raw( q2 )
|
||||
}
|
||||
|
||||
@@ -73,20 +80,23 @@ module.exports = {
|
||||
let ids = [ ]
|
||||
|
||||
let promises = batches.map( async ( batch, index ) => new Promise( async ( resolve, reject ) => {
|
||||
let objTreeRefs = [ ]
|
||||
let closures = [ ]
|
||||
let objsToInsert = [ ]
|
||||
|
||||
let t0 = performance.now( )
|
||||
|
||||
batch.forEach( obj => {
|
||||
|
||||
if ( obj.__tree !== null && obj.__tree ) {
|
||||
objTreeRefs = [ ...objTreeRefs, ...obj.__tree.map( entry => {
|
||||
return { parent: entry.split( '.' )[ 0 ], path: entry }
|
||||
} ) ]
|
||||
let insertionObject = prepInsertionObject( obj )
|
||||
|
||||
if ( obj.__closure !== null ) {
|
||||
for ( const prop in obj.__closure ) {
|
||||
closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } )
|
||||
}
|
||||
}
|
||||
|
||||
let insertionObject = prepInsertionObject( obj )
|
||||
delete obj.__tree
|
||||
delete obj.__closure
|
||||
|
||||
objsToInsert.push( insertionObject )
|
||||
ids.push( insertionObject.id )
|
||||
@@ -95,14 +105,14 @@ module.exports = {
|
||||
let queryObjs = Objects( ).insert( objsToInsert ).toString( ) + ' on conflict do nothing'
|
||||
await knex.raw( queryObjs )
|
||||
|
||||
if ( objTreeRefs.length > 0 ) {
|
||||
let queryRefs = Refs( ).insert( objTreeRefs ).toString( ) + ' on conflict do nothing'
|
||||
await knex.raw( queryRefs )
|
||||
if ( closures.length > 0 ) {
|
||||
let q2 = `${ Closures().insert( closures ).toString() } on conflict do nothing`
|
||||
await knex.raw( q2 )
|
||||
}
|
||||
|
||||
let t1 = performance.now( )
|
||||
debug( `Batch ${index + 1}/${batches.length}: Stored ${objTreeRefs.length + objsToInsert.length} objects in ${t1-t0}ms.` )
|
||||
// console.log( `Batch ${index + 1}/${batches.length}: Stored ${objTreeRefs.length + objsToInsert.length} objects in ${t1-t0}ms.` )
|
||||
debug( `Batch ${index + 1}/${batches.length}: Stored ${closures.length + objsToInsert.length} objects in ${t1-t0}ms.` )
|
||||
console.log( `Batch ${index + 1}/${batches.length}: Stored ${closures.length + objsToInsert.length} objects in ${t1-t0}ms.` )
|
||||
resolve( )
|
||||
} ) )
|
||||
|
||||
@@ -212,7 +222,6 @@ module.exports = {
|
||||
// we cannot provide a full response back including all object hashes.
|
||||
function prepInsertionObject( obj ) {
|
||||
obj.id = obj.id || crypto.createHash( 'md5' ).update( JSON.stringify( obj ) ).digest( 'hex' ) // generate a hash if none is present
|
||||
delete obj.__tree
|
||||
let stringifiedObj = JSON.stringify( obj )
|
||||
return {
|
||||
data: stringifiedObj, // stored in jsonb column
|
||||
|
||||
@@ -15,7 +15,6 @@ const { createStream, getStream, updateStream, deleteStream, getStreamsUser, gra
|
||||
const { createCommit, createObject, createObjects, getObject, getObjects, getObjectChildren } = require( '../objects/services' )
|
||||
|
||||
const sampleObjects = require( './sampleObjectData' )
|
||||
// console.log( sampleObjects )
|
||||
|
||||
let sampleCommit = JSON.parse( `{
|
||||
"Objects": [
|
||||
@@ -156,50 +155,11 @@ describe( 'Objects', ( ) => {
|
||||
} )
|
||||
|
||||
it( 'Should get object children', async ( ) => {
|
||||
let objectCount = 10000
|
||||
let objs = [ ]
|
||||
|
||||
for ( let i = 0; i < objectCount; i++ ) {
|
||||
objs.push( {
|
||||
id: `${i}_hash`,
|
||||
text: `This is object ${i}`,
|
||||
arr: [ 12, 21.0003, i * 100 ],
|
||||
nest: {
|
||||
flag: true,
|
||||
what: 'butt ' + i,
|
||||
orderMe: Math.random( ) * i,
|
||||
nextNest: {
|
||||
really: 'cool'
|
||||
}
|
||||
},
|
||||
__tree: [ ]
|
||||
} )
|
||||
let nestedBoys = createAShitTonOfFuckingObjects( 300000 )
|
||||
|
||||
// if ( i % 2 === 0 )
|
||||
// delete objs[ i ].nest
|
||||
|
||||
if ( i === 0 ) {
|
||||
let __tree = [ ]
|
||||
|
||||
for ( let j = 1; j < objectCount - 2; j++ ) {
|
||||
__tree.push( `0_hash.${j}_hash` )
|
||||
__tree.push( `0_hash.${j}_hash.${j+1}_hash` )
|
||||
__tree.push( `0_hash.${j}_hash.${j+1}_hash.${j+2}_hash` )
|
||||
// if ( j < objectCount - 2 )
|
||||
// __tree.push( `0_hash.${j}_hash.${j+1}_hash.${j+2}_hash` )
|
||||
}
|
||||
|
||||
objs[ i ].__tree = __tree
|
||||
} else if ( i < objectCount - 2 ) {
|
||||
objs[ i ].__tree.push( `${i}_hash.${i+1}_hash` )
|
||||
}
|
||||
}
|
||||
let print = objs.slice( 1, 10 )
|
||||
let ttree1 = objs[ 0 ].__tree.slice( 0, 30 )
|
||||
console.log( ttree1 )
|
||||
console.log( print.map( o => ( { id: o.id, tree: o.__tree } ) ) )
|
||||
let ids = await createObjects( objs )
|
||||
// console.log( ids )
|
||||
let ids = await createObjects( nestedBoys )
|
||||
console.log( `base id is: ${ids[0]} ` )
|
||||
|
||||
let res = await getObjectChildren( '0_hash' )
|
||||
// console.log( res )
|
||||
@@ -286,5 +246,34 @@ describe( 'Objects', ( ) => {
|
||||
} )
|
||||
|
||||
} )
|
||||
} )
|
||||
|
||||
} )
|
||||
const crypto = require( 'crypto' )
|
||||
|
||||
function createAShitTonOfFuckingObjects( shitTon, noise ) {
|
||||
shitTon = shitTon || 10000
|
||||
noise = noise || Math.random() * 100
|
||||
|
||||
let objs = [ ]
|
||||
|
||||
let base = { name: 'base bastard 2', noise: noise, __closure: {} }
|
||||
objs.push( base )
|
||||
|
||||
for ( let i = 0; i < shitTon; i++ ) {
|
||||
let baby = { name: `mr. ${i}`, noise: noise, sortValueA: i, sortValueB: i * 0.42 * i }
|
||||
getAFuckingId( baby )
|
||||
base.__closure[ baby.id ] = 1
|
||||
|
||||
if( i > 1000 )
|
||||
base.__closure[ baby.id ] = i / 1000
|
||||
|
||||
objs.push( baby )
|
||||
}
|
||||
|
||||
getAFuckingId( base )
|
||||
return objs
|
||||
}
|
||||
|
||||
function getAFuckingId( obj ) {
|
||||
obj.id = obj.id || crypto.createHash( 'md5' ).update( JSON.stringify( obj ) ).digest( 'hex' )
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
module.exports = JSON.parse( `[
|
||||
{
|
||||
"name": "depth five",
|
||||
"hash": "99b77f596443d2a2cf59124dbc6a4a8f",
|
||||
"id": "99b77f596443d2a2cf59124dbc6a4a8f",
|
||||
"speckle_type": ""
|
||||
}, {
|
||||
"name": "depth four",
|
||||
@@ -9,7 +9,7 @@ module.exports = JSON.parse( `[
|
||||
"speckle_type": "reference",
|
||||
"referencedId": "99b77f596443d2a2cf59124dbc6a4a8f"
|
||||
},
|
||||
"hash": "c2cdd8d01c219703926f7282db700e14",
|
||||
"id": "c2cdd8d01c219703926f7282db700e14",
|
||||
"speckle_type": "",
|
||||
"__tree": [
|
||||
"c2cdd8d01c219703926f7282db700e14.99b77f596443d2a2cf59124dbc6a4a8f"
|
||||
@@ -23,7 +23,7 @@ module.exports = JSON.parse( `[
|
||||
"speckle_type": "reference",
|
||||
"referencedId": "c2cdd8d01c219703926f7282db700e14"
|
||||
},
|
||||
"hash": "78af2314eed937c7338fccc4224393c0",
|
||||
"id": "78af2314eed937c7338fccc4224393c0",
|
||||
"speckle_type": "",
|
||||
"__tree": [
|
||||
"78af2314eed937c7338fccc4224393c0.c2cdd8d01c219703926f7282db700e14",
|
||||
@@ -45,7 +45,7 @@ module.exports = JSON.parse( `[
|
||||
"referencedId": "99b77f596443d2a2cf59124dbc6a4a8f"
|
||||
}
|
||||
],
|
||||
"hash": "254cb2d7094eca3c809cdac2ffc4010b",
|
||||
"id": "254cb2d7094eca3c809cdac2ffc4010b",
|
||||
"speckle_type": "",
|
||||
"__tree": [
|
||||
"254cb2d7094eca3c809cdac2ffc4010b.78af2314eed937c7338fccc4224393c0",
|
||||
@@ -68,7 +68,7 @@ module.exports = JSON.parse( `[
|
||||
"speckle_type": "reference",
|
||||
"referencedId": "99b77f596443d2a2cf59124dbc6a4a8f"
|
||||
},
|
||||
"hash": "d2b9e647cb345673ff2b35ccab45ffc2",
|
||||
"id": "d2b9e647cb345673ff2b35ccab45ffc2",
|
||||
"speckle_type": "",
|
||||
"__tree": [
|
||||
"d2b9e647cb345673ff2b35ccab45ffc2.254cb2d7094eca3c809cdac2ffc4010b",
|
||||
|
||||
Reference in New Issue
Block a user