feat(queries): figuring out the perfect querie for getting objects
quite a few things are being considered, perf. wise: pagination, ordering, querying, depth.
This commit is contained in:
@@ -66,6 +66,8 @@ exports.up = async knex => {
|
|||||||
table.string( 'id' ).primary( )
|
table.string( 'id' ).primary( )
|
||||||
table.string( 'speckle_type' ).defaultTo( 'Base' ).notNullable( )
|
table.string( 'speckle_type' ).defaultTo( 'Base' ).notNullable( )
|
||||||
table.string( 'applicationId' )
|
table.string( 'applicationId' )
|
||||||
|
table.integer( 'totalChildrenCount' )
|
||||||
|
table.jsonb( 'totalChildrenCountByDepth' )
|
||||||
table.jsonb( 'data' )
|
table.jsonb( 'data' )
|
||||||
table.string( 'author', 10 ).references( 'id' ).inTable( 'users' )
|
table.string( 'author', 10 ).references( 'id' ).inTable( 'users' )
|
||||||
table.string( 'description' )
|
table.string( 'description' )
|
||||||
@@ -74,6 +76,7 @@ exports.up = async knex => {
|
|||||||
} )
|
} )
|
||||||
|
|
||||||
await knex.raw( 'ALTER TABLE "objects" add column "serial_id" bigserial' )
|
await knex.raw( 'ALTER TABLE "objects" add column "serial_id" bigserial' )
|
||||||
|
await knex.raw( 'CREATE INDEX serial_idx ON objects(serial_id) ' )
|
||||||
|
|
||||||
// Tree inheritance tracker
|
// Tree inheritance tracker
|
||||||
await knex.schema.createTable( 'object_tree_refs', table => {
|
await knex.schema.createTable( 'object_tree_refs', table => {
|
||||||
@@ -81,15 +84,16 @@ exports.up = async knex => {
|
|||||||
table.string( 'parent' ).index( null, 'HASH' )
|
table.string( 'parent' ).index( null, 'HASH' )
|
||||||
table.specificType( 'path', 'ltree' )
|
table.specificType( 'path', 'ltree' )
|
||||||
} )
|
} )
|
||||||
|
await knex.raw( `CREATE INDEX tree_path_idx ON object_tree_refs USING gist(path)` )
|
||||||
|
|
||||||
await knex.schema.createTable( 'object_children_closure', table => {
|
await knex.schema.createTable( 'object_children_closure', table => {
|
||||||
table.string( 'parent' ).notNullable( )
|
table.string( 'parent' ).notNullable( ).index()
|
||||||
table.string( 'child' ).notNullable( )
|
table.string( 'child' ).notNullable( ).index()
|
||||||
table.integer( 'minDepth' ).defaultTo( 1 ).notNullable()
|
table.integer( 'minDepth' ).defaultTo( 1 ).notNullable().index()
|
||||||
// table.index( [ 'parent', 'child' ], 'pc_index' )
|
table.index( [ 'parent', 'child' ], 'parent_child_index' )
|
||||||
|
table.index( [ 'parent', 'minDepth' ], 'full_pcd_index' )
|
||||||
} )
|
} )
|
||||||
|
|
||||||
await knex.raw( `CREATE INDEX tree_path_idx ON object_tree_refs USING gist(path)` )
|
|
||||||
|
|
||||||
// creates an enum type for db reference types (branch, tag).
|
// creates an enum type for db reference types (branch, tag).
|
||||||
await knex.raw( `
|
await knex.raw( `
|
||||||
|
|||||||
@@ -88,15 +88,26 @@ module.exports = {
|
|||||||
batch.forEach( obj => {
|
batch.forEach( obj => {
|
||||||
|
|
||||||
let insertionObject = prepInsertionObject( obj )
|
let insertionObject = prepInsertionObject( obj )
|
||||||
|
let totalChildrenCountByDepth = {}
|
||||||
|
let totalChildrenCountGlobal = 0
|
||||||
if ( obj.__closure !== null ) {
|
if ( obj.__closure !== null ) {
|
||||||
for ( const prop in obj.__closure ) {
|
for ( const prop in obj.__closure ) {
|
||||||
closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } )
|
closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } )
|
||||||
|
|
||||||
|
totalChildrenCountGlobal++
|
||||||
|
|
||||||
|
if( totalChildrenCountByDepth[ obj.__closure[prop].toString() ] )
|
||||||
|
totalChildrenCountByDepth[ obj.__closure[ prop ].toString() ]++
|
||||||
|
else
|
||||||
|
totalChildrenCountByDepth[ obj.__closure[ prop ].toString() ] = 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delete obj.__tree
|
insertionObject.totalChildrenCount = totalChildrenCountGlobal
|
||||||
delete obj.__closure
|
insertionObject.totalChildrenCountByDepth = JSON.stringify( totalChildrenCountByDepth )
|
||||||
|
|
||||||
|
delete insertionObject.__tree
|
||||||
|
delete insertionObject.__closure
|
||||||
|
|
||||||
objsToInsert.push( insertionObject )
|
objsToInsert.push( insertionObject )
|
||||||
ids.push( insertionObject.id )
|
ids.push( insertionObject.id )
|
||||||
|
|||||||
@@ -156,13 +156,19 @@ describe( 'Objects', ( ) => {
|
|||||||
|
|
||||||
it( 'Should get object children', async ( ) => {
|
it( 'Should get object children', async ( ) => {
|
||||||
|
|
||||||
let nestedBoys = createAShitTonOfFuckingObjects( 300000 )
|
let objs_1 = createAShitTonOfFuckingObjects( 10000, 'noise__' )
|
||||||
|
let ids = await createObjects( objs_1 )
|
||||||
|
|
||||||
let ids = await createObjects( nestedBoys )
|
// let objs_2 = createAShitTonOfFuckingObjects( 20000, 'noise_2' )
|
||||||
|
// let ids2 = await createObjects( objs_2 )
|
||||||
|
|
||||||
|
// let objs_3 = createAShitTonOfFuckingObjects( 50000, 'noise_3' )
|
||||||
|
// let ids3 = await createObjects( objs_3 )
|
||||||
|
|
||||||
console.log( `base id is: ${ids[0]} ` )
|
console.log( `base id is: ${ids[0]} ` )
|
||||||
|
console.log( `base id is: ${ids2[0]} ` )
|
||||||
|
console.log( `base id is: ${ids3[0]} ` )
|
||||||
|
|
||||||
let res = await getObjectChildren( '0_hash' )
|
|
||||||
// console.log( res )
|
|
||||||
} ).timeout( 30000 )
|
} ).timeout( 30000 )
|
||||||
|
|
||||||
} )
|
} )
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
with objs as (
|
||||||
|
SELECT
|
||||||
|
-- child as id,
|
||||||
|
id,
|
||||||
|
serial_id, -- just for reference
|
||||||
|
"data"
|
||||||
|
FROM object_children_closure
|
||||||
|
JOIN objects ON objects.id = child
|
||||||
|
WHERE parent = '7919a52c017be262ee0daf1844c376d7'
|
||||||
|
AND "minDepth" < 1000
|
||||||
|
-- AND (objects."data" -> 'sortValueA')::numeric <= 700
|
||||||
|
-- AND (objects."data" -> 'sortValueA')::numeric > 100
|
||||||
|
ORDER BY id
|
||||||
|
)
|
||||||
|
SELECT * FROM objs
|
||||||
|
RIGHT JOIN (SELECT count(*) FROM objs ) c(total_count) ON TRUE
|
||||||
|
OFFSET 100
|
||||||
|
LIMIT 200
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
-- Fast, because?
|
||||||
|
SELECT child, "data" FROM object_children_closure
|
||||||
|
RIGHT JOIN objects ON objects.id = child
|
||||||
|
WHERE parent = '7919a52c017be262ee0daf1844c376d7'
|
||||||
|
ORDER BY id
|
||||||
|
OFFSET 0
|
||||||
|
LIMIT 20
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
-- SLOW, because?
|
||||||
|
-- because we were ordering it by the serial_id!
|
||||||
|
-- sorting by the id (on which we actually do the join) is 10x faster.
|
||||||
|
-- nice, to say the least.
|
||||||
|
SELECT child as id, "data" FROM object_children_closure
|
||||||
|
RIGHT JOIN objects ON objects.id = child
|
||||||
|
WHERE parent = '509cb0c19594b731214d3ffed2c011df'
|
||||||
|
-- minDepth is a way to limit asking for objects up to a specific nested depth.
|
||||||
|
-- this is useful, for example, when we want to get a stream's top level objects only.
|
||||||
|
-- AND "minDepth" < 1000
|
||||||
|
-- better pagination routine:
|
||||||
|
-- instead of using offset, we use the last item we "saw" as where clause (last seen id)
|
||||||
|
-- this assumes that we are ordering results by their id.
|
||||||
|
-- if we would be ordering them by something else, this clause would need to change.
|
||||||
|
-- AND id > '5a29a1e000d94d8b9f4c6dd767235903'
|
||||||
|
-- AND (objects."data" -> 'sortValueA')::numeric <= 700
|
||||||
|
ORDER BY id
|
||||||
|
-- ORDER BY serial_id
|
||||||
|
OFFSET 0
|
||||||
|
LIMIT 200
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
WITH ids AS (
|
||||||
|
SELECT child FROM object_children_closure
|
||||||
|
WHERE parent = '94a0a141c211f60c5e3f859baae125e9'
|
||||||
|
AND "minDepth" < 100
|
||||||
|
),
|
||||||
|
objs AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
speckle_type,
|
||||||
|
"data"
|
||||||
|
FROM ids
|
||||||
|
JOIN objects ON ids.child = objects.id
|
||||||
|
WHERE
|
||||||
|
(objects."data" -> 'sortValueA')::numeric >= 100
|
||||||
|
)
|
||||||
|
SELECT * FROM objs
|
||||||
|
RIGHT JOIN (SELECT count(*) FROM objs ) c(totalCount) ON TRUE
|
||||||
|
OFFSET 120
|
||||||
|
LIMIT 1000
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
WITH ids AS (
|
||||||
|
SELECT DISTINCT unnest( string_to_array( ltree2text( subltree("path", 1, 2) ), '.') ) as obj_id
|
||||||
|
FROM object_tree_refs
|
||||||
|
WHERE parent = '0_hash'
|
||||||
|
),
|
||||||
|
objs AS (
|
||||||
|
SELECT obj_id as id, speckle_type, "data"
|
||||||
|
FROM ids
|
||||||
|
JOIN objects ON ids.obj_id = objects.id
|
||||||
|
-- WHERE objects."data" @> '{"text": "This is object 1"}'
|
||||||
|
ORDER BY jsonb_path_query(data, '$.nest.orderMe' ) DESC
|
||||||
|
),
|
||||||
|
childrenCount AS (SELECT count(*) FROM ids),
|
||||||
|
resultCount AS (SELECT count(*) FROM objs)
|
||||||
|
SELECT * from objs
|
||||||
|
RIGHT JOIN (SELECT count(*) FROM objs) d(totalCount) ON TRUE
|
||||||
|
OFFSET 100
|
||||||
|
LIMIT 200
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
WITH ids as (
|
||||||
|
SELECT (subltree("path", 2, 3))::text as obj_id
|
||||||
|
FROM object_tree_refs
|
||||||
|
WHERE path ~ '0_hash.*{2}'
|
||||||
|
ORDER BY id
|
||||||
|
)
|
||||||
|
SELECT id, speckle_type, "data" -> 'nest' -> 'orderMe'
|
||||||
|
FROM ids
|
||||||
|
JOIN objects ON obj_id = objects.id WITH ORDINALITY
|
||||||
|
OFFSET 2
|
||||||
|
LIMIT 50
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
WITH ids AS(
|
||||||
|
SELECT unnest( string_to_array( ltree2text( subltree("path", 1, 3) ), '.') ) as obj_id
|
||||||
|
FROM object_tree_refs
|
||||||
|
WHERE parent = '0_hash'
|
||||||
|
)
|
||||||
|
SELECT obj_id, speckle_type, "data"
|
||||||
|
FROM ids
|
||||||
|
JOIN objects ON ids.obj_id = objects.id
|
||||||
|
OFFSET 0
|
||||||
|
LIMIT 100
|
||||||
Reference in New Issue
Block a user