feat(queries): figuring out the perfect querie for getting objects
quite a few things are being considered, perf. wise: pagination, ordering, querying, depth.
This commit is contained in:
@@ -66,6 +66,8 @@ exports.up = async knex => {
|
||||
table.string( 'id' ).primary( )
|
||||
table.string( 'speckle_type' ).defaultTo( 'Base' ).notNullable( )
|
||||
table.string( 'applicationId' )
|
||||
table.integer( 'totalChildrenCount' )
|
||||
table.jsonb( 'totalChildrenCountByDepth' )
|
||||
table.jsonb( 'data' )
|
||||
table.string( 'author', 10 ).references( 'id' ).inTable( 'users' )
|
||||
table.string( 'description' )
|
||||
@@ -74,6 +76,7 @@ exports.up = async knex => {
|
||||
} )
|
||||
|
||||
await knex.raw( 'ALTER TABLE "objects" add column "serial_id" bigserial' )
|
||||
await knex.raw( 'CREATE INDEX serial_idx ON objects(serial_id) ' )
|
||||
|
||||
// Tree inheritance tracker
|
||||
await knex.schema.createTable( 'object_tree_refs', table => {
|
||||
@@ -81,15 +84,16 @@ exports.up = async knex => {
|
||||
table.string( 'parent' ).index( null, 'HASH' )
|
||||
table.specificType( 'path', 'ltree' )
|
||||
} )
|
||||
await knex.raw( `CREATE INDEX tree_path_idx ON object_tree_refs USING gist(path)` )
|
||||
|
||||
await knex.schema.createTable( 'object_children_closure', table => {
|
||||
table.string( 'parent' ).notNullable( )
|
||||
table.string( 'child' ).notNullable( )
|
||||
table.integer( 'minDepth' ).defaultTo( 1 ).notNullable()
|
||||
// table.index( [ 'parent', 'child' ], 'pc_index' )
|
||||
table.string( 'parent' ).notNullable( ).index()
|
||||
table.string( 'child' ).notNullable( ).index()
|
||||
table.integer( 'minDepth' ).defaultTo( 1 ).notNullable().index()
|
||||
table.index( [ 'parent', 'child' ], 'parent_child_index' )
|
||||
table.index( [ 'parent', 'minDepth' ], 'full_pcd_index' )
|
||||
} )
|
||||
|
||||
await knex.raw( `CREATE INDEX tree_path_idx ON object_tree_refs USING gist(path)` )
|
||||
|
||||
// creates an enum type for db reference types (branch, tag).
|
||||
await knex.raw( `
|
||||
|
||||
@@ -88,15 +88,26 @@ module.exports = {
|
||||
batch.forEach( obj => {
|
||||
|
||||
let insertionObject = prepInsertionObject( obj )
|
||||
|
||||
let totalChildrenCountByDepth = {}
|
||||
let totalChildrenCountGlobal = 0
|
||||
if ( obj.__closure !== null ) {
|
||||
for ( const prop in obj.__closure ) {
|
||||
closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } )
|
||||
|
||||
totalChildrenCountGlobal++
|
||||
|
||||
if( totalChildrenCountByDepth[ obj.__closure[prop].toString() ] )
|
||||
totalChildrenCountByDepth[ obj.__closure[ prop ].toString() ]++
|
||||
else
|
||||
totalChildrenCountByDepth[ obj.__closure[ prop ].toString() ] = 1
|
||||
}
|
||||
}
|
||||
|
||||
delete obj.__tree
|
||||
delete obj.__closure
|
||||
insertionObject.totalChildrenCount = totalChildrenCountGlobal
|
||||
insertionObject.totalChildrenCountByDepth = JSON.stringify( totalChildrenCountByDepth )
|
||||
|
||||
delete insertionObject.__tree
|
||||
delete insertionObject.__closure
|
||||
|
||||
objsToInsert.push( insertionObject )
|
||||
ids.push( insertionObject.id )
|
||||
|
||||
@@ -156,13 +156,19 @@ describe( 'Objects', ( ) => {
|
||||
|
||||
it( 'Should get object children', async ( ) => {
|
||||
|
||||
let nestedBoys = createAShitTonOfFuckingObjects( 300000 )
|
||||
let objs_1 = createAShitTonOfFuckingObjects( 10000, 'noise__' )
|
||||
let ids = await createObjects( objs_1 )
|
||||
|
||||
let ids = await createObjects( nestedBoys )
|
||||
// let objs_2 = createAShitTonOfFuckingObjects( 20000, 'noise_2' )
|
||||
// let ids2 = await createObjects( objs_2 )
|
||||
|
||||
// let objs_3 = createAShitTonOfFuckingObjects( 50000, 'noise_3' )
|
||||
// let ids3 = await createObjects( objs_3 )
|
||||
|
||||
console.log( `base id is: ${ids[0]} ` )
|
||||
console.log( `base id is: ${ids2[0]} ` )
|
||||
console.log( `base id is: ${ids3[0]} ` )
|
||||
|
||||
let res = await getObjectChildren( '0_hash' )
|
||||
// console.log( res )
|
||||
} ).timeout( 30000 )
|
||||
|
||||
} )
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
with objs as (
|
||||
SELECT
|
||||
-- child as id,
|
||||
id,
|
||||
serial_id, -- just for reference
|
||||
"data"
|
||||
FROM object_children_closure
|
||||
JOIN objects ON objects.id = child
|
||||
WHERE parent = '7919a52c017be262ee0daf1844c376d7'
|
||||
AND "minDepth" < 1000
|
||||
-- AND (objects."data" -> 'sortValueA')::numeric <= 700
|
||||
-- AND (objects."data" -> 'sortValueA')::numeric > 100
|
||||
ORDER BY id
|
||||
)
|
||||
SELECT * FROM objs
|
||||
RIGHT JOIN (SELECT count(*) FROM objs ) c(total_count) ON TRUE
|
||||
OFFSET 100
|
||||
LIMIT 200
|
||||
@@ -0,0 +1,7 @@
|
||||
-- Fast, because?
|
||||
SELECT child, "data" FROM object_children_closure
|
||||
RIGHT JOIN objects ON objects.id = child
|
||||
WHERE parent = '7919a52c017be262ee0daf1844c376d7'
|
||||
ORDER BY id
|
||||
OFFSET 0
|
||||
LIMIT 20
|
||||
@@ -0,0 +1,20 @@
|
||||
-- SLOW, because?
|
||||
-- because we were ordering it by the serial_id!
|
||||
-- sorting by the id (on which we actually do the join) is 10x faster.
|
||||
-- nice, to say the least.
|
||||
SELECT child as id, "data" FROM object_children_closure
|
||||
RIGHT JOIN objects ON objects.id = child
|
||||
WHERE parent = '509cb0c19594b731214d3ffed2c011df'
|
||||
-- minDepth is a way to limit asking for objects up to a specific nested depth.
|
||||
-- this is useful, for example, when we want to get a stream's top level objects only.
|
||||
-- AND "minDepth" < 1000
|
||||
-- better pagination routine:
|
||||
-- instead of using offset, we use the last item we "saw" as where clause (last seen id)
|
||||
-- this assumes that we are ordering results by their id.
|
||||
-- if we would be ordering them by something else, this clause would need to change.
|
||||
-- AND id > '5a29a1e000d94d8b9f4c6dd767235903'
|
||||
-- AND (objects."data" -> 'sortValueA')::numeric <= 700
|
||||
ORDER BY id
|
||||
-- ORDER BY serial_id
|
||||
OFFSET 0
|
||||
LIMIT 200
|
||||
@@ -0,0 +1,19 @@
|
||||
WITH ids AS (
|
||||
SELECT child FROM object_children_closure
|
||||
WHERE parent = '94a0a141c211f60c5e3f859baae125e9'
|
||||
AND "minDepth" < 100
|
||||
),
|
||||
objs AS (
|
||||
SELECT
|
||||
id,
|
||||
speckle_type,
|
||||
"data"
|
||||
FROM ids
|
||||
JOIN objects ON ids.child = objects.id
|
||||
WHERE
|
||||
(objects."data" -> 'sortValueA')::numeric >= 100
|
||||
)
|
||||
SELECT * FROM objs
|
||||
RIGHT JOIN (SELECT count(*) FROM objs ) c(totalCount) ON TRUE
|
||||
OFFSET 120
|
||||
LIMIT 1000
|
||||
@@ -0,0 +1,18 @@
|
||||
WITH ids AS (
|
||||
SELECT DISTINCT unnest( string_to_array( ltree2text( subltree("path", 1, 2) ), '.') ) as obj_id
|
||||
FROM object_tree_refs
|
||||
WHERE parent = '0_hash'
|
||||
),
|
||||
objs AS (
|
||||
SELECT obj_id as id, speckle_type, "data"
|
||||
FROM ids
|
||||
JOIN objects ON ids.obj_id = objects.id
|
||||
-- WHERE objects."data" @> '{"text": "This is object 1"}'
|
||||
ORDER BY jsonb_path_query(data, '$.nest.orderMe' ) DESC
|
||||
),
|
||||
childrenCount AS (SELECT count(*) FROM ids),
|
||||
resultCount AS (SELECT count(*) FROM objs)
|
||||
SELECT * from objs
|
||||
RIGHT JOIN (SELECT count(*) FROM objs) d(totalCount) ON TRUE
|
||||
OFFSET 100
|
||||
LIMIT 200
|
||||
@@ -0,0 +1,11 @@
|
||||
WITH ids as (
|
||||
SELECT (subltree("path", 2, 3))::text as obj_id
|
||||
FROM object_tree_refs
|
||||
WHERE path ~ '0_hash.*{2}'
|
||||
ORDER BY id
|
||||
)
|
||||
SELECT id, speckle_type, "data" -> 'nest' -> 'orderMe'
|
||||
FROM ids
|
||||
JOIN objects ON obj_id = objects.id WITH ORDINALITY
|
||||
OFFSET 2
|
||||
LIMIT 50
|
||||
@@ -0,0 +1,10 @@
|
||||
WITH ids AS(
|
||||
SELECT unnest( string_to_array( ltree2text( subltree("path", 1, 3) ), '.') ) as obj_id
|
||||
FROM object_tree_refs
|
||||
WHERE parent = '0_hash'
|
||||
)
|
||||
SELECT obj_id, speckle_type, "data"
|
||||
FROM ids
|
||||
JOIN objects ON ids.obj_id = objects.id
|
||||
OFFSET 0
|
||||
LIMIT 100
|
||||
Reference in New Issue
Block a user