feat(queries): figuring out the perfect querie for getting objects

quite a few things are being considered, perf. wise: pagination, ordering, querying, depth.
This commit is contained in:
Dimitrie Stefanescu
2020-05-04 16:25:37 +01:00
parent 666e69c428
commit 85cc525663
10 changed files with 136 additions and 12 deletions
+9 -5
View File
@@ -66,6 +66,8 @@ exports.up = async knex => {
table.string( 'id' ).primary( )
table.string( 'speckle_type' ).defaultTo( 'Base' ).notNullable( )
table.string( 'applicationId' )
table.integer( 'totalChildrenCount' )
table.jsonb( 'totalChildrenCountByDepth' )
table.jsonb( 'data' )
table.string( 'author', 10 ).references( 'id' ).inTable( 'users' )
table.string( 'description' )
@@ -74,6 +76,7 @@ exports.up = async knex => {
} )
await knex.raw( 'ALTER TABLE "objects" add column "serial_id" bigserial' )
await knex.raw( 'CREATE INDEX serial_idx ON objects(serial_id) ' )
// Tree inheritance tracker
await knex.schema.createTable( 'object_tree_refs', table => {
@@ -81,15 +84,16 @@ exports.up = async knex => {
table.string( 'parent' ).index( null, 'HASH' )
table.specificType( 'path', 'ltree' )
} )
await knex.raw( `CREATE INDEX tree_path_idx ON object_tree_refs USING gist(path)` )
await knex.schema.createTable( 'object_children_closure', table => {
table.string( 'parent' ).notNullable( )
table.string( 'child' ).notNullable( )
table.integer( 'minDepth' ).defaultTo( 1 ).notNullable()
// table.index( [ 'parent', 'child' ], 'pc_index' )
table.string( 'parent' ).notNullable( ).index()
table.string( 'child' ).notNullable( ).index()
table.integer( 'minDepth' ).defaultTo( 1 ).notNullable().index()
table.index( [ 'parent', 'child' ], 'parent_child_index' )
table.index( [ 'parent', 'minDepth' ], 'full_pcd_index' )
} )
await knex.raw( `CREATE INDEX tree_path_idx ON object_tree_refs USING gist(path)` )
// creates an enum type for db reference types (branch, tag).
await knex.raw( `
+14 -3
View File
@@ -88,15 +88,26 @@ module.exports = {
batch.forEach( obj => {
let insertionObject = prepInsertionObject( obj )
let totalChildrenCountByDepth = {}
let totalChildrenCountGlobal = 0
if ( obj.__closure !== null ) {
for ( const prop in obj.__closure ) {
closures.push( { parent: insertionObject.id, child: prop, minDepth: obj.__closure[ prop ] } )
totalChildrenCountGlobal++
if( totalChildrenCountByDepth[ obj.__closure[prop].toString() ] )
totalChildrenCountByDepth[ obj.__closure[ prop ].toString() ]++
else
totalChildrenCountByDepth[ obj.__closure[ prop ].toString() ] = 1
}
}
delete obj.__tree
delete obj.__closure
insertionObject.totalChildrenCount = totalChildrenCountGlobal
insertionObject.totalChildrenCountByDepth = JSON.stringify( totalChildrenCountByDepth )
delete insertionObject.__tree
delete insertionObject.__closure
objsToInsert.push( insertionObject )
ids.push( insertionObject.id )
+10 -4
View File
@@ -156,13 +156,19 @@ describe( 'Objects', ( ) => {
it( 'Should get object children', async ( ) => {
let nestedBoys = createAShitTonOfFuckingObjects( 300000 )
let objs_1 = createAShitTonOfFuckingObjects( 10000, 'noise__' )
let ids = await createObjects( objs_1 )
let ids = await createObjects( nestedBoys )
// let objs_2 = createAShitTonOfFuckingObjects( 20000, 'noise_2' )
// let ids2 = await createObjects( objs_2 )
// let objs_3 = createAShitTonOfFuckingObjects( 50000, 'noise_3' )
// let ids3 = await createObjects( objs_3 )
console.log( `base id is: ${ids[0]} ` )
console.log( `base id is: ${ids2[0]} ` )
console.log( `base id is: ${ids3[0]} ` )
let res = await getObjectChildren( '0_hash' )
// console.log( res )
} ).timeout( 30000 )
} )
+18
View File
@@ -0,0 +1,18 @@
with objs as (
SELECT
-- child as id,
id,
serial_id, -- just for reference
"data"
FROM object_children_closure
JOIN objects ON objects.id = child
WHERE parent = '7919a52c017be262ee0daf1844c376d7'
AND "minDepth" < 1000
-- AND (objects."data" -> 'sortValueA')::numeric <= 700
-- AND (objects."data" -> 'sortValueA')::numeric > 100
ORDER BY id
)
SELECT * FROM objs
RIGHT JOIN (SELECT count(*) FROM objs ) c(total_count) ON TRUE
OFFSET 100
LIMIT 200
+7
View File
@@ -0,0 +1,7 @@
-- Fast, because?
SELECT child, "data" FROM object_children_closure
RIGHT JOIN objects ON objects.id = child
WHERE parent = '7919a52c017be262ee0daf1844c376d7'
ORDER BY id
OFFSET 0
LIMIT 20
+20
View File
@@ -0,0 +1,20 @@
-- SLOW, because?
-- because we were ordering it by the serial_id!
-- sorting by the id (on which we actually do the join) is 10x faster.
-- nice, to say the least.
SELECT child as id, "data" FROM object_children_closure
RIGHT JOIN objects ON objects.id = child
WHERE parent = '509cb0c19594b731214d3ffed2c011df'
-- minDepth is a way to limit asking for objects up to a specific nested depth.
-- this is useful, for example, when we want to get a stream's top level objects only.
-- AND "minDepth" < 1000
-- better pagination routine:
-- instead of using offset, we use the last item we "saw" as where clause (last seen id)
-- this assumes that we are ordering results by their id.
-- if we would be ordering them by something else, this clause would need to change.
-- AND id > '5a29a1e000d94d8b9f4c6dd767235903'
-- AND (objects."data" -> 'sortValueA')::numeric <= 700
ORDER BY id
-- ORDER BY serial_id
OFFSET 0
LIMIT 200
+19
View File
@@ -0,0 +1,19 @@
WITH ids AS (
SELECT child FROM object_children_closure
WHERE parent = '94a0a141c211f60c5e3f859baae125e9'
AND "minDepth" < 100
),
objs AS (
SELECT
id,
speckle_type,
"data"
FROM ids
JOIN objects ON ids.child = objects.id
WHERE
(objects."data" -> 'sortValueA')::numeric >= 100
)
SELECT * FROM objs
RIGHT JOIN (SELECT count(*) FROM objs ) c(totalCount) ON TRUE
OFFSET 120
LIMIT 1000
+18
View File
@@ -0,0 +1,18 @@
WITH ids AS (
SELECT DISTINCT unnest( string_to_array( ltree2text( subltree("path", 1, 2) ), '.') ) as obj_id
FROM object_tree_refs
WHERE parent = '0_hash'
),
objs AS (
SELECT obj_id as id, speckle_type, "data"
FROM ids
JOIN objects ON ids.obj_id = objects.id
-- WHERE objects."data" @> '{"text": "This is object 1"}'
ORDER BY jsonb_path_query(data, '$.nest.orderMe' ) DESC
),
childrenCount AS (SELECT count(*) FROM ids),
resultCount AS (SELECT count(*) FROM objs)
SELECT * from objs
RIGHT JOIN (SELECT count(*) FROM objs) d(totalCount) ON TRUE
OFFSET 100
LIMIT 200
@@ -0,0 +1,11 @@
WITH ids as (
SELECT (subltree("path", 2, 3))::text as obj_id
FROM object_tree_refs
WHERE path ~ '0_hash.*{2}'
ORDER BY id
)
SELECT id, speckle_type, "data" -> 'nest' -> 'orderMe'
FROM ids
JOIN objects ON obj_id = objects.id WITH ORDINALITY
OFFSET 2
LIMIT 50
+10
View File
@@ -0,0 +1,10 @@
WITH ids AS(
SELECT unnest( string_to_array( ltree2text( subltree("path", 1, 3) ), '.') ) as obj_id
FROM object_tree_refs
WHERE parent = '0_hash'
)
SELECT obj_id, speckle_type, "data"
FROM ids
JOIN objects ON ids.obj_id = objects.id
OFFSET 0
LIMIT 100