From dee1479a7c9579c97ef2a6f7d40a9589570fed10 Mon Sep 17 00:00:00 2001 From: Kristaps Fabians Geikins Date: Mon, 21 Oct 2024 17:21:49 +0300 Subject: [PATCH 1/2] chore(server): core IoC #95 - getObjectChildrenQueryFactory --- .../modules/core/domain/objects/operations.ts | 15 ++ .../modules/core/graph/resolvers/objects.ts | 30 ++- .../modules/core/repositories/objects.ts | 254 +++++++++++++++++- .../server/modules/core/services/objects.js | 254 ------------------ .../server/modules/core/tests/objects.spec.js | 6 +- 5 files changed, 294 insertions(+), 265 deletions(-) diff --git a/packages/server/modules/core/domain/objects/operations.ts b/packages/server/modules/core/domain/objects/operations.ts index 50543219c..1723c2a90 100644 --- a/packages/server/modules/core/domain/objects/operations.ts +++ b/packages/server/modules/core/domain/objects/operations.ts @@ -66,6 +66,21 @@ export type GetObjectChildren = (params: { cursor: string | null }> +export type GetObjectChildrenQuery = (params: { + streamId: string + objectId: string + limit?: MaybeNullOrUndefined + depth?: MaybeNullOrUndefined + select?: MaybeNullOrUndefined + cursor?: MaybeNullOrUndefined + query?: Array<{ field: string; verb?: string; value: unknown; operator: string }> + orderBy?: { field: keyof SpeckleObject; direction: 'asc' | 'desc' } +}) => Promise<{ + objects: Omit[] + cursor: string | null + totalCount: number +}> + export type CreateObject = (params: { streamId: string object: RawSpeckleObject diff --git a/packages/server/modules/core/graph/resolvers/objects.ts b/packages/server/modules/core/graph/resolvers/objects.ts index 9809a73a7..ee0b80599 100644 --- a/packages/server/modules/core/graph/resolvers/objects.ts +++ b/packages/server/modules/core/graph/resolvers/objects.ts @@ -1,11 +1,9 @@ import { authorizeResolver } from '@/modules/shared' - -import { getObjectChildrenQuery } from '@/modules/core/services/objects' - import { isNonNullable, Roles } from '@speckle/shared' import { Resolvers } from '@/modules/core/graph/generated/graphql' import { getObjectChildrenFactory, + getObjectChildrenQueryFactory, getObjectFactory, storeClosuresIfNotFoundFactory, storeObjectsIfNotFoundFactory @@ -19,6 +17,8 @@ const createObjects = createObjectsFactory({ storeClosuresIfNotFound: storeClosuresIfNotFoundFactory({ db }) }) const getObjectChildren = getObjectChildrenFactory({ db }) +const getObjectChildrenQuery = getObjectChildrenQueryFactory({ db }) +type GetObjectChildrenQueryParams = Parameters[0] const getStreamObject: NonNullable['object'] = async function object(parent, args) { @@ -66,13 +66,27 @@ export = { objectId: parent.id, limit: args.limit, depth: args.depth, - select: args.select, - query: args.query, - orderBy: args.orderBy, + select: args.select?.filter(isNonNullable), + // TODO: Theoretically users can feed in invalid structures here + query: args.query?.filter( + isNonNullable + ) as GetObjectChildrenQueryParams['query'], + orderBy: (args.orderBy || undefined) as GetObjectChildrenQueryParams['orderBy'], cursor: args.cursor }) - result.objects.forEach((x) => (x.streamId = parent.streamId)) - return result + + // Hacky typing here, but I want to avoid filling up memory with a new array of new objects w/ .map() + const objects = result.objects as Array< + (typeof result)['objects'][number] & { + streamId: string + } + > + objects.forEach((x) => (x.streamId = parent.streamId)) + + return { + ...result, + objects + } } }, Mutation: { diff --git a/packages/server/modules/core/repositories/objects.ts b/packages/server/modules/core/repositories/objects.ts index cd2077875..2df80ef82 100644 --- a/packages/server/modules/core/repositories/objects.ts +++ b/packages/server/modules/core/repositories/objects.ts @@ -11,6 +11,7 @@ import { GetFormattedObject, GetObject, GetObjectChildren, + GetObjectChildrenQuery, GetObjectChildrenStream, GetStreamObjects, StoreClosuresIfNotFound, @@ -20,7 +21,7 @@ import { } from '@/modules/core/domain/objects/operations' import { SpeckleObject } from '@/modules/core/domain/objects/types' import { SetOptional } from 'type-fest' -import { set, toNumber } from 'lodash' +import { get, set, toNumber } from 'lodash' const ObjectChildrenClosure = buildTableHelper('object_children_closure', [ 'parent', @@ -249,3 +250,254 @@ export const getObjectChildrenFactory = const lastId = rows[rows.length - 1].id return { objects: rows, cursor: lastId } } + +/** + * This query is inefficient on larger sets (n * 10k objects) as we need to return the total count on an arbitrarily (user) defined selection of objects. + * A possible future optimisation route would be to cache the total count of a query (as objects are immutable, it will not change) on a first run, and, if found on a subsequent round, do a simpler query and merge the total count result. + */ +export const getObjectChildrenQueryFactory = + (deps: { db: Knex }): GetObjectChildrenQuery => + async (params) => { + const { streamId, objectId, select, query } = params + let { limit, depth, orderBy } = params + + limit = toNumber(limit || 0) || 50 + depth = toNumber(depth || 0) || 1000 + orderBy = orderBy || { field: 'id', direction: 'asc' } + + // Cursors received by this service should be base64 encoded. They are generated on first entry query by this service; They should never be client-side generated. + const cursor: Optional<{ + value: unknown + operator: string + field: string + lastSeenId?: string + }> = params.cursor + ? JSON.parse(Buffer.from(params.cursor, 'base64').toString('binary')) + : undefined + + // Flag that keeps track of whether we select the whole "data" part of an object or not + let fullObjectSelect = false + if (Array.isArray(select)) { + // if we order by a field that we do not select, select it! + if (orderBy && select.indexOf(orderBy.field) === -1) { + select.push(orderBy.field) + } + // // always add the id! + // if ( select.indexOf( 'id' ) === -1 ) select.unshift( 'id' ) + } else { + fullObjectSelect = true + } + + const additionalIdOrderBy = orderBy.field !== 'id' + + const operatorsWhitelist = ['=', '>', '>=', '<', '<=', '!='] + + const mainQuery = deps.db + .with( + 'object_children_closure', + knex.raw( + `SELECT objects.id as parent, d.key as child, d.value as mindepth, ? as "streamId" + FROM objects + JOIN jsonb_each_text(objects.data->'__closure') d ON true + where objects.id = ?`, + [streamId, objectId] + ) + ) + .with('objs', (cteInnerQuery) => { + // always select the id + cteInnerQuery.select('id').from('object_children_closure') + cteInnerQuery.select('createdAt') + cteInnerQuery.select('speckleType') + cteInnerQuery.select('totalChildrenCount') + + // if there are any select fields, add them + if (Array.isArray(select)) { + select.forEach((field, index) => { + cteInnerQuery.select( + knex.raw('jsonb_path_query(data, :path) as :name:', { + path: '$.' + field, + name: '' + index + }) + ) + }) + // otherwise, get the whole object, as stored in the jsonb column + } else { + cteInnerQuery.select('data') + } + + // join on objects table + cteInnerQuery + .join('objects', function () { + this.on('objects.streamId', '=', 'object_children_closure.streamId').andOn( + 'objects.id', + '=', + 'object_children_closure.child' + ) + }) + .where('object_children_closure.streamId', streamId) + .andWhere('parent', objectId) + .andWhere('mindepth', '<', depth) + + // Add user provided filters/queries. + if (Array.isArray(query) && query.length > 0) { + cteInnerQuery.andWhere((nestedWhereQuery) => { + query.forEach((statement, index) => { + let castType = 'text' + if (typeof statement.value === 'string') castType = 'text' + if (typeof statement.value === 'boolean') castType = 'boolean' + if (typeof statement.value === 'number') castType = 'numeric' + + if (operatorsWhitelist.indexOf(statement.operator) === -1) + throw new Error('Invalid operator for query') + + // Determine the correct where clause (where, and where, or where) + let whereClause: keyof typeof nestedWhereQuery + if (index === 0) whereClause = 'where' + else if (statement.verb && statement.verb.toLowerCase() === 'or') + whereClause = 'orWhere' + else whereClause = 'andWhere' + + // Note: castType is generated from the statement's value and operators are matched against a whitelist. + // If comparing with strings, the jsonb_path_query(_first) func returns json encoded strings (ie, `bar` is actually `"bar"`), hence we need to add the quotes manually to the raw provided comparison value. + nestedWhereQuery[whereClause]( + knex.raw( + `jsonb_path_query_first( data, ? )::${castType} ${statement.operator} ? `, + [ + '$.' + statement.field, + castType === 'text' ? `"${statement.value}"` : statement.value + ] + ) + ) + }) + }) + } + + // Order by clause; validate direction! + const direction = + orderBy.direction && orderBy.direction.toLowerCase() === 'desc' + ? 'desc' + : 'asc' + if (orderBy.field === 'id') { + cteInnerQuery.orderBy('id', direction) + } else { + cteInnerQuery.orderByRaw( + knex.raw(`jsonb_path_query_first( data, ? ) ${direction}, id asc`, [ + '$.' + orderBy.field + ]) + ) + } + }) + .select('*') + .from('objs') + .joinRaw('RIGHT JOIN ( SELECT count(*) FROM "objs" ) c(total_count) ON TRUE') + + // Set cursor clause, if present. If it's not present, it's an entry query; this method will return a cursor based on its given query. + // We have implemented keyset pagination for more efficient searches on larger sets. This approach depends on an order by value provided by the user and a (hidden) primary key. + // logger.debug( cursor ) + if (cursor) { + let castType = 'text' + if (typeof cursor.value === 'string') castType = 'text' + if (typeof cursor.value === 'boolean') castType = 'boolean' + if (typeof cursor.value === 'number') castType = 'numeric' + + // When strings are used inside an order clause, as mentioned above, we need to add quotes around the comparison value, as the jsonb_path_query funcs return json encoded strings (`{"test":"foo"}` => test is returned as `"foo"`) + if (castType === 'text') cursor.value = `"${cursor.value}"` + + if (operatorsWhitelist.indexOf(cursor.operator) === -1) + throw new Error('Invalid operator for cursor') + + // Unwrapping the tuple comparison of ( userOrderByField, id ) > ( lastValueOfUserOrderBy, lastSeenId ) + if (fullObjectSelect) { + if (cursor.field === 'id') { + mainQuery.where(knex.raw(`id ${cursor.operator} ? `, [cursor.value])) + } else { + mainQuery.where( + knex.raw( + `jsonb_path_query_first( data, ? )::${castType} ${cursor.operator}= ? `, + ['$.' + cursor.field, cursor.value] + ) + ) + } + } else { + mainQuery.where( + knex.raw(`??::${castType} ${cursor.operator}= ? `, [ + (select || []).indexOf(cursor.field).toString(), + cursor.value + ]) + ) + } + + if (cursor.lastSeenId) { + mainQuery.andWhere((qb) => { + // Dunno what the TS issue here is, the JS code itself seemed to work fine as JS + // eslint-disable-next-line @typescript-eslint/no-explicit-any + qb.where('id' as any, '>', cursor!.lastSeenId as any) + if (fullObjectSelect) + qb.orWhere( + knex.raw( + `jsonb_path_query_first( data, ? )::${castType} ${cursor!.operator} ? `, + ['$.' + cursor!.field, cursor!.value] + ) + ) + else + qb.orWhere( + knex.raw(`??::${castType} ${cursor!.operator} ? `, [ + (select || []).indexOf(cursor!.field).toString(), + cursor!.value + ]) + ) + }) + } + } + + mainQuery.limit(limit) + // logger.debug( mainQuery.toString() ) + // Finally, execute the query + const rows = await mainQuery + const totalCount = rows && rows.length > 0 ? parseInt(rows[0].total_count) : 0 + + // Return early + if (totalCount === 0) return { totalCount, objects: [], cursor: null } + + // Reconstruct the object based on the provided select paths. + if (!fullObjectSelect) { + rows.forEach((o, i, arr) => { + const no = { + id: o.id, + createdAt: o.createdAt, + speckleType: o.speckleType, + totalChildrenCount: o.totalChildrenCount, + data: {} + } + let k = 0 + for (const field of select || []) { + set(no.data, field, o[k++]) + } + arr[i] = no + }) + } + + // Assemble the cursor for an eventual next call + const cursorObj: typeof cursor = { + field: cursor?.field || orderBy.field, + operator: + cursor?.operator || + (orderBy.direction && orderBy.direction.toLowerCase() === 'desc' ? '<' : '>'), + value: get(rows[rows.length - 1], `data.${orderBy.field}`) + } + + // If we're not ordering by id (default case, where no order by argument is provided), we need to add the last seen id of this query in order to enable keyset pagination. + if (additionalIdOrderBy) { + cursorObj.lastSeenId = rows[rows.length - 1].id + } + + // Cursor objects should be client-side opaque, hence we encode them to base64. + const cursorEncoded = Buffer.from(JSON.stringify(cursorObj), 'binary').toString( + 'base64' + ) + return { + totalCount, + objects: rows, + cursor: rows.length === limit ? cursorEncoded : null + } + } diff --git a/packages/server/modules/core/services/objects.js b/packages/server/modules/core/services/objects.js index b4d068f44..08e32e290 100644 --- a/packages/server/modules/core/services/objects.js +++ b/packages/server/modules/core/services/objects.js @@ -1,262 +1,8 @@ -const { set, get } = require('lodash') - const knex = require(`@/db/knex`) const Objects = () => knex('objects') module.exports = { - /** - * - * This query is inefficient on larger sets (n * 10k objects) as we need to return the total count on an arbitrarily (user) defined selection of objects. - * A possible future optimisation route would be to cache the total count of a query (as objects are immutable, it will not change) on a first run, and, if found on a subsequent round, do a simpler query and merge the total count result. - * @param {*} param0 - * @returns {Promise<{objects: import('@/modules/core/helpers/types').ObjectRecord[], cursor: string | null, totalCount: number}>} - */ - async getObjectChildrenQuery({ - streamId, - objectId, - limit, - depth, - select, - cursor, - query, - orderBy - }) { - limit = parseInt(limit) || 50 - depth = parseInt(depth) || 1000 - orderBy = orderBy || { field: 'id', direction: 'asc' } - - // Cursors received by this service should be base64 encoded. They are generated on first entry query by this service; They should never be client-side generated. - if (cursor) { - cursor = JSON.parse(Buffer.from(cursor, 'base64').toString('binary')) - } - - // Flag that keeps track of whether we select the whole "data" part of an object or not - let fullObjectSelect = false - if (Array.isArray(select)) { - // if we order by a field that we do not select, select it! - if (orderBy && select.indexOf(orderBy.field) === -1) { - select.push(orderBy.field) - } - // // always add the id! - // if ( select.indexOf( 'id' ) === -1 ) select.unshift( 'id' ) - } else { - fullObjectSelect = true - } - - const additionalIdOrderBy = orderBy.field !== 'id' - - const operatorsWhitelist = ['=', '>', '>=', '<', '<=', '!='] - - const mainQuery = knex - .with( - 'object_children_closure', - knex.raw( - `SELECT objects.id as parent, d.key as child, d.value as mindepth, ? as "streamId" - FROM objects - JOIN jsonb_each_text(objects.data->'__closure') d ON true - where objects.id = ?`, - [streamId, objectId] - ) - ) - .with('objs', (cteInnerQuery) => { - // always select the id - cteInnerQuery.select('id').from('object_children_closure') - cteInnerQuery.select('createdAt') - cteInnerQuery.select('speckleType') - cteInnerQuery.select('totalChildrenCount') - - // if there are any select fields, add them - if (Array.isArray(select)) { - select.forEach((field, index) => { - cteInnerQuery.select( - knex.raw('jsonb_path_query(data, :path) as :name:', { - path: '$.' + field, - name: '' + index - }) - ) - }) - // otherwise, get the whole object, as stored in the jsonb column - } else { - cteInnerQuery.select('data') - } - - // join on objects table - cteInnerQuery - .join('objects', function () { - this.on('objects.streamId', '=', 'object_children_closure.streamId').andOn( - 'objects.id', - '=', - 'object_children_closure.child' - ) - }) - .where('object_children_closure.streamId', streamId) - .andWhere('parent', objectId) - .andWhere('mindepth', '<', depth) - - // Add user provided filters/queries. - if (Array.isArray(query) && query.length > 0) { - cteInnerQuery.andWhere((nestedWhereQuery) => { - query.forEach((statement, index) => { - let castType = 'text' - if (typeof statement.value === 'string') castType = 'text' - if (typeof statement.value === 'boolean') castType = 'boolean' - if (typeof statement.value === 'number') castType = 'numeric' - - if (operatorsWhitelist.indexOf(statement.operator) === -1) - throw new Error('Invalid operator for query') - - // Determine the correct where clause (where, and where, or where) - let whereClause - if (index === 0) whereClause = 'where' - else if (statement.verb && statement.verb.toLowerCase() === 'or') - whereClause = 'orWhere' - else whereClause = 'andWhere' - - // Note: castType is generated from the statement's value and operators are matched against a whitelist. - // If comparing with strings, the jsonb_path_query(_first) func returns json encoded strings (ie, `bar` is actually `"bar"`), hence we need to add the quotes manually to the raw provided comparison value. - nestedWhereQuery[whereClause]( - knex.raw( - `jsonb_path_query_first( data, ? )::${castType} ${statement.operator} ? `, - [ - '$.' + statement.field, - castType === 'text' ? `"${statement.value}"` : statement.value - ] - ) - ) - }) - }) - } - - // Order by clause; validate direction! - const direction = - orderBy.direction && orderBy.direction.toLowerCase() === 'desc' - ? 'desc' - : 'asc' - if (orderBy.field === 'id') { - cteInnerQuery.orderBy('id', direction) - } else { - cteInnerQuery.orderByRaw( - knex.raw(`jsonb_path_query_first( data, ? ) ${direction}, id asc`, [ - '$.' + orderBy.field - ]) - ) - } - }) - .select('*') - .from('objs') - .joinRaw('RIGHT JOIN ( SELECT count(*) FROM "objs" ) c(total_count) ON TRUE') - - // Set cursor clause, if present. If it's not present, it's an entry query; this method will return a cursor based on its given query. - // We have implemented keyset pagination for more efficient searches on larger sets. This approach depends on an order by value provided by the user and a (hidden) primary key. - // logger.debug( cursor ) - if (cursor) { - let castType = 'text' - if (typeof cursor.value === 'string') castType = 'text' - if (typeof cursor.value === 'boolean') castType = 'boolean' - if (typeof cursor.value === 'number') castType = 'numeric' - - // When strings are used inside an order clause, as mentioned above, we need to add quotes around the comparison value, as the jsonb_path_query funcs return json encoded strings (`{"test":"foo"}` => test is returned as `"foo"`) - if (castType === 'text') cursor.value = `"${cursor.value}"` - - if (operatorsWhitelist.indexOf(cursor.operator) === -1) - throw new Error('Invalid operator for cursor') - - // Unwrapping the tuple comparison of ( userOrderByField, id ) > ( lastValueOfUserOrderBy, lastSeenId ) - if (fullObjectSelect) { - if (cursor.field === 'id') { - mainQuery.where(knex.raw(`id ${cursor.operator} ? `, [cursor.value])) - } else { - mainQuery.where( - knex.raw( - `jsonb_path_query_first( data, ? )::${castType} ${cursor.operator}= ? `, - ['$.' + cursor.field, cursor.value] - ) - ) - } - } else { - mainQuery.where( - knex.raw(`??::${castType} ${cursor.operator}= ? `, [ - select.indexOf(cursor.field).toString(), - cursor.value - ]) - ) - } - - if (cursor.lastSeenId) { - mainQuery.andWhere((qb) => { - qb.where('id', '>', cursor.lastSeenId) - if (fullObjectSelect) - qb.orWhere( - knex.raw( - `jsonb_path_query_first( data, ? )::${castType} ${cursor.operator} ? `, - ['$.' + cursor.field, cursor.value] - ) - ) - else - qb.orWhere( - knex.raw(`??::${castType} ${cursor.operator} ? `, [ - select.indexOf(cursor.field).toString(), - cursor.value - ]) - ) - }) - } - } - - mainQuery.limit(limit) - // logger.debug( mainQuery.toString() ) - // Finally, execute the query - const rows = await mainQuery - const totalCount = rows && rows.length > 0 ? parseInt(rows[0].total_count) : 0 - - // Return early - if (totalCount === 0) return { totalCount, objects: [], cursor: null } - - // Reconstruct the object based on the provided select paths. - if (!fullObjectSelect) { - rows.forEach((o, i, arr) => { - const no = { - id: o.id, - createdAt: o.createdAt, - speckleType: o.speckleType, - totalChildrenCount: o.totalChildrenCount, - data: {} - } - let k = 0 - for (const field of select) { - set(no.data, field, o[k++]) - } - arr[i] = no - }) - } - - // Assemble the cursor for an eventual next call - cursor = cursor || {} - const cursorObj = { - field: cursor.field || orderBy.field, - operator: - cursor.operator || - (orderBy.direction && orderBy.direction.toLowerCase() === 'desc' ? '<' : '>'), - value: get(rows[rows.length - 1], `data.${orderBy.field}`) - } - - // If we're not ordering by id (default case, where no order by argument is provided), we need to add the last seen id of this query in order to enable keyset pagination. - if (additionalIdOrderBy) { - cursorObj.lastSeenId = rows[rows.length - 1].id - } - - // Cursor objects should be client-side opaque, hence we encode them to base64. - const cursorEncoded = Buffer.from(JSON.stringify(cursorObj), 'binary').toString( - 'base64' - ) - return { - totalCount, - objects: rows, - cursor: rows.length === limit ? cursorEncoded : null - } - }, - async getObjects(streamId, objectIds) { const res = await Objects() .whereIn('id', objectIds) diff --git a/packages/server/modules/core/tests/objects.spec.js b/packages/server/modules/core/tests/objects.spec.js index f61ca3ac4..c80bbdf06 100644 --- a/packages/server/modules/core/tests/objects.spec.js +++ b/packages/server/modules/core/tests/objects.spec.js @@ -7,7 +7,7 @@ const { cloneDeep, times, random, padStart } = require('lodash') const { beforeEachContext } = require('@/test/hooks') const { getAnIdForThisOnePlease } = require('@/test/helpers') -const { getObjects, getObjectChildrenQuery } = require('../services/objects') +const { getObjects } = require('../services/objects') const { getStreamFactory, createStreamFactory @@ -83,7 +83,8 @@ const { storeObjectsIfNotFoundFactory, getFormattedObjectFactory, getObjectChildrenStreamFactory, - getObjectChildrenFactory + getObjectChildrenFactory, + getObjectChildrenQueryFactory } = require('@/modules/core/repositories/objects') const sampleCommit = JSON.parse(`{ @@ -188,6 +189,7 @@ const createObjects = createObjectsFactory({ const getObject = getFormattedObjectFactory({ db }) const getObjectChildrenStream = getObjectChildrenStreamFactory({ db }) const getObjectChildren = getObjectChildrenFactory({ db }) +const getObjectChildrenQuery = getObjectChildrenQueryFactory({ db }) describe('Objects @core-objects', () => { const userOne = { From 086e67b871e37650ff66fc52bb467a3a94fc521c Mon Sep 17 00:00:00 2001 From: Kristaps Fabians Geikins Date: Mon, 21 Oct 2024 17:48:24 +0300 Subject: [PATCH 2/2] lintfix? --- packages/server/modules/core/repositories/objects.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/server/modules/core/repositories/objects.ts b/packages/server/modules/core/repositories/objects.ts index 2df80ef82..5d08bd5e0 100644 --- a/packages/server/modules/core/repositories/objects.ts +++ b/packages/server/modules/core/repositories/objects.ts @@ -259,11 +259,10 @@ export const getObjectChildrenQueryFactory = (deps: { db: Knex }): GetObjectChildrenQuery => async (params) => { const { streamId, objectId, select, query } = params - let { limit, depth, orderBy } = params - limit = toNumber(limit || 0) || 50 - depth = toNumber(depth || 0) || 1000 - orderBy = orderBy || { field: 'id', direction: 'asc' } + const limit = toNumber(params.limit || 0) || 50 + const depth = toNumber(params.depth || 0) || 1000 + const orderBy = params.orderBy || { field: 'id', direction: 'asc' } // Cursors received by this service should be base64 encoded. They are generated on first entry query by this service; They should never be client-side generated. const cursor: Optional<{