Files
speckle-server/packages/server/modules/core/utils/chunking.ts
T
2023-08-01 15:36:36 +02:00

68 lines
2.2 KiB
TypeScript

import { BaseError } from '@/modules/shared/errors'
import { Options } from 'verror'
type InsertionObject = {
data: string
}
export class ArgumentError extends BaseError {
static defaultMessage = 'Invalid argument value provided'
constructor(message?: string | undefined, options?: Options | Error | undefined) {
super(message, options)
}
}
// since we're mostly using this for an artificial limit calculation
// we can live with a somewhat imprecise but fast estimate
// Js uses utf16 so the in memory string size in bytes is length * 2
// this is just the in memory string size, not the utf-8 encoded byte size
// since our data is mostly ascii characters, its prob safe to use
// string.length is a slight underestimation of the actual size
export const estimateStringByteSize = (str: string) => str.length
export const estimateStringMegabyteSize = (str: string) =>
estimateStringByteSize(str) / 1_000_000
export const chunkInsertionObjectArray = ({
objects,
chunkSizeLimitMb,
chunkLengthLimit
}: {
chunkSizeLimitMb: number
chunkLengthLimit: number
objects: InsertionObject[]
}): InsertionObject[][] => {
if (chunkLengthLimit < 1)
throw new ArgumentError('Chunks must have a length limit > 1')
if (chunkSizeLimitMb <= 0)
throw new ArgumentError('Chunks must have a size in MB limit > 0')
let currentChunkSize = 0
let currentChunkLength = 0
const chunkedObjects: InsertionObject[][] = []
let currentBatch: InsertionObject[] = []
for (const obj of objects) {
// if limits are exceeded start a new batch
if (
currentChunkSize >= chunkSizeLimitMb ||
currentChunkLength >= chunkLengthLimit
) {
// push the current batch into the final chunks
chunkedObjects.push(currentBatch)
// reset the current batch
currentBatch = []
// reset limits
currentChunkSize = 0
currentChunkLength = 0
}
// do some proper chunking here
// insert the batch to returned chunks
currentChunkLength++
currentChunkSize += estimateStringMegabyteSize(obj.data)
currentBatch.push(obj)
}
// do not forget to push the final batch
chunkedObjects.push(currentBatch)
return chunkedObjects
}