68 lines
2.2 KiB
TypeScript
68 lines
2.2 KiB
TypeScript
import { BaseError } from '@/modules/shared/errors'
|
|
import { Options } from 'verror'
|
|
|
|
type InsertionObject = {
|
|
data: string
|
|
}
|
|
|
|
export class ArgumentError extends BaseError {
|
|
static defaultMessage = 'Invalid argument value provided'
|
|
|
|
constructor(message?: string | undefined, options?: Options | Error | undefined) {
|
|
super(message, options)
|
|
}
|
|
}
|
|
|
|
// since we're mostly using this for an artificial limit calculation
|
|
// we can live with a somewhat imprecise but fast estimate
|
|
// Js uses utf16 so the in memory string size in bytes is length * 2
|
|
// this is just the in memory string size, not the utf-8 encoded byte size
|
|
// since our data is mostly ascii characters, its prob safe to use
|
|
// string.length is a slight underestimation of the actual size
|
|
export const estimateStringByteSize = (str: string) => str.length
|
|
export const estimateStringMegabyteSize = (str: string) =>
|
|
estimateStringByteSize(str) / 1_000_000
|
|
|
|
export const chunkInsertionObjectArray = ({
|
|
objects,
|
|
chunkSizeLimitMb,
|
|
chunkLengthLimit
|
|
}: {
|
|
chunkSizeLimitMb: number
|
|
chunkLengthLimit: number
|
|
objects: InsertionObject[]
|
|
}): InsertionObject[][] => {
|
|
if (chunkLengthLimit < 1)
|
|
throw new ArgumentError('Chunks must have a length limit > 1')
|
|
if (chunkSizeLimitMb <= 0)
|
|
throw new ArgumentError('Chunks must have a size in MB limit > 0')
|
|
|
|
let currentChunkSize = 0
|
|
let currentChunkLength = 0
|
|
const chunkedObjects: InsertionObject[][] = []
|
|
let currentBatch: InsertionObject[] = []
|
|
for (const obj of objects) {
|
|
// if limits are exceeded start a new batch
|
|
if (
|
|
currentChunkSize >= chunkSizeLimitMb ||
|
|
currentChunkLength >= chunkLengthLimit
|
|
) {
|
|
// push the current batch into the final chunks
|
|
chunkedObjects.push(currentBatch)
|
|
// reset the current batch
|
|
currentBatch = []
|
|
// reset limits
|
|
currentChunkSize = 0
|
|
currentChunkLength = 0
|
|
}
|
|
// do some proper chunking here
|
|
// insert the batch to returned chunks
|
|
currentChunkLength++
|
|
currentChunkSize += estimateStringMegabyteSize(obj.data)
|
|
currentBatch.push(obj)
|
|
}
|
|
// do not forget to push the final batch
|
|
chunkedObjects.push(currentBatch)
|
|
return chunkedObjects
|
|
}
|