From e795381bd2840cab70466df0867437d1e5518723 Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:03:50 +0000 Subject: [PATCH] fix(server): prometheus summary should be over rolling window - currently it calculates based on all values from start, and should instead expire old buckets https://github.com/siimon/prom-client?tab=readme-ov-file#configuration-2 --- packages/server/modules/previews/observability/metrics.ts | 7 +++++-- .../server/observability/components/knex/knexMonitoring.ts | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/packages/server/modules/previews/observability/metrics.ts b/packages/server/modules/previews/observability/metrics.ts index a982839ed..a25d076bd 100644 --- a/packages/server/modules/previews/observability/metrics.ts +++ b/packages/server/modules/previews/observability/metrics.ts @@ -1,3 +1,4 @@ +import { TIME } from '@speckle/shared' import Bull from 'bull' import { type Registry, Counter, Summary, Gauge } from 'prom-client' @@ -107,8 +108,10 @@ export const initializeMetrics = (params: { ) const previewJobsProcessedSummary = new Summary<'status'>({ name: 'speckle_server_preview_jobs_processed_duration_seconds', - help: 'Duration of preview job processing, in seconds', - labelNames: ['status'] + help: 'Duration of preview job processing, in seconds, as sampled over a period of 1 minute.', + labelNames: ['status'], + maxAgeSeconds: 1 * TIME.minute, + ageBuckets: 5 }) return { previewJobsProcessedSummary } diff --git a/packages/server/observability/components/knex/knexMonitoring.ts b/packages/server/observability/components/knex/knexMonitoring.ts index 8e4981d21..81327e61d 100644 --- a/packages/server/observability/components/knex/knexMonitoring.ts +++ b/packages/server/observability/components/knex/knexMonitoring.ts @@ -5,7 +5,7 @@ import { Logger } from 'pino' import { toNDecimalPlaces } from '@/modules/core/utils/formatting' import { omit } from 'lodash' import { getRequestContext } from '@/observability/components/express/requestContext' -import { collectLongTrace } from '@speckle/shared' +import { collectLongTrace, TIME } from '@speckle/shared' let metricQueryDuration: Summary let metricQueryErrors: Counter @@ -134,7 +134,9 @@ export const initKnexPrometheusMetrics = async (params: { registers, labelNames: ['sqlMethod', 'sqlNumberBindings', 'region'], name: 'speckle_server_knex_query_duration', - help: 'Summary of the DB query durations in seconds' + help: 'Summary of the DB query durations in seconds, as computed over the last 1 minute.', + maxAgeSeconds: 1 * TIME.minute, + ageBuckets: 5 }) registers.forEach((r) => r.removeSingleMetric('speckle_server_knex_query_errors'))