feat(server): improving knex query stack traces (#3855)
* feat(server): improving knex query stack traces * minor cleanup * more improvements
This commit is contained in:
committed by
GitHub
parent
92d5fe4066
commit
347fa4b462
Vendored
+7
-4
@@ -30,10 +30,6 @@ if (isApolloMonitoringEnabled() && !getApolloServerVersion()) {
|
||||
process.env.APOLLO_SERVER_USER_VERSION = getServerVersion()
|
||||
}
|
||||
|
||||
//knex is a singleton controlled by module so can't wait til app init
|
||||
const { initOpenTelemetry } = require('./otel')
|
||||
initOpenTelemetry()
|
||||
|
||||
// If running in test env, load .env.test first
|
||||
// (appRoot necessary, cause env files aren't loaded through require() calls)
|
||||
if (isTestEnv()) {
|
||||
@@ -49,6 +45,13 @@ if (isTestEnv()) {
|
||||
|
||||
dotenv.config({ path: `${packageRoot}/.env` })
|
||||
|
||||
// knex is a singleton controlled by module so can't wait til app init
|
||||
const { initOpenTelemetry } = require('./otel')
|
||||
initOpenTelemetry()
|
||||
|
||||
const { patchKnex } = require('./modules/core/patches/knex')
|
||||
patchKnex()
|
||||
|
||||
module.exports = {
|
||||
appRoot,
|
||||
packageRoot
|
||||
|
||||
@@ -5,6 +5,7 @@ import { Logger } from 'pino'
|
||||
import { toNDecimalPlaces } from '@/modules/core/utils/formatting'
|
||||
import { omit } from 'lodash'
|
||||
import { getRequestContext } from '@/logging/requestContext'
|
||||
import { collectLongTrace } from '@speckle/shared'
|
||||
|
||||
let metricQueryDuration: prometheusClient.Summary<string>
|
||||
let metricQueryErrors: prometheusClient.Counter<string>
|
||||
@@ -183,6 +184,7 @@ interface QueryEvent extends Knex.Sql {
|
||||
__knexUid: string
|
||||
__knexTxId: string
|
||||
__knexQueryUid: string
|
||||
__stackTrace: string
|
||||
}
|
||||
|
||||
const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
@@ -191,20 +193,25 @@ const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
logger: Logger
|
||||
}) => {
|
||||
const { region, db } = params
|
||||
const queryStartTime: Record<string, number> = {}
|
||||
const queryMetadata: Record<string, { startTime: number; stackTrace: string }> = {}
|
||||
const connectionAcquisitionStartTime: Record<string, number> = {}
|
||||
const connectionInUseStartTime: Record<string, number> = {}
|
||||
|
||||
db.on('query', (data: QueryEvent) => {
|
||||
const queryId = data.__knexQueryUid + ''
|
||||
queryStartTime[queryId] = performance.now()
|
||||
queryMetadata[queryId] = {
|
||||
startTime: performance.now(),
|
||||
stackTrace: data.__stackTrace
|
||||
}
|
||||
})
|
||||
|
||||
db.on('query-response', (_response: unknown, data: QueryEvent) => {
|
||||
const queryId = data.__knexQueryUid + ''
|
||||
const durationMs = performance.now() - queryStartTime[queryId]
|
||||
const { startTime = NaN, stackTrace = undefined } = queryMetadata[queryId] || {}
|
||||
|
||||
const durationMs = performance.now() - startTime
|
||||
const durationSec = toNDecimalPlaces(durationMs / 1000, 2)
|
||||
delete queryStartTime[queryId]
|
||||
delete queryMetadata[queryId]
|
||||
if (!isNaN(durationSec))
|
||||
metricQueryDuration
|
||||
.labels({
|
||||
@@ -214,7 +221,6 @@ const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
})
|
||||
.observe(durationSec)
|
||||
|
||||
const trace = (new Error().stack || '').split('\n').slice(1).join('\n').trim()
|
||||
const reqCtx = getRequestContext()
|
||||
|
||||
// Update reqCtx with DB query metrics
|
||||
@@ -223,6 +229,7 @@ const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
reqCtx.dbMetrics.totalDuration += durationMs || 0
|
||||
}
|
||||
|
||||
const trace = stackTrace || collectLongTrace()
|
||||
params.logger.info(
|
||||
{
|
||||
region,
|
||||
@@ -240,9 +247,11 @@ const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
|
||||
db.on('query-error', (err: unknown, data: QueryEvent) => {
|
||||
const queryId = data.__knexQueryUid + ''
|
||||
const durationMs = performance.now() - queryStartTime[queryId]
|
||||
const { startTime = NaN, stackTrace = undefined } = queryMetadata[queryId] || {}
|
||||
|
||||
const durationMs = performance.now() - startTime
|
||||
const durationSec = toNDecimalPlaces(durationMs / 1000, 2)
|
||||
delete queryStartTime[queryId]
|
||||
delete queryMetadata[queryId]
|
||||
|
||||
if (!isNaN(durationSec))
|
||||
metricQueryDuration
|
||||
@@ -254,7 +263,6 @@ const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
.observe(durationSec)
|
||||
metricQueryErrors.inc()
|
||||
|
||||
const trace = (new Error().stack || '').split('\n').slice(1).join('\n').trim()
|
||||
const reqCtx = getRequestContext()
|
||||
|
||||
// Update reqCtx with DB query metrics
|
||||
@@ -263,6 +271,7 @@ const initKnexPrometheusMetricsForRegionEvents = async (params: {
|
||||
reqCtx.dbMetrics.totalDuration += durationMs || 0
|
||||
}
|
||||
|
||||
const trace = stackTrace || collectLongTrace()
|
||||
params.logger.warn(
|
||||
{
|
||||
err: typeof err === 'object' ? omit(err, 'detail') : err,
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { enableImprovedKnexTelemetryStackTraces } from '@/modules/shared/helpers/envHelper'
|
||||
import { collectLongTrace } from '@speckle/shared'
|
||||
import knexQueryCompiler from 'knex/lib/query/querycompiler'
|
||||
|
||||
/**
|
||||
* ⚠️ HERE BE DARKNESS! ⚠️
|
||||
* We're monkey patching knex which is pretty cursed, but i'm not aware of any other way
|
||||
* to get a proper stack trace in query event handlers
|
||||
*/
|
||||
|
||||
export const patchKnex = () => {
|
||||
if (enableImprovedKnexTelemetryStackTraces()) {
|
||||
// Preserve stack trace on query compilation
|
||||
const originalToSQL = knexQueryCompiler.prototype.toSQL
|
||||
const newToSQL: typeof originalToSQL = function (
|
||||
this: typeof knexQueryCompiler.prototype,
|
||||
...args: any
|
||||
) {
|
||||
const ret = originalToSQL.apply(this, args)
|
||||
ret.__stackTrace = collectLongTrace()
|
||||
return ret
|
||||
}
|
||||
|
||||
knexQueryCompiler.prototype.toSQL = newToSQL
|
||||
}
|
||||
}
|
||||
@@ -443,3 +443,7 @@ export const knexAsyncStackTracesEnabled = () => {
|
||||
export const asyncRequestContextEnabled = () => {
|
||||
return getBooleanFromEnv('ASYNC_REQUEST_CONTEXT_ENABLED')
|
||||
}
|
||||
|
||||
export function enableImprovedKnexTelemetryStackTraces() {
|
||||
return getBooleanFromEnv('KNEX_IMPROVED_TELEMETRY_STACK_TRACES')
|
||||
}
|
||||
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
// used in patches/knex.ts
|
||||
declare module 'knex/lib/query/querycompiler' {
|
||||
declare class QueryCompiler {
|
||||
toSQL(
|
||||
method: string,
|
||||
tz: string
|
||||
): {
|
||||
method: string
|
||||
options: Record<string, unknown>
|
||||
timeout: number | false
|
||||
cancelOnTimeout: boolean
|
||||
bindings: unknown[]
|
||||
__knexQueryUid: string
|
||||
/**
|
||||
* This one's patched in by us for telemetry purposes
|
||||
*/
|
||||
__stackTrace: string
|
||||
}
|
||||
}
|
||||
|
||||
export default QueryCompiler
|
||||
}
|
||||
@@ -20,3 +20,14 @@ export function throwUncoveredError(e: never): never {
|
||||
export function createUncoveredError(e: never) {
|
||||
return new Error(`Uncovered error case ${e}.`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: Only V8 and Node.js support controlling the stack trace limit
|
||||
*/
|
||||
export const collectLongTrace = (limit?: number) => {
|
||||
const originalLimit = Error.stackTraceLimit
|
||||
Error.stackTraceLimit = limit || 30
|
||||
const trace = (new Error().stack || '').split('\n').slice(1).join('\n').trim()
|
||||
Error.stackTraceLimit = originalLimit
|
||||
return trace
|
||||
}
|
||||
|
||||
@@ -782,6 +782,11 @@ Generate the environment variables for Speckle server and Speckle objects deploy
|
||||
value: {{ .Values.db.knexAsyncStackTracesEnabled | quote }}
|
||||
{{- end}}
|
||||
|
||||
{{- if .Values.db.knexImprovedTelemetryStackTraces }}
|
||||
- name: KNEX_IMPROVED_TELEMETRY_STACK_TRACES
|
||||
value: {{ .Values.db.knexImprovedTelemetryStackTraces | quote }}
|
||||
{{- end}}
|
||||
|
||||
- name: PGSSLMODE
|
||||
value: "{{ .Values.db.PGSSLMODE }}"
|
||||
|
||||
|
||||
@@ -246,6 +246,11 @@
|
||||
"description": "If enabled, will provide better stack traces for errors arising out of knex operations",
|
||||
"default": false
|
||||
},
|
||||
"knexImprovedTelemetryStackTraces": {
|
||||
"type": "boolean",
|
||||
"description": "If enabled, will provide better stack traces for knex telemetry",
|
||||
"default": false
|
||||
},
|
||||
"databaseName": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The name of the Postgres database to which Speckle will connect. Only required for the Database Monitoring utility when the connection string is to a database connection pool and multi-region is disabled, otherwise this value is ignored.",
|
||||
|
||||
@@ -197,6 +197,9 @@ db:
|
||||
## @param db.knexAsyncStackTracesEnabled If enabled, will provide better stack traces for errors arising out of knex operations
|
||||
##
|
||||
knexAsyncStackTracesEnabled: false
|
||||
## @param db.knexImprovedTelemetryStackTraces If enabled, will provide better stack traces for knex telemetry
|
||||
##
|
||||
knexImprovedTelemetryStackTraces: false
|
||||
|
||||
## @param db.databaseName (Optional) The name of the Postgres database to which Speckle will connect. Only required for the Database Monitoring utility when the connection string is to a database connection pool and multi-region is disabled, otherwise this value is ignored.
|
||||
databaseName: ''
|
||||
|
||||
Reference in New Issue
Block a user