Skip to content

Commit 404787c

Browse files
nsavoiretlhunter
authored andcommitted
[profiling] Associate tracing span IDs with samples in the wall profiler (#3371)
* Initial code for code hotspots and endpoint aggregation in wall profiler Hide functionality behind DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED and DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED flags. Profiler tracks changes of active span by subscribing to async hooks before channel and to AsyncResourceStorage enter channel that notifies when current store is changed with enterWith/run. Profiler remembers previous span and previous started spans, upon change of active span, it checks if a sample has been taken by native profiler addon, then if that's the case, it updates the context of the sample with span id and root span id computed from previous span / started spans. It also add the tags from the last web span to the context to allow computation of endpoint later on. Minor changes to other parts of the code to make it all work: === * Report error if start fails * Avoid clearing span context tags upon export Profiler may need the tags to determine span resource name for endpoint aggregation: profiler keeps a reference on web span that was active when each profiling sample is taken. Then during profile export, span tags are used to determine endpoint. Endpoint cannot be determined right away because sometimes tags necessary to determine endpoint are only set just before span is finished. * Add request tags just after span creation For web spans, add request tags just after span creation instead of when span finishes. This is needed for profiler code hotspots implementation to determine which span are web spans and should be used to find current endpoint.
1 parent 4b02e9e commit 404787c

File tree

9 files changed

+184
-27
lines changed

9 files changed

+184
-27
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
"@datadog/native-iast-rewriter": "2.0.1",
7171
"@datadog/native-iast-taint-tracking": "^1.5.0",
7272
"@datadog/native-metrics": "^1.6.0",
73-
"@datadog/pprof": "3.0.0",
73+
"@datadog/pprof": "3.1.0",
7474
"@datadog/sketches-js": "^2.1.0",
7575
"@types/node": "<18.13",
7676
"@opentelemetry/api": "^1.0.0",

packages/datadog-core/src/storage/async_resource.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const { channel } = require('../../../diagnostics_channel')
55

66
const beforeCh = channel('dd-trace:storage:before')
77
const afterCh = channel('dd-trace:storage:after')
8+
const enterCh = channel('dd-trace:storage:enter')
89

910
let PrivateSymbol = Symbol
1011
function makePrivateSymbol () {
@@ -52,6 +53,7 @@ class AsyncResourceStorage {
5253
const resource = this._executionAsyncResource()
5354

5455
resource[this._ddResourceStore] = store
56+
enterCh.publish()
5557
}
5658

5759
run (store, callback, ...args) {
@@ -61,11 +63,13 @@ class AsyncResourceStorage {
6163
const oldStore = resource[this._ddResourceStore]
6264

6365
resource[this._ddResourceStore] = store
66+
enterCh.publish()
6467

6568
try {
6669
return callback(...args)
6770
} finally {
6871
resource[this._ddResourceStore] = oldStore
72+
enterCh.publish()
6973
}
7074
}
7175

packages/dd-trace/src/plugins/util/web.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ const web = {
103103
context.res = res
104104

105105
this.setConfig(req, config)
106+
addRequestTags(context)
106107

107108
return span
108109
},

packages/dd-trace/src/profiling/config.js

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ class Config {
1818
const {
1919
DD_PROFILING_ENABLED,
2020
DD_PROFILING_PROFILERS,
21-
DD_PROFILING_ENDPOINT_COLLECTION_ENABLED,
2221
DD_ENV,
2322
DD_TAGS,
2423
DD_SERVICE,
@@ -36,7 +35,9 @@ class Config {
3635
DD_PROFILING_EXPERIMENTAL_OOM_MONITORING_ENABLED,
3736
DD_PROFILING_EXPERIMENTAL_OOM_HEAP_LIMIT_EXTENSION_SIZE,
3837
DD_PROFILING_EXPERIMENTAL_OOM_MAX_HEAP_EXTENSION_COUNT,
39-
DD_PROFILING_EXPERIMENTAL_OOM_EXPORT_STRATEGIES
38+
DD_PROFILING_EXPERIMENTAL_OOM_EXPORT_STRATEGIES,
39+
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED,
40+
DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED
4041
} = process.env
4142

4243
const enabled = isTrue(coalesce(options.enabled, DD_PROFILING_ENABLED, true))
@@ -51,8 +52,8 @@ class Config {
5152
Number(DD_PROFILING_UPLOAD_TIMEOUT), 60 * 1000)
5253
const sourceMap = coalesce(options.sourceMap,
5354
DD_PROFILING_SOURCE_MAP, true)
54-
const endpointCollection = coalesce(options.endpointCollection,
55-
DD_PROFILING_ENDPOINT_COLLECTION_ENABLED, false)
55+
const endpointCollectionEnabled = coalesce(options.endpointCollection,
56+
DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED, false)
5657
const pprofPrefix = coalesce(options.pprofPrefix,
5758
DD_PROFILING_PPROF_PREFIX, '')
5859

@@ -73,7 +74,7 @@ class Config {
7374
this.uploadTimeout = uploadTimeout
7475
this.sourceMap = sourceMap
7576
this.debugSourceMaps = isTrue(coalesce(options.debugSourceMaps, DD_PROFILING_DEBUG_SOURCE_MAPS, false))
76-
this.endpointCollection = endpointCollection
77+
this.endpointCollectionEnabled = endpointCollectionEnabled
7778
this.pprofPrefix = pprofPrefix
7879

7980
const hostname = coalesce(options.hostname, DD_AGENT_HOST) || 'localhost'
@@ -110,6 +111,8 @@ class Config {
110111
const profilers = options.profilers
111112
? options.profilers
112113
: getProfilers({ DD_PROFILING_HEAP_ENABLED, DD_PROFILING_WALLTIME_ENABLED, DD_PROFILING_PROFILERS })
114+
this.codeHotspotsEnabled = isTrue(coalesce(options.codeHotspotsEnabled,
115+
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED, false))
113116

114117
this.profilers = ensureProfilers(profilers, this)
115118
}

packages/dd-trace/src/profiling/profiler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class Profiler extends EventEmitter {
2323
}
2424

2525
start (options) {
26-
this._start(options).catch(() => {})
26+
this._start(options).catch((err) => { if (options.logger) options.logger.error(err) })
2727
return this
2828
}
2929

packages/dd-trace/src/profiling/profilers/wall.js

Lines changed: 145 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,111 @@
11
'use strict'
22

3+
const { storage } = require('../../../../datadog-core')
4+
5+
const dc = require('../../../../diagnostics_channel')
6+
7+
const beforeCh = dc.channel('dd-trace:storage:before')
8+
const enterCh = dc.channel('dd-trace:storage:enter')
9+
10+
let kSampleCount
11+
12+
function getActiveSpan () {
13+
const store = storage.getStore()
14+
return store && store.span
15+
}
16+
17+
function getStartedSpans (context) {
18+
return context._trace.started
19+
}
20+
21+
function generateLabels ({ spanId, rootSpanId, webTags, endpoint }) {
22+
const labels = {}
23+
if (spanId) {
24+
labels['span id'] = spanId
25+
}
26+
if (rootSpanId) {
27+
labels['local root span id'] = rootSpanId
28+
}
29+
if (webTags && Object.keys(webTags).length !== 0) {
30+
labels['trace endpoint'] = endpointNameFromTags(webTags)
31+
} else if (endpoint) {
32+
// fallback to endpoint computed when sample was taken
33+
labels['trace endpoint'] = endpoint
34+
}
35+
36+
return labels
37+
}
38+
39+
function getSpanContextTags (span) {
40+
return span.context()._tags
41+
}
42+
43+
function isWebServerSpan (tags) {
44+
return tags['span.type'] === 'web'
45+
}
46+
47+
function endpointNameFromTags (tags) {
48+
return tags['resource.name'] || [
49+
tags['http.method'],
50+
tags['http.route']
51+
].filter(v => v).join(' ')
52+
}
53+
54+
function updateContext (context, span, startedSpans, endpointCollectionEnabled) {
55+
context.spanId = span.context().toSpanId()
56+
const rootSpan = startedSpans[0]
57+
if (rootSpan) {
58+
context.rootSpanId = rootSpan.context().toSpanId()
59+
if (endpointCollectionEnabled) {
60+
// Find the first webspan starting from the end:
61+
// There might be several webspans, for example with next.js, http plugin creates a first span
62+
// and then next.js plugin creates a child span, and this child span haves the correct endpoint information.
63+
for (let i = startedSpans.length - 1; i >= 0; i--) {
64+
const tags = getSpanContextTags(startedSpans[i])
65+
if (isWebServerSpan(tags)) {
66+
context.webTags = tags
67+
// endpoint may not be determined yet, but keep it as fallback
68+
// if tags are not available anymore during serialization
69+
context.endpoint = endpointNameFromTags(tags)
70+
break
71+
}
72+
}
73+
}
74+
}
75+
}
76+
377
class NativeWallProfiler {
478
constructor (options = {}) {
579
this.type = 'wall'
680
this._samplingIntervalMicros = options.samplingInterval || 1e6 / 99 // 99hz
781
this._flushIntervalMillis = options.flushInterval || 60 * 1e3 // 60 seconds
882
this._codeHotspotsEnabled = !!options.codeHotspotsEnabled
83+
this._endpointCollectionEnabled = !!options.endpointCollectionEnabled
984
this._mapper = undefined
1085
this._pprof = undefined
1186

87+
// Bind to this so the same value can be used to unsubscribe later
88+
this._enter = this._enter.bind(this)
1289
this._logger = options.logger
1390
this._started = false
1491
}
1592

93+
codeHotspotsEnabled () {
94+
return this._codeHotspotsEnabled
95+
}
96+
1697
start ({ mapper } = {}) {
1798
if (this._started) return
1899

100+
if (this._codeHotspotsEnabled && !this._emittedFFMessage && this._logger) {
101+
this._logger.debug(
102+
`Wall profiler: Enable config_trace_show_breakdown_profiling_for_node feature flag to see code hotspots.`)
103+
this._emittedFFMessage = true
104+
}
105+
19106
this._mapper = mapper
20107
this._pprof = require('@datadog/pprof')
108+
kSampleCount = this._pprof.time.constants.kSampleCount
21109

22110
// pprof otherwise crashes in worker threads
23111
if (!process._startProfilerIdleNotifier) {
@@ -31,16 +119,63 @@ class NativeWallProfiler {
31119
intervalMicros: this._samplingIntervalMicros,
32120
durationMillis: this._flushIntervalMillis,
33121
sourceMapper: this._mapper,
34-
customLabels: this._codeHotspotsEnabled,
122+
withContexts: this._codeHotspotsEnabled,
35123
lineNumbers: false
36124
})
37125

126+
if (this._codeHotspotsEnabled) {
127+
this._profilerState = this._pprof.time.getState()
128+
this._currentContext = {}
129+
this._pprof.time.setContext(this._currentContext)
130+
this._profilerState = undefined
131+
this._lastSpan = undefined
132+
this._lastStartedSpans = undefined
133+
this._lastSampleCount = 0
134+
135+
beforeCh.subscribe(this._enter)
136+
enterCh.subscribe(this._enter)
137+
}
138+
38139
this._started = true
39140
}
40141

41-
profile () {
142+
_enter () {
42143
if (!this._started) return
43-
return this._pprof.time.stop(true)
144+
145+
const sampleCount = this._profilerState[kSampleCount]
146+
if (sampleCount !== this._lastSampleCount) {
147+
this._lastSampleCount = sampleCount
148+
const context = this._currentContext
149+
this._currentContext = {}
150+
this._pprof.time.setContext(this._currentContext)
151+
152+
if (this._lastSpan) {
153+
updateContext(context, this._lastSpan, this._lastStartedSpans, this._endpointCollectionEnabled)
154+
}
155+
}
156+
157+
const span = getActiveSpan()
158+
if (span) {
159+
this._lastSpan = span
160+
this._lastStartedSpans = getStartedSpans(span.context())
161+
} else {
162+
this._lastStartedSpans = undefined
163+
this._lastSpan = undefined
164+
}
165+
}
166+
167+
_stop (restart) {
168+
if (!this._started) return
169+
if (this._codeHotspotsEnabled) {
170+
// update last sample context if needed
171+
this._enter()
172+
this._lastSampleCount = 0
173+
}
174+
return this._pprof.time.stop(restart, this._codeHotspotsEnabled ? generateLabels : undefined)
175+
}
176+
177+
profile () {
178+
return this._stop(true)
44179
}
45180

46181
encode (profile) {
@@ -50,7 +185,13 @@ class NativeWallProfiler {
50185
stop () {
51186
if (!this._started) return
52187

53-
const profile = this._pprof.time.stop()
188+
const profile = this._stop(false)
189+
if (this._codeHotspotsEnabled) {
190+
beforeCh.unsubscribe(this._enter)
191+
enterCh.subscribe(this._enter)
192+
this._profilerState = undefined
193+
}
194+
54195
this._started = false
55196
return profile
56197
}

packages/dd-trace/src/span_processor.js

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,6 @@ class SpanProcessor {
138138
}
139139
}
140140

141-
for (const span of trace.finished) {
142-
span.context()._tags = {}
143-
}
144-
145141
trace.started = active
146142
trace.finished = []
147143
}

packages/dd-trace/test/profiling/config.spec.js

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ describe('config', () => {
4242
expect(config.logger).to.be.an.instanceof(ConsoleLogger)
4343
expect(config.exporters[0]).to.be.an.instanceof(AgentExporter)
4444
expect(config.profilers[0]).to.be.an.instanceof(WallProfiler)
45+
expect(config.profilers[0].codeHotspotsEnabled()).false
4546
expect(config.profilers[1]).to.be.an.instanceof(SpaceProfiler)
4647
})
4748

@@ -57,8 +58,9 @@ describe('config', () => {
5758
error () { }
5859
},
5960
exporters: 'agent,file',
60-
profilers: 'wall',
61-
url: 'http://localhost:1234/'
61+
profilers: 'space,wall',
62+
url: 'http://localhost:1234/',
63+
codeHotspotsEnabled: true
6264
}
6365

6466
const config = new Config(options)
@@ -78,8 +80,10 @@ describe('config', () => {
7880
expect(config.exporters[0]._url.toString()).to.equal(options.url)
7981
expect(config.exporters[1]).to.be.an.instanceof(FileExporter)
8082
expect(config.profilers).to.be.an('array')
81-
expect(config.profilers.length).to.equal(1)
82-
expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler)
83+
expect(config.profilers.length).to.equal(2)
84+
expect(config.profilers[0]).to.be.an.instanceOf(SpaceProfiler)
85+
expect(config.profilers[1]).to.be.an.instanceOf(WallProfiler)
86+
expect(config.profilers[1].codeHotspotsEnabled()).true
8387
})
8488

8589
it('should filter out invalid profilers', () => {
@@ -127,7 +131,8 @@ describe('config', () => {
127131

128132
it('should support profiler config with DD_PROFILING_PROFILERS', () => {
129133
process.env = {
130-
DD_PROFILING_PROFILERS: 'wall'
134+
DD_PROFILING_PROFILERS: 'wall',
135+
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED: '1'
131136
}
132137
const options = {
133138
logger: {
@@ -143,6 +148,7 @@ describe('config', () => {
143148
expect(config.profilers).to.be.an('array')
144149
expect(config.profilers.length).to.equal(1)
145150
expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler)
151+
expect(config.profilers[0].codeHotspotsEnabled()).true
146152
})
147153

148154
it('should support profiler config with DD_PROFILING_XXX_ENABLED', () => {
@@ -190,7 +196,8 @@ describe('config', () => {
190196

191197
it('should prioritize options over env variables', () => {
192198
process.env = {
193-
DD_PROFILING_PROFILERS: 'wall'
199+
DD_PROFILING_PROFILERS: 'space',
200+
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED: '1'
194201
}
195202
const options = {
196203
logger: {
@@ -199,14 +206,16 @@ describe('config', () => {
199206
warn () {},
200207
error () {}
201208
},
202-
profilers: ['space']
209+
profilers: ['wall'],
210+
codeHotspotsEnabled: false
203211
}
204212

205213
const config = new Config(options)
206214

207215
expect(config.profilers).to.be.an('array')
208216
expect(config.profilers.length).to.equal(1)
209-
expect(config.profilers[0]).to.be.an.instanceOf(SpaceProfiler)
217+
expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler)
218+
expect(config.profilers[0].codeHotspotsEnabled()).false
210219
})
211220

212221
it('should support tags', () => {

0 commit comments

Comments
 (0)