Skip to content

Commit e8deee3

Browse files
authored
Add timeouts and retries to LiveDebuggerTests when debugger is disabled (#7010)
## Summary of changes Adds a timeout to the `DataTarget.CreateSnapshotAndAttach()` call, and marks the `LiveDebuggerTests` as flaky ## Reason for change The `LiveDebuggerTests` currently frequently hang, we need to understand why, and mitigate the issue. ## Implementation details - Add overall timeout to the `GetLiveObjectsByTypes` method - Add some logs to try to understand what's happening - Mark the test as flaky ## Test coverage N/A Example failures: [Example1](https://dev.azure.com/datadoghq/dd-trace-dotnet/_build/results?buildId=178295&view=logs&j=5bbbfeff-80c6-5e3a-ed15-df19bc2221a9&t=f97d6e34-8c18-5125-1e10-f8ff1691d2d4), [Example2](https://dev.azure.com/datadoghq/dd-trace-dotnet/_build/results?buildId=178288&view=logs&j=5a1533ef-d5b3-5244-8177-90f592ad9a02&t=889462ef-7425-5ee6-c218-8225a4de53e7), [Example3](https://dev.azure.com/datadoghq/dd-trace-dotnet/_build/results?buildId=178279&view=logs&j=5bbbfeff-80c6-5e3a-ed15-df19bc2221a9&t=f97d6e34-8c18-5125-1e10-f8ff1691d2d4), [Example4](https://dev.azure.com/datadoghq/dd-trace-dotnet/_build/results?buildId=178274&view=logs&j=dfbb55d7-038c-5432-f154-c7f295998250&t=cde1f8e2-299e-52ab-e384-edcffbdbb58d), ## Other details I suspect there's a race condition - maybe the app exists before the `DataTarget` can attach or something? _May_ want to rewrite to use the existing memory dump capabilities we have built in? If this continues to fail, we will likely need to skip it completely.
1 parent 4f0f743 commit e8deee3

File tree

3 files changed

+43
-4
lines changed

3 files changed

+43
-4
lines changed

tracer/test/Datadog.Trace.Debugger.IntegrationTests/Assertions/DumpHeapLive.cs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,13 @@
33
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc.
44
// </copyright>
55

6+
using System;
67
using System.Collections.Generic;
78
using System.Diagnostics;
9+
using System.Threading;
10+
using System.Threading.Tasks;
811
using Microsoft.Diagnostics.Runtime;
12+
using Xunit.Abstractions;
913

1014
namespace Datadog.Trace.Debugger.IntegrationTests.Assertions;
1115

@@ -16,26 +20,55 @@ namespace Datadog.Trace.Debugger.IntegrationTests.Assertions;
1620
/// </summary>
1721
internal static class DumpHeapLive
1822
{
19-
public static Dictionary<string, (int Live, int Disposed)> GetLiveObjectsByTypes(Process process)
23+
public static async Task<Dictionary<string, (int Live, int Disposed)>> GetLiveObjectsByTypes(Process process, ITestOutputHelper output, TimeSpan timeout)
24+
{
25+
using var cts = new CancellationTokenSource(timeout);
26+
27+
// Run the task in a backgroun thread, and wait for it to complete or timeout.
28+
return await Task.Run(() => GetLiveObjectsByTypes(process, output, cts.Token), cts.Token);
29+
}
30+
31+
private static Dictionary<string, (int Live, int Disposed)> GetLiveObjectsByTypes(Process process, ITestOutputHelper output, CancellationToken ct)
2032
{
2133
var objCountByType = new Dictionary<string, (int Live, int Disposed)>();
2234
if (process.HasExited)
2335
{
2436
throw new MemoryAssertionException("Process has exited", string.Empty);
2537
}
2638

39+
output?.WriteLine($"Creating snapshot of process ID {process.Id}");
2740
using var target = DataTarget.CreateSnapshotAndAttach(process.Id);
41+
if (ct.IsCancellationRequested)
42+
{
43+
throw new OperationCanceledException();
44+
}
45+
46+
output?.WriteLine("Snapshot complete, analyzing heap...");
2847
var heap = target.ClrVersions[0].CreateRuntime().Heap;
2948
var considered = new ObjectSet(heap);
3049
var eval = new Stack<ulong>();
3150

51+
var rootCount = 0;
3252
foreach (var root in heap.EnumerateRoots())
3353
{
54+
rootCount++;
3455
eval.Push(root.Object);
3556
}
3657

58+
output?.WriteLine($"Found {rootCount} roots in the heap");
59+
var evalsComplete = 0;
3760
while (eval.Count > 0)
3861
{
62+
if (ct.IsCancellationRequested)
63+
{
64+
throw new OperationCanceledException();
65+
}
66+
67+
if (evalsComplete % 100 == 0)
68+
{
69+
output?.WriteLine($"Evaluated {evalsComplete} objects so far, {eval.Count} remaining.");
70+
}
71+
3972
var obj = eval.Pop();
4073
if (considered.Contains(obj))
4174
{
@@ -71,6 +104,7 @@ internal static class DumpHeapLive
71104
}
72105
}
73106

107+
output?.WriteLine($"Analysis complete, found {objCountByType.Count} object types in the heap");
74108
return objCountByType;
75109
}
76110

tracer/test/Datadog.Trace.Debugger.IntegrationTests/Assertions/MemoryAssertions.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
using System.Diagnostics;
99
using System.Reflection;
1010
using System.Runtime.InteropServices;
11+
using System.Threading.Tasks;
12+
using Xunit.Abstractions;
1113

1214
namespace Datadog.Trace.Debugger.IntegrationTests.Assertions;
1315

@@ -32,15 +34,16 @@ private MemoryAssertions(Dictionary<string, (int Live, int Disposed)> liveObject
3234
/// which can perform assertions on said snapshot.
3335
/// </summary>
3436
/// <param name="process">Process to capture snapshot of</param>
37+
/// <param name="output">The test output helper</param>
3538
/// <returns>MemoryAssertions</returns>
36-
public static MemoryAssertions CaptureSnapshotToAssertOn(Process process)
39+
public static async Task<MemoryAssertions> CaptureSnapshotToAssertOn(Process process, ITestOutputHelper output)
3740
{
3841
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
3942
{
4043
throw new NotSupportedException("Arm64 is not supported for memory assertions");
4144
}
4245

43-
var liveObjectsByTypes = DumpHeapLive.GetLiveObjectsByTypes(process);
46+
var liveObjectsByTypes = await DumpHeapLive.GetLiveObjectsByTypes(process, output, TimeSpan.FromSeconds(30));
4447
return new MemoryAssertions(liveObjectsByTypes);
4548
}
4649

tracer/test/Datadog.Trace.Debugger.IntegrationTests/LiveDebuggerTests.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public LiveDebuggerTests(ITestOutputHelper output)
3939
[Trait("Category", "ArmUnsupported")]
4040
[Trait("RunOnWindows", "True")]
4141
[Trait("Category", "LinuxUnsupported")]
42+
[Flaky("The explicitly disabled tests often hang on x86 .NET and .NET 8, when the debugger is disabled. Needs investigation.")]
4243
public async Task LiveDebuggerDisabled_DebuggerDisabledByDefault_NoDebuggerTypesCreated()
4344
{
4445
await RunTest();
@@ -49,6 +50,7 @@ public async Task LiveDebuggerDisabled_DebuggerDisabledByDefault_NoDebuggerTypes
4950
[Trait("Category", "ArmUnsupported")]
5051
[Trait("RunOnWindows", "True")]
5152
[Trait("Category", "LinuxUnsupported")]
53+
[Flaky("The explicitly disabled tests often hang on x86 .NET and .NET 8, when the debugger is disabled. Needs investigation.")]
5254
public async Task LiveDebuggerDisabled_DebuggerExplicitlyDisabled_NoDebuggerTypesCreated()
5355
{
5456
SetEnvironmentVariable(ConfigurationKeys.Debugger.Enabled, "0");
@@ -68,7 +70,7 @@ private async Task RunTest()
6870

6971
try
7072
{
71-
var memoryAssertions = MemoryAssertions.CaptureSnapshotToAssertOn(sample);
73+
var memoryAssertions = await MemoryAssertions.CaptureSnapshotToAssertOn(sample, Output);
7274

7375
memoryAssertions.NoObjectsExist<SnapshotSink>();
7476
memoryAssertions.NoObjectsExist<LineProbeResolver>();

0 commit comments

Comments
 (0)