Skip to content

Commit 891b99c

Browse files
tmdsKirillOsenkov
andauthored
When sharing the terminal with child nodes, wait for the children to terminate before exiting ourselves. (#6053)
This is an attempt to fix microsoft/vstest#2282. When sharing the terminal with child nodes, wait for the children to terminate before exiting ourselves. This avoids issues where the child changes terminal configuration after our own exit. Co-authored-by: Kirill Osenkov <[email protected]>
1 parent 3a87883 commit 891b99c

26 files changed

+264
-73
lines changed

scripts/Deploy-MSBuild.ps1

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,11 @@ $filesToCopyToBin = @(
7474
FileToCopy "$bootstrapBinDirectory\Microsoft.Managed.targets"
7575
FileToCopy "$bootstrapBinDirectory\Microsoft.Managed.Before.targets"
7676
FileToCopy "$bootstrapBinDirectory\Microsoft.Managed.After.targets"
77-
FileToCopy "$bootstrapBinDirectory\Microsoft.Net.props"
78-
FileToCopy "$bootstrapBinDirectory\Microsoft.NetFramework.CurrentVersion.props"
79-
FileToCopy "$bootstrapBinDirectory\Microsoft.NetFramework.CurrentVersion.targets"
80-
FileToCopy "$bootstrapBinDirectory\Microsoft.NetFramework.props"
81-
FileToCopy "$bootstrapBinDirectory\Microsoft.NetFramework.targets"
77+
FileToCopy "$bootstrapBinDirectory\Microsoft.NET.props"
78+
FileToCopy "$bootstrapBinDirectory\Microsoft.NETFramework.CurrentVersion.props"
79+
FileToCopy "$bootstrapBinDirectory\Microsoft.NETFramework.CurrentVersion.targets"
80+
FileToCopy "$bootstrapBinDirectory\Microsoft.NETFramework.props"
81+
FileToCopy "$bootstrapBinDirectory\Microsoft.NETFramework.targets"
8282
FileToCopy "$bootstrapBinDirectory\Microsoft.VisualBasic.CrossTargeting.targets"
8383
FileToCopy "$bootstrapBinDirectory\Microsoft.VisualBasic.CurrentVersion.targets"
8484
FileToCopy "$bootstrapBinDirectory\Microsoft.VisualBasic.targets"

src/Build/BackEnd/BuildManager/BuildManager.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ void Callback(object state)
629629
}
630630
}
631631

632-
ShutdownConnectedNodesAsync(true /* abort */);
632+
ShutdownConnectedNodes(true /* abort */);
633633
CheckForActiveNodesAndCleanUpSubmissions();
634634
}
635635
}
@@ -774,7 +774,7 @@ public void EndBuild()
774774
try
775775
{
776776
_noActiveSubmissionsEvent.WaitOne();
777-
ShutdownConnectedNodesAsync(false /* normal termination */);
777+
ShutdownConnectedNodes(false /* normal termination */);
778778
_noNodesActiveEvent.WaitOne();
779779

780780
// Wait for all of the actions in the work queue to drain. Wait() could throw here if there was an unhandled exception
@@ -1955,7 +1955,7 @@ public void Dispose()
19551955
/// Asks the nodeManager to tell the currently connected nodes to shut down and sets a flag preventing all non-shutdown-related packets from
19561956
/// being processed.
19571957
/// </summary>
1958-
private void ShutdownConnectedNodesAsync(bool abort)
1958+
private void ShutdownConnectedNodes(bool abort)
19591959
{
19601960
_shuttingDown = true;
19611961

src/Build/BackEnd/Components/Communications/NodeProviderOutOfProcBase.cs

Lines changed: 120 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
using Microsoft.Build.Utilities;
2626

2727
using BackendNativeMethods = Microsoft.Build.BackEnd.NativeMethods;
28+
using Task = System.Threading.Tasks.Task;
29+
using DotNetFrameworkArchitecture = Microsoft.Build.Shared.DotNetFrameworkArchitecture;
30+
using Microsoft.Build.Framework;
31+
using Microsoft.Build.BackEnd.Logging;
2832

2933
namespace Microsoft.Build.BackEnd
3034
{
@@ -49,6 +53,11 @@ internal abstract class NodeProviderOutOfProcBase
4953
/// </summary>
5054
private const int TimeoutForNewNodeCreation = 30000;
5155

56+
/// <summary>
57+
/// The amount of time to wait for an out-of-proc node to exit.
58+
/// </summary>
59+
private const int TimeoutForWaitForExit = 30000;
60+
5261
/// <summary>
5362
/// The build component host.
5463
/// </summary>
@@ -95,9 +104,30 @@ protected void ShutdownConnectedNodes(List<NodeContext> contextsToShutDown, bool
95104
// Send the build completion message to the nodes, causing them to shutdown or reset.
96105
_processesToIgnore.Clear();
97106

107+
// We wait for child nodes to exit to avoid them changing the terminal
108+
// after this process terminates.
109+
bool waitForExit = !enableReuse &&
110+
!Console.IsInputRedirected &&
111+
Traits.Instance.EscapeHatches.EnsureStdOutForChildNodesIsPrimaryStdout;
112+
113+
Task[] waitForExitTasks = waitForExit && contextsToShutDown.Count > 0 ? new Task[contextsToShutDown.Count] : null;
114+
int i = 0;
115+
var loggingService = _componentHost.LoggingService;
98116
foreach (NodeContext nodeContext in contextsToShutDown)
99117
{
100-
nodeContext?.SendData(new NodeBuildComplete(enableReuse));
118+
if (nodeContext is null)
119+
{
120+
continue;
121+
}
122+
nodeContext.SendData(new NodeBuildComplete(enableReuse));
123+
if (waitForExit)
124+
{
125+
waitForExitTasks[i++] = nodeContext.WaitForExitAsync(loggingService);
126+
}
127+
}
128+
if (waitForExitTasks != null)
129+
{
130+
Task.WaitAll(waitForExitTasks);
101131
}
102132
}
103133

@@ -138,7 +168,7 @@ protected void ShutdownAllNodes(bool nodeReuse, NodeContextTerminateDelegate ter
138168
{
139169
// If we're able to connect to such a process, send a packet requesting its termination
140170
CommunicationsUtilities.Trace("Shutting down node with pid = {0}", nodeProcess.Id);
141-
NodeContext nodeContext = new NodeContext(0, nodeProcess.Id, nodeStream, factory, terminateNode);
171+
NodeContext nodeContext = new NodeContext(0, nodeProcess, nodeStream, factory, terminateNode);
142172
nodeContext.SendData(new NodeBuildComplete(false /* no node reuse */));
143173
nodeStream.Dispose();
144174
}
@@ -204,7 +234,7 @@ protected NodeContext GetNode(string msbuildLocation, string commandLineArgs, in
204234
{
205235
// Connection successful, use this node.
206236
CommunicationsUtilities.Trace("Successfully connected to existed node {0} which is PID {1}", nodeId, nodeProcess.Id);
207-
return new NodeContext(nodeId, nodeProcess.Id, nodeStream, factory, terminateNode);
237+
return new NodeContext(nodeId, nodeProcess, nodeStream, factory, terminateNode);
208238
}
209239
}
210240
}
@@ -242,20 +272,20 @@ protected NodeContext GetNode(string msbuildLocation, string commandLineArgs, in
242272
#endif
243273

244274
// Create the node process
245-
int msbuildProcessId = LaunchNode(msbuildLocation, commandLineArgs);
246-
_processesToIgnore.Add(GetProcessesToIgnoreKey(hostHandshake, msbuildProcessId));
275+
Process msbuildProcess = LaunchNode(msbuildLocation, commandLineArgs);
276+
_processesToIgnore.Add(GetProcessesToIgnoreKey(hostHandshake, msbuildProcess.Id));
247277

248278
// Note, when running under IMAGEFILEEXECUTIONOPTIONS registry key to debug, the process ID
249279
// gotten back from CreateProcess is that of the debugger, which causes this to try to connect
250280
// to the debugger process. Instead, use MSBUILDDEBUGONSTART=1
251281

252282
// Now try to connect to it.
253-
Stream nodeStream = TryConnectToProcess(msbuildProcessId, TimeoutForNewNodeCreation, hostHandshake);
283+
Stream nodeStream = TryConnectToProcess(msbuildProcess.Id, TimeoutForNewNodeCreation, hostHandshake);
254284
if (nodeStream != null)
255285
{
256286
// Connection successful, use this node.
257-
CommunicationsUtilities.Trace("Successfully connected to created node {0} which is PID {1}", nodeId, msbuildProcessId);
258-
return new NodeContext(nodeId, msbuildProcessId, nodeStream, factory, terminateNode);
287+
CommunicationsUtilities.Trace("Successfully connected to created node {0} which is PID {1}", nodeId, msbuildProcess.Id);
288+
return new NodeContext(nodeId, msbuildProcess, nodeStream, factory, terminateNode);
259289
}
260290
}
261291

@@ -391,7 +421,7 @@ private Stream TryConnectToProcess(int nodeProcessId, int timeout, Handshake han
391421
/// <summary>
392422
/// Creates a new MSBuild process
393423
/// </summary>
394-
private int LaunchNode(string msbuildLocation, string commandLineArgs)
424+
private Process LaunchNode(string msbuildLocation, string commandLineArgs)
395425
{
396426
// Should always have been set already.
397427
ErrorUtilities.VerifyThrowInternalLength(msbuildLocation, nameof(msbuildLocation));
@@ -490,7 +520,7 @@ private int LaunchNode(string msbuildLocation, string commandLineArgs)
490520
}
491521

492522
CommunicationsUtilities.Trace("Successfully launched {1} node with PID {0}", process.Id, exeName);
493-
return process.Id;
523+
return process;
494524
}
495525
else
496526
{
@@ -548,7 +578,7 @@ out processInfo
548578
}
549579

550580
CommunicationsUtilities.Trace("Successfully launched {1} node with PID {0}", childProcessId, exeName);
551-
return childProcessId;
581+
return Process.GetProcessById(childProcessId);
552582
}
553583
}
554584

@@ -582,6 +612,13 @@ private static string GetCurrentHost()
582612
/// </summary>
583613
internal class NodeContext
584614
{
615+
enum ExitPacketState
616+
{
617+
None,
618+
ExitPacketQueued,
619+
ExitPacketSent
620+
}
621+
585622
// The pipe(s) used to communicate with the node.
586623
private Stream _clientToServerStream;
587624
private Stream _serverToClientStream;
@@ -597,9 +634,9 @@ internal class NodeContext
597634
private int _nodeId;
598635

599636
/// <summary>
600-
/// The process id
637+
/// The node process.
601638
/// </summary>
602-
private int _processId;
639+
private readonly Process _process;
603640

604641
/// <summary>
605642
/// An array used to store the header byte for each packet when read.
@@ -631,14 +668,14 @@ internal class NodeContext
631668
private Task _packetWriteDrainTask = Task.CompletedTask;
632669

633670
/// <summary>
634-
/// Event indicating the node has terminated.
671+
/// Delegate called when the context terminates.
635672
/// </summary>
636-
private ManualResetEvent _nodeTerminated;
673+
private NodeContextTerminateDelegate _terminateDelegate;
637674

638675
/// <summary>
639-
/// Delegate called when the context terminates.
676+
/// Tracks the state of the packet sent to terminate the node.
640677
/// </summary>
641-
private NodeContextTerminateDelegate _terminateDelegate;
678+
private ExitPacketState _exitPacketState;
642679

643680
/// <summary>
644681
/// Per node read buffers
@@ -648,20 +685,18 @@ internal class NodeContext
648685
/// <summary>
649686
/// Constructor.
650687
/// </summary>
651-
public NodeContext(int nodeId, int processId,
688+
public NodeContext(int nodeId, Process process,
652689
Stream nodePipe,
653690
INodePacketFactory factory, NodeContextTerminateDelegate terminateDelegate)
654691
{
655692
_nodeId = nodeId;
656-
_processId = processId;
693+
_process = process;
657694
_clientToServerStream = nodePipe;
658695
_serverToClientStream = nodePipe;
659696
_packetFactory = factory;
660697
_headerByte = new byte[5]; // 1 for the packet type, 4 for the body length
661-
662698
_readBufferMemoryStream = new MemoryStream();
663699
_writeBufferMemoryStream = new MemoryStream();
664-
_nodeTerminated = new ManualResetEvent(false);
665700
_terminateDelegate = terminateDelegate;
666701
_sharedReadBuffer = InterningBinaryReader.CreateSharedBuffer();
667702
}
@@ -749,6 +784,10 @@ public async Task RunPacketReadLoopAsync()
749784
/// <param name="packet">The packet to send.</param>
750785
public void SendData(INodePacket packet)
751786
{
787+
if (IsExitPacket(packet))
788+
{
789+
_exitPacketState = ExitPacketState.ExitPacketQueued;
790+
}
752791
_packetWriteQueue.Add(packet);
753792
DrainPacketQueue();
754793
}
@@ -816,6 +855,10 @@ private void SendDataCore(INodePacket packet)
816855
int lengthToWrite = Math.Min(writeStreamLength - i, MaxPacketWriteSize);
817856
_serverToClientStream.Write(writeStreamBuffer, i, lengthToWrite);
818857
}
858+
if (IsExitPacket(packet))
859+
{
860+
_exitPacketState = ExitPacketState.ExitPacketSent;
861+
}
819862
}
820863
catch (IOException e)
821864
{
@@ -828,6 +871,11 @@ private void SendDataCore(INodePacket packet)
828871
}
829872
}
830873

874+
private static bool IsExitPacket(INodePacket packet)
875+
{
876+
return packet is NodeBuildComplete buildCompletePacket && !buildCompletePacket.PrepareForReuse;
877+
}
878+
831879
/// <summary>
832880
/// Avoid having a BinaryWriter just to write a 4-byte int
833881
/// </summary>
@@ -842,7 +890,7 @@ private void WriteInt32(MemoryStream stream, int value)
842890
/// <summary>
843891
/// Closes the node's context, disconnecting it from the node.
844892
/// </summary>
845-
public void Close()
893+
private void Close()
846894
{
847895
_clientToServerStream.Dispose();
848896
if (!object.ReferenceEquals(_clientToServerStream, _serverToClientStream))
@@ -852,6 +900,52 @@ public void Close()
852900
_terminateDelegate(_nodeId);
853901
}
854902

903+
/// <summary>
904+
/// Waits for the child node process to exit.
905+
/// </summary>
906+
public async Task WaitForExitAsync(ILoggingService loggingService)
907+
{
908+
if (_exitPacketState == ExitPacketState.ExitPacketQueued)
909+
{
910+
// Wait up to 100ms until all remaining packets are sent.
911+
// We don't need to wait long, just long enough for the Task to start running on the ThreadPool.
912+
await Task.WhenAny(_packetWriteDrainTask, Task.Delay(100));
913+
}
914+
if (_exitPacketState == ExitPacketState.ExitPacketSent)
915+
{
916+
CommunicationsUtilities.Trace("Waiting for node with pid = {0} to exit", _process.Id);
917+
918+
// .NET 5 introduces a real WaitForExitAsyc.
919+
// This is a poor man's implementation that uses polling.
920+
int timeout = TimeoutForWaitForExit;
921+
int delay = 5;
922+
while (timeout > 0)
923+
{
924+
bool exited = _process.WaitForExit(milliseconds: 0);
925+
if (exited)
926+
{
927+
return;
928+
}
929+
timeout -= delay;
930+
await Task.Delay(delay).ConfigureAwait(false);
931+
932+
// Double delay up to 500ms.
933+
delay = Math.Min(delay * 2, 500);
934+
}
935+
}
936+
937+
// Kill the child and do a blocking wait.
938+
loggingService?.LogWarning(
939+
BuildEventContext.Invalid,
940+
null,
941+
BuildEventFileInfo.Empty,
942+
"KillingProcessWithPid",
943+
_process.Id);
944+
CommunicationsUtilities.Trace("Killing node with pid = {0}", _process.Id);
945+
946+
_process.KillTree(timeout: 5000);
947+
}
948+
855949
#if FEATURE_APM
856950
/// <summary>
857951
/// Completes the asynchronous packet write to the node.
@@ -873,17 +967,16 @@ private bool ProcessHeaderBytesRead(int bytesRead)
873967
{
874968
if (bytesRead != _headerByte.Length)
875969
{
876-
CommunicationsUtilities.Trace(_nodeId, "COMMUNICATIONS ERROR (HRC) Node: {0} Process: {1} Bytes Read: {2} Expected: {3}", _nodeId, _processId, bytesRead, _headerByte.Length);
970+
CommunicationsUtilities.Trace(_nodeId, "COMMUNICATIONS ERROR (HRC) Node: {0} Process: {1} Bytes Read: {2} Expected: {3}", _nodeId, _process.Id, bytesRead, _headerByte.Length);
877971
try
878972
{
879-
Process childProcess = Process.GetProcessById(_processId);
880-
if (childProcess?.HasExited != false)
973+
if (_process.HasExited)
881974
{
882-
CommunicationsUtilities.Trace(_nodeId, " Child Process {0} has exited.", _processId);
975+
CommunicationsUtilities.Trace(_nodeId, " Child Process {0} has exited.", _process.Id);
883976
}
884977
else
885978
{
886-
CommunicationsUtilities.Trace(_nodeId, " Child Process {0} is still running.", _processId);
979+
CommunicationsUtilities.Trace(_nodeId, " Child Process {0} is still running.", _process.Id);
887980
}
888981
}
889982
catch (Exception e) when (!ExceptionHandling.IsCriticalException(e))

src/Build/BackEnd/TaskExecutionHost/TaskExecutionHost.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
using Microsoft.Build.Utilities;
2525

2626
using TaskItem = Microsoft.Build.Execution.ProjectItemInstance.TaskItem;
27+
using Task = System.Threading.Tasks.Task;
2728

2829
namespace Microsoft.Build.BackEnd
2930
{

src/Build/Microsoft.Build.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
<Compile Include="..\Shared\NodeBuildComplete.cs" />
128128
<Compile Include="..\Shared\NodeShutdown.cs" />
129129
<Compile Include="..\Shared\NamedPipeUtil.cs" />
130+
<Compile Include="..\Shared\ProcessExtensions.cs" />
130131
<Compile Include="..\Shared\TaskLoggingHelper.cs">
131132
<Link>BackEnd\Components\RequestBuilder\IntrinsicTasks\TaskLoggingHelper.cs</Link>
132133
<ExcludeFromStyleCop>True</ExcludeFromStyleCop>

src/Build/Resources/Strings.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,4 +1876,7 @@ Utilization: {0} Average Utilization: {1:###.0}</value>
18761876
<data name="NoProjectCachePluginFoundInAssembly" xml:space="preserve">
18771877
<value>MSB4270: No project cache plugins found in assembly "{0}". Expected one.</value>
18781878
</data>
1879+
<data name="KillingProcessWithPid" xml:space="preserve">
1880+
<value>Killing process with pid = {0}.</value>
1881+
</data>
18791882
</root>

src/Build/Resources/xlf/Strings.cs.xlf

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)