Skip to content

Commit 105783d

Browse files
authored
Write script pod events to task log (#992)
* Write script pod events to task log * Fix only returning events once * Write since times to disk in case of tentacle restart * Handle file not found * Fix path issue * Change to use Series values * Handle possible null series * Change back to helper class * Write warning events with warning messages * Change how we do wrapped messages * Don't return WrappedProcessOutput * Change back to Count * Add env var to disable pod events * Log pulling & pulled as wait style * Constrain the pulled message to the same field path as the pulling * Added test and removed Task.WhenAll * Removed test as it was too flaky * Add pod event check in simple test
1 parent d0eb611 commit 105783d

File tree

9 files changed

+221
-22
lines changed

9 files changed

+221
-22
lines changed

source/.run/Build and Push Kubernetes Tentacle docker image (powershell, amd64).run.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<option name="SCRIPT_TEXT" value=".\build." />
44
<option name="INDEPENDENT_SCRIPT_PATH" value="false" />
55
<option name="SCRIPT_PATH" value="$PROJECT_DIR$/../build.ps1" />
6-
<option name="SCRIPT_OPTIONS" value="-Target BuildAndLoadLocallyKubernetesTentacleContainerImage -DockerPlatform &quot;linux/amd64&quot;" />
6+
<option name="SCRIPT_OPTIONS" value="-Target BuildAndLoadLocallyKubernetesTentacleContainerImage -DockerPlatform &quot;linux/amd64&quot; -Runtime &quot;linux-x64&quot;" />
77
<option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
88
<option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$/../" />
99
<option name="INDEPENDENT_INTERPRETER_PATH" value="true" />

source/Octopus.Tentacle.Contracts/ScriptStatusResponse.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Diagnostics;
34
using Newtonsoft.Json;
45

56
namespace Octopus.Tentacle.Contracts
@@ -18,6 +19,7 @@ public enum ProcessOutputSource
1819
Debug
1920
}
2021

22+
[DebuggerDisplay("{Occurred} | {Source} | {Text}")]
2123
public class ProcessOutput
2224
{
2325
public ProcessOutput(ProcessOutputSource source, string text) : this(source, text, DateTimeOffset.UtcNow)

source/Octopus.Tentacle.Kubernetes.Tests.Integration/KubernetesScriptServiceV1IntegrationTest.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using FluentAssertions;
22
using FluentAssertions.Execution;
3+
using Octopus.Client.Model.Endpoints;
34
using Octopus.Tentacle.Client.Scripts.Models;
45
using Octopus.Tentacle.Client.Scripts.Models.Builders;
56
using Octopus.Tentacle.CommonTestUtils;
@@ -53,6 +54,7 @@ public async Task RunSimpleScript(ScriptType scriptType)
5354
var result = await TentacleClient.ExecuteScript(command, StatusReceived, ScriptCompleted, new InMemoryLog(), CancellationToken);
5455

5556
//Assert
57+
logs.Should().Contain(po => po.Text.StartsWith("[POD EVENT]")); // Verify that we are receiving some pod events
5658
logs.Should().Contain(po => po.Source == ProcessOutputSource.StdOut && po.Text == "Hello World");
5759
scriptCompleted.Should().BeTrue();
5860
result.ExitCode.Should().Be(0);

source/Octopus.Tentacle/Kubernetes/IKubernetesPodLogService.cs

Lines changed: 120 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Threading;
77
using System.Threading.Tasks;
88
using k8s.Autorest;
9+
using k8s.Models;
910
using Octopus.Diagnostics;
1011
using Octopus.Tentacle.Contracts;
1112

@@ -21,39 +22,65 @@ class KubernetesPodLogService : KubernetesService, IKubernetesPodLogService
2122
readonly IKubernetesPodMonitor podMonitor;
2223
readonly ITentacleScriptLogProvider scriptLogProvider;
2324
readonly IScriptPodSinceTimeStore scriptPodSinceTimeStore;
25+
readonly IKubernetesEventService eventService;
2426

25-
public KubernetesPodLogService(IKubernetesClientConfigProvider configProvider, IKubernetesPodMonitor podMonitor, ITentacleScriptLogProvider scriptLogProvider, IScriptPodSinceTimeStore scriptPodSinceTimeStore, ISystemLog log)
27+
public KubernetesPodLogService(
28+
IKubernetesClientConfigProvider configProvider,
29+
IKubernetesPodMonitor podMonitor,
30+
ITentacleScriptLogProvider scriptLogProvider,
31+
IScriptPodSinceTimeStore scriptPodSinceTimeStore,
32+
IKubernetesEventService eventService,
33+
ISystemLog log)
2634
: base(configProvider, log)
2735
{
2836
this.podMonitor = podMonitor;
2937
this.scriptLogProvider = scriptLogProvider;
3038
this.scriptPodSinceTimeStore = scriptPodSinceTimeStore;
39+
this.eventService = eventService;
3140
}
3241

3342
public async Task<(IReadOnlyCollection<ProcessOutput> Outputs, long NextSequenceNumber)> GetLogs(ScriptTicket scriptTicket, long lastLogSequence, CancellationToken cancellationToken)
3443
{
3544
var tentacleScriptLog = scriptLogProvider.GetOrCreate(scriptTicket);
3645
var podName = scriptTicket.ToKubernetesScriptPodName();
3746

38-
var podLogs = await GetPodLogs();
47+
//we start both tasks now so we can overlap the API calls
48+
var podLogsTask = GetPodLogs();
49+
var podEventsTask = GetPodEvents(scriptTicket, podName, cancellationToken);
50+
51+
var podLogs = await podLogsTask;
3952
if (podLogs.Outputs.Any())
4053
{
4154
var nextSinceTime = podLogs.Outputs.Max(o => o.Occurred);
42-
scriptPodSinceTimeStore.UpdateSinceTime(scriptTicket, nextSinceTime);
43-
55+
scriptPodSinceTimeStore.UpdatePodLogsSinceTime(scriptTicket, nextSinceTime);
56+
4457
//We can use our EOS marker to detect completion quicker than the Pod status
4558
if (podLogs.ExitCode != null)
4659
podMonitor.MarkAsCompleted(scriptTicket, podLogs.ExitCode.Value);
4760
}
4861

4962
//We are making the assumption that the clock on the Tentacle Pod is in sync with API Server
5063
var tentacleLogs = tentacleScriptLog.PopLogs();
51-
var combinedLogs = podLogs.Outputs.Concat(tentacleLogs).OrderBy(o => o.Occurred).ToList();
64+
var podEventLogs = await podEventsTask;
65+
66+
var combinedLogs = podLogs
67+
.Outputs
68+
.Concat(tentacleLogs)
69+
.Concat(podEventLogs)
70+
.OrderBy(o => o.Occurred)
71+
.SelectMany(o => o switch
72+
{
73+
//if this is a wrapped output, expand it in place
74+
WrappedProcessOutput wrappedOutput => wrappedOutput.Expand(),
75+
_ => new[] { o }
76+
})
77+
.ToList();
78+
5279
return (combinedLogs, podLogs.NextSequenceNumber);
5380

5481
async Task<(IReadOnlyCollection<ProcessOutput> Outputs, long NextSequenceNumber, int? ExitCode)> GetPodLogs()
5582
{
56-
var sinceTime = scriptPodSinceTimeStore.GetSinceTime(scriptTicket);
83+
var sinceTime = scriptPodSinceTimeStore.GetPodLogsSinceTime(scriptTicket);
5784
try
5885
{
5986
return await GetPodLogsWithSinceTime(sinceTime);
@@ -63,7 +90,7 @@ public KubernetesPodLogService(IKubernetesClientConfigProvider configProvider, I
6390
var message = $"Unexpected Pod log line numbers found with sinceTime='{sinceTime}', loading all logs";
6491
tentacleScriptLog.Verbose(message);
6592
Log.Warn(ex, message);
66-
93+
6794
//If we somehow come across weird/missing line numbers, try load the whole Pod logs to see if that helps
6895
return await GetPodLogsWithSinceTime(null);
6996
}
@@ -77,13 +104,64 @@ public KubernetesPodLogService(IKubernetesClientConfigProvider configProvider, I
77104

78105
async Task<(IReadOnlyCollection<ProcessOutput> Outputs, long NextSequenceNumber, int? ExitCode)> ReadPodLogsFromStream(Stream stream)
79106
{
80-
using (var reader = new StreamReader(stream))
81-
{
82-
return await PodLogReader.ReadPodLogs(lastLogSequence, reader);
83-
}
107+
using var reader = new StreamReader(stream);
108+
return await PodLogReader.ReadPodLogs(lastLogSequence, reader);
84109
}
85110
}
86111

112+
async Task<IEnumerable<ProcessOutput>> GetPodEvents(ScriptTicket scriptTicket, string podName, CancellationToken cancellationToken)
113+
{
114+
//if we don't want to write pod events to the task log, don't do anything
115+
if (KubernetesConfig.DisablePodEventsInTaskLog)
116+
{
117+
return Array.Empty<ProcessOutput>();
118+
}
119+
120+
121+
var sinceTime = scriptPodSinceTimeStore.GetPodEventsSinceTime(scriptTicket);
122+
123+
var allEvents = await eventService.FetchAllEventsAsync(KubernetesConfig.Namespace, podName, cancellationToken);
124+
if (allEvents is null)
125+
{
126+
return Array.Empty<ProcessOutput>();
127+
}
128+
129+
var relevantEvents = allEvents.Items
130+
.Select(e => (Event: e, Occurred: EventHelpers.GetLatestTimestampInEvent(e)))
131+
.Where(x => x.Occurred.HasValue)
132+
.Select(x => (x.Event, Occurred: new DateTimeOffset(x.Occurred!.Value, TimeSpan.Zero)))
133+
.OrderBy(x => x.Occurred)
134+
.SkipWhile(e => e.Occurred <= sinceTime);
135+
136+
var events = relevantEvents.Select((x) =>
137+
{
138+
var (ev, occurred) = x;
139+
140+
var formattedMessage = $"[POD EVENT] {ev.Reason} | {ev.Message} (Count: {ev.Count ?? 1})";
141+
142+
if (ev.IsWarning())
143+
return new WrappedProcessOutput(ProcessOutputSource.StdOut, formattedMessage, occurred, "warning");
144+
145+
//if we are pulling a container, show it as a "wait"
146+
if (ev.IsPullingReason())
147+
return new WrappedProcessOutput(ProcessOutputSource.StdOut, formattedMessage, occurred, "wait");
148+
149+
//if this is a Pulled event, and we had a Pulling event with the same path (i.e. the same container), then show this as "wait"
150+
if (ev.IsPulledReason() && allEvents.Items.Any(e => e.IsPullingReason() && ev.InvolvedObject.FieldPath == e.InvolvedObject.FieldPath))
151+
return new WrappedProcessOutput(ProcessOutputSource.StdOut, formattedMessage, occurred, "wait");
152+
153+
return new ProcessOutput(ProcessOutputSource.Debug, formattedMessage, occurred);
154+
})
155+
.ToArray();
156+
157+
if (events.Any())
158+
{
159+
//update the events since time, so we don't get duplicate events
160+
scriptPodSinceTimeStore.UpdatePodEventsSinceTime(scriptTicket, events.Max(o => o.Occurred));
161+
}
162+
163+
return events;
164+
}
87165

88166
async Task<Stream?> GetLogStream(string podName, DateTimeOffset? sinceTime, CancellationToken cancellationToken)
89167
{
@@ -107,5 +185,36 @@ public KubernetesPodLogService(IKubernetesClientConfigProvider configProvider, I
107185
}
108186
}
109187
}
188+
189+
class WrappedProcessOutput : ProcessOutput
190+
{
191+
static readonly TimeSpan OneTick = TimeSpan.FromTicks(1);
192+
readonly string wrapper;
193+
194+
public WrappedProcessOutput(ProcessOutputSource source, string text, DateTimeOffset occurred, string wrapper)
195+
: base(source, text, occurred)
196+
{
197+
this.wrapper = wrapper;
198+
}
199+
200+
public IEnumerable<ProcessOutput> Expand()
201+
{
202+
return new[]
203+
{
204+
//we add the service messages one tick before and after so they are correctly ordered
205+
new ProcessOutput(ProcessOutputSource.StdOut, $"##octopus[stdout-{wrapper}]", Occurred.Subtract(OneTick)),
206+
new ProcessOutput(Source, Text, Occurred),
207+
new ProcessOutput(ProcessOutputSource.StdOut, "##octopus[stdout-default]", Occurred.Add(OneTick)),
208+
};
209+
}
210+
}
211+
}
212+
213+
public static class EventExtensions
214+
{
215+
public static bool IsPullingReason(this Corev1Event @event) => @event.Reason.Equals("Pulling", StringComparison.OrdinalIgnoreCase);
216+
public static bool IsPulledReason(this Corev1Event @event) => @event.Reason.Equals("Pulled", StringComparison.OrdinalIgnoreCase);
217+
public static bool IsWarning(this Corev1Event @event) => @event.Type.Equals("Warning", StringComparison.OrdinalIgnoreCase);
218+
110219
}
111220
}

source/Octopus.Tentacle/Kubernetes/KubernetesConfig.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public static class KubernetesConfig
2121

2222
public static TimeSpan PodsConsideredOrphanedAfterTimeSpan => TimeSpan.FromMinutes(int.TryParse(Environment.GetEnvironmentVariable($"{EnvVarPrefix}__PODSCONSIDEREDORPHANEDAFTERMINUTES"), out var podsConsideredOrphanedAfterTimeSpan) ? podsConsideredOrphanedAfterTimeSpan : 10);
2323
public static bool DisableAutomaticPodCleanup => bool.TryParse(Environment.GetEnvironmentVariable($"{EnvVarPrefix}__DISABLEAUTOPODCLEANUP"), out var disableAutoCleanup) && disableAutoCleanup;
24+
public static bool DisablePodEventsInTaskLog => bool.TryParse(Environment.GetEnvironmentVariable($"{EnvVarPrefix}__DISABLEPODEVENTSINTASKLOG"), out var disable) && disable;
2425

2526
public static string HelmReleaseNameVariableName => $"{EnvVarPrefix}__HELMRELEASENAME";
2627
public static string HelmReleaseName => GetRequiredEnvVar(HelmReleaseNameVariableName, "Unable to determine Helm release name.");

source/Octopus.Tentacle/Kubernetes/KubernetesEventService.cs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
using k8s.Autorest;
77
using k8s.Models;
88
using Octopus.Diagnostics;
9+
using Octopus.Tentacle.Contracts;
910

1011
namespace Octopus.Tentacle.Kubernetes
1112
{
1213
public interface IKubernetesEventService
1314
{
1415
Task<Corev1EventList?> FetchAllEventsAsync(string kubernetesNamespace, CancellationToken cancellationToken);
16+
Task<Corev1EventList?> FetchAllEventsAsync(string kubernetesNamespace, string podName, CancellationToken cancellationToken);
1517
}
1618

1719
public class KubernetesEventService : KubernetesService, IKubernetesEventService
@@ -35,5 +37,25 @@ public KubernetesEventService(IKubernetesClientConfigProvider configProvider, IS
3537
}
3638
});
3739
}
40+
41+
public async Task<Corev1EventList?> FetchAllEventsAsync(string kubernetesNamespace, string podName, CancellationToken cancellationToken)
42+
{
43+
return await RetryPolicy.ExecuteAsync(async () =>
44+
{
45+
try
46+
{
47+
//get all the events for a specific script pod
48+
return await Client.CoreV1.ListNamespacedEventAsync(
49+
kubernetesNamespace,
50+
fieldSelector: $"involvedObject.name={podName}",
51+
cancellationToken: cancellationToken);
52+
}
53+
catch (HttpOperationException opException)
54+
when (opException.Response.StatusCode == HttpStatusCode.NotFound)
55+
{
56+
return null;
57+
}
58+
});
59+
}
3860
}
3961
}
Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,87 @@
11
using System;
22
using System.Collections.Concurrent;
33
using Octopus.Tentacle.Contracts;
4+
using Octopus.Tentacle.Scripts;
45

56
namespace Octopus.Tentacle.Kubernetes
67
{
78
public interface IScriptPodSinceTimeStore
89
{
9-
DateTimeOffset? GetSinceTime(ScriptTicket scriptTicket);
10-
void UpdateSinceTime(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime);
10+
DateTimeOffset? GetPodLogsSinceTime(ScriptTicket scriptTicket);
11+
void UpdatePodLogsSinceTime(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime);
12+
DateTimeOffset? GetPodEventsSinceTime(ScriptTicket scriptTicket);
13+
void UpdatePodEventsSinceTime(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime);
1114
void Delete(ScriptTicket scriptTicket);
1215
}
1316

1417
public class ScriptPodSinceTimeStore : IScriptPodSinceTimeStore
1518
{
16-
readonly ConcurrentDictionary<ScriptTicket, DateTimeOffset?> sinceTimes = new();
17-
18-
public DateTimeOffset? GetSinceTime(ScriptTicket scriptTicket)
19+
const string PodLogsSinceTimeFilename = "last-pod-log-timestamp.txt";
20+
const string PodEventsSinceTimeFilename = "last-pod-event-timestamp.txt";
21+
22+
readonly IScriptWorkspaceFactory workspaceFactory;
23+
24+
readonly ConcurrentDictionary<ScriptTicket, Lazy<IScriptWorkspace>> workspaces = new();
25+
26+
readonly ConcurrentDictionary<ScriptTicket, DateTimeOffset> logsSinceTimes = new();
27+
readonly ConcurrentDictionary<ScriptTicket, DateTimeOffset> eventsSinceTimes = new();
28+
29+
public ScriptPodSinceTimeStore(IScriptWorkspaceFactory workspaceFactory)
1930
{
20-
return sinceTimes.GetOrAdd(scriptTicket, _ => null);
31+
this.workspaceFactory = workspaceFactory;
2132
}
2233

23-
public void UpdateSinceTime(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime)
34+
public DateTimeOffset? GetPodLogsSinceTime(ScriptTicket scriptTicket) => GetTimestampFromMemoryOrDisk(scriptTicket, logsSinceTimes, PodLogsSinceTimeFilename);
35+
36+
public void UpdatePodLogsSinceTime(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime) => SaveTimestampInMemoryAndDisk(scriptTicket, nextSinceTime, logsSinceTimes, PodLogsSinceTimeFilename);
37+
38+
public DateTimeOffset? GetPodEventsSinceTime(ScriptTicket scriptTicket) => GetTimestampFromMemoryOrDisk(scriptTicket, eventsSinceTimes, PodEventsSinceTimeFilename);
39+
40+
public void UpdatePodEventsSinceTime(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime) => SaveTimestampInMemoryAndDisk(scriptTicket, nextSinceTime, eventsSinceTimes, PodEventsSinceTimeFilename);
41+
42+
IScriptWorkspace GetWorkspace(ScriptTicket scriptTicket) => workspaces.GetOrAdd(scriptTicket, new Lazy<IScriptWorkspace>(() => workspaceFactory.GetWorkspace(scriptTicket))).Value;
43+
44+
DateTimeOffset? GetTimestampFromMemoryOrDisk(ScriptTicket scriptTicket, ConcurrentDictionary<ScriptTicket, DateTimeOffset> memoryCache, string filename)
45+
{
46+
//if we have it in memory, all good
47+
if (memoryCache.TryGetValue(scriptTicket, out var log))
48+
{
49+
return log;
50+
}
51+
52+
var workspace = GetWorkspace(scriptTicket);
53+
lock (workspace)
54+
{
55+
//otherwise try and load it from disk
56+
var sinceTimeStr = workspace.TryReadFile(filename);
57+
var sinceTime = sinceTimeStr is not null && DateTimeOffset.TryParse(sinceTimeStr, out var dto) ? dto : (DateTimeOffset?)null;
58+
59+
//if we have a value on disk, save it in memory
60+
if (sinceTime.HasValue)
61+
{
62+
memoryCache[scriptTicket] = sinceTime.Value;
63+
}
64+
65+
return sinceTime;
66+
}
67+
}
68+
69+
void SaveTimestampInMemoryAndDisk(ScriptTicket scriptTicket, DateTimeOffset nextSinceTime, ConcurrentDictionary<ScriptTicket, DateTimeOffset> memoryCache, string podLogsSinceTimeFilename)
2470
{
25-
sinceTimes[scriptTicket] = nextSinceTime;
71+
memoryCache[scriptTicket] = nextSinceTime;
72+
73+
var workspace = GetWorkspace(scriptTicket);
74+
lock (workspace)
75+
{
76+
workspace.WriteFile(podLogsSinceTimeFilename, nextSinceTime.ToString("O"));
77+
}
2678
}
2779

2880
public void Delete(ScriptTicket scriptTicket)
2981
{
30-
sinceTimes.TryRemove(scriptTicket, out _);
82+
workspaces.TryRemove(scriptTicket, out _);
83+
logsSinceTimes.TryRemove(scriptTicket, out _);
84+
eventsSinceTimes.TryRemove(scriptTicket, out _);
3185
}
3286
}
3387
}

source/Octopus.Tentacle/Scripts/IScriptWorkspace.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ public interface IScriptWorkspace
2222
string LogFilePath { get; }
2323
void WriteFile(string filename, string contents);
2424
void CopyFile(string sourceFilePath, string destFileName, bool overwrite);
25+
string ReadFile(string filename);
2526
void CheckReadiness();
27+
string? TryReadFile(string filename);
2628
}
2729
}

0 commit comments

Comments
 (0)