Skip to content

Commit 361d6ae

Browse files
authored
Merge pull request #410 from x13n/timestamp
Prevent invalid timestamps in cpu usage metrics
2 parents 610e442 + 316d5ec commit 361d6ae

File tree

3 files changed

+16
-8
lines changed

3 files changed

+16
-8
lines changed

kubelet-to-gcm/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
OUT_DIR = build
1616
PACKAGE = github.com/GoogleCloudPlatform/k8s-stackdriver/kubelet-to-gcm
1717
PREFIX = staging-k8s.gcr.io
18-
TAG = 1.3.5
18+
TAG = 1.3.6
1919

2020
# Rules for building the real image for deployment to gcr.io
2121

kubelet-to-gcm/monitor/kubelet/translate.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,10 @@ func translateCPU(cpu *stats.CPUStats, tsFactory *timeSeriesFactory, startTime t
435435
if cpu.UsageCoreNanoSeconds == nil {
436436
return nil, fmt.Errorf("UsageCoreNanoSeconds missing from CPUStats %v", cpu)
437437
}
438+
// Only send cpu usage metric if start time is before current time. Right after container is started, kubelet can return start time == end time.
439+
if !cpu.Time.Time.After(startTime) {
440+
return nil, nil
441+
}
438442

439443
// Total CPU utilization for all time. Convert from nanosec to sec.
440444
cpuTotalPoint := tsFactory.newPoint(&v3.TypedValue{
@@ -519,7 +523,7 @@ func translateMemory(memory *stats.MemoryStats, tsFactory *timeSeriesFactory, st
519523
return nil, fmt.Errorf("Memory information missing.")
520524
}
521525

522-
// Only send page fault metric if start time is before current time. Right after container is started, kubelet can return start time == end time. This doesn't seem to happen with other metrics.
526+
// Only send page fault metric if start time is before current time. Right after container is started, kubelet can return start time == end time.
523527
if pageFaultsMD != nil && memory.Time.Time.After(startTime) {
524528
if memory.MajorPageFaults == nil {
525529
return nil, fmt.Errorf("MajorPageFaults missing in MemoryStats %v", memory)

kubelet-to-gcm/monitor/kubelet/translate_test.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,9 @@ func TestTranslateContainers(t *testing.T) {
221221
noLogStatsContainer.Logs = nil
222222
noRootfsStatsContainer := *getContainerStats(false)
223223
noRootfsStatsContainer.Rootfs = nil
224-
badTimestampOnPageFaultContrainer := *getContainerStats(false)
225-
badTimestampOnPageFaultContrainer.Memory.Time = badTimestampOnPageFaultContrainer.StartTime
224+
badTimestampOnCumulativeMetricsContrainer := *getContainerStats(false)
225+
badTimestampOnCumulativeMetricsContrainer.Memory.Time = badTimestampOnCumulativeMetricsContrainer.StartTime
226+
badTimestampOnCumulativeMetricsContrainer.CPU.Time = badTimestampOnCumulativeMetricsContrainer.StartTime
226227
legacyTsPerContainer := 11
227228
tsPerContainer := 8
228229
testCases := []struct {
@@ -310,12 +311,12 @@ func TestTranslateContainers(t *testing.T) {
310311
},
311312
},
312313
{
313-
name: "bad timestamp for page_fault_count",
314-
ExpectedLegacyTSCount: legacyTsPerContainer - 2,
315-
ExpectedTSCount: tsPerContainer - 2,
314+
name: "bad timestamp for cumulative metrics",
315+
ExpectedLegacyTSCount: legacyTsPerContainer - 3,
316+
ExpectedTSCount: tsPerContainer - 3,
316317
pods: []stats.PodStats{
317318
getPodStats(
318-
badTimestampOnPageFaultContrainer,
319+
badTimestampOnCumulativeMetricsContrainer,
319320
),
320321
},
321322
},
@@ -364,6 +365,9 @@ func getContainerStats(skipUsageNanoCores bool) *stats.ContainerStats {
364365
if v.Memory.Time.Time.Before(v.StartTime.Time) {
365366
v.Memory.Time, v.StartTime = v.StartTime, v.Memory.Time
366367
}
368+
if v.CPU.Time.Time.Before(v.StartTime.Time) {
369+
v.CPU.Time, v.StartTime = v.StartTime, v.CPU.Time
370+
}
367371
return v
368372
}
369373

0 commit comments

Comments
 (0)