From ccfcb82a88b25a0acdefed70826de63951e96aad Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Thu, 17 Jul 2025 22:36:28 +1000 Subject: [PATCH 1/3] Feat: Generate a manifest file for ihealth ingestion --- cmd/nginx-supportpkg.go | 56 +++++++++++++-- pkg/data_collector/data_collector.go | 101 +++++++++++++++++++++++++++ pkg/jobs/job.go | 14 ++-- pkg/jobs/nim_job_list.go | 9 +-- 4 files changed, 160 insertions(+), 20 deletions(-) diff --git a/cmd/nginx-supportpkg.go b/cmd/nginx-supportpkg.go index fb03994..70241d9 100644 --- a/cmd/nginx-supportpkg.go +++ b/cmd/nginx-supportpkg.go @@ -21,7 +21,10 @@ package cmd import ( "fmt" "os" + "path/filepath" "slices" + "strings" + "time" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/jobs" @@ -40,7 +43,7 @@ func Execute() { Short: "nginx-supportpkg - a tool to create Ingress Controller diagnostics package", Long: `nginx-supportpkg - a tool to create Ingress Controller diagnostics package`, Run: func(cmd *cobra.Command, args []string) { - + startTime := time.Now() err := data_collector.NewDataCollector(&collector) if err != nil { fmt.Println(fmt.Errorf("unable to start data collector: %s", err)) @@ -66,17 +69,60 @@ func Execute() { if collector.AllNamespacesExist() { failedJobs := 0 + totalJobs := len(jobList) + var jobTimings []data_collector.JobTiming + for _, job := range jobList { fmt.Printf("Running job %s...", job.Name) - err, Skipped := job.Collect(&collector) - if Skipped { + + // Record job start time + jobStartTime := time.Now() + jobResult := job.Collect(&collector) + + // Record job end time and calculate duration + jobEndTime := time.Now() + duration := jobEndTime.Sub(jobStartTime) + + // Create job timing record + files := make([]string, 0, len(jobResult.Files)) + for filename := range jobResult.Files { + if len(filename) > 0 { + packagePath := strings.TrimPrefix(filename, collector.BaseDir) + files = append(files, packagePath) + } + } + + jobTiming := data_collector.JobTiming{ + Name: job.Name, + StartTime: jobStartTime.UTC().Format(time.RFC3339), + EndTime: jobEndTime.UTC().Format(time.RFC3339), + Duration: duration.String(), + Files: files, + } + + if jobResult.Skipped { fmt.Print(" SKIPPED\n") - } else if err != nil { - fmt.Printf(" FAILED: %s\n", err) + } else if jobResult.Error != nil { + fmt.Printf(" FAILED: %s\n", jobResult.Error) failedJobs++ } else { fmt.Print(" COMPLETED\n") } + + jobTimings = append(jobTimings, jobTiming) + } + + // Generate manifest with job timings - UPDATE THIS LINE + manifestData, err := collector.GenerateManifest(product, startTime, totalJobs, failedJobs, jobTimings) + if err != nil { + fmt.Printf("Warning: Failed to generate manifest: %v\n", err) + } else { + // Save manifest to base directory + manifestPath := filepath.Join(collector.BaseDir, "manifest.json") + err = os.WriteFile(manifestPath, manifestData, 0644) + if err != nil { + fmt.Printf("Warning: Failed to write manifest: %v\n", err) + } } tarFile, err := collector.WrapUp(product) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index 0df57fa..5f61507 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -23,6 +23,7 @@ import ( "bytes" "compress/gzip" "context" + "encoding/json" "fmt" "io" "log" @@ -33,6 +34,7 @@ import ( helmClient "github.com/mittwald/go-helm-client" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/crds" + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/version" corev1 "k8s.io/api/core/v1" crdClient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -60,6 +62,64 @@ type DataCollector struct { ExcludeTimeSeriesData bool } +type Manifest struct { + Version string `json:"version"` + Timestamp TimestampInfo `json:"ts"` + PackageType string `json:"package_type"` + RootDir string `json:"root_dir,omitempty"` + Commands []Command `json:"commands,omitempty"` + ProductInfo ProductInfo `json:"product_info"` + PlatformInfo PlatformInfo `json:"platform_info"` + Packages []SubPackage `json:"packages,omitempty"` +} + +type TimestampInfo struct { + Start string `json:"start"` + Stop string `json:"stop"` +} + +type Command struct { + Name string `json:"name"` + Cwd string `json:"cwd"` + Ts CommandTiming `json:"ts"` + Output string `json:"output"` + RetCode int `json:"retcode,omitempty"` +} + +type CommandTiming struct { + Start string `json:"start"` + End string `json:"end"` +} + +type ProductInfo struct { + Product string `json:"product"` + Version string `json:"version"` +} + +type PlatformInfo struct { + // Add platform-specific fields as needed + K8sVersion string `json:"k8s_version,omitempty"` + Namespaces []string `json:"namespaces,omitempty"` +} + +type SubPackage struct { + Path string `json:"path"` + Ts TimestampInfo `json:"ts"` + SubPackageType string `json:"sub_package_type"` + Name string `json:"name,omitempty"` + ID string `json:"id,omitempty"` +} + +type JobTiming struct { + Name string `json:"name"` + StartTime string `json:"start_time"` + EndTime string `json:"end_time"` + Duration string `json:"duration"` + Status string `json:"status"` // "completed", "failed", "skipped" + Error string `json:"error,omitempty"` + Files []string `json:"files,omitempty"` // List of files generated by the job +} + func NewDataCollector(collector *DataCollector) error { tmpDir, err := os.MkdirTemp("", "-pkg-diag") @@ -266,3 +326,44 @@ func (c *DataCollector) AllNamespacesExist() bool { return allExist } + +func (c *DataCollector) GenerateManifest(product string, startTime time.Time, jobsRun, jobsFailed int, jobTimings []JobTiming) ([]byte, error) { + manifest := Manifest{ + Version: "1.2", // Match the schema version + Timestamp: TimestampInfo{ + Start: startTime.UTC().Format(time.RFC3339), + Stop: time.Now().UTC().Format(time.RFC3339), + }, + PackageType: "root", // As defined in schema enum + RootDir: ".", + ProductInfo: ProductInfo{ + Product: product, + Version: version.Version, + }, + PlatformInfo: PlatformInfo{ + Namespaces: c.Namespaces, + }, + Commands: []Command{}, + } + + // Convert job timings to commands format + for _, job := range jobTimings { + for _, filename := range job.Files { + command := Command{ + Name: job.Name, + Cwd: ".", + Ts: CommandTiming{ + Start: job.StartTime, + End: job.EndTime, + }, + Output: filename, + } + if job.Status == "failed" { + command.RetCode = 1 + } + manifest.Commands = append(manifest.Commands, command) + } + } + + return json.MarshalIndent(manifest, "", " ") +} diff --git a/pkg/jobs/job.go b/pkg/jobs/job.go index 3a0fe25..ca17e0e 100644 --- a/pkg/jobs/job.go +++ b/pkg/jobs/job.go @@ -40,7 +40,7 @@ type JobResult struct { Skipped bool } -func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) { +func (j Job) Collect(dc *data_collector.DataCollector) JobResult { ch := make(chan JobResult, 1) ctx, cancel := context.WithTimeout(context.Background(), j.Timeout) @@ -52,32 +52,32 @@ func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) { select { case <-ctx.Done(): dc.Logger.Printf("\tJob %s has timed out: %s\n---\n", j.Name, ctx.Err()) - return fmt.Errorf("Context cancelled: %v", ctx.Err()), false + return JobResult{Error: fmt.Errorf("Context cancelled: %v", ctx.Err()), Skipped: false} case jobResults := <-ch: if jobResults.Skipped { dc.Logger.Printf("\tJob %s has been skipped\n---\n", j.Name) - return nil, true + return JobResult{Error: nil, Skipped: true} } if jobResults.Error != nil { dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) - return jobResults.Error, false + return JobResult{Error: jobResults.Error, Skipped: false} } for fileName, fileValue := range jobResults.Files { err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm) if err != nil { - return fmt.Errorf("MkdirAll failed: %v", err), jobResults.Skipped + return JobResult{Error: fmt.Errorf("MkdirAll failed: %v", err), Skipped: jobResults.Skipped} } file, _ := os.Create(fileName) _, err = file.Write(fileValue) if err != nil { - return fmt.Errorf("Write failed: %v", err), jobResults.Skipped + return JobResult{Error: fmt.Errorf("Write failed: %v", err), Skipped: jobResults.Skipped} } _ = file.Close() dc.Logger.Printf("\tJob %s wrote %d bytes to %s\n", j.Name, len(fileValue), fileName) } dc.Logger.Printf("\tJob %s completed successfully\n---\n", j.Name) - return nil, jobResults.Skipped + return JobResult{Files: jobResults.Files, Error: nil, Skipped: false} } } diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go index 5230750..7c30582 100644 --- a/pkg/jobs/nim_job_list.go +++ b/pkg/jobs/nim_job_list.go @@ -20,7 +20,6 @@ package jobs import ( "context" - "os" "path/filepath" "strings" "time" @@ -246,13 +245,7 @@ func NIMJobList() []Job { jobResult.Error = err dc.Logger.Printf("\tFailed to copy dumped file %s from pod %s in namespace %s to %s: %v\n", config.outputFile, pod.Name, namespace, destPathFilename, err) } else { - err = os.WriteFile(destPathFilename, fileContent, 0644) - if err != nil { - jobResult.Error = err - dc.Logger.Printf("\tFailed to write file to %s: %v\n", destPathFilename, err) - } else { - dc.Logger.Printf("\tSuccessfully copied dumped file %s from pod %s in namespace %s to %s\n", config.outputFile, pod.Name, namespace, destPathFilename) - } + jobResult.Files[destPathFilename] = fileContent } // Remove/delete the dumped file from the pod From 4c912020f6185df36dd05f4fcf878621b06fa773 Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Fri, 18 Jul 2025 10:48:16 +1000 Subject: [PATCH 2/3] Feat: Generate a manifest file for ihealth ingestion * Job.Collect now returns filenames in addition to error and skipped status * Some files from the nim-dqlite-job were written directly in the job instead of being returned as part of the JobResult, this was corrected in a previous commit * Added GenerateManifest and associated structures to generate a manifest.json file for iHealth ingestion --- cmd/nginx-supportpkg.go | 35 ++++++++++------------------ pkg/data_collector/data_collector.go | 10 ++++---- pkg/jobs/job.go | 27 +++++++++++++++------ 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/cmd/nginx-supportpkg.go b/cmd/nginx-supportpkg.go index 70241d9..ed007fb 100644 --- a/cmd/nginx-supportpkg.go +++ b/cmd/nginx-supportpkg.go @@ -23,7 +23,6 @@ import ( "os" "path/filepath" "slices" - "strings" "time" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" @@ -70,49 +69,39 @@ func Execute() { if collector.AllNamespacesExist() { failedJobs := 0 totalJobs := len(jobList) - var jobTimings []data_collector.JobTiming + var jobTimings []data_collector.JobInfo for _, job := range jobList { fmt.Printf("Running job %s...", job.Name) - // Record job start time + // Record job start and end time to calculate duration jobStartTime := time.Now() - jobResult := job.Collect(&collector) - - // Record job end time and calculate duration + err, skipped, files := job.Collect(&collector) jobEndTime := time.Now() duration := jobEndTime.Sub(jobStartTime) - // Create job timing record - files := make([]string, 0, len(jobResult.Files)) - for filename := range jobResult.Files { - if len(filename) > 0 { - packagePath := strings.TrimPrefix(filename, collector.BaseDir) - files = append(files, packagePath) - } - } - - jobTiming := data_collector.JobTiming{ + // Create job info record + jobInfo := data_collector.JobInfo{ Name: job.Name, - StartTime: jobStartTime.UTC().Format(time.RFC3339), - EndTime: jobEndTime.UTC().Format(time.RFC3339), + StartTime: jobStartTime.UTC().Format(time.RFC3339Nano), + EndTime: jobEndTime.UTC().Format(time.RFC3339Nano), Duration: duration.String(), Files: files, } - if jobResult.Skipped { + if skipped { fmt.Print(" SKIPPED\n") - } else if jobResult.Error != nil { - fmt.Printf(" FAILED: %s\n", jobResult.Error) + } else if err != nil { + fmt.Printf(" FAILED: %s\n", err) failedJobs++ } else { fmt.Print(" COMPLETED\n") } - jobTimings = append(jobTimings, jobTiming) + jobTimings = append(jobTimings, jobInfo) } - // Generate manifest with job timings - UPDATE THIS LINE + // Generate manifest with job timings manifestData, err := collector.GenerateManifest(product, startTime, totalJobs, failedJobs, jobTimings) if err != nil { fmt.Printf("Warning: Failed to generate manifest: %v\n", err) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index 5f61507..340caba 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -110,7 +110,7 @@ type SubPackage struct { ID string `json:"id,omitempty"` } -type JobTiming struct { +type JobInfo struct { Name string `json:"name"` StartTime string `json:"start_time"` EndTime string `json:"end_time"` @@ -168,7 +168,7 @@ func (c *DataCollector) WrapUp(product string) (string, error) { unixTime := time.Now().Unix() unixTimeString := strconv.FormatInt(unixTime, 10) tarballName := fmt.Sprintf("%s-supportpkg-%s.tar.gz", product, unixTimeString) - tarballRootDirName := fmt.Sprintf("%s-supportpkg-%s", product, unixTimeString) + tarballRootDirName := "." err := c.LogFile.Close() if err != nil { @@ -327,12 +327,12 @@ func (c *DataCollector) AllNamespacesExist() bool { return allExist } -func (c *DataCollector) GenerateManifest(product string, startTime time.Time, jobsRun, jobsFailed int, jobTimings []JobTiming) ([]byte, error) { +func (c *DataCollector) GenerateManifest(product string, startTime time.Time, jobsRun, jobsFailed int, jobTimings []JobInfo) ([]byte, error) { manifest := Manifest{ Version: "1.2", // Match the schema version Timestamp: TimestampInfo{ - Start: startTime.UTC().Format(time.RFC3339), - Stop: time.Now().UTC().Format(time.RFC3339), + Start: startTime.UTC().Format(time.RFC3339Nano), + Stop: time.Now().UTC().Format(time.RFC3339Nano), }, PackageType: "root", // As defined in schema enum RootDir: ".", diff --git a/pkg/jobs/job.go b/pkg/jobs/job.go index ca17e0e..d16d976 100644 --- a/pkg/jobs/job.go +++ b/pkg/jobs/job.go @@ -23,6 +23,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "time" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" @@ -40,7 +41,7 @@ type JobResult struct { Skipped bool } -func (j Job) Collect(dc *data_collector.DataCollector) JobResult { +func (j Job) Collect(dc *data_collector.DataCollector) (error, bool, []string) { ch := make(chan JobResult, 1) ctx, cancel := context.WithTimeout(context.Background(), j.Timeout) @@ -52,32 +53,44 @@ func (j Job) Collect(dc *data_collector.DataCollector) JobResult { select { case <-ctx.Done(): dc.Logger.Printf("\tJob %s has timed out: %s\n---\n", j.Name, ctx.Err()) - return JobResult{Error: fmt.Errorf("Context cancelled: %v", ctx.Err()), Skipped: false} + return fmt.Errorf("Context cancelled: %v", ctx.Err()), false, nil case jobResults := <-ch: + files := j.GetFilesFromJobResult(dc, jobResults) if jobResults.Skipped { dc.Logger.Printf("\tJob %s has been skipped\n---\n", j.Name) - return JobResult{Error: nil, Skipped: true} + return nil, true, files } if jobResults.Error != nil { dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) - return JobResult{Error: jobResults.Error, Skipped: false} + return jobResults.Error, false, files } for fileName, fileValue := range jobResults.Files { err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm) if err != nil { - return JobResult{Error: fmt.Errorf("MkdirAll failed: %v", err), Skipped: jobResults.Skipped} + return fmt.Errorf("MkdirAll failed: %v", err), jobResults.Skipped, files } file, _ := os.Create(fileName) _, err = file.Write(fileValue) if err != nil { - return JobResult{Error: fmt.Errorf("Write failed: %v", err), Skipped: jobResults.Skipped} + return fmt.Errorf("Write failed: %v", err), jobResults.Skipped, files } _ = file.Close() dc.Logger.Printf("\tJob %s wrote %d bytes to %s\n", j.Name, len(fileValue), fileName) } dc.Logger.Printf("\tJob %s completed successfully\n---\n", j.Name) - return JobResult{Files: jobResults.Files, Error: nil, Skipped: false} + return nil, false, files } } + +func (j Job) GetFilesFromJobResult(dc *data_collector.DataCollector, jobResult JobResult) []string { + files := make([]string, 0, len(jobResult.Files)) + for filename := range jobResult.Files { + if len(filename) > 0 { + packagePath := strings.TrimPrefix(filename, dc.BaseDir) + files = append(files, packagePath) + } + } + return files +} From 0da9305a8dba5173b63c321803f39a5ddc6faaee Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Fri, 18 Jul 2025 16:17:20 +1000 Subject: [PATCH 3/3] Fix: Issue 164: Capture output of exec to files before overall job failure check * Otherwise we fail to capture output from running pods when there is at least one other pod containing the same pod string such as nginx-ingress --- pkg/jobs/job.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pkg/jobs/job.go b/pkg/jobs/job.go index d16d976..b2ee0a8 100644 --- a/pkg/jobs/job.go +++ b/pkg/jobs/job.go @@ -61,10 +61,6 @@ func (j Job) Collect(dc *data_collector.DataCollector) (error, bool, []string) { dc.Logger.Printf("\tJob %s has been skipped\n---\n", j.Name) return nil, true, files } - if jobResults.Error != nil { - dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) - return jobResults.Error, false, files - } for fileName, fileValue := range jobResults.Files { err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm) @@ -79,6 +75,13 @@ func (j Job) Collect(dc *data_collector.DataCollector) (error, bool, []string) { _ = file.Close() dc.Logger.Printf("\tJob %s wrote %d bytes to %s\n", j.Name, len(fileValue), fileName) } + + if jobResults.Error != nil { + dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) + fmt.Printf("Files collected so far: %v\n", files) + return jobResults.Error, false, files + } + dc.Logger.Printf("\tJob %s completed successfully\n---\n", j.Name) return nil, false, files }