From 01d7d8b6229b72008ac8475c55502524401650e0 Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Fri, 30 May 2025 15:01:26 +1200 Subject: [PATCH 1/8] Introduce support the NIM product --- cmd/nginx-supportpkg.go | 8 ++++-- pkg/jobs/nim_job_list.go | 62 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 pkg/jobs/nim_job_list.go diff --git a/cmd/nginx-supportpkg.go b/cmd/nginx-supportpkg.go index 7427573..cca5cfa 100644 --- a/cmd/nginx-supportpkg.go +++ b/cmd/nginx-supportpkg.go @@ -57,8 +57,10 @@ func Execute() { jobList = slices.Concat(jobs.CommonJobList(), jobs.NGFJobList()) case "ngx": jobList = slices.Concat(jobs.CommonJobList(), jobs.NGXJobList()) + case "nim": + jobList = slices.Concat(jobs.CommonJobList(), jobs.NIMJobList()) default: - fmt.Printf("Error: product must be in the following list: [nic, ngf, ngx]\n") + fmt.Printf("Error: product must be in the following list: [nic, ngf, ngx, nim]\n") os.Exit(1) } @@ -115,8 +117,8 @@ func Execute() { "Usage:" + "\n nginx-supportpkg -h|--help" + "\n nginx-supportpkg -v|--version" + - "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nic,ngf,ngx]" + - "\n nginx-supportpkg [-n|--namespace] ns1,ns2 [-p|--product] [nic,ngf,ngx] \n") + "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nic,ngf,ngx,nim]" + + "\n nginx-supportpkg [-n|--namespace] ns1,ns2 [-p|--product] [nic,ngf,ngx,nim] \n") if err := rootCmd.Execute(); err != nil { fmt.Println(err) diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go new file mode 100644 index 0000000..64f7450 --- /dev/null +++ b/pkg/jobs/nim_job_list.go @@ -0,0 +1,62 @@ +/** + +Copyright 2024 F5, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +**/ + +package jobs + +import ( + "context" + "path/filepath" + "strings" + "time" + + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func NIMJobList() []Job { + jobList := []Job{ + { + Name: "exec-nginx-t", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + command := []string{"/usr/sbin/nginx", "-T"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "nginx") { + res, err := dc.PodExecutor(namespace, pod.Name, "nginx", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__nginx-t.txt")] = res + } + } + } + } + } + ch <- jobResult + }, + }, + } + return jobList +} From 401507dfc523dc2acc185f3dbf1e53d74a6fce5c Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Mon, 9 Jun 2025 10:24:55 +1200 Subject: [PATCH 2/8] Feat: Add pv, pvc, storageclass, api-resources, apiversions to the common jobs list --- pkg/jobs/common_job_list.go | 92 +++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/pkg/jobs/common_job_list.go b/pkg/jobs/common_job_list.go index 61d9701..f2e0aa5 100644 --- a/pkg/jobs/common_job_list.go +++ b/pkg/jobs/common_job_list.go @@ -23,12 +23,13 @@ import ( "context" "encoding/json" "fmt" - "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" "io" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "path/filepath" "time" + + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func CommonJobList() []Job { @@ -88,6 +89,91 @@ func CommonJobList() []Job { ch <- jobResult }, }, + { + Name: "pv-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve persistent volumes list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "persistentvolumes.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "pvc-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve persistent volume claims list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "persistentvolumeclaims.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "sc-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.StorageV1().StorageClasses().List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve storage classes list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "storageclasses.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "apiresources-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.DiscoveryClient.ServerPreferredResources() + if err != nil { + dc.Logger.Printf("\tCould not retrieve API resources list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "apiresources.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "apiversions-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.DiscoveryClient.ServerGroups() + if err != nil { + dc.Logger.Printf("\tCould not retrieve API versions list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "apiversions.json")] = jsonResult + } + } + ch <- jobResult + }, + }, { Name: "events-list", Timeout: time.Second * 10, From 1e10c8bba3712828fc63f3f835e2f67f8684522a Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Mon, 30 Jun 2025 22:27:40 +1000 Subject: [PATCH 3/8] Feat: Added clickhouse command to capture from a NIM deployment --- pkg/jobs/nim_job_list.go | 98 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go index 64f7450..c1711e9 100644 --- a/pkg/jobs/nim_job_list.go +++ b/pkg/jobs/nim_job_list.go @@ -28,6 +28,38 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +var clickhouseCommands = map[string]string{ + "events.csv": "SELECT * FROM nms.events WHERE creation_time > subtractHours(now(),${default_hrs}) ORDER BY creation_time DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics.csv": "SELECT * FROM nms.metrics WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics_1day.csv": "SELECT * FROM nms.metrics_1day WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics_1hour.csv": "SELECT * FROM nms.metrics_1hour WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics_5min.csv": "SELECT * FROM nms.metrics_5min WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics-row-counts.csv": "SELECT count(*), name FROM nms.metrics GROUP BY name FORMAT CSVWithNames", + "events-row-counts.csv": "SELECT count(*), category FROM nms.events GROUP BY category FORMAT CSVWithNames", + "events.sql": "SHOW CREATE TABLE nms.events", + "metrics.sql": "SHOW CREATE TABLE nms.metrics", + "metrics_1day.sql": "SHOW CREATE TABLE nms.metrics_1day", + "metrics_1hour.sql": "SHOW CREATE TABLE nms.metrics_1hour", + "metrics_5min.sql": "SHOW CREATE TABLE nms.metrics_5min", + "table-sizes.stat": "SELECT table, formatReadableSize(sum(bytes)) as size, min(min_date) as min_date, max(max_date) as max_date FROM system.parts WHERE active GROUP BY table ORDER BY table FORMAT CSVWithNames", + "system-asynchronous-metrics.stat": "SELECT * FROM system.asynchronous_metrics FORMAT CSVWithNames", + "system-tables.stat": "SELECT * FROM system.tables FORMAT CSVWithNames", + "system-parts.stat": "SELECT * FROM system.parts ORDER BY table ASC, name DESC, modification_time DESC FORMAT CSVWithNames", + "system-metrics.stat": "SELECT * FROM system.metrics FORMAT CSVWithNames", + "system-settings.stat": "SELECT * FROM system.settings FORMAT CSVWithNames", + "system-query-log.csv": "SELECT * FROM system.query_log WHERE event_time > subtractHours(now(),${default_hrs}) AND event_date > toDate(subtractDays(now(),${max_num_days})) ORDER BY event_time DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "system-events.stat": "SELECT * FROM system.events ORDER BY value DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics-profile-15-min.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 15 minute) tmstp, count(*) cnt FROM metrics where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics-profile-1-hour.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 hour) tmstp, count(*) cnt FROM metrics where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_5min-profile-1-hour.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 hour) tmstp, count(*) cnt FROM metrics_5min where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_5min-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics_5min where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1hour-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics_1hour where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1hour-profile-1-month.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 month) tmstp, count(*) cnt FROM metrics_1hour where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1day-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics_1day where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1day-profile-1-month.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 month) tmstp, count(*) cnt FROM metrics_1day where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", +} + func NIMJobList() []Job { jobList := []Job{ { @@ -57,6 +89,72 @@ func NIMJobList() []Job { ch <- jobResult }, }, + { + Name: "exec-clickhouse-version", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + command := []string{"clickhouse-server", "--version"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "clickhouse") { + res, err := dc.PodExecutor(namespace, pod.Name, "clickhouse-server", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__clickhouse-server-version.txt")] = res + } + } + } + } + } + ch <- jobResult + }, + }, + { + Name: "exec-clickhouse-data", + Timeout: time.Second * 100, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + // command := []string{"clickhouse-client", "--database", "nms", "-q", "SHOW CREATE TABLE nms.events"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "clickhouse") { + for fileName, query := range clickhouseCommands { + // Replace placeholders in the query + query = strings.ReplaceAll(query, "${default_hrs}", "24") + query = strings.ReplaceAll(query, "${max_num_days}", "30") + query = strings.ReplaceAll(query, "${max_log_limit}", "1000") + command := []string{"clickhouse-client", "--database", "nms", "-q", query} + if fileName == "events.csv" || fileName == "metrics.csv" { + command = append(command, "--format_csv_delimiter=,") + } + + // Execute the command + res, err := dc.PodExecutor(namespace, pod.Name, "clickhouse-server", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__"+fileName)] = res + } + } + } + } + } + } + ch <- jobResult + }, + }, } return jobList } From f89f81fa8a755591a1811384ddd8248f68230188 Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Fri, 4 Jul 2025 10:48:42 +1000 Subject: [PATCH 4/8] Feat: dqlite dump for core DB --- pkg/data_collector/data_collector.go | 165 ++++++++++++++++++++++++++- pkg/jobs/nim_job_list.go | 78 ++++++++++++- 2 files changed, 234 insertions(+), 9 deletions(-) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index cedbda0..6c4814a 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -24,9 +24,15 @@ import ( "compress/gzip" "context" "fmt" + "io" + "log" + "os" + "path/filepath" + "strconv" + "time" + helmClient "github.com/mittwald/go-helm-client" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/crds" - "io" corev1 "k8s.io/api/core/v1" crdClient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -38,11 +44,6 @@ import ( "k8s.io/client-go/tools/remotecommand" "k8s.io/client-go/util/homedir" metricsClient "k8s.io/metrics/pkg/client/clientset/versioned" - "log" - "os" - "path/filepath" - "strconv" - "time" ) type DataCollector struct { @@ -266,3 +267,155 @@ func (c *DataCollector) AllNamespacesExist() bool { return allExist } + +// // CopyFileFromPod copies a file from a pod's container to the local filesystem. +// func (c *DataCollector) CopyFileFromPod(namespace, pod, container, srcPath, destPath string, ctx context.Context) error { +// cmd := []string{"tar", "cf", "-", "-C", filepath.Dir(srcPath), filepath.Base(srcPath)} +// req := c.K8sCoreClientSet.CoreV1().RESTClient().Post(). +// Namespace(namespace). +// Resource("pods"). +// Name(pod). +// SubResource("exec"). +// VersionedParams(&corev1.PodExecOptions{ +// Container: container, +// Command: cmd, +// Stdin: false, +// Stdout: true, +// Stderr: true, +// TTY: false, +// }, scheme.ParameterCodec) + +// exec, err := remotecommand.NewSPDYExecutor(c.K8sRestConfig, "POST", req.URL()) +// if err != nil { +// return err +// } +// fmt.Printf("Started remote command\n") +// reader, writer := io.Pipe() +// // var wg sync.WaitGroup +// var streamErr error +// // wg.Add(1) +// go func() { +// defer writer.Close() +// // defer wg.Done() +// streamErr = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ +// Stdout: writer, +// Stderr: os.Stderr, +// }) +// }() + +// tr := tar.NewReader(reader) +// fmt.Printf("New Reader started\n") +// var copyErr error +// for { +// header, err := tr.Next() +// if err == io.EOF { +// fmt.Printf("Reached end of tar stream\n") +// break +// } +// if err != nil { +// copyErr = err +// break +// } +// if header.Typeflag == tar.TypeReg { +// fmt.Printf("Copying file %s to destPath %s\n", header.Name, destPath) +// outFile, err := os.Create(destPath) +// if err != nil { +// copyErr = err +// break +// } +// fmt.Printf("Copying file %s to outFile %s\n", header.Name, outFile.Name()) +// defer outFile.Close() +// _, err = io.Copy(outFile, tr) + +// if err != nil { +// copyErr = err +// break +// } +// } +// } +// // Wait for the goroutine to finish +// fmt.Printf("Waiting for stream to finish\n") +// // wg.Wait() +// fmt.Printf("Stream finished\n") +// if copyErr != nil { +// fmt.Printf("Error copying file: %v\n", copyErr) +// return copyErr +// } +// if streamErr != nil { +// fmt.Printf("Error executing command in pod: %v\n", streamErr) +// return streamErr +// } +// return nil +// } + +// CopyFileFromPod copies a file from a pod's container to the local filesystem. +func (c *DataCollector) CopyFileFromPod(namespace, pod, container, srcPath, destPath string, ctx context.Context) error { + cmd := []string{"tar", "cf", "-", "-C", filepath.Dir(srcPath), filepath.Base(srcPath)} + req := c.K8sCoreClientSet.CoreV1().RESTClient().Post(). + Namespace(namespace). + Resource("pods"). + Name(pod). + SubResource("exec"). + VersionedParams(&corev1.PodExecOptions{ + Container: container, + Command: cmd, + Stdin: false, + Stdout: true, + Stderr: true, + TTY: false, + }, scheme.ParameterCodec) + + exec, err := remotecommand.NewSPDYExecutor(c.K8sRestConfig, "POST", req.URL()) + if err != nil { + return err + } + + // Stream the data from the Pod + var stdout, stderr bytes.Buffer + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdout: &stdout, + Stderr: &stderr, + }) + if err != nil { + // return fmt.Errorf("error in streaming: %w. Stderr: %s", err, stderr.String()) + return err + } + + // Create a local file to save the output + localFile, err := os.Create(destPath) + if err != nil { + // return fmt.Errorf("failed to create local file: %w", err) + return err + } + defer localFile.Close() + + // Untar the stream and write the content to the local file + tarReader := tar.NewReader(&stdout) + for { + // fmt.Printf("Reading tar stream\n") + // fmt.Println("Tar output length:", stdout.Len()) + header, err := tarReader.Next() + + if err == io.EOF { + // fmt.Printf("Reached end of tar stream\n") + break // End of tar archive + } + if err != nil { + // return fmt.Errorf("error reading tar stream: %w", err) + return err + } + + // Ensure the tar file matches the expected file path + // fmt.Printf("Header Name: %s\n", header.Name) + if header.Name == filepath.Base(srcPath) { + // fmt.Printf("Copying file %s to destPath %s\n", header.Name, destPath) + _, err = io.Copy(localFile, tarReader) + if err != nil { + return fmt.Errorf("failed to write to local file: %w", err) + } + break + } + } + + return nil +} diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go index c1711e9..93fb8ad 100644 --- a/pkg/jobs/nim_job_list.go +++ b/pkg/jobs/nim_job_list.go @@ -28,6 +28,9 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +var dqliteBackupPath = "/etc/nms/scripts/dqlite-backup" +var nmsConfigPath = "/etc/nms/nms.conf" + var clickhouseCommands = map[string]string{ "events.csv": "SELECT * FROM nms.events WHERE creation_time > subtractHours(now(),${default_hrs}) ORDER BY creation_time DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", "metrics.csv": "SELECT * FROM nms.metrics WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", @@ -63,7 +66,7 @@ var clickhouseCommands = map[string]string{ func NIMJobList() []Job { jobList := []Job{ { - Name: "exec-nginx-t", + Name: "exec-apigw-nginx-t", Timeout: time.Second * 10, Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { jobResult := JobResult{Files: make(map[string][]byte), Error: nil} @@ -74,8 +77,8 @@ func NIMJobList() []Job { dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) } else { for _, pod := range pods.Items { - if strings.Contains(pod.Name, "nginx") { - res, err := dc.PodExecutor(namespace, pod.Name, "nginx", command, ctx) + if strings.Contains(pod.Name, "apigw") { + res, err := dc.PodExecutor(namespace, pod.Name, "apigw", command, ctx) if err != nil { jobResult.Error = err dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) @@ -89,6 +92,33 @@ func NIMJobList() []Job { ch <- jobResult }, }, + { + Name: "exec-apigw-nginx-version", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + command := []string{"/usr/sbin/nginx", "-v"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "apigw") { + res, err := dc.PodExecutor(namespace, pod.Name, "apigw", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__nginx-version.txt")] = res + } + } + } + } + } + ch <- jobResult + }, + }, { Name: "exec-clickhouse-version", Timeout: time.Second * 10, @@ -155,6 +185,48 @@ func NIMJobList() []Job { ch <- jobResult }, }, + { + Name: "exec-dqlite-dump-core", + Timeout: time.Second * 30, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + containerName := "core" + dbName := "core" + outputFile := "/tmp/core.sql" + dbAddr := "0.0.0.0:7891" + + // /etc/nms/scripts/dqlite-backup -n core -c /etc/nms/nms.conf -a 0.0.0.0:7891 -o /tmp/core.sql -k + command := []string{dqliteBackupPath, "-n", dbName, "-c", nmsConfigPath, "-a", dbAddr, "-o", outputFile, "-k"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, containerName) { + res, err := dc.PodExecutor(namespace, pod.Name, containerName, command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+containerName+".txt")] = res + + // Move the dumped file to the base directory + destPathFilename := filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+filepath.Base(outputFile)) + if err := dc.CopyFileFromPod(namespace, pod.Name, containerName, outputFile, destPathFilename, ctx); err != nil { + jobResult.Error = err + dc.Logger.Printf("\tFailed to copy dumped file for pod %s in namespace %s: %v\n", pod.Name, namespace, err) + } else { + dc.Logger.Printf("\tSuccessfully copied dumped file for pod %s in namespace %s\n", pod.Name, namespace) + } + } + } + } + } + } + ch <- jobResult + }, + }, } return jobList } From 879c4ea70dd1b2f061058a7de634d0d15eb074b4 Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Fri, 4 Jul 2025 14:29:58 +1000 Subject: [PATCH 5/8] Feat: dqlite dump for core, dpm, integrations and license databases --- pkg/data_collector/data_collector.go | 87 ---------------------------- pkg/jobs/nim_job_list.go | 61 ++++++++++++------- 2 files changed, 41 insertions(+), 107 deletions(-) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index 6c4814a..f60969c 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -268,86 +268,6 @@ func (c *DataCollector) AllNamespacesExist() bool { return allExist } -// // CopyFileFromPod copies a file from a pod's container to the local filesystem. -// func (c *DataCollector) CopyFileFromPod(namespace, pod, container, srcPath, destPath string, ctx context.Context) error { -// cmd := []string{"tar", "cf", "-", "-C", filepath.Dir(srcPath), filepath.Base(srcPath)} -// req := c.K8sCoreClientSet.CoreV1().RESTClient().Post(). -// Namespace(namespace). -// Resource("pods"). -// Name(pod). -// SubResource("exec"). -// VersionedParams(&corev1.PodExecOptions{ -// Container: container, -// Command: cmd, -// Stdin: false, -// Stdout: true, -// Stderr: true, -// TTY: false, -// }, scheme.ParameterCodec) - -// exec, err := remotecommand.NewSPDYExecutor(c.K8sRestConfig, "POST", req.URL()) -// if err != nil { -// return err -// } -// fmt.Printf("Started remote command\n") -// reader, writer := io.Pipe() -// // var wg sync.WaitGroup -// var streamErr error -// // wg.Add(1) -// go func() { -// defer writer.Close() -// // defer wg.Done() -// streamErr = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ -// Stdout: writer, -// Stderr: os.Stderr, -// }) -// }() - -// tr := tar.NewReader(reader) -// fmt.Printf("New Reader started\n") -// var copyErr error -// for { -// header, err := tr.Next() -// if err == io.EOF { -// fmt.Printf("Reached end of tar stream\n") -// break -// } -// if err != nil { -// copyErr = err -// break -// } -// if header.Typeflag == tar.TypeReg { -// fmt.Printf("Copying file %s to destPath %s\n", header.Name, destPath) -// outFile, err := os.Create(destPath) -// if err != nil { -// copyErr = err -// break -// } -// fmt.Printf("Copying file %s to outFile %s\n", header.Name, outFile.Name()) -// defer outFile.Close() -// _, err = io.Copy(outFile, tr) - -// if err != nil { -// copyErr = err -// break -// } -// } -// } -// // Wait for the goroutine to finish -// fmt.Printf("Waiting for stream to finish\n") -// // wg.Wait() -// fmt.Printf("Stream finished\n") -// if copyErr != nil { -// fmt.Printf("Error copying file: %v\n", copyErr) -// return copyErr -// } -// if streamErr != nil { -// fmt.Printf("Error executing command in pod: %v\n", streamErr) -// return streamErr -// } -// return nil -// } - // CopyFileFromPod copies a file from a pod's container to the local filesystem. func (c *DataCollector) CopyFileFromPod(namespace, pod, container, srcPath, destPath string, ctx context.Context) error { cmd := []string{"tar", "cf", "-", "-C", filepath.Dir(srcPath), filepath.Base(srcPath)} @@ -377,7 +297,6 @@ func (c *DataCollector) CopyFileFromPod(namespace, pod, container, srcPath, dest Stderr: &stderr, }) if err != nil { - // return fmt.Errorf("error in streaming: %w. Stderr: %s", err, stderr.String()) return err } @@ -392,23 +311,17 @@ func (c *DataCollector) CopyFileFromPod(namespace, pod, container, srcPath, dest // Untar the stream and write the content to the local file tarReader := tar.NewReader(&stdout) for { - // fmt.Printf("Reading tar stream\n") - // fmt.Println("Tar output length:", stdout.Len()) header, err := tarReader.Next() if err == io.EOF { - // fmt.Printf("Reached end of tar stream\n") break // End of tar archive } if err != nil { - // return fmt.Errorf("error reading tar stream: %w", err) return err } // Ensure the tar file matches the expected file path - // fmt.Printf("Header Name: %s\n", header.Name) if header.Name == filepath.Base(srcPath) { - // fmt.Printf("Copying file %s to destPath %s\n", header.Name, destPath) _, err = io.Copy(localFile, tarReader) if err != nil { return fmt.Errorf("failed to write to local file: %w", err) diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go index 93fb8ad..5f4087f 100644 --- a/pkg/jobs/nim_job_list.go +++ b/pkg/jobs/nim_job_list.go @@ -186,38 +186,59 @@ func NIMJobList() []Job { }, }, { - Name: "exec-dqlite-dump-core", + Name: "exec-dqlite-dump", Timeout: time.Second * 30, Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { jobResult := JobResult{Files: make(map[string][]byte), Error: nil} - containerName := "core" - dbName := "core" - outputFile := "/tmp/core.sql" - dbAddr := "0.0.0.0:7891" + + dbConfigs := []struct { + dbName string + containerName string + outputFile string + dbAddr string + }{ + {"core", "core", "/tmp/core.sql", "0.0.0.0:7891"}, + {"dpm", "dpm", "/tmp/dpm.sql", "0.0.0.0:7890"}, + {"integrations", "integrations", "/tmp/integrations.sql", "0.0.0.0:7892"}, + {"license", "integrations", "/tmp/license.sql", "0.0.0.0:7893"}, + // Add more containers as needed + } // /etc/nms/scripts/dqlite-backup -n core -c /etc/nms/nms.conf -a 0.0.0.0:7891 -o /tmp/core.sql -k - command := []string{dqliteBackupPath, "-n", dbName, "-c", nmsConfigPath, "-a", dbAddr, "-o", outputFile, "-k"} + for _, namespace := range dc.Namespaces { pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) if err != nil { dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) } else { - for _, pod := range pods.Items { - if strings.Contains(pod.Name, containerName) { - res, err := dc.PodExecutor(namespace, pod.Name, containerName, command, ctx) - if err != nil { - jobResult.Error = err - dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) - } else { - jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+containerName+".txt")] = res - - // Move the dumped file to the base directory - destPathFilename := filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+filepath.Base(outputFile)) - if err := dc.CopyFileFromPod(namespace, pod.Name, containerName, outputFile, destPathFilename, ctx); err != nil { + for _, config := range dbConfigs { + command := []string{dqliteBackupPath, "-n", config.dbName, "-c", nmsConfigPath, "-a", config.dbAddr, "-o", config.outputFile, "-k"} + for _, pod := range pods.Items { + if strings.Contains(pod.Name, config.containerName) { + res, err := dc.PodExecutor(namespace, pod.Name, config.containerName, command, ctx) + if err != nil { jobResult.Error = err - dc.Logger.Printf("\tFailed to copy dumped file for pod %s in namespace %s: %v\n", pod.Name, namespace, err) + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) } else { - dc.Logger.Printf("\tSuccessfully copied dumped file for pod %s in namespace %s\n", pod.Name, namespace) + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+config.dbName+".txt")] = res + + // Copy the dumped file from the pod to the host + destPathFilename := filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+filepath.Base(config.outputFile)) + if err := dc.CopyFileFromPod(namespace, pod.Name, config.containerName, config.outputFile, destPathFilename, ctx); err != nil { + jobResult.Error = err + dc.Logger.Printf("\tFailed to copy dumped file %s from pod %s in namespace %s to %s: %v\n", config.outputFile, pod.Name, namespace, destPathFilename, err) + } else { + dc.Logger.Printf("\tSuccessfully copied dumped file %s from pod %s in namespace %s to %s\n", config.outputFile, pod.Name, namespace, destPathFilename) + } + + // Remove/delete the dumped file from the pod + _, err := dc.PodExecutor(namespace, pod.Name, config.containerName, []string{"rm", "-f", config.outputFile}, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tFailed to remove dumped file %s from pod %s in namespace %s: %v\n", config.outputFile, pod.Name, namespace, err) + } else { + dc.Logger.Printf("\tSuccessfully removed dumped file %s from pod %s in namespace %s\n", config.outputFile, pod.Name, namespace) + } } } } From 4bcbf7145119c9a9fb8625c3ff95c4364422e0e6 Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Fri, 4 Jul 2025 21:03:36 +1000 Subject: [PATCH 6/8] chore: Set dqlite and clickhouse data collection timeout to 10 minutes --- pkg/jobs/nim_job_list.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go index 5f4087f..5812592 100644 --- a/pkg/jobs/nim_job_list.go +++ b/pkg/jobs/nim_job_list.go @@ -148,7 +148,7 @@ func NIMJobList() []Job { }, { Name: "exec-clickhouse-data", - Timeout: time.Second * 100, + Timeout: time.Second * 600, Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { jobResult := JobResult{Files: make(map[string][]byte), Error: nil} // command := []string{"clickhouse-client", "--database", "nms", "-q", "SHOW CREATE TABLE nms.events"} @@ -187,7 +187,7 @@ func NIMJobList() []Job { }, { Name: "exec-dqlite-dump", - Timeout: time.Second * 30, + Timeout: time.Second * 600, Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { jobResult := JobResult{Files: make(map[string][]byte), Error: nil} From b3cdd58f42ca7582694688fd47d694f531fabc85 Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Mon, 7 Jul 2025 13:31:11 +1000 Subject: [PATCH 7/8] Feat: Provide the ability to exclude dqlite and clickhouse (timeseries) data collection --- cmd/nginx-supportpkg.go | 21 ++++++++++++++++++--- pkg/data_collector/data_collector.go | 20 +++++++++++--------- pkg/jobs/job.go | 22 ++++++++++++---------- pkg/jobs/nim_job_list.go | 17 ++++++++++++++++- 4 files changed, 57 insertions(+), 23 deletions(-) diff --git a/cmd/nginx-supportpkg.go b/cmd/nginx-supportpkg.go index cca5cfa..9444b58 100644 --- a/cmd/nginx-supportpkg.go +++ b/cmd/nginx-supportpkg.go @@ -33,6 +33,8 @@ func Execute() { var namespaces []string var product string + var excludeDBData bool + var excludeTimeSeriesData bool var jobList []jobs.Job var rootCmd = &cobra.Command{ @@ -47,6 +49,13 @@ func Execute() { os.Exit(1) } + if excludeDBData { + collector.ExcludeDBData = true + } + if excludeTimeSeriesData { + collector.ExcludeTimeSeriesData = true + } + collector.Logger.Printf("Starting kubectl-nginx-supportpkg - version: %s - build: %s", version.Version, version.Build) collector.Logger.Printf("Input args are %v", os.Args) @@ -68,8 +77,10 @@ func Execute() { failedJobs := 0 for _, job := range jobList { fmt.Printf("Running job %s...", job.Name) - err = job.Collect(collector) - if err != nil { + err, Skipped := job.Collect(collector) + if Skipped { + fmt.Print(" SKIPPED\n") + } else if err != nil { fmt.Printf(" Error: %s\n", err) failedJobs++ } else { @@ -108,6 +119,9 @@ func Execute() { os.Exit(1) } + rootCmd.Flags().BoolVarP(&excludeDBData, "exclude-db-data", "d", false, "exclude DB data collection") + rootCmd.Flags().BoolVarP(&excludeTimeSeriesData, "exclude-time-series-data", "t", false, "exclude time series data collection") + versionStr := "nginx-supportpkg - version: " + version.Version + " - build: " + version.Build + "\n" rootCmd.SetVersionTemplate(versionStr) rootCmd.Version = versionStr @@ -118,7 +132,8 @@ func Execute() { "\n nginx-supportpkg -h|--help" + "\n nginx-supportpkg -v|--version" + "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nic,ngf,ngx,nim]" + - "\n nginx-supportpkg [-n|--namespace] ns1,ns2 [-p|--product] [nic,ngf,ngx,nim] \n") + "\n nginx-supportpkg [-n|--namespace] ns1,ns2 [-p|--product] [nic,ngf,ngx,nim]" + + "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nim] [-d|--exclude-db-data] [-t|--exclude-time-series-data] \n") if err := rootCmd.Execute(); err != nil { fmt.Println(err) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index f60969c..760d5c4 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -47,15 +47,17 @@ import ( ) type DataCollector struct { - BaseDir string - Namespaces []string - Logger *log.Logger - LogFile *os.File - K8sRestConfig *rest.Config - K8sCoreClientSet *kubernetes.Clientset - K8sCrdClientSet *crdClient.Clientset - K8sMetricsClientSet *metricsClient.Clientset - K8sHelmClientSet map[string]helmClient.Client + BaseDir string + Namespaces []string + Logger *log.Logger + LogFile *os.File + K8sRestConfig *rest.Config + K8sCoreClientSet *kubernetes.Clientset + K8sCrdClientSet *crdClient.Clientset + K8sMetricsClientSet *metricsClient.Clientset + K8sHelmClientSet map[string]helmClient.Client + ExcludeDBData bool + ExcludeTimeSeriesData bool } func NewDataCollector(namespaces ...string) (*DataCollector, error) { diff --git a/pkg/jobs/job.go b/pkg/jobs/job.go index 634acd3..f0be51f 100644 --- a/pkg/jobs/job.go +++ b/pkg/jobs/job.go @@ -20,12 +20,12 @@ package jobs import ( "context" - "errors" "fmt" - "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" "os" "path/filepath" "time" + + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" ) type Job struct { @@ -35,11 +35,12 @@ type Job struct { } type JobResult struct { - Files map[string][]byte - Error error + Files map[string][]byte + Error error + Skipped bool } -func (j Job) Collect(dc *data_collector.DataCollector) error { +func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) { ch := make(chan JobResult, 1) ctx, cancel := context.WithTimeout(context.Background(), j.Timeout) @@ -51,28 +52,29 @@ func (j Job) Collect(dc *data_collector.DataCollector) error { select { case <-ctx.Done(): dc.Logger.Printf("\tJob %s has timed out: %s\n---\n", j.Name, ctx.Err()) - return errors.New(fmt.Sprintf("Context cancelled: %v", ctx.Err())) + err := fmt.Errorf("Context cancelled: %v", ctx.Err()) + return err, false case jobResults := <-ch: if jobResults.Error != nil { dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) - return jobResults.Error + return jobResults.Error, jobResults.Skipped } for fileName, fileValue := range jobResults.Files { err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm) if err != nil { - return fmt.Errorf("MkdirAll failed: %v", err) + return fmt.Errorf("MkdirAll failed: %v", err), jobResults.Skipped } file, _ := os.Create(fileName) _, err = file.Write(fileValue) if err != nil { - return fmt.Errorf("Write failed: %v", err) + return fmt.Errorf("Write failed: %v", err), jobResults.Skipped } _ = file.Close() dc.Logger.Printf("\tJob %s wrote %d bytes to %s\n", j.Name, len(fileValue), fileName) } dc.Logger.Printf("\tJob %s completed successfully\n---\n", j.Name) - return nil + return nil, jobResults.Skipped } } diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go index 5812592..6642617 100644 --- a/pkg/jobs/nim_job_list.go +++ b/pkg/jobs/nim_job_list.go @@ -151,7 +151,14 @@ func NIMJobList() []Job { Timeout: time.Second * 600, Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { jobResult := JobResult{Files: make(map[string][]byte), Error: nil} - // command := []string{"clickhouse-client", "--database", "nms", "-q", "SHOW CREATE TABLE nms.events"} + + if dc.ExcludeTimeSeriesData { + dc.Logger.Printf("\tSkipping clickhouse data dump as ExcludeTimeSeriesData is set to true\n") + jobResult.Skipped = true + ch <- jobResult + return + } + for _, namespace := range dc.Namespaces { pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) if err != nil { @@ -191,6 +198,14 @@ func NIMJobList() []Job { Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + if dc.ExcludeDBData { + dc.Logger.Printf("\tSkipping dqlite dump as ExcludeDBData is set to true\n") + jobResult.Skipped = true + ch <- jobResult + return + } + // Check if ExcludeDBData is set to true, skip dump if so + // (already handled above with dc.ExcludeDBData) dbConfigs := []struct { dbName string containerName string From cdf2a8eee3921a43cbe8e729afd1b2160447cd9d Mon Sep 17 00:00:00 2001 From: Madhu RAJAGOPAL Date: Tue, 8 Jul 2025 13:20:04 +1000 Subject: [PATCH 8/8] Chore: Refactor * Pass DataCollector struct as a reference from main * Improve logic around job result/status --- cmd/nginx-supportpkg.go | 25 +++++++------------- pkg/data_collector/data_collector.go | 35 +++++++++++++--------------- pkg/jobs/job.go | 9 ++++--- 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/cmd/nginx-supportpkg.go b/cmd/nginx-supportpkg.go index 9444b58..fb03994 100644 --- a/cmd/nginx-supportpkg.go +++ b/cmd/nginx-supportpkg.go @@ -31,11 +31,9 @@ import ( func Execute() { - var namespaces []string var product string - var excludeDBData bool - var excludeTimeSeriesData bool var jobList []jobs.Job + collector := data_collector.DataCollector{} var rootCmd = &cobra.Command{ Use: "nginx-supportpkg", @@ -43,19 +41,12 @@ func Execute() { Long: `nginx-supportpkg - a tool to create Ingress Controller diagnostics package`, Run: func(cmd *cobra.Command, args []string) { - collector, err := data_collector.NewDataCollector(namespaces...) + err := data_collector.NewDataCollector(&collector) if err != nil { fmt.Println(fmt.Errorf("unable to start data collector: %s", err)) os.Exit(1) } - if excludeDBData { - collector.ExcludeDBData = true - } - if excludeTimeSeriesData { - collector.ExcludeTimeSeriesData = true - } - collector.Logger.Printf("Starting kubectl-nginx-supportpkg - version: %s - build: %s", version.Version, version.Build) collector.Logger.Printf("Input args are %v", os.Args) @@ -77,14 +68,14 @@ func Execute() { failedJobs := 0 for _, job := range jobList { fmt.Printf("Running job %s...", job.Name) - err, Skipped := job.Collect(collector) + err, Skipped := job.Collect(&collector) if Skipped { fmt.Print(" SKIPPED\n") } else if err != nil { - fmt.Printf(" Error: %s\n", err) + fmt.Printf(" FAILED: %s\n", err) failedJobs++ } else { - fmt.Print(" OK\n") + fmt.Print(" COMPLETED\n") } } @@ -107,7 +98,7 @@ func Execute() { }, } - rootCmd.Flags().StringSliceVarP(&namespaces, "namespace", "n", []string{}, "list of namespaces to collect information from") + rootCmd.Flags().StringSliceVarP(&collector.Namespaces, "namespace", "n", []string{}, "list of namespaces to collect information from") if err := rootCmd.MarkFlagRequired("namespace"); err != nil { fmt.Println(err) os.Exit(1) @@ -119,8 +110,8 @@ func Execute() { os.Exit(1) } - rootCmd.Flags().BoolVarP(&excludeDBData, "exclude-db-data", "d", false, "exclude DB data collection") - rootCmd.Flags().BoolVarP(&excludeTimeSeriesData, "exclude-time-series-data", "t", false, "exclude time series data collection") + rootCmd.Flags().BoolVarP(&collector.ExcludeDBData, "exclude-db-data", "d", false, "exclude DB data collection") + rootCmd.Flags().BoolVarP(&collector.ExcludeTimeSeriesData, "exclude-time-series-data", "t", false, "exclude time series data collection") versionStr := "nginx-supportpkg - version: " + version.Version + " - build: " + version.Build + "\n" rootCmd.SetVersionTemplate(versionStr) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index 760d5c4..cc45880 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -60,16 +60,16 @@ type DataCollector struct { ExcludeTimeSeriesData bool } -func NewDataCollector(namespaces ...string) (*DataCollector, error) { +func NewDataCollector(collector *DataCollector) error { tmpDir, err := os.MkdirTemp("", "-pkg-diag") if err != nil { - return nil, fmt.Errorf("unable to create temp directory: %s", err) + return fmt.Errorf("unable to create temp directory: %s", err) } logFile, err := os.OpenFile(filepath.Join(tmpDir, "supportpkg.log"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - return nil, fmt.Errorf("unable to create log file: %s", err) + return fmt.Errorf("unable to create log file: %s", err) } // Find config @@ -80,30 +80,27 @@ func NewDataCollector(namespaces ...string) (*DataCollector, error) { config, err := clientcmd.BuildConfigFromFlags("", kubeConfig) if err != nil { - return nil, fmt.Errorf("unable to connect to k8s using file %s: %s", kubeConfig, err) - } - - dc := DataCollector{ - BaseDir: tmpDir, - Namespaces: namespaces, - LogFile: logFile, - Logger: log.New(logFile, "", log.LstdFlags|log.LUTC|log.Lmicroseconds|log.Lshortfile), - K8sHelmClientSet: make(map[string]helmClient.Client), + return fmt.Errorf("unable to connect to k8s using file %s: %s", kubeConfig, err) } + // Set up the DataCollector options + collector.BaseDir = tmpDir + collector.LogFile = logFile + collector.Logger = log.New(logFile, "", log.LstdFlags|log.LUTC|log.Lmicroseconds|log.Lshortfile) + collector.K8sHelmClientSet = make(map[string]helmClient.Client) //Initialize clients - dc.K8sRestConfig = config - dc.K8sCoreClientSet, _ = kubernetes.NewForConfig(config) - dc.K8sCrdClientSet, _ = crdClient.NewForConfig(config) - dc.K8sMetricsClientSet, _ = metricsClient.NewForConfig(config) - for _, namespace := range dc.Namespaces { - dc.K8sHelmClientSet[namespace], _ = helmClient.NewClientFromRestConf(&helmClient.RestConfClientOptions{ + collector.K8sRestConfig = config + collector.K8sCoreClientSet, _ = kubernetes.NewForConfig(config) + collector.K8sCrdClientSet, _ = crdClient.NewForConfig(config) + collector.K8sMetricsClientSet, _ = metricsClient.NewForConfig(config) + for _, namespace := range collector.Namespaces { + collector.K8sHelmClientSet[namespace], _ = helmClient.NewClientFromRestConf(&helmClient.RestConfClientOptions{ Options: &helmClient.Options{Namespace: namespace}, RestConfig: config, }) } - return &dc, nil + return nil } func (c *DataCollector) WrapUp(product string) (string, error) { diff --git a/pkg/jobs/job.go b/pkg/jobs/job.go index f0be51f..3a0fe25 100644 --- a/pkg/jobs/job.go +++ b/pkg/jobs/job.go @@ -52,13 +52,16 @@ func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) { select { case <-ctx.Done(): dc.Logger.Printf("\tJob %s has timed out: %s\n---\n", j.Name, ctx.Err()) - err := fmt.Errorf("Context cancelled: %v", ctx.Err()) - return err, false + return fmt.Errorf("Context cancelled: %v", ctx.Err()), false case jobResults := <-ch: + if jobResults.Skipped { + dc.Logger.Printf("\tJob %s has been skipped\n---\n", j.Name) + return nil, true + } if jobResults.Error != nil { dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) - return jobResults.Error, jobResults.Skipped + return jobResults.Error, false } for fileName, fileValue := range jobResults.Files {