Skip to content

Commit 26eb5c8

Browse files
authored
[tmpnet] Add check for collection of logs and metrics to custom github action (#3740)
1 parent bbc3874 commit 26eb5c8

File tree

7 files changed

+370
-18
lines changed

7 files changed

+370
-18
lines changed

.github/actions/run-monitored-tmpnet-cmd/action.yml

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,39 @@ runs:
8080
# easy way to composee custom actions for use by other repos
8181
# without running into versioning issues.
8282
- name: Upload tmpnet data
83-
uses: actions/upload-artifact@v4
8483
if: always()
84+
uses: actions/upload-artifact@v4
8585
with:
8686
name: ${{ inputs.artifact_prefix }}-tmpnet-data
8787
path: |
8888
~/.tmpnet/networks
8989
~/.tmpnet/prometheus/prometheus.log
9090
~/.tmpnet/promtail/promtail.log
9191
if-no-files-found: error
92-
# TODO(marun) Check that collection is working by querying prometheus and loki with the GH_* labels above
92+
# TODO(marun) Maybe optionally run these checks in an AfterSuite step?
93+
- name: Check that logs were collected
94+
if: always()
95+
shell: bash
96+
run: go run github.com/ava-labs/avalanchego/tests/fixture/tmpnet/cmd check-logs
97+
env:
98+
LOKI_USERNAME: ${{ inputs.loki_username }}
99+
LOKI_PASSWORD: ${{ inputs.loki_password }}
100+
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
101+
GH_WORKFLOW: ${{ inputs.workflow }}
102+
GH_RUN_ID: ${{ inputs.run_id }}
103+
GH_RUN_NUMBER: ${{ inputs.run_number }}
104+
GH_RUN_ATTEMPT: ${{ inputs.run_attempt }}
105+
GH_JOB_ID: ${{ inputs.job }}
106+
- name: Check that metrics were collected
107+
if: always()
108+
shell: bash
109+
run: go run github.com/ava-labs/avalanchego/tests/fixture/tmpnet/cmd check-metrics
110+
env:
111+
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
112+
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
113+
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
114+
GH_WORKFLOW: ${{ inputs.workflow }}
115+
GH_RUN_ID: ${{ inputs.run_id }}
116+
GH_RUN_NUMBER: ${{ inputs.run_number }}
117+
GH_RUN_ATTEMPT: ${{ inputs.run_attempt }}
118+
GH_JOB_ID: ${{ inputs.job }}

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M
388388
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
389389
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
390390
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
391+
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
392+
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
391393
github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ=
392394
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
393395
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
@@ -478,6 +480,8 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
478480
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
479481
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
480482
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
483+
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
484+
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
481485
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
482486
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
483487
github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=

tests/fixture/tmpnet/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ the following non-test files:
2626

2727
| Filename | Types | Purpose |
2828
|:----------------------------|:------------|:------------------------------------------------------------|
29+
| check_monitoring.go | | Enables checking if logs and metrics were collected |
2930
| defaults.go | | Defines common default configuration |
3031
| detached_process_default.go | | Configures detached processes for darwin and linux |
3132
| detached_process_windows.go | | No-op detached process configuration for windows |
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package tmpnet
5+
6+
import (
7+
"context"
8+
"encoding/base64"
9+
"encoding/json"
10+
"errors"
11+
"fmt"
12+
"io"
13+
"math"
14+
"net/http"
15+
"net/url"
16+
"strconv"
17+
"strings"
18+
"time"
19+
20+
"github.com/prometheus/client_golang/api"
21+
"github.com/prometheus/client_golang/api/prometheus/v1"
22+
"github.com/prometheus/common/model"
23+
"go.uber.org/zap"
24+
25+
"github.com/ava-labs/avalanchego/utils/logging"
26+
)
27+
28+
type getCountFunc func() (int, error)
29+
30+
// waitForCount waits until the provided function returns greater than zero.
31+
func waitForCount(ctx context.Context, log logging.Logger, name string, getCount getCountFunc) error {
32+
err := pollUntilContextCancel(
33+
ctx,
34+
func(_ context.Context) (bool, error) {
35+
count, err := getCount()
36+
if err != nil {
37+
log.Warn("failed to query for collected count",
38+
zap.String("type", name),
39+
zap.Error(err),
40+
)
41+
return false, nil
42+
}
43+
if count > 0 {
44+
log.Info("collected count is non-zero",
45+
zap.String("type", name),
46+
zap.Int("count", count),
47+
)
48+
}
49+
return count > 0, nil
50+
},
51+
)
52+
if err != nil {
53+
return fmt.Errorf("%s not found before timeout: %w", name, err)
54+
}
55+
return nil
56+
}
57+
58+
// CheckLogsExist checks if logs exist for the given network. Github labels are also
59+
// included if provided as env vars (GH_*).
60+
func CheckLogsExist(ctx context.Context, log logging.Logger, networkUUID string) error {
61+
username, password, err := getCollectorCredentials(promtailCmd)
62+
if err != nil {
63+
return fmt.Errorf("failed to get collector credentials: %w", err)
64+
}
65+
66+
url := getLokiURL()
67+
if !strings.HasPrefix(url, "https") {
68+
return fmt.Errorf("loki URL must be https for basic auth to be secure: %s", url)
69+
}
70+
71+
selectors, err := getSelectors(networkUUID)
72+
if err != nil {
73+
return err
74+
}
75+
query := fmt.Sprintf("sum(count_over_time({%s}[1h]))", selectors)
76+
77+
log.Info("checking if logs exist",
78+
zap.String("url", url),
79+
zap.String("query", query),
80+
)
81+
82+
return waitForCount(
83+
ctx,
84+
log,
85+
"logs",
86+
func() (int, error) {
87+
return queryLoki(ctx, url, username, password, query)
88+
},
89+
)
90+
}
91+
92+
func queryLoki(
93+
ctx context.Context,
94+
lokiURL string,
95+
username string,
96+
password string,
97+
query string,
98+
) (int, error) {
99+
// Compose the URL
100+
params := url.Values{}
101+
params.Add("query", query)
102+
reqURL := fmt.Sprintf("%s/loki/api/v1/query?%s", lokiURL, params.Encode())
103+
104+
// Create request
105+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
106+
if err != nil {
107+
return 0, fmt.Errorf("failed to create request: %w", err)
108+
}
109+
110+
auth := base64.StdEncoding.EncodeToString([]byte(username + ":" + password))
111+
req.Header.Set("Authorization", "Basic "+auth)
112+
113+
// Execute request
114+
resp, err := http.DefaultClient.Do(req)
115+
if err != nil {
116+
return 0, fmt.Errorf("failed to execute request: %w", err)
117+
}
118+
defer resp.Body.Close()
119+
120+
// Read and parse response
121+
body, err := io.ReadAll(resp.Body)
122+
if err != nil {
123+
return 0, fmt.Errorf("failed to read response: %w", err)
124+
}
125+
126+
if resp.StatusCode != http.StatusOK {
127+
return 0, fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(body))
128+
}
129+
130+
// Parse JSON response
131+
var result struct {
132+
Status string `json:"status"`
133+
Data struct {
134+
Result []struct {
135+
Value []interface{} `json:"value"`
136+
} `json:"result"`
137+
} `json:"data"`
138+
}
139+
140+
if err := json.Unmarshal(body, &result); err != nil {
141+
return 0, fmt.Errorf("failed to parse response: %w", err)
142+
}
143+
144+
// Extract count value
145+
if len(result.Data.Result) == 0 {
146+
return 0, nil
147+
}
148+
if len(result.Data.Result[0].Value) != 2 {
149+
return 0, errors.New("unexpected value format in response")
150+
}
151+
// Convert value to a string
152+
valueStr, ok := result.Data.Result[0].Value[1].(string)
153+
if !ok {
154+
return 0, errors.New("value is not a string")
155+
}
156+
// Convert string to float64 first to handle scientific notation
157+
floatVal, err := strconv.ParseFloat(valueStr, 64)
158+
if err != nil {
159+
return 0, fmt.Errorf("parsing count value: %w", err)
160+
}
161+
// Round to nearest integer
162+
return int(math.Round(floatVal)), nil
163+
}
164+
165+
// CheckMetricsExist checks if metrics exist for the given network. Github labels are also
166+
// included if provided as env vars (GH_*).
167+
func CheckMetricsExist(ctx context.Context, log logging.Logger, networkUUID string) error {
168+
username, password, err := getCollectorCredentials(prometheusCmd)
169+
if err != nil {
170+
return fmt.Errorf("failed to get collector credentials: %w", err)
171+
}
172+
173+
url := getPrometheusURL()
174+
if !strings.HasPrefix(url, "https") {
175+
return fmt.Errorf("prometheus URL must be https for basic auth to be secure: %s", url)
176+
}
177+
178+
selectors, err := getSelectors(networkUUID)
179+
if err != nil {
180+
return err
181+
}
182+
query := fmt.Sprintf("count({%s})", selectors)
183+
184+
log.Info("checking if metrics exist",
185+
zap.String("url", url),
186+
zap.String("query", query),
187+
)
188+
189+
return waitForCount(
190+
ctx,
191+
log,
192+
"metrics",
193+
func() (int, error) {
194+
return queryPrometheus(ctx, log, url, username, password, query)
195+
},
196+
)
197+
}
198+
199+
func queryPrometheus(
200+
ctx context.Context,
201+
log logging.Logger,
202+
url string,
203+
username string,
204+
password string,
205+
query string,
206+
) (int, error) {
207+
// Create client with basic auth
208+
client, err := api.NewClient(api.Config{
209+
Address: url,
210+
RoundTripper: &basicAuthRoundTripper{
211+
username: username,
212+
password: password,
213+
rt: api.DefaultRoundTripper,
214+
},
215+
})
216+
if err != nil {
217+
return 0, fmt.Errorf("failed to create client: %w", err)
218+
}
219+
220+
// Query Prometheus
221+
result, warnings, err := v1.NewAPI(client).QueryRange(ctx, query, v1.Range{
222+
Start: time.Now().Add(-time.Hour),
223+
End: time.Now(),
224+
Step: time.Minute,
225+
})
226+
if err != nil {
227+
return 0, fmt.Errorf("query failed: %w", err)
228+
}
229+
if len(warnings) > 0 {
230+
log.Warn("prometheus query warnings",
231+
zap.Strings("warnings", warnings),
232+
)
233+
}
234+
235+
if matrix, ok := result.(model.Matrix); !ok {
236+
return 0, fmt.Errorf("unexpected result type: %s", result.Type())
237+
} else if len(matrix) > 0 {
238+
return int(matrix[0].Values[len(matrix[0].Values)-1].Value), nil
239+
}
240+
241+
return 0, nil
242+
}
243+
244+
type basicAuthRoundTripper struct {
245+
username, password string
246+
rt http.RoundTripper
247+
}
248+
249+
func (b *basicAuthRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
250+
req.SetBasicAuth(b.username, b.password)
251+
return b.rt.RoundTrip(req)
252+
}
253+
254+
// getSelectors returns the comma-separated list of selectors.
255+
func getSelectors(networkUUID string) (string, error) {
256+
selectors := []string{}
257+
if len(networkUUID) > 0 {
258+
selectors = append(selectors, fmt.Sprintf(`network_uuid="%s"`, networkUUID))
259+
}
260+
githubLabels := githubLabelsFromEnv()
261+
for label := range githubLabels {
262+
value, err := githubLabels.GetStringVal(label)
263+
if err != nil {
264+
return "", err
265+
}
266+
if len(value) == 0 {
267+
continue
268+
}
269+
selectors = append(selectors, fmt.Sprintf(`%s="%s"`, label, value))
270+
}
271+
return strings.Join(selectors, ","), nil
272+
}

0 commit comments

Comments
 (0)