Skip to content

Commit 57a622b

Browse files
authored
Merge branch 'master' into simplex-bls
2 parents aa8fb57 + 697b668 commit 57a622b

File tree

11 files changed

+193
-22
lines changed

11 files changed

+193
-22
lines changed

.envrc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,8 @@ export AVALANCHEGO_PATH="${AVALANCHEGO_PATH:-$PWD/bin/avalanchego}"
1616
mkdir -p $PWD/build/plugins # avalanchego will FATAL if the directory does not exist
1717
export AVAGO_PLUGIN_DIR="${AVAGO_PLUGIN_DIR:-$PWD/build/plugins}" # Use an existing value if set
1818

19+
# Default to tmpnetctl targeting the last deployed tmpnet network
20+
export TMPNET_NETWORK_DIR="${TMPNET_NETWORK_DIR:-${HOME}/.tmpnet/networks/latest}"
21+
1922
# Allow individuals to add their own customisation
2023
source_env_if_exists .envrc.local

Taskfile.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,17 @@ tasks:
232232
- task: generate-load-contract-bindings
233233
- task: build
234234
- cmd: go run ./tests/load/c/main --avalanchego-path=./build/avalanchego {{.CLI_ARGS}}
235+
236+
test-load-kube:
237+
desc: Runs load tests against a network deployed to kube
238+
cmds:
239+
- task: generate-load-contract-bindings
240+
- cmd: bash -x ./scripts/tests.load.kube.sh {{.CLI_ARGS}}
241+
242+
test-load-exclusive:
243+
desc: Runs load tests against kube with exclusive scheduling
244+
cmds:
245+
- cmd: go run ./tests/load/c/main --runtime=kube --kube-use-exclusive-scheduling {{.CLI_ARGS}}
235246

236247
test-unit:
237248
desc: Runs unit tests

scripts/tests.load.kube.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
# Run load test against nodes deployed to a kind cluster
6+
7+
if ! [[ "$0" =~ scripts/tests.load.kube.sh ]]; then
8+
echo "must be run from repository root"
9+
exit 255
10+
fi
11+
12+
# This script will use kubeconfig arguments if supplied
13+
./scripts/start_kind_cluster.sh "$@"
14+
15+
# Build AvalancheGo image
16+
AVALANCHEGO_IMAGE="localhost:5001/avalanchego"
17+
DOCKER_IMAGE="$AVALANCHEGO_IMAGE" FORCE_TAG_LATEST=1 ./scripts/build_image.sh
18+
19+
go run ./tests/load/c/main --runtime=kube --kube-image="$AVALANCHEGO_IMAGE" "$@"

tests/e2e/vms/xsvm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func XSVMSubnetsOrPanic(nodes ...*tmpnet.Node) []*tmpnet.Subnet {
4949
}
5050
}
5151

52-
var _ = ginkgo.Describe("[XSVM]", func() {
52+
var _ = ginkgo.Describe("[XSVM]", ginkgo.Label("xsvm"), func() {
5353
tc := e2e.NewTestContext()
5454
require := require.New(tc)
5555

tests/fixture/e2e/helpers.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,16 @@ func StartNetwork(
281281
) {
282282
require := require.New(tc)
283283

284-
err := tmpnet.BootstrapNewNetwork(
285-
tc.DefaultContext(),
284+
nodeCount := len(network.Nodes)
285+
timeout, err := network.DefaultRuntimeConfig.GetNetworkStartTimeout(nodeCount)
286+
require.NoError(err)
287+
tc.Log().Info("waiting for network to start",
288+
zap.Float64("timeoutSeconds", timeout.Seconds()),
289+
)
290+
ctx := tc.ContextWithTimeout(timeout)
291+
292+
err = tmpnet.BootstrapNewNetwork(
293+
ctx,
286294
tc.Log(),
287295
network,
288296
rootNetworkDir,

tests/fixture/tmpnet/flags/kube_runtime.go

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,33 @@ const (
2020
)
2121

2222
var (
23-
errKubeNamespaceRequired = errors.New("--kube-namespace is required")
24-
errKubeImageRequired = errors.New("--kube-image is required")
25-
errKubeMinVolumeSizeRequired = fmt.Errorf("--kube-volume-size must be >= %d", tmpnet.MinimumVolumeSizeGB)
23+
errKubeNamespaceRequired = errors.New("--kube-namespace is required")
24+
errKubeImageRequired = errors.New("--kube-image is required")
25+
errKubeMinVolumeSizeRequired = fmt.Errorf("--kube-volume-size must be >= %d", tmpnet.MinimumVolumeSizeGB)
26+
errKubeSchedulingLabelRequired = errors.New("--kube-scheduling-label-key and --kube-scheduling-label-value are required when --kube-use-exclusive-scheduling is enabled")
2627
)
2728

2829
type kubeRuntimeVars struct {
29-
namespace string
30-
image string
31-
volumeSizeGB uint
32-
config *KubeconfigVars
30+
namespace string
31+
image string
32+
volumeSizeGB uint
33+
useExclusiveScheduling bool
34+
schedulingLabelKey string
35+
schedulingLabelValue string
36+
config *KubeconfigVars
3337
}
3438

3539
func (v *kubeRuntimeVars) registerWithFlag() {
3640
v.config = newKubeconfigFlagVars(kubeDocPrefix)
37-
v.register(flag.StringVar, flag.UintVar)
41+
v.register(flag.StringVar, flag.UintVar, flag.BoolVar)
3842
}
3943

4044
func (v *kubeRuntimeVars) registerWithFlagSet(flagSet *pflag.FlagSet) {
4145
v.config = newKubeconfigFlagSetVars(flagSet, kubeDocPrefix)
42-
v.register(flagSet.StringVar, flagSet.UintVar)
46+
v.register(flagSet.StringVar, flagSet.UintVar, flagSet.BoolVar)
4347
}
4448

45-
func (v *kubeRuntimeVars) register(stringVar varFunc[string], uintVar varFunc[uint]) {
49+
func (v *kubeRuntimeVars) register(stringVar varFunc[string], uintVar varFunc[uint], boolVar varFunc[bool]) {
4650
stringVar(
4751
&v.namespace,
4852
"kube-namespace",
@@ -64,6 +68,24 @@ func (v *kubeRuntimeVars) register(stringVar varFunc[string], uintVar varFunc[ui
6468
tmpnet.MinimumVolumeSizeGB,
6569
),
6670
)
71+
boolVar(
72+
&v.useExclusiveScheduling,
73+
"kube-use-exclusive-scheduling",
74+
false,
75+
kubeDocPrefix+"Whether to schedule each AvalancheGo node to a dedicated Kubernetes node",
76+
)
77+
stringVar(
78+
&v.schedulingLabelKey,
79+
"kube-scheduling-label-key",
80+
"purpose",
81+
kubeDocPrefix+"The label key to use for exclusive scheduling for node selection and toleration",
82+
)
83+
stringVar(
84+
&v.schedulingLabelValue,
85+
"kube-scheduling-label-value",
86+
"higher-spec",
87+
kubeDocPrefix+"The label value to use for exclusive scheduling for node selection and toleration",
88+
)
6789
}
6890

6991
func (v *kubeRuntimeVars) getKubeRuntimeConfig() (*tmpnet.KubeRuntimeConfig, error) {
@@ -76,11 +98,17 @@ func (v *kubeRuntimeVars) getKubeRuntimeConfig() (*tmpnet.KubeRuntimeConfig, err
7698
if v.volumeSizeGB < tmpnet.MinimumVolumeSizeGB {
7799
return nil, errKubeMinVolumeSizeRequired
78100
}
101+
if v.useExclusiveScheduling && (len(v.schedulingLabelKey) == 0 || len(v.schedulingLabelValue) == 0) {
102+
return nil, errKubeSchedulingLabelRequired
103+
}
79104
return &tmpnet.KubeRuntimeConfig{
80-
ConfigPath: v.config.Path,
81-
ConfigContext: v.config.Context,
82-
Namespace: v.namespace,
83-
Image: v.image,
84-
VolumeSizeGB: v.volumeSizeGB,
105+
ConfigPath: v.config.Path,
106+
ConfigContext: v.config.Context,
107+
Namespace: v.namespace,
108+
Image: v.image,
109+
VolumeSizeGB: v.volumeSizeGB,
110+
UseExclusiveScheduling: v.useExclusiveScheduling,
111+
SchedulingLabelKey: v.schedulingLabelKey,
112+
SchedulingLabelValue: v.schedulingLabelValue,
85113
}, nil
86114
}

tests/fixture/tmpnet/kube_runtime.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ const (
4343
// - EBS volume sizes are in GB
4444
// - The minimum number greater than 1GB is 2GB
4545
MinimumVolumeSizeGB = 2
46+
47+
// All statefulsets configured for exclusive scheduling will use
48+
// anti-affinity with the following labeling to ensure their pods
49+
// are never scheduled to the same nodes.
50+
antiAffinityLabelKey = "tmpnet-scheduling"
51+
antiAffinityLabelValue = "exclusive"
4652
)
4753

4854
type KubeRuntimeConfig struct {
@@ -58,6 +64,12 @@ type KubeRuntimeConfig struct {
5864
Image string `json:"image,omitempty"`
5965
// Size in gigabytes of the PersistentVolumeClaim to allocate for the node
6066
VolumeSizeGB uint `json:"volumeSizeGB,omitempty"`
67+
// Whether to schedule each AvalancheGo node to a dedicated Kubernetes node
68+
UseExclusiveScheduling bool `json:"useExclusiveScheduling,omitempty"`
69+
// Label key to use for exclusive scheduling for node selection and toleration
70+
SchedulingLabelKey string `json:"schedulingLabelKey,omitempty"`
71+
// Label value to use for exclusive scheduling for node selection and toleration
72+
SchedulingLabelValue string `json:"schedulingLabelValue,omitempty"`
6173
}
6274

6375
type KubeRuntime struct {
@@ -265,6 +277,22 @@ func (p *KubeRuntime) Start(ctx context.Context) error {
265277
p.node.getMonitoringLabels(),
266278
)
267279

280+
if runtimeConfig.UseExclusiveScheduling {
281+
labelKey := runtimeConfig.SchedulingLabelKey
282+
labelValue := runtimeConfig.SchedulingLabelValue
283+
log.Debug("configuring exclusive scheduling",
284+
zap.String("nodeID", nodeID),
285+
zap.String("namespace", namespace),
286+
zap.String("statefulSet", statefulSetName),
287+
zap.String("schedulingLabelKey", labelKey),
288+
zap.String("schedulingLabelValue", labelValue),
289+
)
290+
if labelKey == "" || labelValue == "" {
291+
return errors.New("scheduling label key and value must be non-empty when exclusive scheduling is enabled")
292+
}
293+
configureExclusiveScheduling(&statefulSet.Spec.Template, labelKey, labelValue)
294+
}
295+
268296
_, err = clientset.AppsV1().StatefulSets(runtimeConfig.Namespace).Create(
269297
ctx,
270298
statefulSet,
@@ -764,3 +792,48 @@ func (p *KubeRuntime) getFlags() (FlagsMap, error) {
764792
flags[config.HTTPHostKey] = "0.0.0.0"
765793
return flags, nil
766794
}
795+
796+
// configureExclusiveScheduling ensures that the provided template schedules only to nodes with the provided
797+
// labeling, tolerates a taint that matches the labeling, and uses anti-affinity to ensure only a single
798+
// avalanchego pod is scheduled to a given target node.
799+
func configureExclusiveScheduling(template *corev1.PodTemplateSpec, labelKey string, labelValue string) {
800+
podSpec := &template.Spec
801+
802+
// Configure node selection
803+
if podSpec.NodeSelector == nil {
804+
podSpec.NodeSelector = make(map[string]string)
805+
}
806+
podSpec.NodeSelector[labelKey] = labelValue
807+
808+
// Configure toleration. Nodes are assumed to have a taint with the same
809+
// key+value as the label used to select it.
810+
podSpec.Tolerations = []corev1.Toleration{
811+
{
812+
Key: labelKey,
813+
Operator: corev1.TolerationOpEqual,
814+
Value: labelValue,
815+
Effect: corev1.TaintEffectNoExecute,
816+
},
817+
}
818+
819+
// Configure anti-affinity to ensure only one pod per node
820+
templateMeta := &template.ObjectMeta
821+
if templateMeta.Labels == nil {
822+
templateMeta.Labels = make(map[string]string)
823+
}
824+
templateMeta.Labels[antiAffinityLabelKey] = antiAffinityLabelValue
825+
podSpec.Affinity = &corev1.Affinity{
826+
PodAntiAffinity: &corev1.PodAntiAffinity{
827+
RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
828+
{
829+
LabelSelector: &metav1.LabelSelector{
830+
MatchLabels: map[string]string{
831+
antiAffinityLabelKey: antiAffinityLabelValue,
832+
},
833+
},
834+
TopologyKey: "kubernetes.io/hostname",
835+
},
836+
},
837+
},
838+
}
839+
}

tests/fixture/tmpnet/network.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ func (n *Network) GetNodeURIs(ctx context.Context, deferCleanupFunc func(func())
787787
// GetAvailableNodeIDs returns the node IDs of nodes in the network that are running and not ephemeral.
788788
func (n *Network) GetAvailableNodeIDs() []string {
789789
availableNodes := FilterAvailableNodes(n.Nodes)
790-
ids := make([]string, len(availableNodes))
790+
ids := []string{}
791791
for _, node := range availableNodes {
792792
ids = append(ids, node.NodeID.String())
793793
}

tests/fixture/tmpnet/node.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,27 @@ type NodeRuntimeConfig struct {
5656
Kube *KubeRuntimeConfig `json:"kube,omitempty"`
5757
}
5858

59+
// GetNetworkStartTimeout returns the timeout to use when starting a network.
60+
func (c *NodeRuntimeConfig) GetNetworkStartTimeout(nodeCount int) (time.Duration, error) {
61+
switch {
62+
case c.Process != nil:
63+
// Processes are expected to start quickly, nodeCount is ignored
64+
return DefaultNetworkTimeout, nil
65+
case c.Kube != nil:
66+
// Ensure sufficient time for scheduling and image pull
67+
timeout := time.Duration(nodeCount) * time.Minute
68+
69+
if c.Kube.UseExclusiveScheduling {
70+
// Ensure sufficient time for the creation of autoscaled nodes
71+
timeout *= 2
72+
}
73+
74+
return timeout, nil
75+
default:
76+
return 0, errors.New("no runtime configuration set")
77+
}
78+
}
79+
5980
// Node supports configuring and running a node participating in a temporary network.
6081
type Node struct {
6182
// Set by EnsureNodeID which is also called when the node is read.

tests/fixture/tmpnet/tmpnetctl/main.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,15 @@ func main() {
8080
DefaultRuntimeConfig: *nodeRuntimeConfig,
8181
}
8282

83-
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
83+
timeout, err := nodeRuntimeConfig.GetNetworkStartTimeout(nodeCount)
84+
if err != nil {
85+
return err
86+
}
87+
log.Info("waiting for network to start",
88+
zap.Float64("timeoutSeconds", timeout.Seconds()),
89+
)
90+
91+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
8492
defer cancel()
8593
if err := tmpnet.BootstrapNewNetwork(
8694
ctx,

tests/fixture/tmpnet/utils.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ type NodeURI struct {
5959
// nodes are running as local processes or in a kube cluster.
6060
func GetNodeURIs(ctx context.Context, nodes []*Node, deferCleanupFunc func(func())) ([]NodeURI, error) {
6161
availableNodes := FilterAvailableNodes(nodes)
62-
uris := make([]NodeURI, 0, len(availableNodes))
62+
uris := []NodeURI{}
6363
for _, node := range availableNodes {
6464
uri, cancel, err := node.GetLocalURI(ctx)
6565
if err != nil {
@@ -77,7 +77,7 @@ func GetNodeURIs(ctx context.Context, nodes []*Node, deferCleanupFunc func(func(
7777

7878
// FilteredAvailableNodes filters the provided nodes by whether they are running and not ephemeral.
7979
func FilterAvailableNodes(nodes []*Node) []*Node {
80-
filteredNodes := make([]*Node, 0, len(nodes))
80+
filteredNodes := []*Node{}
8181
for _, node := range nodes {
8282
if node.IsEphemeral {
8383
// Avoid returning URIs for nodes whose lifespan is indeterminate

0 commit comments

Comments
 (0)