Skip to content

Commit 3b85b55

Browse files
liuxiaomeiGMingmei Liu
authored andcommitted
node: fix after stopping the spy and restarting it immediately, the spy cannot receive the VAA received by the guardian #4447
1 parent 4c46384 commit 3b85b55

File tree

5 files changed

+149
-10
lines changed

5 files changed

+149
-10
lines changed

node/cmd/guardiand/node.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@ import (
66
"net"
77
_ "net/http/pprof" // #nosec G108 we are using a custom router (`router := mux.NewRouter()`) and thus not automatically expose pprof.
88
"os"
9-
"os/signal"
109
"path"
1110
"runtime"
1211
"slices"
1312
"strings"
14-
"syscall"
1513
"time"
1614

1715
"github.com/certusone/wormhole/node/pkg/guardiansigner"
@@ -1107,14 +1105,7 @@ func runNode(cmd *cobra.Command, args []string) {
11071105
rpcMap["ibcLCD"] = *ibcLCD
11081106
rpcMap["ibcWS"] = *ibcWS
11091107

1110-
// Handle SIGTERM
1111-
sigterm := make(chan os.Signal, 1)
1112-
signal.Notify(sigterm, syscall.SIGTERM)
1113-
go func() {
1114-
<-sigterm
1115-
logger.Info("Received sigterm. exiting.")
1116-
rootCtxCancel()
1117-
}()
1108+
common.ListenSysExit(logger, rootCtxCancel)
11181109

11191110
// log golang version
11201111
logger.Info("golang version", zap.String("golang_version", runtime.Version()))

node/cmd/spy/spy.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ func runSpy(cmd *cobra.Command, args []string) {
343343
rootCtx, rootCtxCancel = context.WithCancel(context.Background())
344344
defer rootCtxCancel()
345345

346+
common.ListenSysExit(logger, rootCtxCancel)
347+
346348
// Inbound signed VAAs
347349
signedInC := make(chan *gossipv1.SignedVAAWithQuorum, 1024)
348350

node/pkg/common/cmd_utils.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package common
2+
3+
import (
4+
"context"
5+
"os"
6+
"os/signal"
7+
"syscall"
8+
9+
"go.uber.org/zap"
10+
)
11+
12+
func ListenSysExit(logger *zap.Logger, ctxCancel context.CancelFunc) {
13+
// Handle SIGTERM, SIGINT
14+
sigterm := make(chan os.Signal, 1)
15+
signal.Notify(sigterm, syscall.SIGTERM, syscall.SIGINT)
16+
go func() {
17+
<-sigterm
18+
logger.Info("Received sigterm. exiting.")
19+
ctxCancel()
20+
}()
21+
}

node/pkg/common/cmd_utils_test.go

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
package common
2+
3+
import (
4+
"context"
5+
"syscall"
6+
"testing"
7+
"time"
8+
9+
"go.uber.org/zap"
10+
)
11+
12+
func TestListenSysExit(t *testing.T) {
13+
// Create a logger for testing
14+
logger := zap.NewNop()
15+
16+
// Test SIGTERM signal
17+
t.Run("SIGTERM signal", func(t *testing.T) {
18+
// Create a new context for this test
19+
testCtx, testCancel := context.WithCancel(context.Background())
20+
defer testCancel()
21+
22+
// Start ListenSysExit
23+
go ListenSysExit(logger, testCancel)
24+
25+
// Give some time for the signal handler to be set up
26+
time.Sleep(100 * time.Millisecond)
27+
28+
// Send SIGTERM signal to the current process
29+
err := syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
30+
if err != nil {
31+
t.Fatalf("Failed to send SIGTERM signal: %v", err)
32+
}
33+
34+
// Wait for the context to be cancelled
35+
select {
36+
case <-testCtx.Done():
37+
// Context was cancelled as expected
38+
t.Log("Context cancelled successfully after SIGTERM")
39+
case <-time.After(2 * time.Second):
40+
t.Fatal("Context was not cancelled within 2 seconds after SIGTERM")
41+
}
42+
})
43+
44+
// Test SIGINT signal
45+
t.Run("SIGINT signal", func(t *testing.T) {
46+
// Create a new context for this test
47+
testCtx, testCancel := context.WithCancel(context.Background())
48+
defer testCancel()
49+
50+
// Start ListenSysExit
51+
go ListenSysExit(logger, testCancel)
52+
53+
// Give some time for the signal handler to be set up
54+
time.Sleep(100 * time.Millisecond)
55+
56+
// Send SIGINT signal to the current process
57+
err := syscall.Kill(syscall.Getpid(), syscall.SIGINT)
58+
if err != nil {
59+
t.Fatalf("Failed to send SIGINT signal: %v", err)
60+
}
61+
62+
// Wait for the context to be cancelled
63+
select {
64+
case <-testCtx.Done():
65+
// Context was cancelled as expected
66+
t.Log("Context cancelled successfully after SIGINT")
67+
case <-time.After(2 * time.Second):
68+
t.Fatal("Context was not cancelled within 2 seconds after SIGINT")
69+
}
70+
})
71+
72+
// Test that the function doesn't exit immediately
73+
t.Run("no signal sent", func(t *testing.T) {
74+
// Create a new context for this test
75+
testCtx, testCancel := context.WithCancel(context.Background())
76+
defer testCancel()
77+
78+
// Start ListenSysExit
79+
go ListenSysExit(logger, testCancel)
80+
81+
// Wait a short time and verify context is not cancelled
82+
select {
83+
case <-testCtx.Done():
84+
t.Fatal("Context was unexpectedly cancelled without signal")
85+
case <-time.After(500 * time.Millisecond):
86+
// Context was not cancelled as expected
87+
t.Log("Context remained active as expected when no signal was sent")
88+
}
89+
})
90+
}
91+
92+
// TestListenSysExitConcurrent tests that multiple calls to ListenSysExit work correctly
93+
func TestListenSysExitConcurrent(t *testing.T) {
94+
logger := zap.NewNop()
95+
ctx, cancel := context.WithCancel(context.Background())
96+
defer cancel()
97+
98+
// Start multiple ListenSysExit goroutines
99+
for i := 0; i < 3; i++ {
100+
go ListenSysExit(logger, cancel)
101+
}
102+
103+
// Give some time for all signal handlers to be set up
104+
time.Sleep(100 * time.Millisecond)
105+
106+
// Send SIGTERM signal
107+
err := syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
108+
if err != nil {
109+
t.Fatalf("Failed to send SIGTERM signal: %v", err)
110+
}
111+
112+
// Wait for the context to be cancelled
113+
select {
114+
case <-ctx.Done():
115+
t.Log("Context cancelled successfully with multiple ListenSysExit goroutines")
116+
case <-time.After(2 * time.Second):
117+
t.Fatal("Context was not cancelled within 2 seconds")
118+
}
119+
}

node/pkg/p2p/p2p.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,12 @@ func Run(params *RunParams) func(ctx context.Context) error {
331331
}
332332

333333
defer func() {
334+
// close connect
335+
for _, conn := range h.Network().Conns() {
336+
logger.Info("Closing connection", zap.String("peer", conn.RemotePeer().String()))
337+
conn.Close()
338+
}
339+
334340
if err := h.Close(); err != nil {
335341
logger.Error("error closing the host", zap.Error(err))
336342
}

0 commit comments

Comments
 (0)