Skip to content

Commit c0fe497

Browse files
[FIXED] Stream recovery with corrupt msg block with sequence gaps. (#4344)
This is a fix for a bad msg blk detected in the field that had sequence holes. The stream had max msgs per subject of one and only one subject but had lots of messages. The stream did not recover correctly, and upon further inspection determined that a msg blk had holes, which should not be possible. We now detect the holes and deal with the situation appropriately. Heavily tested on the data dump from the field. Signed-off-by: Derek Collison <derek@nats.io>
2 parents 3a2835c + 9243051 commit c0fe497

File tree

1 file changed

+85
-28
lines changed

1 file changed

+85
-28
lines changed

server/filestore.go

Lines changed: 85 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ const (
275275
wiThresh = int64(30 * time.Second)
276276
// Time threshold to write index info for non FIFO cases
277277
winfThresh = int64(2 * time.Second)
278+
// Checksum size for hash for msg records.
279+
recordHashSize = 8
278280
)
279281

280282
func newFileStore(fcfg FileStoreConfig, cfg StreamConfig) (*fileStore, error) {
@@ -971,6 +973,10 @@ func (mb *msgBlock) rebuildState() (*LostStreamData, error) {
971973
func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, error) {
972974
startLastSeq := mb.last.seq
973975

976+
// Remove the .fss file and clear any cache we have set.
977+
mb.clearCacheAndOffset()
978+
mb.removePerSubjectInfoLocked()
979+
974980
buf, err := mb.loadBlock(nil)
975981
if err != nil || len(buf) == 0 {
976982
var ld *LostStreamData
@@ -996,9 +1002,6 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, error) {
9961002
mb.last.seq, mb.last.ts = 0, 0
9971003
firstNeedsSet := true
9981004

999-
// Remove the .fss file from disk.
1000-
mb.removePerSubjectInfoLocked()
1001-
10021005
// Check if we need to decrypt.
10031006
if mb.bek != nil && len(buf) > 0 {
10041007
// Recreate to reset counter.
@@ -1070,12 +1073,7 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, error) {
10701073
rl &^= hbit
10711074
dlen := int(rl) - msgHdrSize
10721075
// Do some quick sanity checks here.
1073-
if dlen < 0 || int(slen) > (dlen-8) || dlen > int(rl) || rl > rlBadThresh {
1074-
truncate(index)
1075-
return gatherLost(lbuf - index), errBadMsg
1076-
}
1077-
1078-
if index+rl > lbuf {
1076+
if dlen < 0 || int(slen) > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
10791077
truncate(index)
10801078
return gatherLost(lbuf - index), errBadMsg
10811079
}
@@ -1091,15 +1089,17 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, error) {
10911089
addToDmap(seq)
10921090
}
10931091
index += rl
1094-
mb.last.seq = seq
1095-
mb.last.ts = ts
1092+
if seq >= mb.first.seq {
1093+
mb.last.seq = seq
1094+
mb.last.ts = ts
1095+
}
10961096
continue
10971097
}
10981098

10991099
// This is for when we have index info that adjusts for deleted messages
11001100
// at the head. So the first.seq will be already set here. If this is larger
11011101
// replace what we have with this seq.
1102-
if firstNeedsSet && seq > mb.first.seq {
1102+
if firstNeedsSet && seq >= mb.first.seq {
11031103
firstNeedsSet, mb.first.seq, mb.first.ts = false, seq, ts
11041104
}
11051105

@@ -1119,12 +1119,12 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, error) {
11191119
hh.Write(hdr[4:20])
11201120
hh.Write(data[:slen])
11211121
if hasHeaders {
1122-
hh.Write(data[slen+4 : dlen-8])
1122+
hh.Write(data[slen+4 : dlen-recordHashSize])
11231123
} else {
1124-
hh.Write(data[slen : dlen-8])
1124+
hh.Write(data[slen : dlen-recordHashSize])
11251125
}
11261126
checksum := hh.Sum(nil)
1127-
if !bytes.Equal(checksum, data[len(data)-8:]) {
1127+
if !bytes.Equal(checksum, data[len(data)-recordHashSize:]) {
11281128
truncate(index)
11291129
return gatherLost(lbuf - index), errBadMsg
11301130
}
@@ -1165,6 +1165,11 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, error) {
11651165
mb.last.seq = mb.first.seq - 1
11661166
}
11671167

1168+
// Update our fss file if needed.
1169+
if len(mb.fss) > 0 {
1170+
mb.writePerSubjectInfo()
1171+
}
1172+
11681173
return nil, nil
11691174
}
11701175

@@ -2598,14 +2603,42 @@ func (fs *fileStore) enforceMsgPerSubjectLimit() {
25982603
fs.scb = nil
25992604
defer func() { fs.scb = cb }()
26002605

2606+
var numMsgs uint64
2607+
26012608
// collect all that are not correct.
26022609
needAttention := make(map[string]*psi)
26032610
for subj, psi := range fs.psim {
2611+
numMsgs += psi.total
26042612
if psi.total > maxMsgsPer {
26052613
needAttention[subj] = psi
26062614
}
26072615
}
26082616

2617+
// We had an issue with a use case where psim (and hence fss) were correct but idx was not and was not properly being caught.
2618+
// So do a quick sanity check here. If we detect a skew do a rebuild then re-check.
2619+
if numMsgs != fs.state.Msgs {
2620+
// Clear any global subject state.
2621+
fs.psim = make(map[string]*psi)
2622+
for _, mb := range fs.blks {
2623+
mb.removeIndexFile()
2624+
ld, err := mb.rebuildState()
2625+
mb.writeIndexInfo()
2626+
if err != nil && ld != nil {
2627+
fs.addLostData(ld)
2628+
}
2629+
fs.populateGlobalPerSubjectInfo(mb)
2630+
}
2631+
// Rebuild fs state too.
2632+
fs.rebuildStateLocked(nil)
2633+
// Need to redo blocks that need attention.
2634+
needAttention = make(map[string]*psi)
2635+
for subj, psi := range fs.psim {
2636+
if psi.total > maxMsgsPer {
2637+
needAttention[subj] = psi
2638+
}
2639+
}
2640+
}
2641+
26092642
// Collect all the msgBlks we alter.
26102643
blks := make(map[*msgBlock]struct{})
26112644

@@ -3050,8 +3083,7 @@ func (mb *msgBlock) compact() {
30503083
return
30513084
}
30523085

3053-
// Close cache and index file and wipe delete map, then rebuild.
3054-
mb.clearCacheAndOffset()
3086+
// Remove index file and wipe delete map, then rebuild.
30553087
mb.removeIndexFileLocked()
30563088
mb.deleteDmap()
30573089
mb.rebuildStateLocked()
@@ -3077,6 +3109,11 @@ func (mb *msgBlock) slotInfo(slot int) (uint32, uint32, bool, error) {
30773109
bi := mb.cache.idx[slot]
30783110
ri, hashChecked := (bi &^ hbit), (bi&hbit) != 0
30793111

3112+
// If this is a deleted slot return here.
3113+
if bi == dbit {
3114+
return 0, 0, false, errDeletedMsg
3115+
}
3116+
30803117
// Determine record length
30813118
var rl uint32
30823119
if len(mb.cache.idx) > slot+1 {
@@ -4022,7 +4059,7 @@ func (fs *fileStore) selectMsgBlockForStart(minTime time.Time) *msgBlock {
40224059
func (mb *msgBlock) indexCacheBuf(buf []byte) error {
40234060
var le = binary.LittleEndian
40244061

4025-
var fseq uint64
4062+
var fseq, pseq uint64
40264063
var idx []uint32
40274064
var index uint32
40284065

@@ -4055,23 +4092,39 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
40554092
dlen := int(rl) - msgHdrSize
40564093

40574094
// Do some quick sanity checks here.
4058-
if dlen < 0 || int(slen) > dlen || dlen > int(rl) || index+rl > lbuf || rl > 32*1024*1024 {
4095+
if dlen < 0 || int(slen) > (dlen-recordHashSize) || dlen > int(rl) || index+rl > lbuf || rl > rlBadThresh {
40594096
// This means something is off.
40604097
// TODO(dlc) - Add into bad list?
40614098
return errCorruptState
40624099
}
40634100

40644101
// Clear erase bit.
40654102
seq = seq &^ ebit
4066-
// Adjust if we guessed wrong.
4067-
if seq != 0 && seq < fseq {
4068-
fseq = seq
4069-
}
4103+
40704104
// We defer checksum checks to individual msg cache lookups to amortorize costs and
40714105
// not introduce latency for first message from a newly loaded block.
4072-
idx = append(idx, index)
4073-
mb.cache.lrl = uint32(rl)
4074-
index += mb.cache.lrl
4106+
if seq >= mb.first.seq {
4107+
// Track that we do not have holes.
4108+
// Not expected but did see it in the field.
4109+
if pseq > 0 && seq != pseq+1 {
4110+
if mb.dmap == nil {
4111+
mb.dmap = make(map[uint64]struct{})
4112+
}
4113+
for dseq := pseq + 1; dseq < seq; dseq++ {
4114+
idx = append(idx, dbit)
4115+
mb.dmap[dseq] = struct{}{}
4116+
}
4117+
}
4118+
pseq = seq
4119+
4120+
idx = append(idx, index)
4121+
mb.cache.lrl = uint32(rl)
4122+
// Adjust if we guessed wrong.
4123+
if seq != 0 && seq < fseq {
4124+
fseq = seq
4125+
}
4126+
}
4127+
index += rl
40754128
}
40764129
mb.cache.buf = buf
40774130
mb.cache.idx = idx
@@ -4407,6 +4460,9 @@ const hbit = 1 << 31
44074460
// Used for marking erased messages sequences.
44084461
const ebit = 1 << 63
44094462

4463+
// Used to mark a bad index as deleted.
4464+
const dbit = 1 << 30
4465+
44104466
// Will do a lookup from cache.
44114467
// Lock should be held.
44124468
func (mb *msgBlock) cacheLookup(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
@@ -4417,6 +4473,7 @@ func (mb *msgBlock) cacheLookup(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
44174473
// If we have a delete map check it.
44184474
if mb.dmap != nil {
44194475
if _, ok := mb.dmap[seq]; ok {
4476+
mb.llts = time.Now().UnixNano()
44204477
return nil, errDeletedMsg
44214478
}
44224479
}
@@ -4559,9 +4616,9 @@ func (mb *msgBlock) msgFromBuf(buf []byte, sm *StoreMsg, hh hash.Hash64) (*Store
45594616
hh.Write(hdr[4:20])
45604617
hh.Write(data[:slen])
45614618
if hasHeaders {
4562-
hh.Write(data[slen+4 : dlen-8])
4619+
hh.Write(data[slen+4 : dlen-recordHashSize])
45634620
} else {
4564-
hh.Write(data[slen : dlen-8])
4621+
hh.Write(data[slen : dlen-recordHashSize])
45654622
}
45664623
if !bytes.Equal(hh.Sum(nil), data[len(data)-8:]) {
45674624
return nil, errBadMsg

0 commit comments

Comments
 (0)