Skip to content

Commit dba529a

Browse files
torcolvingregns1
andauthored
[3.1.10 backport] CBG-4107 CBG-4173 : remove database even if removeCorruptConfigIfExists fails (#7060)
* CBG-4088: If we error in removeCorruptConfigIfExists we don't unload/remove database (#7014) * CBG-4087: remove configs from invalid tracking that are not found in config poll (#7020) * CBG-4087: remove configs from invalid tracking that are not found in config poll * update test --------- Co-authored-by: Gregory Newman-Smith <109068393+gregns1@users.noreply.github.com>
1 parent ae73178 commit dba529a

6 files changed

+188
-1
lines changed

rest/admin_api.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ func (h *handler) handleCreateDB() error {
135135
// if it used to be corrupt we need to remove it from the invalid database map on server context and remove the old corrupt config from the bucket
136136
err = h.removeCorruptConfigIfExists(contextNoCancel.Ctx, bucket, h.server.Config.Bootstrap.ConfigGroupID, dbName)
137137
if err != nil {
138+
// we cannot continue on with database creation with possibility of the corrupt database config in the bucket for this db
139+
// thus we need to unload the requested database config to prevent the cluster being in an inconsistent state
140+
h.server._removeDatabase(contextNoCancel.Ctx, dbName)
138141
return err
139142
}
140143
cas, err := h.server.BootstrapContext.InsertConfig(contextNoCancel.Ctx, bucket, h.server.Config.Bootstrap.ConfigGroupID, &persistedConfig)

rest/config.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,18 @@ func (d *invalidDatabaseConfigs) remove(dbname string) {
366366
delete(d.dbNames, dbname)
367367
}
368368

369+
// removeNonExistingConfigs will remove any configs from invalid config tracking map that aren't present in fetched configs
370+
func (d *invalidDatabaseConfigs) removeNonExistingConfigs(fetchedConfigs map[string]bool) {
371+
d.m.Lock()
372+
defer d.m.Unlock()
373+
for dbName := range d.dbNames {
374+
if ok := fetchedConfigs[dbName]; !ok {
375+
// this invalid db config was not found in config polling, so lets remove
376+
delete(d.dbNames, dbName)
377+
}
378+
}
379+
}
380+
369381
// inheritFromBootstrap sets any empty Couchbase Server values from the given bootstrap config.
370382
func (dbc *DbConfig) inheritFromBootstrap(b BootstrapConfig) {
371383
if dbc.Username == "" {
@@ -1762,6 +1774,7 @@ func (sc *ServerContext) FetchConfigs(ctx context.Context, isInitialStartup bool
17621774
return nil, err
17631775
}
17641776

1777+
allConfigsFound := make(map[string]bool)
17651778
fetchedConfigs := make(map[string]DatabaseConfig, len(buckets))
17661779
for _, bucket := range buckets {
17671780
ctx := base.BucketNameCtx(ctx, bucket)
@@ -1782,6 +1795,7 @@ func (sc *ServerContext) FetchConfigs(ctx context.Context, isInitialStartup bool
17821795
continue
17831796
}
17841797
for _, cnf := range configs {
1798+
allConfigsFound[cnf.Name] = true
17851799
// Handle invalid database registry entries. Either:
17861800
// - CBG-3292: Bucket in config doesn't match the actual bucket
17871801
// - CBG-3742: Registry entry marked invalid (due to rollback causing collection conflict)
@@ -1815,6 +1829,10 @@ func (sc *ServerContext) FetchConfigs(ctx context.Context, isInitialStartup bool
18151829
}
18161830
}
18171831

1832+
// remove any invalid databases from the tracking map if config poll above didn't
1833+
// pick up that configs from the bucket. This means the config is no longer present in the bucket.
1834+
sc.invalidDatabaseConfigTracking.removeNonExistingConfigs(allConfigsFound)
1835+
18181836
return fetchedConfigs, nil
18191837
}
18201838

rest/config_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"runtime"
3131
"strings"
3232
"testing"
33+
"time"
3334

3435
"golang.org/x/crypto/bcrypt"
3536
"gopkg.in/square/go-jose.v2"
@@ -2925,3 +2926,146 @@ func makeScopesConfigWithDefault(scopeName string, collections []string) *Scopes
29252926
scopesConfig := makeScopesConfig(scopeName, collections)
29262927
return &scopesConfig
29272928
}
2929+
2930+
// TestInvalidDbConfigNoLongerPresentInBucket:
2931+
// - Create rest tester with large config poll interval
2932+
// - Create valid db
2933+
// - Alter config in bucket to make it invalid
2934+
// - Force config poll, assert it is picked up as invalid db config
2935+
// - Delete the invalid db config form the bucket
2936+
// - Force config poll reload and assert the invalid db is cleared
2937+
func TestInvalidDbConfigNoLongerPresentInBucket(t *testing.T) {
2938+
if base.UnitTestUrlIsWalrus() {
2939+
t.Skip("test only works with CBS, requires bootstrap connection")
2940+
}
2941+
rt := NewRestTester(t, &RestTesterConfig{
2942+
CustomTestBucket: base.GetTestBucket(t),
2943+
PersistentConfig: true,
2944+
MutateStartupConfig: func(config *StartupConfig) {
2945+
// configure the interval time to not run
2946+
config.Bootstrap.ConfigUpdateFrequency = base.NewConfigDuration(10 * time.Minute)
2947+
},
2948+
DatabaseConfig: nil,
2949+
})
2950+
defer rt.Close()
2951+
realBucketName := rt.CustomTestBucket.GetName()
2952+
ctx := base.TestCtx(t)
2953+
const dbName = "db1"
2954+
2955+
// create db with correct config
2956+
dbConfig := rt.NewDbConfig()
2957+
resp := rt.CreateDatabase(dbName, dbConfig)
2958+
RequireStatus(t, resp, http.StatusCreated)
2959+
2960+
// wait for db to come online
2961+
require.NoError(t, rt.WaitForDBOnline())
2962+
2963+
// grab the persisted db config from the bucket
2964+
databaseConfig := DatabaseConfig{}
2965+
_, err := rt.ServerContext().BootstrapContext.GetConfig(rt.Context(), realBucketName, rt.ServerContext().Config.Bootstrap.ConfigGroupID, "db1", &databaseConfig)
2966+
require.NoError(t, err)
2967+
2968+
// update the persisted config to a fake bucket name
2969+
newBucketName := "fakeBucket"
2970+
_, err = rt.UpdatePersistedBucketName(&databaseConfig, &newBucketName)
2971+
require.NoError(t, err)
2972+
2973+
// force reload of configs from bucket
2974+
rt.ServerContext().ForceDbConfigsReload(t, ctx)
2975+
2976+
// assert the config is picked as invalid db config
2977+
require.EventuallyWithT(t, func(c *assert.CollectT) {
2978+
invalidDatabases := rt.ServerContext().AllInvalidDatabaseNames(t)
2979+
assert.Equal(c, 1, len(invalidDatabases))
2980+
assert.Equal(c, 0, len(rt.ServerContext().dbConfigs))
2981+
}, time.Second*10, time.Millisecond*100)
2982+
2983+
// remove the invalid config from the bucket
2984+
rt.RemoveDbConfigFromBucket(dbName, realBucketName)
2985+
2986+
// force reload of configs from bucket
2987+
rt.ServerContext().ForceDbConfigsReload(t, ctx)
2988+
2989+
// assert the config is removed from tracking
2990+
require.EventuallyWithT(t, func(c *assert.CollectT) {
2991+
invalidDatabases := rt.ServerContext().AllInvalidDatabaseNames(t)
2992+
assert.Equal(c, 0, len(invalidDatabases))
2993+
assert.Equal(c, 0, len(rt.ServerContext().dbConfigs))
2994+
}, time.Second*10, time.Millisecond*100)
2995+
2996+
// create db again, should succeed
2997+
resp = rt.CreateDatabase(dbName, dbConfig)
2998+
RequireStatus(t, resp, http.StatusCreated)
2999+
}
3000+
3001+
// TestNotFoundOnInvalidDatabase:
3002+
// - Create rest tester with large config polling interval
3003+
// - Insert a bad dbConfig into the bucket
3004+
// - Manually fetch and load db from buckets
3005+
// - Assert that the bad config is tracked as invalid config
3006+
// - Delete the bad config manually and attempt to correct the db config through create db endpoint
3007+
// - Assert db is removed form invalid db's and is now a running database on server context
3008+
func TestNotFoundOnInvalidDatabase(t *testing.T) {
3009+
if base.UnitTestUrlIsWalrus() {
3010+
t.Skip("test only works with CBS, requires bootstrap connection")
3011+
}
3012+
rt := NewRestTester(t, &RestTesterConfig{
3013+
CustomTestBucket: base.GetTestBucket(t),
3014+
PersistentConfig: true,
3015+
MutateStartupConfig: func(config *StartupConfig) {
3016+
// configure the interval time to not run
3017+
config.Bootstrap.ConfigUpdateFrequency = base.NewConfigDuration(100 * time.Second)
3018+
},
3019+
DatabaseConfig: nil,
3020+
})
3021+
defer rt.Close()
3022+
realBucketName := rt.CustomTestBucket.GetName()
3023+
3024+
// create a new invalid db config and persist to bucket
3025+
badName := "badBucketName"
3026+
dbConfig := rt.NewDbConfig()
3027+
dbConfig.Name = "db1"
3028+
3029+
version, err := GenerateDatabaseConfigVersionID(rt.Context(), "", &dbConfig)
3030+
require.NoError(t, err)
3031+
metadataID, metadataIDError := rt.ServerContext().BootstrapContext.ComputeMetadataIDForDbConfig(base.TestCtx(t), &dbConfig)
3032+
require.NoError(t, metadataIDError)
3033+
3034+
// insert the db config with bad bucket name
3035+
dbConfig.Bucket = &badName
3036+
persistedConfig := DatabaseConfig{
3037+
Version: version,
3038+
MetadataID: metadataID,
3039+
DbConfig: dbConfig,
3040+
SGVersion: base.ProductVersion.String(),
3041+
}
3042+
rt.InsertDbConfigToBucket(&persistedConfig, rt.CustomTestBucket.GetName())
3043+
3044+
// manually fetch and load db configs from bucket
3045+
_, err = rt.ServerContext().fetchAndLoadConfigs(rt.Context(), false)
3046+
require.NoError(t, err)
3047+
3048+
// assert the config is picked as invalid db config
3049+
require.EventuallyWithT(t, func(c *assert.CollectT) {
3050+
invalidDatabases := rt.ServerContext().AllInvalidDatabaseNames(t)
3051+
assert.Equal(c, 1, len(invalidDatabases))
3052+
}, time.Second*10, time.Millisecond*100)
3053+
3054+
resp := rt.SendAdminRequest(http.MethodGet, "/db1/", "")
3055+
RequireStatus(t, resp, http.StatusNotFound)
3056+
assert.Contains(t, resp.Body.String(), "You must update database config immediately")
3057+
3058+
// delete the invalid db config to force the not found error
3059+
rt.RemoveDbConfigFromBucket(dbConfig.Name, realBucketName)
3060+
3061+
// fix the bucket name and try fix corrupt db through create db endpoint
3062+
dbConfig.Bucket = &realBucketName
3063+
RequireStatus(t, rt.CreateDatabase(dbConfig.Name, dbConfig), http.StatusCreated)
3064+
3065+
// assert the config is remove the invalid config and we have a running db
3066+
require.EventuallyWithT(t, func(c *assert.CollectT) {
3067+
invalidDatabases := rt.ServerContext().AllInvalidDatabaseNames(t)
3068+
assert.Equal(c, 0, len(invalidDatabases))
3069+
assert.Equal(c, 1, len(rt.ServerContext().dbConfigs))
3070+
}, time.Second*10, time.Millisecond*100)
3071+
}

rest/handler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ func (h *handler) removeCorruptConfigIfExists(ctx context.Context, bucket, confi
572572
}
573573
// remove the bad config from the bucket
574574
err := h.server.BootstrapContext.DeleteConfig(ctx, bucket, configGroupID, dbName)
575-
if err != nil {
575+
if err != nil && !base.IsDocNotFoundError(err) {
576576
return err
577577
}
578578
// delete the database name form the invalid database map on server context

rest/utilities_testing.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2629,6 +2629,23 @@ func (sc *ServerContext) RequireInvalidDatabaseConfigNames(t *testing.T, expecte
26292629
require.ElementsMatch(t, expectedDbNames, dbNames)
26302630
}
26312631

2632+
// ForceDbConfigsReload forces the reload db config from bucket process (like the ConfigUpdate background process)
2633+
func (sc *ServerContext) ForceDbConfigsReload(t *testing.T, ctx context.Context) {
2634+
_, err := sc.fetchAndLoadConfigs(ctx, false)
2635+
require.NoError(t, err)
2636+
}
2637+
2638+
// AllInvalidDatabaseNames returns the names of all the databases that have invalid configs. Testing only since this locks the database context.
2639+
func (sc *ServerContext) AllInvalidDatabaseNames(_ *testing.T) []string {
2640+
sc.invalidDatabaseConfigTracking.m.RLock()
2641+
defer sc.invalidDatabaseConfigTracking.m.RUnlock()
2642+
dbs := make([]string, 0, len(sc.invalidDatabaseConfigTracking.dbNames))
2643+
for db := range sc.invalidDatabaseConfigTracking.dbNames {
2644+
dbs = append(dbs, db)
2645+
}
2646+
return dbs
2647+
}
2648+
26322649
// Calls DropAllIndexes to remove all indexes, then restores the primary index for TestBucketPool readier requirements
26332650
func dropAllNonPrimaryIndexes(t *testing.T, dataStore base.DataStore) {
26342651

rest/utilities_testing_resttester.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,11 @@ func (rt *RestTester) InsertDbConfigToBucket(config *DatabaseConfig, bucketName
290290
require.NoError(rt.TB, insertErr)
291291
}
292292

293+
func (rt *RestTester) RemoveDbConfigFromBucket(dbName string, bucketName string) {
294+
deleteErr := rt.ServerContext().BootstrapContext.DeleteConfig(base.TestCtx(rt.TB), bucketName, rt.ServerContext().Config.Bootstrap.ConfigGroupID, dbName)
295+
require.NoError(rt.TB, deleteErr)
296+
}
297+
293298
func (rt *RestTester) PersistDbConfigToBucket(dbConfig DbConfig, bucketName string) {
294299
version, err := GenerateDatabaseConfigVersionID(rt.Context(), "", &dbConfig)
295300
require.NoError(rt.TB, err)

0 commit comments

Comments
 (0)