8
8
9
9
package org .opensearch .remotemigration ;
10
10
11
- import com .carrotsearch .randomizedtesting .generators .RandomNumbers ;
12
-
13
- import org .opensearch .action .DocWriteResponse ;
14
11
import org .opensearch .action .admin .cluster .health .ClusterHealthResponse ;
15
12
import org .opensearch .action .admin .cluster .settings .ClusterUpdateSettingsRequest ;
13
+ import org .opensearch .action .admin .indices .replication .SegmentReplicationStatsResponse ;
16
14
import org .opensearch .action .admin .indices .settings .put .UpdateSettingsRequest ;
17
- import org .opensearch .action .delete .DeleteResponse ;
18
- import org .opensearch .action .index .IndexResponse ;
19
15
import org .opensearch .cluster .metadata .IndexMetadata ;
20
16
import org .opensearch .cluster .routing .allocation .command .MoveAllocationCommand ;
21
17
import org .opensearch .common .Priority ;
22
18
import org .opensearch .common .settings .Settings ;
23
19
import org .opensearch .common .unit .TimeValue ;
20
+ import org .opensearch .index .SegmentReplicationPerGroupStats ;
24
21
import org .opensearch .index .query .QueryBuilders ;
25
22
import org .opensearch .test .OpenSearchIntegTestCase ;
26
23
import org .opensearch .test .hamcrest .OpenSearchAssertions ;
27
24
28
- import java .util .concurrent .atomic .AtomicBoolean ;
29
- import java .util .concurrent .atomic .AtomicInteger ;
25
+ import java .util .concurrent .TimeUnit ;
30
26
31
27
import static org .opensearch .node .remotestore .RemoteStoreNodeService .MIGRATION_DIRECTION_SETTING ;
32
28
import static org .opensearch .node .remotestore .RemoteStoreNodeService .REMOTE_STORE_COMPATIBILITY_MODE_SETTING ;
33
29
import static org .opensearch .test .hamcrest .OpenSearchAssertions .assertAcked ;
34
30
35
31
@ OpenSearchIntegTestCase .ClusterScope (scope = OpenSearchIntegTestCase .Scope .TEST , numDataNodes = 0 , autoManageMasterNodes = false )
36
-
37
32
public class RemoteReplicaRecoveryIT extends MigrationBaseTestCase {
38
33
39
34
protected int maximumNumberOfShards () {
@@ -52,6 +47,7 @@ protected int minimumNumberOfReplicas() {
52
47
Brings up new replica copies on remote and docrep nodes, when primary is on a remote node
53
48
Live indexing is happening meanwhile
54
49
*/
50
+ @ AwaitsFix (bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/13473" )
55
51
public void testReplicaRecovery () throws Exception {
56
52
internalCluster ().setBootstrapClusterManagerNodeIndex (0 );
57
53
String primaryNode = internalCluster ().startNode ();
@@ -63,10 +59,8 @@ public void testReplicaRecovery() throws Exception {
63
59
client ().admin ().indices ().prepareCreate ("test" ).setSettings (indexSettings ()).setMapping ("field" , "type=text" ).get ();
64
60
String replicaNode = internalCluster ().startNode ();
65
61
ensureGreen ("test" );
66
-
67
- AtomicInteger numAutoGenDocs = new AtomicInteger ();
68
- final AtomicBoolean finished = new AtomicBoolean (false );
69
- Thread indexingThread = getThread (finished , numAutoGenDocs );
62
+ AsyncIndexingService asyncIndexingService = new AsyncIndexingService ("test" );
63
+ asyncIndexingService .startIndexing ();
70
64
71
65
refresh ("test" );
72
66
@@ -78,12 +72,10 @@ public void testReplicaRecovery() throws Exception {
78
72
updateSettingsRequest .persistentSettings (Settings .builder ().put (MIGRATION_DIRECTION_SETTING .getKey (), "remote_store" ));
79
73
assertAcked (client ().admin ().cluster ().updateSettings (updateSettingsRequest ).actionGet ());
80
74
81
- String remoteNode2 = internalCluster ().startNode ();
75
+ internalCluster ().startNode ();
82
76
internalCluster ().validateClusterFormed ();
83
77
84
78
// identify the primary
85
-
86
- Thread .sleep (RandomNumbers .randomIntBetween (random (), 0 , 2000 ));
87
79
logger .info ("--> relocating primary from {} to {} " , primaryNode , remoteNode );
88
80
client ().admin ()
89
81
.cluster ()
@@ -102,7 +94,6 @@ public void testReplicaRecovery() throws Exception {
102
94
103
95
assertEquals (0 , clusterHealthResponse .getRelocatingShards ());
104
96
logger .info ("--> relocation of primary from docrep to remote complete" );
105
- Thread .sleep (RandomNumbers .randomIntBetween (random (), 0 , 2000 ));
106
97
107
98
logger .info ("--> getting up the new replicas now to doc rep node as well as remote node " );
108
99
// Increase replica count to 3
@@ -129,52 +120,33 @@ public void testReplicaRecovery() throws Exception {
129
120
logger .info ("--> replica is up now on another docrep now as well as remote node" );
130
121
131
122
assertEquals (0 , clusterHealthResponse .getRelocatingShards ());
123
+ asyncIndexingService .stopIndexing ();
124
+ refresh ("test" );
132
125
133
- Thread .sleep (RandomNumbers .randomIntBetween (random (), 0 , 2000 ));
126
+ // segrep lag should be zero
127
+ assertBusy (() -> {
128
+ SegmentReplicationStatsResponse segmentReplicationStatsResponse = dataNodeClient ().admin ()
129
+ .indices ()
130
+ .prepareSegmentReplicationStats ("test" )
131
+ .setDetailed (true )
132
+ .execute ()
133
+ .actionGet ();
134
+ SegmentReplicationPerGroupStats perGroupStats = segmentReplicationStatsResponse .getReplicationStats ().get ("test" ).get (0 );
135
+ assertEquals (segmentReplicationStatsResponse .getReplicationStats ().size (), 1 );
136
+ perGroupStats .getReplicaStats ().stream ().forEach (e -> assertEquals (e .getCurrentReplicationLagMillis (), 0 ));
137
+ }, 20 , TimeUnit .SECONDS );
134
138
135
- // Stop replicas on docrep now.
136
- // ToDo : Remove once we have dual replication enabled
137
- client ().admin ()
138
- .indices ()
139
- .updateSettings (
140
- new UpdateSettingsRequest ("test" ).settings (
141
- Settings .builder ()
142
- .put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 )
143
- .put ("index.routing.allocation.exclude._name" , primaryNode + "," + replicaNode )
144
- .build ()
145
- )
146
- )
147
- .get ();
148
-
149
- finished .set (true );
150
- indexingThread .join ();
151
- refresh ("test" );
152
- OpenSearchAssertions .assertHitCount (client ().prepareSearch ("test" ).setTrackTotalHits (true ).get (), numAutoGenDocs .get ());
139
+ OpenSearchAssertions .assertHitCount (
140
+ client ().prepareSearch ("test" ).setTrackTotalHits (true ).get (),
141
+ asyncIndexingService .getIndexedDocs ()
142
+ );
153
143
OpenSearchAssertions .assertHitCount (
154
144
client ().prepareSearch ("test" )
155
145
.setTrackTotalHits (true )// extra paranoia ;)
156
146
.setQuery (QueryBuilders .termQuery ("auto" , true ))
157
- // .setPreference("_prefer_nodes:" + (remoteNode+ "," + remoteNode2))
158
147
.get (),
159
- numAutoGenDocs . get ()
148
+ asyncIndexingService . getIndexedDocs ()
160
149
);
161
150
162
151
}
163
-
164
- private Thread getThread (AtomicBoolean finished , AtomicInteger numAutoGenDocs ) {
165
- Thread indexingThread = new Thread (() -> {
166
- while (finished .get () == false && numAutoGenDocs .get () < 100 ) {
167
- IndexResponse indexResponse = client ().prepareIndex ("test" ).setId ("id" ).setSource ("field" , "value" ).get ();
168
- assertEquals (DocWriteResponse .Result .CREATED , indexResponse .getResult ());
169
- DeleteResponse deleteResponse = client ().prepareDelete ("test" , "id" ).get ();
170
- assertEquals (DocWriteResponse .Result .DELETED , deleteResponse .getResult ());
171
- client ().prepareIndex ("test" ).setSource ("auto" , true ).get ();
172
- numAutoGenDocs .incrementAndGet ();
173
- logger .info ("Indexed {} docs here" , numAutoGenDocs .get ());
174
- }
175
- });
176
- indexingThread .start ();
177
- return indexingThread ;
178
- }
179
-
180
152
}
0 commit comments