[Test] Wait on master node for shard started (#131172)

The shard started may not be visible on the master node if the wait is on a data node. In that case, the DiskThreshold monitor may use stale cluster state for releasing read-only blocks. This PR fixes it by waiting on the master node, which is the behaviour before #129872. Resolves: #131146
2025-07-15 11:25:04 +10:00 · 2025-07-15 11:25:04 +10:00 · 072e6c722d
parent 102cef8302
commit 072e6c722d
2 changed files with 4 additions and 17 deletions
--- a/muted-tests.yml
+++ b/muted-tests.yml
@ -535,9 +535,6 @@ tests:
 - class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
  method: test {p0=field_caps/40_time_series/Get simple time series field caps}
  issue: https://github.com/elastic/elasticsearch/issues/131225
- class: org.elasticsearch.cluster.routing.allocation.DiskThresholdMonitorIT
-  method: testFloodStageExceeded
-  issue: https://github.com/elastic/elasticsearch/issues/131146
 - class: org.elasticsearch.packaging.test.DockerTests
  method: test090SecurityCliPackaging
  issue: https://github.com/elastic/elasticsearch/issues/131107
--- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java
@ -13,6 +13,7 @@ import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
 import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
 import org.elasticsearch.cluster.DiskUsageIntegTestCase;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.cluster.metadata.ProjectId;
 import org.elasticsearch.cluster.node.DiscoveryNodeRole;
 import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.cluster.routing.ShardRoutingState;
@ -86,20 +87,9 @@ public class DiskThresholdMonitorIT extends DiskUsageIntegTestCase {
        // Verify that we can still move shards around even while blocked
        final String newDataNodeName = internalCluster().startDataOnlyNode();
        final String newDataNodeId = clusterAdmin().prepareNodesInfo(newDataNodeName).get().getNodes().get(0).getNode().getId();
-        assertBusy(() -> {
-            final ShardRouting primaryShard = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
-                .clear()
-                .setRoutingTable(true)
-                .setNodes(true)
-                .setIndices(indexName)
-                .get()
-                .getState()
-                .routingTable()
-                .index(indexName)
-                .shard(0)
-                .primaryShard();
-            assertThat(primaryShard.state(), equalTo(ShardRoutingState.STARTED));
-            assertThat(primaryShard.currentNodeId(), equalTo(newDataNodeId));
+        awaitClusterState(state -> {
+            final ShardRouting primaryShard = state.routingTable(ProjectId.DEFAULT).index(indexName).shard(0).primaryShard();
+            return primaryShard.state() == ShardRoutingState.STARTED && newDataNodeId.equals(primaryShard.currentNodeId());
        });

        // Verify that the block is removed once the shard migration is complete