Improve lost-increment message in repo analysis (#131200)
Today repository analysis may fail with a message like the following: [test-repo] register [test-register-contended-F_NNXHrSSDGveoeyj1skwg] should have value [10] but instead had value [OptionalBytesReference[00 00 00 00 00 00 00 09]] This is confusing because one might interpret `should have value [10]` as an indication that Elasticsearch definitely wrote this value to the register, leaving you trying to work out how that particular write was lost. In fact it can be more subtle than that, we only believe the register blob should have this value because we know we completed 10 supposedly-atomic increment operations, and the failure could instead be that these operations are not as atomic as they need to be and that one or more of the increments was lost. This commit makes the message more verbose, clarifying that this failure could be an atomicity problem rather than a simple lost write: [test-repo] Successfully completed all [10] atomic increments of register [test-register-contended-F_NNXHrSSDGveoeyj1skwg] so its expected value is [OptionalBytesReference[00 00 00 00 00 00 00 0a]], but reading its value with [getRegister] unexpectedly yielded [OptionalBytesReference[00 00 00 00 00 00 00 09]]. This anomaly may indicate an atomicity failure amongst concurrent compare-and-exchange operations on registers in this repository.
This commit is contained in:
parent
70176c8356
commit
6f5579656c
|
@ -0,0 +1,5 @@
|
|||
pr: 131200
|
||||
summary: Improve lost-increment message in repo analysis
|
||||
area: Snapshot/Restore
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -76,6 +76,7 @@ import static org.hamcrest.Matchers.anyOf;
|
|||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.matchesPattern;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
public class RepositoryAnalysisFailureIT extends AbstractSnapshotIntegTestCase {
|
||||
|
@ -385,6 +386,55 @@ public class RepositoryAnalysisFailureIT extends AbstractSnapshotIntegTestCase {
|
|||
assertAnalysisFailureMessage(analyseRepositoryExpectFailure(request).getMessage());
|
||||
}
|
||||
|
||||
public void testFailsOnLostIncrement() {
|
||||
final RepositoryAnalyzeAction.Request request = new RepositoryAnalyzeAction.Request("test-repo");
|
||||
final AtomicBoolean registerWasCorrupted = new AtomicBoolean();
|
||||
|
||||
blobStore.setDisruption(new Disruption() {
|
||||
@Override
|
||||
public BytesReference onContendedCompareAndExchange(BytesRegister register, BytesReference expected, BytesReference updated) {
|
||||
if (expected.equals(updated) == false // not the initial read
|
||||
&& updated.length() == Long.BYTES // not the final write
|
||||
&& randomBoolean()
|
||||
&& register.get().equals(expected) // would have succeeded
|
||||
&& registerWasCorrupted.compareAndSet(false, true)) {
|
||||
|
||||
// indicate success without actually applying the update
|
||||
return expected;
|
||||
}
|
||||
|
||||
return register.compareAndExchange(expected, updated);
|
||||
}
|
||||
});
|
||||
|
||||
safeAwait((ActionListener<RepositoryAnalyzeAction.Response> l) -> analyseRepository(request, l.delegateResponse((ll, e) -> {
|
||||
if (ExceptionsHelper.unwrapCause(e) instanceof RepositoryVerificationException repositoryVerificationException) {
|
||||
assertAnalysisFailureMessage(repositoryVerificationException.getMessage());
|
||||
assertTrue(
|
||||
"did not lose increment, so why did the verification fail?",
|
||||
// clear flag for final assertion
|
||||
registerWasCorrupted.compareAndSet(true, false)
|
||||
);
|
||||
assertThat(
|
||||
asInstanceOf(
|
||||
RepositoryVerificationException.class,
|
||||
ExceptionsHelper.unwrapCause(repositoryVerificationException.getCause())
|
||||
).getMessage(),
|
||||
matchesPattern("""
|
||||
\\[test-repo] Successfully completed all \\[.*] atomic increments of register \\[test-register-contended-.*] \
|
||||
so its expected value is \\[OptionalBytesReference\\[.*]], but reading its value with \\[.*] unexpectedly \
|
||||
yielded \\[OptionalBytesReference\\[.*]]\\. This anomaly may indicate an atomicity failure amongst concurrent \
|
||||
compare-and-exchange operations on registers in this repository\\.""")
|
||||
);
|
||||
ll.onResponse(null);
|
||||
} else {
|
||||
ll.onFailure(e);
|
||||
}
|
||||
})));
|
||||
|
||||
assertFalse(registerWasCorrupted.get());
|
||||
}
|
||||
|
||||
public void testFailsIfRegisterHoldsSpuriousValue() {
|
||||
final RepositoryAnalyzeAction.Request request = new RepositoryAnalyzeAction.Request("test-repo");
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.elasticsearch.common.unit.ByteSizeValue;
|
|||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||
import org.elasticsearch.common.util.concurrent.EsExecutors;
|
||||
import org.elasticsearch.common.util.concurrent.ThrottledIterator;
|
||||
import org.elasticsearch.core.CheckedConsumer;
|
||||
import org.elasticsearch.core.Releasable;
|
||||
import org.elasticsearch.core.Releasables;
|
||||
import org.elasticsearch.core.TimeValue;
|
||||
|
@ -647,12 +648,73 @@ public class RepositoryAnalyzeAction extends HandledTransportAction<RepositoryAn
|
|||
}
|
||||
|
||||
private Runnable finalRegisterValueVerifier(String registerName, Random random, Releasable ref) {
|
||||
return () -> {
|
||||
return new Runnable() {
|
||||
|
||||
final CheckedConsumer<ActionListener<OptionalBytesReference>, Exception> finalValueReader = switch (random.nextInt(3)) {
|
||||
case 0 -> new CheckedConsumer<ActionListener<OptionalBytesReference>, Exception>() {
|
||||
@Override
|
||||
public void accept(ActionListener<OptionalBytesReference> listener) {
|
||||
getBlobContainer().getRegister(OperationPurpose.REPOSITORY_ANALYSIS, registerName, listener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "getRegister";
|
||||
}
|
||||
};
|
||||
case 1 -> new CheckedConsumer<ActionListener<OptionalBytesReference>, Exception>() {
|
||||
@Override
|
||||
public void accept(ActionListener<OptionalBytesReference> listener) {
|
||||
getBlobContainer().compareAndExchangeRegister(
|
||||
OperationPurpose.REPOSITORY_ANALYSIS,
|
||||
registerName,
|
||||
bytesFromLong(expectedFinalRegisterValue),
|
||||
new BytesArray(new byte[] { (byte) 0xff }),
|
||||
listener
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "compareAndExchangeRegister";
|
||||
}
|
||||
};
|
||||
case 2 -> new CheckedConsumer<ActionListener<OptionalBytesReference>, Exception>() {
|
||||
@Override
|
||||
public void accept(ActionListener<OptionalBytesReference> listener) {
|
||||
getBlobContainer().compareAndSetRegister(
|
||||
OperationPurpose.REPOSITORY_ANALYSIS,
|
||||
registerName,
|
||||
bytesFromLong(expectedFinalRegisterValue),
|
||||
new BytesArray(new byte[] { (byte) 0xff }),
|
||||
listener.map(
|
||||
b -> b
|
||||
? OptionalBytesReference.of(bytesFromLong(expectedFinalRegisterValue))
|
||||
: OptionalBytesReference.MISSING
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "compareAndSetRegister";
|
||||
}
|
||||
};
|
||||
default -> {
|
||||
assert false;
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
};
|
||||
|
||||
long expectedFinalRegisterValue = Long.MIN_VALUE;
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
if (isRunning()) {
|
||||
final var expectedFinalRegisterValue = expectedRegisterValue.get();
|
||||
expectedFinalRegisterValue = expectedRegisterValue.get();
|
||||
transportService.getThreadPool()
|
||||
.executor(ThreadPool.Names.SNAPSHOT)
|
||||
.execute(ActionRunnable.wrap(ActionListener.releaseAfter(new ActionListener<OptionalBytesReference>() {
|
||||
.execute(ActionRunnable.wrap(ActionListener.releaseAfter(new ActionListener<>() {
|
||||
@Override
|
||||
public void onResponse(OptionalBytesReference actualFinalRegisterValue) {
|
||||
if (actualFinalRegisterValue.isPresent() == false
|
||||
|
@ -661,9 +723,15 @@ public class RepositoryAnalyzeAction extends HandledTransportAction<RepositoryAn
|
|||
new RepositoryVerificationException(
|
||||
request.getRepositoryName(),
|
||||
Strings.format(
|
||||
"register [%s] should have value [%d] but instead had value [%s]",
|
||||
registerName,
|
||||
"""
|
||||
Successfully completed all [%d] atomic increments of register [%s] so its expected \
|
||||
value is [%s], but reading its value with [%s] unexpectedly yielded [%s]. This \
|
||||
anomaly may indicate an atomicity failure amongst concurrent compare-and-exchange \
|
||||
operations on registers in this repository.""",
|
||||
expectedFinalRegisterValue,
|
||||
registerName,
|
||||
OptionalBytesReference.of(bytesFromLong(expectedFinalRegisterValue)),
|
||||
finalValueReader.toString(),
|
||||
actualFinalRegisterValue
|
||||
)
|
||||
)
|
||||
|
@ -678,36 +746,12 @@ public class RepositoryAnalyzeAction extends HandledTransportAction<RepositoryAn
|
|||
fail(exp);
|
||||
}
|
||||
}
|
||||
}, ref), listener -> {
|
||||
switch (random.nextInt(3)) {
|
||||
case 0 -> getBlobContainer().getRegister(OperationPurpose.REPOSITORY_ANALYSIS, registerName, listener);
|
||||
case 1 -> getBlobContainer().compareAndExchangeRegister(
|
||||
OperationPurpose.REPOSITORY_ANALYSIS,
|
||||
registerName,
|
||||
bytesFromLong(expectedFinalRegisterValue),
|
||||
new BytesArray(new byte[] { (byte) 0xff }),
|
||||
listener
|
||||
);
|
||||
case 2 -> getBlobContainer().compareAndSetRegister(
|
||||
OperationPurpose.REPOSITORY_ANALYSIS,
|
||||
registerName,
|
||||
bytesFromLong(expectedFinalRegisterValue),
|
||||
new BytesArray(new byte[] { (byte) 0xff }),
|
||||
listener.map(
|
||||
b -> b
|
||||
? OptionalBytesReference.of(bytesFromLong(expectedFinalRegisterValue))
|
||||
: OptionalBytesReference.MISSING
|
||||
)
|
||||
);
|
||||
default -> {
|
||||
assert false;
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
}
|
||||
}));
|
||||
}, ref), finalValueReader));
|
||||
} else {
|
||||
ref.close();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue