-
Notifications
You must be signed in to change notification settings - Fork 326
Implement early exit for retry mechanisms #10604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
0043a80
26dafe3
7cec3c3
c4bbae6
8350acb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,29 +4,37 @@ | |
| import datadog.trace.api.civisibility.execution.TestStatus; | ||
| import datadog.trace.api.civisibility.telemetry.tag.RetryReason; | ||
| import datadog.trace.civisibility.config.ExecutionsByDuration; | ||
| import datadog.trace.civisibility.execution.exit.EarlyExitPolicy; | ||
| import java.util.List; | ||
|
|
||
| /** Runs a test case N times (N depends on test duration) regardless of success or failure. */ | ||
| /** | ||
| * Runs a test case N times (N depends on test duration) regardless of success or failure. The | ||
| * execution can also be terminated early if its ExitPolicy evaluates to {@code true}. | ||
| */ | ||
| public class RunNTimes implements TestExecutionPolicy { | ||
|
|
||
| private final boolean suppressFailures; | ||
| private final List<ExecutionsByDuration> executionsByDuration; | ||
| private int executions; | ||
| private int maxExecutions; | ||
| private int successfulExecutionsSeen; | ||
| private int failedExecutionsSeen; | ||
| private final RetryReason retryReason; | ||
| private TestStatus lastStatus; | ||
| private final EarlyExitPolicy exitPolicy; | ||
|
|
||
| public RunNTimes( | ||
| List<ExecutionsByDuration> executionsByDuration, | ||
| boolean suppressFailures, | ||
| RetryReason retryReason) { | ||
| RetryReason retryReason, | ||
| EarlyExitPolicy exitPolicy) { | ||
| this.suppressFailures = suppressFailures; | ||
| this.executionsByDuration = executionsByDuration; | ||
| this.executions = 0; | ||
| this.maxExecutions = getExecutions(0); | ||
| this.successfulExecutionsSeen = 0; | ||
| this.retryReason = retryReason; | ||
| this.exitPolicy = exitPolicy; | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -35,20 +43,21 @@ public ExecutionOutcome registerExecution(TestStatus status, long durationMillis | |
| ++executions; | ||
| if (status != TestStatus.fail) { | ||
| ++successfulExecutionsSeen; | ||
| } else { | ||
| ++failedExecutionsSeen; | ||
| } | ||
| int maxExecutionsForGivenDuration = getExecutions(durationMillis); | ||
| maxExecutions = Math.min(maxExecutions, maxExecutionsForGivenDuration); | ||
|
|
||
| boolean lastExecution = !retriesLeft(); | ||
| boolean retry = executions > 1; // first execution is not a retry | ||
| boolean failureSuppressed = status == TestStatus.fail && suppressFailures(); | ||
| boolean succeededAllRetries = lastExecution && successfulExecutionsSeen == executions; | ||
|
|
||
| boolean succeededAllExecutions = successfulExecutionsSeen == executions; | ||
| TestStatus finalStatus = null; | ||
| if (lastExecution) { | ||
| // final status will only be "pass" if all retries pass (or the failures were suppressed) | ||
| // also, the `suppressFailures()` call works because its value cannot change between retries | ||
| if (succeededAllRetries || suppressFailures()) { | ||
| if (succeededAllExecutions || suppressFailures()) { | ||
| finalStatus = TestStatus.pass; | ||
| } else { | ||
| finalStatus = TestStatus.fail; | ||
|
|
@@ -58,15 +67,17 @@ public ExecutionOutcome registerExecution(TestStatus status, long durationMillis | |
| return new ExecutionOutcomeImpl( | ||
| failureSuppressed, | ||
| lastExecution, | ||
| lastExecution && successfulExecutionsSeen == 0, | ||
| succeededAllRetries, | ||
| lastExecution && failedExecutionsSeen == executions, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like a skipped test will be considered as "failed all retries" with this update, is this intended?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a test is skipped, |
||
| lastExecution && succeededAllExecutions, | ||
| retry ? retryReason : null, | ||
| finalStatus); | ||
| } | ||
|
|
||
| private boolean retriesLeft() { | ||
| // skipped tests (either by the framework or DD) should not be retried | ||
| return lastStatus != TestStatus.skip && executions < maxExecutions; | ||
| return lastStatus != TestStatus.skip | ||
| && executions < maxExecutions | ||
| && !exitPolicy.evaluate(failedExecutionsSeen != 0, successfulExecutionsSeen != 0); | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| package datadog.trace.civisibility.execution.exit; | ||
|
|
||
| public interface EarlyExitPolicy { | ||
|
|
||
| /** | ||
| * @return {@code true} if the policy indicates that the test should not be retried anymore. | ||
| */ | ||
| boolean evaluate(boolean hasFailedExecutions, boolean hasPassedExecutions); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| package datadog.trace.civisibility.execution.exit; | ||
|
|
||
| /** Policy to avoid retries once a test fails once. Used for Attempt to Fix. */ | ||
| public class ExitOnFailure implements EarlyExitPolicy { | ||
| public static final EarlyExitPolicy INSTANCE = new ExitOnFailure(); | ||
|
|
||
| private ExitOnFailure() {} | ||
|
|
||
| @Override | ||
| public boolean evaluate(boolean hasFailedExecutions, boolean hasPassedExecutions) { | ||
| return hasFailedExecutions; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| package datadog.trace.civisibility.execution.exit; | ||
|
|
||
| /** | ||
| * Policy to avoid retries once a test flakes (has at least one failed and passed execution). Used | ||
| * for Early Flake Detection. | ||
| */ | ||
| public class ExitOnFlake implements EarlyExitPolicy { | ||
| public static final EarlyExitPolicy INSTANCE = new ExitOnFlake(); | ||
|
|
||
| private ExitOnFlake() {} | ||
|
|
||
| @Override | ||
| public boolean evaluate(boolean hasFailedExecutions, boolean hasPassedExecutions) { | ||
| return hasFailedExecutions && hasPassedExecutions; | ||
| } | ||
| } |
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This class doesn't seem to be used anywhere, do we need it?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created it for testing purposes to maintain the old test cases, but in hindsight I can totally remove it and align the test cases with the only two possible policies currently (ExitOnFlake and ExitOnFailure) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| package datadog.trace.civisibility.execution.exit; | ||
|
|
||
| /** Policy that ignores early exit on retries. Currently only used in testing. */ | ||
| public class NoExit implements EarlyExitPolicy { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could rename it to |
||
| public static final EarlyExitPolicy INSTANCE = new NoExit(); | ||
|
|
||
| private NoExit() {} | ||
|
|
||
| @Override | ||
| public boolean evaluate(boolean hasFailedExecutions, boolean hasPassedExecutions) { | ||
| return false; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Out of curiosity, what's the reason for this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to avoid tagging tests with
@test.has_failed_all_retrieswhen they haven't actually been retried. This specific path could be triggered for an Attempt to Fix test that fails on the first run and exits early out of being retried, causing its only execution (a failure) to be the final oneThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at it now, it might be worth considering renaming the Outcome attributes to
succeededAllExecutionsandfailedAllExecutionsgiven that it is more closely aligned with what the variables actually hold (as for event tag, there's a similar case to be made 😢 )