-
Notifications
You must be signed in to change notification settings - Fork 2
kv(composed1): M3 — verifyComposed1 apply-time gate + retry sentinels #895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -179,6 +179,21 @@ func (s RouteHistorySnapshot) OwnerOf(key []byte) (uint64, bool) { | |||||||||||||||||||||||||||
| return 0, false | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Current returns the route catalog snapshot at the engine's current | ||||||||||||||||||||||||||||
| // catalogVersion. Returns (zero, false) when the history ring has | ||||||||||||||||||||||||||||
| // not been initialised (bare-struct Engine). Used by the M3 | ||||||||||||||||||||||||||||
| // Composed-1 cross-version-read fence (design doc §4.4) — the gate | ||||||||||||||||||||||||||||
| // compares the txn's observed-version owner against the current | ||||||||||||||||||||||||||||
| // owner so a route shift between BeginTxn and Commit is caught | ||||||||||||||||||||||||||||
| // before it can produce a G1c anomaly across a cross-group | ||||||||||||||||||||||||||||
| // MoveRange / SplitRange. | ||||||||||||||||||||||||||||
| func (e *Engine) Current() (RouteHistorySnapshot, bool) { | ||||||||||||||||||||||||||||
| e.mu.RLock() | ||||||||||||||||||||||||||||
| defer e.mu.RUnlock() | ||||||||||||||||||||||||||||
| snap, ok := e.history[e.catalogVersion] | ||||||||||||||||||||||||||||
| return snap, ok | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // SnapshotAt returns the route catalog snapshot recorded at version v. | ||||||||||||||||||||||||||||
| // Returns (zero, false) when v is not in the ring — either because v | ||||||||||||||||||||||||||||
| // is in the future (> catalogVersion), or because the FIFO ring has | ||||||||||||||||||||||||||||
|
|
@@ -193,6 +208,26 @@ func (e *Engine) SnapshotAt(v uint64) (RouteHistorySnapshot, bool) { | |||||||||||||||||||||||||||
| return snap, ok | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // SetHistoryDepthForTest overrides the FIFO ring depth from outside | ||||||||||||||||||||||||||||
| // the package. Test-only — callers MUST set the depth before the | ||||||||||||||||||||||||||||
| // Engine is shared with any concurrent reader (no internal | ||||||||||||||||||||||||||||
| // synchronisation here for the same reason TestEngineSnapshotAt_FIFOEviction | ||||||||||||||||||||||||||||
| // does the direct field write in-package; this seam exposes the | ||||||||||||||||||||||||||||
| // equivalent capability to external test packages that need a | ||||||||||||||||||||||||||||
| // small depth to exercise eviction without overwhelming TLC-style | ||||||||||||||||||||||||||||
| // bounded scenarios — claude review on PR #894). | ||||||||||||||||||||||||||||
| // | ||||||||||||||||||||||||||||
| // Production code must use DefaultRouteHistoryDepth (32) or a | ||||||||||||||||||||||||||||
| // future operator-exposed config knob; this seam is build-time | ||||||||||||||||||||||||||||
| // equivalent to direct field access and exists ONLY so tests in | ||||||||||||||||||||||||||||
| // the kv package can drive eviction-trigger scenarios without | ||||||||||||||||||||||||||||
| // adding a constructor option just for tests. | ||||||||||||||||||||||||||||
| func (e *Engine) SetHistoryDepthForTest(depth int) { | ||||||||||||||||||||||||||||
| e.mu.Lock() | ||||||||||||||||||||||||||||
| defer e.mu.Unlock() | ||||||||||||||||||||||||||||
| e.historyDepth = depth | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
Comment on lines
+225
to
+229
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reject non-positive history depths here. Passing Suggested fix func (e *Engine) SetHistoryDepthForTest(depth int) {
e.mu.Lock()
defer e.mu.Unlock()
+ if depth <= 0 {
+ panic("SetHistoryDepthForTest: depth must be > 0")
+ }
e.historyDepth = depth
}📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // HistoryDepth returns the configured ring depth for diagnostics. | ||||||||||||||||||||||||||||
| func (e *Engine) HistoryDepth() int { | ||||||||||||||||||||||||||||
| e.mu.RLock() | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,16 +86,24 @@ type kvFSM struct { | |
| } | ||
|
|
||
| // RouteHistory is the kv-side interface to the route catalog's | ||
| // versioned-snapshot ring. *distribution.Engine satisfies it. | ||
| // Defined in the kv package so kvFSM does not have to import a | ||
| // concrete type for the field; the M3 verifyComposed1 gate uses | ||
| // only SnapshotAt and the returned snapshot's OwnerOf, so the | ||
| // interface stays minimal. | ||
| // versioned-snapshot ring. *distribution.Engine satisfies it via | ||
| // WrapDistributionEngine. Defined in the kv package so kvFSM does | ||
| // not have to import a concrete type for the field; the M3 | ||
| // verifyComposed1 gate uses only SnapshotAt + Current + the returned | ||
| // snapshot's OwnerOf, so the interface stays minimal. | ||
| type RouteHistory interface { | ||
| // SnapshotAt returns the route catalog at the given catalog | ||
| // version. Returns (zero, false) when the version is outside | ||
| // the ring (either evicted by depth, or in the future). | ||
| // the ring (either evicted by depth, or in the future). The | ||
| // M3 gate maps the not-found case to ErrComposed1VersionGCd. | ||
| SnapshotAt(version uint64) (RouteSnapshot, bool) | ||
| // Current returns the route catalog snapshot at the engine's | ||
| // current catalog version. Returns (zero, false) when the | ||
| // engine has no history (bare-struct case used by some test | ||
| // seams). The M3 cross-version fence uses this to compare | ||
| // the txn's observed-version owner against the current | ||
| // owner — a mismatch is the §3 codex P1 trace. | ||
| Current() (RouteSnapshot, bool) | ||
| } | ||
|
|
||
| // RouteSnapshot is the historical view of the route catalog at a | ||
|
|
@@ -199,6 +207,38 @@ var _ raftengine.StateMachine = (*kvFSM)(nil) | |
|
|
||
| var ErrUnknownRequestType = errors.New("unknown request type") | ||
|
|
||
| // ErrComposed1Violation is returned by verifyComposed1 when the | ||
| // transaction's commit cannot proceed on this Raft group because the | ||
| // txn's read-set or write-set keys are not owned by this group at | ||
| // either the txn's observed catalog version (the spec-level §4.2(a) | ||
| // check) or the current catalog version observed by the FSM at apply | ||
| // time (the §4.4 cross-version-read fence). Surfaces to the | ||
| // coordinator as a retryable error: the M4 coordinator path re-reads | ||
| // the route cache, re-routes the txn, and re-issues it once on the | ||
| // new owning group. | ||
| // | ||
| // Wrapped with errors.Wrapf at the call site to carry the | ||
| // per-key diagnostic (which key, which observed-version owner, which | ||
| // current-version owner) — the caller's retry path uses | ||
| // errors.Is(err, ErrComposed1Violation) to match. | ||
| var ErrComposed1Violation = errors.New("composed-1: route ownership shifted; retry on new owning group") | ||
|
|
||
| // ErrComposed1VersionGCd is returned by verifyComposed1 when the | ||
| // txn's observed catalog version is no longer in the engine's | ||
| // retention ring — either because the FIFO ring evicted it (the | ||
| // txn lived longer than `routeHistoryDepth` versions worth of | ||
| // catalog churn) or because the version was never seen on this | ||
| // node. Surfaces to the coordinator as a retryable error: the | ||
| // caller's M4 retry path reads the current route cache and | ||
| // re-issues the txn with a fresh observedVer. | ||
| // | ||
| // The not-found ⇒ hard-error semantics (rather than soft-pass) | ||
| // matters because a soft-pass would let the gate be bypassed | ||
| // exactly in the long-running-txn / high-churn cases where the | ||
| // cross-version-read hazard is most likely (design doc §4.3 + | ||
| // gemini medium + codex P2 on PR #870). | ||
| var ErrComposed1VersionGCd = errors.New("composed-1: observed catalog version evicted from history ring; retry") | ||
|
|
||
| type fsmApplyResponse struct { | ||
| results []error | ||
| } | ||
|
|
@@ -493,6 +533,9 @@ func (f *kvFSM) RestoredCutover() uint64 { | |
| } | ||
|
|
||
| func (f *kvFSM) handleTxnRequest(ctx context.Context, r *pb.Request, commitTS uint64) error { | ||
| if err := f.verifyComposed1(r); err != nil { | ||
| return err | ||
| } | ||
| switch r.Phase { | ||
| case pb.Phase_PREPARE: | ||
| return f.handlePrepareRequest(ctx, r) | ||
|
|
@@ -507,6 +550,88 @@ func (f *kvFSM) handleTxnRequest(ctx context.Context, r *pb.Request, commitTS ui | |
| } | ||
| } | ||
|
|
||
| // verifyComposed1 is the M3 apply-time Composed-1 gate per | ||
| // docs/design/2026_05_29_proposed_composed1_cross_group_commit_guard.md | ||
| // §4.2(a) + §4.4. Runs two checks before the txn's writes land: | ||
| // | ||
| // (a) Observed-version owner — the txn's read-set was captured | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚫 [golangci] reported by reviewdog 🐶 |
||
| // at routes[observedVer], so every write key must be owned | ||
| // by THIS Raft group at that historical version. Matches | ||
| // the spec-level Commit precondition in tla/composed/Composed.tla. | ||
| // | ||
| // (b) Current-version owner — even when (a) passes, a route | ||
| // shift between BeginTxn and Commit can leave the write | ||
| // landing on the OLD owner while readers at the new | ||
| // version route to the NEW owner and miss the write (the | ||
| // §3 codex P1 G1c trace). The current-version fence | ||
| // refuses the commit when this group no longer owns the | ||
| // key, forcing a coordinator retry on the new owner. | ||
| // | ||
| // Short-circuits cleanly in three legacy / not-applicable cases: | ||
| // - FSM was constructed without WithRouteHistory (legacy / test | ||
| // seam): routes == nil, return nil. | ||
| // - Request carries ObservedRouteVersion == 0 (unpinned — | ||
| // pre-M1 caller, or ABORT request that doesn't carry the | ||
| // version): return nil. | ||
| // - Engine.Current returns (zero, false) — the engine has no | ||
| // history (bare-struct test seam): return nil at the (b) check. | ||
| // | ||
| // Returns ErrComposed1VersionGCd when the observed version is | ||
| // outside the ring (M4 retry), and ErrComposed1Violation wrapped | ||
| // with per-key context otherwise. | ||
| func (f *kvFSM) verifyComposed1(r *pb.Request) error { | ||
| if f.routes == nil { | ||
| return nil | ||
| } | ||
|
Comment on lines
+582
to
+585
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are two critical issues here:
func (f *kvFSM) verifyComposed1(r *pb.Request) error {
if r.Phase == pb.Phase_ABORT {
return nil
}
if f.routes == nil || f.shardGroupID == 0 {
return nil
} |
||
| observedVer := r.GetObservedRouteVersion() | ||
| if observedVer == 0 { | ||
| return nil | ||
| } | ||
|
Comment on lines
+582
to
+589
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Honor the documented
Suggested fix func (f *kvFSM) verifyComposed1(r *pb.Request) error {
- if f.routes == nil {
+ if f.routes == nil || f.shardGroupID == 0 {
return nil
}
observedVer := r.GetObservedRouteVersion()
if observedVer == 0 {
return nil🤖 Prompt for AI Agents |
||
|
|
||
| // (a) Observed-version check. | ||
| observedSnap, ok := f.routes.SnapshotAt(observedVer) | ||
| if !ok { | ||
| return errors.WithStack(ErrComposed1VersionGCd) | ||
| } | ||
| if err := f.verifyOwnerFromSnapshot(r.GetMutations(), observedSnap, observedVer, "observed"); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| // (b) Current-version cross-version-read fence. | ||
| currentSnap, ok := f.routes.Current() | ||
| if !ok { | ||
| // No current snapshot — engine has no history, nothing | ||
| // to compare against. Fall through (matches the | ||
| // short-circuit posture of an unwired FSM). | ||
| return nil | ||
| } | ||
| return f.verifyOwnerFromSnapshot(r.GetMutations(), currentSnap, currentSnap.Version(), "current") | ||
| } | ||
|
|
||
| // verifyOwnerFromSnapshot is the shared per-mutation owner-check | ||
| // loop used by verifyComposed1's observed-version and current- | ||
| // version passes. `phase` is the diagnostic label ("observed" / | ||
| // "current") that ends up in the wrapped error. isTxnInternalKey | ||
| // mutations (the TxnMeta marker prefix) are skipped — they are | ||
| // always on every shard and have no Composed-1 ownership. | ||
| func (f *kvFSM) verifyOwnerFromSnapshot(mutations []*pb.Mutation, snap RouteSnapshot, snapVer uint64, phase string) error { | ||
| for _, mut := range mutations { | ||
| if mut == nil || len(mut.Key) == 0 { | ||
| continue | ||
| } | ||
| if isTxnInternalKey(mut.Key) { | ||
| continue | ||
| } | ||
| owner, found := snap.OwnerOf(mut.Key) | ||
| if !found || owner != f.shardGroupID { | ||
| return errors.Wrapf(ErrComposed1Violation, | ||
| "%s-version v=%d: key %q owned by group %d (found=%v); this FSM serves group %d", | ||
| phase, snapVer, mut.Key, owner, found, f.shardGroupID) | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (f *kvFSM) validateConflicts(ctx context.Context, muts []*pb.Mutation, startTS uint64) error { | ||
| seen := make(map[string]struct{}, len(muts)) | ||
| for _, mut := range muts { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If
SetHistoryDepthForTestis called to reduce the history depth after the history ring has already been populated with more entries than the new depth, a subsequent call torecordHistorySnapshotLockedwill trigger a runtime panic. This is becausemake([]uint64, len(e.historyOrder)-1, e.historyDepth)will be invoked with a length greater than its capacity. To prevent this, truncate the excess history entries immediately when updating the depth.