From 4f1c761f5286b4d64f128dff9857b9483aeefe17 Mon Sep 17 00:00:00 2001 From: corey Date: Wed, 24 Jun 2026 10:38:53 +0800 Subject: [PATCH 1/3] test(node): force derivation self-heal reorg once for QA Test-only hook (test/derivation-force-reorg branch) that forces the local-verify self-heal path to fire exactly once, on the first committed batch this process derives, regardless of whether the local blobs actually diverge. This reuses the real batch-granular reorg mechanism (deriveForce on the whole batch -> EL SetCanonical, wrapped in reactor quiesce/restart) so reorg trigger + handling can be observed on a live QA node. Adds L2 head BEFORE/AFTER logging alongside geth's "Chain reorg detected". QA verification only; must NOT be merged to main. Co-authored-by: Cursor --- node/derivation/derivation.go | 93 ++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 24 deletions(-) diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 19b524bb7..ebca9980b 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -75,6 +75,15 @@ type Derivation struct { tagAdvancer *tagAdvancer + // forceReorgDone gates the QA-only reorg test hook: it forces the local- + // verify self-heal path (deriveForce on a whole batch — the real reorg + // mechanism) to fire exactly once, on the first committed batch this + // process derives, so we can observe reorg handling (EL SetCanonical + + // reactor quiesce/restart) on a live node without re-reorging every batch. + // This hook only exists on the test/derivation-force-reorg branch and + // must never be merged to main. + forceReorgDone bool + stop chan struct{} } @@ -415,35 +424,71 @@ func (d *Derivation) derivationBlock(ctx context.Context) { d.logger.Error("local verify local last-header fetch failed", "batchIndex", batchInfo.batchIndex, "error", err) return } + // QA-only (test branch): force the self-heal reorg path on the + // first batch this process derives, so the real batch-granular + // reorg mechanism (deriveForce → EL SetCanonical, wrapped in + // reactor quiesce/restart) runs on a live node regardless of + // whether the local blobs actually diverge. Off after one batch. + forceReorg := !d.forceReorgDone + mismatchIdx := -1 for i := range rebuilt { if rebuilt[i] != batchInfo.blobHashes[i] { + mismatchIdx = i + break + } + } + if forceReorg || mismatchIdx >= 0 { + if forceReorg { + d.forceReorgDone = true + d.logger.Info("FORCE-REORG test: forcing self-heal reorg on this batch (QA only, test branch)", + "batchIndex", batchInfo.batchIndex, + "firstBlockNumber", batchInfo.firstBlockNumber, + "lastBlockNumber", batchInfo.lastBlockNumber) + } else { d.logger.Info("blob hash mismatch; triggering self-heal reorg", "batchIndex", batchInfo.batchIndex, - "expected", batchInfo.blobHashes[i].Hex(), - "rebuilt", rebuilt[i].Hex()) - - batchInfoFull, fetchErr := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) - if fetchErr != nil { - d.logger.Error("local verify self-heal: fetch real batch failed", - "batchIndex", batchInfo.batchIndex, "error", fetchErr) - return - } + "expected", batchInfo.blobHashes[mismatchIdx].Hex(), + "rebuilt", rebuilt[mismatchIdx].Hex()) + } - // Quiesce blocksync + broadcast reactors via withReactorsQuiesced - // so the deferred Start runs whether deriveForce succeeds - // or fails — without it, a deriveForce error would leave - // reactors stopped indefinitely. - err = d.withReactorsQuiesced(ctx, batchInfo.batchIndex, func() error { - var derErr error - lastHeader, derErr = d.deriveForce(batchInfoFull, 0) - return derErr - }) - if err != nil { - d.logger.Error("local verify self-heal: derive failed", - "batchIndex", batchInfo.batchIndex, "error", err) - return - } - break + batchInfoFull, fetchErr := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) + if fetchErr != nil { + d.logger.Error("local verify self-heal: fetch real batch failed", + "batchIndex", batchInfo.batchIndex, "error", fetchErr) + return + } + + // Log L2 head before the forced reorg so the before/after + // transition is visible alongside geth's "Chain reorg detected". + if headBefore, hErr := d.l2Client.BlockByNumber(ctx, nil); hErr == nil { + d.logger.Info("self-heal reorg: L2 head BEFORE", + "batchIndex", batchInfo.batchIndex, + "headNumber", headBefore.NumberU64(), + "headHash", headBefore.Hash().Hex(), + "headParent", headBefore.ParentHash().Hex()) + } + + // Quiesce blocksync + broadcast reactors via withReactorsQuiesced + // so the deferred Start runs whether deriveForce succeeds + // or fails — without it, a deriveForce error would leave + // reactors stopped indefinitely. + err = d.withReactorsQuiesced(ctx, batchInfo.batchIndex, func() error { + var derErr error + lastHeader, derErr = d.deriveForce(batchInfoFull, 0) + return derErr + }) + if err != nil { + d.logger.Error("local verify self-heal: derive failed", + "batchIndex", batchInfo.batchIndex, "error", err) + return + } + + if headAfter, hErr := d.l2Client.BlockByNumber(ctx, nil); hErr == nil { + d.logger.Info("self-heal reorg: L2 head AFTER", + "batchIndex", batchInfo.batchIndex, + "headNumber", headAfter.NumberU64(), + "headHash", headAfter.Hash().Hex(), + "headParent", headAfter.ParentHash().Hex()) } } From d9b66d316d9563c0901c76989640d342df799fbf Mon Sep 17 00:00:00 2001 From: corey Date: Wed, 24 Jun 2026 10:45:40 +0800 Subject: [PATCH 2/3] test(node): minimize to condition + log only Revert the loop restructuring. Now the only behavior change is the self-heal trigger condition (forced || blob mismatch) plus a one-shot flag; the loop body and BEFORE/AFTER head reads are removed. Rely on deriveForce's existing per-block log and geth's "Chain reorg detected". Co-authored-by: Cursor --- node/derivation/derivation.go | 101 ++++++++++++---------------------- 1 file changed, 34 insertions(+), 67 deletions(-) diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index ebca9980b..1b8d1bd90 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -75,13 +75,9 @@ type Derivation struct { tagAdvancer *tagAdvancer - // forceReorgDone gates the QA-only reorg test hook: it forces the local- - // verify self-heal path (deriveForce on a whole batch — the real reorg - // mechanism) to fire exactly once, on the first committed batch this - // process derives, so we can observe reorg handling (EL SetCanonical + - // reactor quiesce/restart) on a live node without re-reorging every batch. - // This hook only exists on the test/derivation-force-reorg branch and - // must never be merged to main. + // forceReorgDone: QA test-branch one-shot. Forces the local-verify + // self-heal path (deriveForce) on the first batch derived, so reorg + // handling can be observed on a live node. Test-only; never merge to main. forceReorgDone bool stop chan struct{} @@ -424,71 +420,42 @@ func (d *Derivation) derivationBlock(ctx context.Context) { d.logger.Error("local verify local last-header fetch failed", "batchIndex", batchInfo.batchIndex, "error", err) return } - // QA-only (test branch): force the self-heal reorg path on the - // first batch this process derives, so the real batch-granular - // reorg mechanism (deriveForce → EL SetCanonical, wrapped in - // reactor quiesce/restart) runs on a live node regardless of - // whether the local blobs actually diverge. Off after one batch. - forceReorg := !d.forceReorgDone - mismatchIdx := -1 for i := range rebuilt { - if rebuilt[i] != batchInfo.blobHashes[i] { - mismatchIdx = i - break - } - } - if forceReorg || mismatchIdx >= 0 { - if forceReorg { + // QA test branch: force the first batch this process derives + // into the self-heal path once (forceReorgDone), so deriveForce + // runs even when the local blob actually matches. Production + // behavior on a real blob mismatch is unchanged. + forced := !d.forceReorgDone + if forced || rebuilt[i] != batchInfo.blobHashes[i] { d.forceReorgDone = true - d.logger.Info("FORCE-REORG test: forcing self-heal reorg on this batch (QA only, test branch)", - "batchIndex", batchInfo.batchIndex, - "firstBlockNumber", batchInfo.firstBlockNumber, - "lastBlockNumber", batchInfo.lastBlockNumber) - } else { d.logger.Info("blob hash mismatch; triggering self-heal reorg", "batchIndex", batchInfo.batchIndex, - "expected", batchInfo.blobHashes[mismatchIdx].Hex(), - "rebuilt", rebuilt[mismatchIdx].Hex()) - } - - batchInfoFull, fetchErr := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) - if fetchErr != nil { - d.logger.Error("local verify self-heal: fetch real batch failed", - "batchIndex", batchInfo.batchIndex, "error", fetchErr) - return - } - - // Log L2 head before the forced reorg so the before/after - // transition is visible alongside geth's "Chain reorg detected". - if headBefore, hErr := d.l2Client.BlockByNumber(ctx, nil); hErr == nil { - d.logger.Info("self-heal reorg: L2 head BEFORE", - "batchIndex", batchInfo.batchIndex, - "headNumber", headBefore.NumberU64(), - "headHash", headBefore.Hash().Hex(), - "headParent", headBefore.ParentHash().Hex()) - } - - // Quiesce blocksync + broadcast reactors via withReactorsQuiesced - // so the deferred Start runs whether deriveForce succeeds - // or fails — without it, a deriveForce error would leave - // reactors stopped indefinitely. - err = d.withReactorsQuiesced(ctx, batchInfo.batchIndex, func() error { - var derErr error - lastHeader, derErr = d.deriveForce(batchInfoFull, 0) - return derErr - }) - if err != nil { - d.logger.Error("local verify self-heal: derive failed", - "batchIndex", batchInfo.batchIndex, "error", err) - return - } + "forced", forced, + "expected", batchInfo.blobHashes[i].Hex(), + "rebuilt", rebuilt[i].Hex()) + + batchInfoFull, fetchErr := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) + if fetchErr != nil { + d.logger.Error("local verify self-heal: fetch real batch failed", + "batchIndex", batchInfo.batchIndex, "error", fetchErr) + return + } - if headAfter, hErr := d.l2Client.BlockByNumber(ctx, nil); hErr == nil { - d.logger.Info("self-heal reorg: L2 head AFTER", - "batchIndex", batchInfo.batchIndex, - "headNumber", headAfter.NumberU64(), - "headHash", headAfter.Hash().Hex(), - "headParent", headAfter.ParentHash().Hex()) + // Quiesce blocksync + broadcast reactors via withReactorsQuiesced + // so the deferred Start runs whether deriveForce succeeds + // or fails — without it, a deriveForce error would leave + // reactors stopped indefinitely. + err = d.withReactorsQuiesced(ctx, batchInfo.batchIndex, func() error { + var derErr error + lastHeader, derErr = d.deriveForce(batchInfoFull, 0) + return derErr + }) + if err != nil { + d.logger.Error("local verify self-heal: derive failed", + "batchIndex", batchInfo.batchIndex, "error", err) + return + } + break } } From 33f8474962c88814b8922065260b9d107acc6765 Mon Sep 17 00:00:00 2001 From: corey Date: Wed, 24 Jun 2026 10:51:32 +0800 Subject: [PATCH 3/3] test(node): add detailed reorg before/after logging in deriveForce Make the reorg visible end-to-end from node logs: - snapshot canonical hashes of the blocks about to be rewritten - log EL head before the rewrite (batch tip) - per block: oldHash -> newHash + live EL head after the write (the head drops to the pinned parent then climbs back -- the reorg) - log EL head after the rewrite completes On a healthy node the rewritten content is identical so the per-block hash is unchanged; the EL head drop/climb plus geth's "Chain reorg detected" are the proof the reorg actually happened. Co-authored-by: Cursor --- node/derivation/derivation.go | 49 ++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 1b8d1bd90..c8a41973a 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -956,6 +956,33 @@ func (d *Derivation) deriveForce(rollupData *BatchInfo, skipNumber uint64) (*eth return nil, fmt.Errorf("parent header at %d missing", parentNum) } + // Reorg observability: snapshot the canonical hashes of the blocks we are + // about to rewrite (read now, before any write, so they are still intact) + // and the EL head before the rewrite. Pairing these with the per-block and + // post-rewrite logs below makes the reorg visible end-to-end: the EL head + // drops from the batch tip down to the pinned parent and then climbs back. + oldHashes := make(map[uint64]common.Hash, len(rollupData.blockContexts)) + for _, bd := range rollupData.blockContexts { + n := bd.SafeL2Data.Number + if n <= skipNumber { + continue + } + if h, e := d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(n))); e == nil && h != nil { + oldHashes[n] = h.Hash() + } + } + if elHeadBefore, e := d.l2Client.BlockByNumber(d.ctx, nil); e == nil { + d.logger.Info("deriveForce: REORG begin — rewriting batch on pinned parent", + "batchIndex", rollupData.batchIndex, + "rewriteFrom", parentNum+1, + "rewriteTo", rollupData.lastBlockNumber, + "pinnedParentNumber", parentNum, + "pinnedParentHash", lastHeader.Hash().Hex(), + "elHeadNumberBefore", elHeadBefore.NumberU64(), + "elHeadHashBefore", elHeadBefore.Hash().Hex(), + ) + } + for _, blockData := range rollupData.blockContexts { // Skip blocks already present locally (scenario C). For scenario B // skipNumber == 0 means this branch is never taken. @@ -995,10 +1022,30 @@ func (d *Derivation) deriveForce(rollupData *BatchInfo, skipNumber uint64) (*eth return nil, fmt.Errorf("apply block %d: %w", safeData.Number, err) } + // Read the live EL head right after the write. On the first rewritten + // block this is the proof of reorg: the head has dropped from the old + // batch tip down to this freshly-applied block (SetCanonical switched + // the canonical chain); subsequent blocks climb it back up. + var elHeadNum uint64 + if h, e := d.l2Client.BlockNumber(d.ctx); e == nil { + elHeadNum = h + } + oldHash := oldHashes[safeData.Number] d.logger.Info("block written via NewSafeL2Block", "batchIndex", rollupData.batchIndex, "blockNumber", safeData.Number, - "hash", lastHeader.Hash().Hex(), + "oldHash", oldHash.Hex(), + "newHash", lastHeader.Hash().Hex(), + "hashChanged", oldHash != lastHeader.Hash(), + "elHeadAfterWrite", elHeadNum, + ) + } + + if elHeadAfter, e := d.l2Client.BlockByNumber(d.ctx, nil); e == nil { + d.logger.Info("deriveForce: REORG complete — batch reapplied", + "batchIndex", rollupData.batchIndex, + "elHeadNumberAfter", elHeadAfter.NumberU64(), + "elHeadHashAfter", elHeadAfter.Hash().Hex(), ) } return lastHeader, nil