Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/scripts/map_consensus_inventory.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash
set -euo pipefail

ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
cd "$ROOT"

# 1) Find map fields in proto definitions.
proto_maps=$(grep -RIn "map<" proto/lumera || true)

# 2) Find map-bearing generated message hints in module types.
pb_maps=$(grep -RIn "for k := range m\." x/*/v1/types/*.pb.go || true)

# 3) Guard cascade-client-failure path: it must be either reserved in msg path OR canonicalized in keeper path.
reserved_in_msg="false"
if grep -q "EVIDENCE_TYPE_CASCADE_CLIENT_FAILURE" x/audit/v1/keeper/msg_submit_evidence.go; then
reserved_in_msg="true"
fi

has_canonical_encoder="false"
if grep -q "marshalCascadeClientFailureEvidenceMetadataDeterministic" x/audit/v1/keeper/evidence.go; then
has_canonical_encoder="true"
fi

if [ "$reserved_in_msg" != "true" ] && [ "$has_canonical_encoder" != "true" ]; then
echo "WARN: cascade-client-failure is not reserved in MsgSubmitEvidence and no canonical deterministic encoder was found"
fi

# 4) Determinism coverage checks.
# Keep these paths aligned with committed determinism suites.
for f in \
tests/integration/bank/deterministic_test.go \
tests/integration/staking/determinstic_test.go \
tests/systemtests/supernode_metrics_test.go \
tests/systemtests/supernode_metrics_staleness_test.go
do
if [ ! -f "$f" ]; then
echo "WARN: missing determinism-related test file: $f"
fi
done

# Hard floor: repo must keep at least one deterministic integration test.
deterministic_integration_count=$(find tests/integration -type f -name '*determin*test.go' | wc -l | tr -d ' ')
if [ "$deterministic_integration_count" -lt 1 ]; then
echo "ERROR: no deterministic integration tests found under tests/integration"
exit 1
fi

echo "Proto map fields:"
echo "$proto_maps"
echo
echo "Generated map marshal loops:"
echo "$pb_maps"
echo
echo "Map-bearing consensus inventory check passed"
262 changes: 262 additions & 0 deletions .github/workflows/consensus-determinism.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
name: consensus-determinism

on:
pull_request:
branches: [ master ]
paths-ignore:
- '**.md'
- 'docs/**'
- '.gitignore'
push:
branches: [ master ]
paths-ignore:
- '**.md'
- 'docs/**'
- '.gitignore'

jobs:
determinism-pipeline:
name: determinism-pipeline (canary + restart 1/3/5)
runs-on: ubuntu-latest
timeout-minutes: 45

steps:
- name: Check out repository
uses: actions/checkout@v6.0.1
with:
fetch-depth: 0

- name: Configure Git Safe Directory
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"

- name: Set up Go
uses: ./.github/actions/setup-go

- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq

- name: Install Specific Ignite CLI Version
run: |
IGNITE_VERSION="v29.2.0"
ARCH="linux_amd64"

curl -L "https://github.com/ignite/cli/releases/download/${IGNITE_VERSION}/ignite_${IGNITE_VERSION#v}_checksums.txt" -o checksums.txt
EXPECTED_CHECKSUM=$(grep "ignite_${IGNITE_VERSION#v}_${ARCH}.tar.gz" checksums.txt | awk '{print $1}')

curl -L "https://github.com/ignite/cli/releases/download/${IGNITE_VERSION}/ignite_${IGNITE_VERSION#v}_${ARCH}.tar.gz" -o ignite.tar.gz
ACTUAL_CHECKSUM=$(sha256sum ignite.tar.gz | awk '{print $1}')
if [ "$ACTUAL_CHECKSUM" != "$EXPECTED_CHECKSUM" ]; then
echo "Error: Checksum mismatch!"
exit 1
fi

tar -xzf ignite.tar.gz
chmod +x ignite

- name: Build chain binary
run: |
./ignite chain build --build.tags "ledger" -y -t linux:amd64
env:
DO_NOT_TRACK: 1
GOFLAGS: "-buildvcs=false"

- name: Map-bearing consensus risk gate
run: .github/scripts/map_consensus_inventory.sh

- name: Run determinism canary + restart replay
shell: bash
run: |
set -euo pipefail

command -v jq >/dev/null || { echo "jq is required but not found"; exit 1; }

BIN="$(pwd)/build/lumerad"
if [ ! -x "$BIN" ]; then
BIN="$(command -v lumerad || true)"
fi
if [ -z "$BIN" ] || [ ! -x "$BIN" ]; then
echo "lumerad binary not found after build"
ls -la "$(pwd)/build" || true
command -v lumerad || true
exit 1
fi

WORK="$(pwd)/.ci-determinism"
OUT="$WORK/testnet"
CHAIN_ID="testing"

mkdir -p "$WORK"
rm -rf "$OUT"

cleanup() {
pkill -f "${BIN} start" || true
}
trap cleanup EXIT

"$BIN" testnet init-files \
--chain-id="$CHAIN_ID" \
--output-dir="$OUT" \
--v=6 \
--keyring-backend=test \
--commit-timeout=900ms \
--minimum-gas-prices=0.000001ulume \
--single-host

for i in 0 1 2 3 4 5; do
"$BIN" start --trace --log_level=info --home "$OUT/node${i}/lumerad" >"$WORK/node${i}.log" 2>&1 &
done

for p in 26657 26658 26659 26660 26661 26662; do
for _ in $(seq 1 60); do
if curl -sf "http://127.0.0.1:${p}/status" >/dev/null; then
break
fi
sleep 1
done
curl -sf "http://127.0.0.1:${p}/status" >/dev/null
done

# Wait until chain has produced first blocks before CLI/account queries.
for _ in $(seq 1 90); do
h=$(curl -sf "http://127.0.0.1:26657/status" | jq -r '.result.sync_info.latest_block_height // "0"')
if [[ "$h" =~ ^[0-9]+$ ]] && [ "$h" -ge 2 ]; then
break
fi
sleep 1
done
h=$(curl -sf "http://127.0.0.1:26657/status" | jq -r '.result.sync_info.latest_block_height // "0"')
if ! [[ "$h" =~ ^[0-9]+$ ]] || [ "$h" -lt 2 ]; then
echo "chain not ready, latest_block_height=$h"
exit 1
fi

submit_evidence_tx() {
local action_id="$1"
local meta_json="$2"
"$BIN" tx audit submit-evidence "$SUBJECT" cascade-client-failure "$action_id" "$meta_json" \
--from node0 \
--home "$OUT/node0/lumerad" \
--keyring-backend test \
--chain-id "$CHAIN_ID" \
--node tcp://127.0.0.1:26657 \
--fees 1ulume \
--broadcast-mode block \
--yes -o json > "$WORK/${action_id}.json"
Comment on lines +138 to +146
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same --home issue as the keys show call above: $OUT is the testnet root, but the keyring for node0 is at $OUT/node0/lumerad. The --from node0 lookup will fail because there is no keyring at $OUT.

Suggested change
"$BIN" tx audit submit-evidence "$SUBJECT" cascade-client-failure "$action_id" "$meta_json" \
--from node0 \
--home "$OUT" \
--keyring-backend test \
--chain-id "$CHAIN_ID" \
--node tcp://127.0.0.1:26657 \
--fees 1ulume \
--broadcast-mode sync \
--yes -o json > "$WORK/${action_id}.json"
"$BIN" tx audit submit-evidence "$SUBJECT" cascade-client-failure "$action_id" "$meta_json" \
--from node0 \
--home "$OUT/node0/lumerad" \
--keyring-backend test \
--chain-id "$CHAIN_ID" \
--node tcp://127.0.0.1:26657 \
--fees 1ulume \
--broadcast-mode sync \
--yes -o json > "$WORK/${action_id}.json"

Fix it with Roo Code or mention @roomote and request a fix.


local txhash code
txhash=$(jq -r '.txhash // ""' "$WORK/${action_id}.json")
code=$(jq -r '.code // 0' "$WORK/${action_id}.json")
test -n "$txhash"
if [ "$code" != "0" ]; then
echo "submit-evidence failed for ${action_id}:"
cat "$WORK/${action_id}.json"
exit 1
fi
}

submit_bank_tx() {
local tag="$1"
"$BIN" tx bank send \
"$($BIN keys show node0 -a --home "$OUT/node0/lumerad" --keyring-backend test)" \
"$($BIN keys show node2 -a --home "$OUT/node2/lumerad" --keyring-backend test)" \
1ulume \
--from node0 \
--home "$OUT/node0/lumerad" \
--keyring-backend test \
--chain-id "$CHAIN_ID" \
--node tcp://127.0.0.1:26657 \
--fees 1ulume \
--broadcast-mode block \
--yes -o json > "$WORK/${tag}.json"

local txhash code
txhash=$(jq -r '.txhash // ""' "$WORK/${tag}.json")
code=$(jq -r '.code // 0' "$WORK/${tag}.json")
test -n "$txhash"
if [ "$code" != "0" ]; then
echo "bank send failed for ${tag}:"
cat "$WORK/${tag}.json"
exit 1
fi
}

check_consensus_window() {
local rounds="$1"
local prev_min_h=0
for _ in $(seq 1 "$rounds"); do
min_h=999999999
max_h=0

for p in 26657 26658 26659 26660 26661 26662; do
h=$(curl -sf "http://127.0.0.1:${p}/status" | jq -r '.result.sync_info.latest_block_height')
if [ "$h" -lt "$min_h" ]; then min_h="$h"; fi
if [ "$h" -gt "$max_h" ]; then max_h="$h"; fi
done

if [ "$prev_min_h" -gt 0 ] && [ "$min_h" -le "$prev_min_h" ]; then
echo "No progress detected: prev_min_h=$prev_min_h current_min_h=$min_h"
exit 1
fi
prev_min_h="$min_h"

if [ $((max_h - min_h)) -gt 3 ]; then
echo "Height skew too large: min_h=$min_h max_h=$max_h"
exit 1
fi

baseline=""
for p in 26657 26658 26659 26660 26661 26662; do
ah=$(curl -sf "http://127.0.0.1:${p}/block?height=${min_h}" | jq -r '.result.block.header.app_hash')
if [ -z "$baseline" ]; then
baseline="$ah"
elif [ "$ah" != "$baseline" ]; then
echo "App-hash divergence at height=${min_h}: baseline=$baseline got=$ah on port=$p"
exit 1
fi
done

sleep 1
done
}

SUBJECT=$("$BIN" keys show node1 -a --home "$OUT/node1/lumerad" --keyring-backend test)

META1='{"reporter_component":2,"target_supernode_accounts":["lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6"],"details":{"action_id":"123637","error":"download failed: insufficient symbols","iteration":"1","operation":"download","supernode_account":"lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6","supernode_endpoint":"18.190.53.108:4444","task_id":"9700ec8a"}}'
META2='{"reporter_component":2,"target_supernode_accounts":["lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6"],"details":{"task_id":"9700ec8a","supernode_endpoint":"18.190.53.108:4444","supernode_account":"lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6","operation":"download","iteration":"1","error":"download failed: insufficient symbols","action_id":"123637"}}'
META3='{"target_supernode_accounts":["lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6"],"details":{"supernode_endpoint":"18.190.53.108:4444","task_id":"9700ec8a","operation":"download","error":"download failed: insufficient symbols","supernode_account":"lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6","action_id":"123637","iteration":"1"},"reporter_component":2}'
META4='{"details":{"iteration":"1","action_id":"123637","supernode_account":"lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6","error":"download failed: insufficient symbols","operation":"download","task_id":"9700ec8a","supernode_endpoint":"18.190.53.108:4444"},"reporter_component":2,"target_supernode_accounts":["lumera1mfldjaqc7ec5rlh4k58yttv3cd978gzl070zk6"]}'

submit_evidence_tx ci-canary-1 "$META1"
submit_evidence_tx ci-canary-2 "$META2"
submit_evidence_tx ci-canary-3 "$META3"
submit_evidence_tx ci-canary-4 "$META4"

submit_bank_tx ci-bank-1
submit_bank_tx ci-bank-2
submit_bank_tx ci-bank-3
check_consensus_window 15

for restart_node in 1 3 5; do
restart_port=$((26657 + restart_node))
echo "Restarting node${restart_node} on port ${restart_port}"

pkill -f "$OUT/node${restart_node}/lumerad" || true
sleep 2
"$BIN" start --trace --log_level=info --home "$OUT/node${restart_node}/lumerad" >"$WORK/node${restart_node}-restart.log" 2>&1 &

for _ in $(seq 1 60); do
if curl -sf "http://127.0.0.1:${restart_port}/status" >/dev/null; then
break
fi
sleep 1
done
curl -sf "http://127.0.0.1:${restart_port}/status" >/dev/null

submit_bank_tx "post-restart-${restart_node}-1"
submit_bank_tx "post-restart-${restart_node}-2"
check_consensus_window 10
done

echo "Consensus determinism pipeline passed"
6 changes: 6 additions & 0 deletions .github/workflows/systemtests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ on:
- '**.md'
- 'docs/**'
- '.gitignore'
pull_request:
branches: [ master ]
paths-ignore:
- '**.md'
- 'docs/**'
- '.gitignore'

jobs:
system-tests:
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ on:
- '**.md'
- 'docs/**'
- '.gitignore'
pull_request:
branches: [ master ]
paths-ignore:
- '**.md'
- 'docs/**'
- '.gitignore'

jobs:
unit-tests:
Expand Down
Loading