From ebc45dd133a128766738b1199b45769f647cc1b9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 19 Feb 2026 19:42:12 +0100 Subject: [PATCH 1/4] GPU: Improve some error messages --- GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx | 4 ++-- GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index cd1717faf178d..51a896c2baf6a 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -43,10 +43,10 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClustersFlat* cluster int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec) { if (clustersCompressed->nTracks && clustersCompressed->solenoidBz != -1e6f && clustersCompressed->solenoidBz != param.bzkG) { - throw std::runtime_error("Configured solenoid Bz does not match value used for track model encoding"); + throw std::runtime_error("Configured solenoid Bz " + std::to_string(param.bzkG) + " does not match value used for track model encoding " + std::to_string(clustersCompressed->solenoidBz)); } if (clustersCompressed->nTracks && clustersCompressed->maxTimeBin != -1e6 && clustersCompressed->maxTimeBin != param.continuousMaxTimeBin) { - throw std::runtime_error("Configured max time bin does not match value used for track model encoding"); + throw std::runtime_error("Configured max time bin " + std::to_string(param.continuousMaxTimeBin) + " does not match value used for track model encoding " + std::to_string(clustersCompressed->maxTimeBin)); } std::vector clusters[NSECTORS][GPUCA_ROW_COUNT]; std::atomic_flag locks[NSECTORS][GPUCA_ROW_COUNT]; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index ca1352b3bda1b..89d47d0e1b86c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -273,10 +273,10 @@ int32_t GPUChainTracking::RunTPCDecompression() CompressedClusters& inputGPUShadow = DecompressorShadow.mInputGPU; if (cmprClsHost.nTracks && cmprClsHost.solenoidBz != -1e6f && cmprClsHost.solenoidBz != param().bzkG) { - throw std::runtime_error("Configured solenoid Bz does not match value used for track model encoding"); + throw std::runtime_error("Configured solenoid Bz " + std::to_string(param().bzkG) + " does not match value used for track model encoding " + std::to_string(cmprClsHost.solenoidBz)); } if (cmprClsHost.nTracks && cmprClsHost.maxTimeBin != -1e6 && cmprClsHost.maxTimeBin != param().continuousMaxTimeBin) { - throw std::runtime_error("Configured max time bin does not match value used for track model encoding"); + throw std::runtime_error("Configured max time bin " + std::to_string(param().continuousMaxTimeBin) + " does not match value used for track model encoding " + std::to_string(cmprClsHost.maxTimeBin)); } int32_t inputStream = 0; From b50c50cd4dd67155bdf83e03095513b3135e97cb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 15 Oct 2025 09:44:10 +0200 Subject: [PATCH 2/4] GPU: Improve existing debug dumps --- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 2 + GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 74 ++++++++++--------- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 14974bdec2303..813e3df29e82e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -199,6 +199,8 @@ class GPUTPCGMMerger : public GPUProcessor void DumpRefit(std::ostream& out) const; void DumpFinal(std::ostream& out) const; void DumpLoopers(std::ostream& out) const; + void DumpTrackParam(std::ostream& out) const; + void DumpTrackClusters(std::ostream& out, bool non0StateOnly = false, bool noNDF0 = false) const; template void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 0a83bf47f5725..f6afc46609a11 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -43,10 +43,10 @@ using namespace gputpcgmmergertypes; void GPUTPCGMMerger::DumpSectorTracks(std::ostream& out) const { std::streamsize ss = out.precision(); - out << std::setprecision(2); + out << std::setprecision(10); out << "\nTPC Merger Sector Tracks\n"; for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { - out << "Sector Track Info Index " << (mSectorTrackInfoIndex[iSector + 1] - mSectorTrackInfoIndex[iSector]) << " / " << (mSectorTrackInfoIndex[NSECTORS + iSector + 1] - mSectorTrackInfoIndex[NSECTORS + iSector]) << "\n"; + out << "Sector Track Info Sector " << iSector << " Index " << (mSectorTrackInfoIndex[iSector + 1] - mSectorTrackInfoIndex[iSector]) << " / " << (mSectorTrackInfoIndex[NSECTORS + iSector + 1] - mSectorTrackInfoIndex[NSECTORS + iSector]) << "\n"; for (int32_t iGlobal = 0; iGlobal < 2; iGlobal++) { out << " Track type " << iGlobal << "\n"; for (int32_t j = mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal]; j < mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal + 1]; j++) { @@ -134,9 +134,14 @@ void GPUTPCGMMerger::DumpMergedBetweenSectors(std::ostream& out) const void GPUTPCGMMerger::DumpCollected(std::ostream& out) const { - std::streamsize ss = out.precision(); - out << std::setprecision(6); out << "\nTPC Merger Collected Tracks\n"; + DumpTrackParam(out); +} + +void GPUTPCGMMerger::DumpTrackParam(std::ostream& out) const +{ + std::streamsize ss = out.precision(); + out << std::setprecision(10); for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); @@ -157,33 +162,42 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const } } -void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const +void GPUTPCGMMerger::DumpTrackClusters(std::ostream& out, bool non0StateOnly, bool noNDF0) const { - out << "\nTPC Merger Refit Prepare\n"; - out << " Sort\n"; - for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - out << " " << i << ": " << mTrackOrderAttach[i] << "\n"; - } - out << " Clusters\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mMergedTracks[j]; - out << " Track " << j << ": "; + if (trk.NClusters() == 0) { + continue; + } + if (noNDF0 && (!trk.OK() || trk.GetParam().GetNDF() < 0)) { + continue; + } + out << " Track " << j << ": (" << trk.NClusters() << "): "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { - out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; + if (!non0StateOnly || mClusters[i].state != 0) { + out << j << "/" << (i - trk.FirstClusterRef()) << ": " << (int32_t)mClusters[i].row << "/" << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; + } } out << "\n"; } - uint32_t maxId = mNMaxClusters; +} + +void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const +{ + out << "\nTPC Merger Refit Prepare\n"; + out << " Sort\n"; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + out << " " << i << ": " << mTrackOrderAttach[i] << "\n"; + } + out << " Track Clusters"; + DumpTrackClusters(out); uint32_t j = 0; - for (uint32_t i = 0; i < maxId; i++) { + for (uint32_t i = 0; i < mNMaxClusters; i++) { if ((mClusterAttachment[i] & attachFlagMask) != 0) { - if (++j % 10 == 0) { - out << " Cluster attachment "; + if (j++ % 10 == 0) { + out << "\n Cluster attachment "; } out << i << ": " << (mClusterAttachment[i] & attachTrackMask) << " / " << (mClusterAttachment[i] & attachFlagMask) << " - "; - if (j % 10 == 0) { - out << "\n"; - } } } out << "\n"; @@ -192,7 +206,7 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const void GPUTPCGMMerger::DumpRefit(std::ostream& out) const { std::streamsize ss = out.precision(); - out << std::setprecision(2); + out << std::setprecision(10); out << "\nTPC Merger Refit\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mMergedTracks[i]; @@ -224,22 +238,10 @@ void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; - for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mMergedTracks[j]; - if (trk.NClusters() == 0) { - continue; - } - out << " Track " << j << ": "; - for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { - if (mClusters[i].state != 0) { - out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; - } - } - out << "\n"; - } - uint32_t maxId = mNMaxClusters; + out << "Track Clusters\n"; + DumpTrackClusters(out, true); uint32_t j = 0; - for (uint32_t i = 0; i < maxId; i++) { + for (uint32_t i = 0; i < mNMaxClusters; i++) { if ((mClusterAttachment[i] & attachFlagMask) != 0) { if (++j % 10 == 0) { out << " Cluster attachment "; From 956b45ca71bed3fee14d5a88b41914df0f9a8bd1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 19 Feb 2026 22:27:38 +0100 Subject: [PATCH 3/4] GPU TPC: Fix deterministic mode with new cluster removal protection --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 260781c17406b..eaf181b741918 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1852,7 +1852,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in if (CAMath::Abs(trk.GetParam().GetQPt() * Param().qptB5Scaler) <= Param().rec.tpc.rejectQPtB5 && !trk.MergedLooper() && trk.Leg() == 0) { weight |= attachProtect; } - mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = weight; + CAMath::AtomicMax(&mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num], weight); CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u); } if (!trk.CCE() && !trk.MergedLooper()) { From f638a7f6e3fcbf4f5f8a1e31e250c96c0540c965 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 20 Feb 2026 00:03:17 +0100 Subject: [PATCH 4/4] GPU TPC: Do not use fitWithoutProjection for now, sometimes broken --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 21 +++++++++----------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index eaf181b741918..3622e51bd663f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -555,7 +555,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack prop.SetMaterialTPC(); prop.SetMaxSinPhi(GPUCA_MAX_SIN_PHI); prop.SetSeedingErrors(true); // Larger errors for seeds, better since we don't start with good hypothesis - prop.SetFitInProjections(false); + prop.SetFitInProjections(true); // TODO: Was false, consider reenabling after fitInProjection is fixed prop.SetPolynomialField(&Param().polynomialField); GPUTPCGMTrackParam trk; trk.X() = inTrack->Param().GetX(); @@ -718,9 +718,6 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right dAlpha = -dAlpha; x0 = GPUTPCGeometry::Row2X(63); - } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation - dAlpha = 0; - x0 = GPUTPCGeometry::Row2X(63); } const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); @@ -783,14 +780,14 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode)); GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); bool sameSector = (iSector1 == iSector2); for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) { - GPUTPCGMBorderTrack& b = B1[itr]; + const GPUTPCGMBorderTrack& b = B1[itr]; float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1])); if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) { d *= 2; @@ -809,7 +806,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } if (!sameSector) { for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) { - GPUTPCGMBorderTrack& b = B2[itr]; + const GPUTPCGMBorderTrack& b = B2[itr]; float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1])); if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) { d *= 2; @@ -827,7 +824,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; @@ -860,7 +857,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThrea } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { // int32_t statAll = 0, statMerged = 0; float factor2ys = Param().rec.tpc.trackMergerFactor2YS; @@ -887,7 +884,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea i2++; } - GPUTPCGMBorderTrack& b1 = B1[r1.fId]; + const GPUTPCGMBorderTrack& b1 = B1[r1.fId]; if (b1.NClusters() < minNPartHits) { continue; } @@ -904,7 +901,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea } // do check - GPUTPCGMBorderTrack& b2 = B2[r2.fId]; + const GPUTPCGMBorderTrack& b2 = B2[r2.fId]; #if defined(GPUCA_MERGER_BY_MC_LABEL) && !defined(GPUCA_GPUCODE) int64_t label1 = GetTrackLabel(b1); int64_t label2 = GetTrackLabel(b2); @@ -1019,7 +1016,7 @@ GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t n const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; + const GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; int32_t iSector = track.Sector(); GPUTPCGMBorderTrack b; if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 813e3df29e82e..8f554c24c1d8c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -226,7 +226,7 @@ class GPUTPCGMMerger : public GPUProcessor private: GPUd() void MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false); template - GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); + GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, int32_t itr); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 60febbb4428f6..84102cd14ce5c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -54,6 +54,7 @@ class GPUTPCGMSectorTrack GPUd() float SecPhi() const { return mParam.mSecPhi; } GPUd() float DzDs() const { return mParam.mDzDs; } GPUd() float QPt() const { return mParam.mQPt; } + GPUd() const auto& Param() const { return mParam; } GPUd() float TOffset() const { return mTOffset; } GPUd() int32_t LocalTrackId() const { return mLocalTrackId; } @@ -75,7 +76,7 @@ class GPUTPCGMSectorTrack GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); GPUd() void SetParam2(const GPUTPCGMTrackParam& trk); GPUd() void Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); - GPUd() void UseParam2() { mParam = mParam2; } + GPUd() void UseParam2() { mParam = mParam2; } // TODO: Clean this up! GPUd() void SetX2(float v) { mParam2.mX = v; } GPUd() float X2() const { return mParam2.mX; }