AliceO2Group · davidrohr · Feb 19, 2026 · Oct 15, 2025 · Feb 19, 2026 · Feb 19, 2026
@@ -43,10 +43,10 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClustersFlat* cluster
 int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function<o2::tpc::ClusterNative*(size_t)> allocator, const GPUParam& param, bool deterministicRec)
 {
   if (clustersCompressed->nTracks && clustersCompressed->solenoidBz != -1e6f && clustersCompressed->solenoidBz != param.bzkG) {
-    throw std::runtime_error("Configured solenoid Bz does not match value used for track model encoding");
+    throw std::runtime_error("Configured solenoid Bz " + std::to_string(param.bzkG) + " does not match value used for track model encoding " + std::to_string(clustersCompressed->solenoidBz));
   }
   if (clustersCompressed->nTracks && clustersCompressed->maxTimeBin != -1e6 && clustersCompressed->maxTimeBin != param.continuousMaxTimeBin) {
-    throw std::runtime_error("Configured max time bin does not match value used for track model encoding");
+    throw std::runtime_error("Configured max time bin " + std::to_string(param.continuousMaxTimeBin) + " does not match value used for track model encoding " + std::to_string(clustersCompressed->maxTimeBin));
   }
   std::vector<ClusterNative> clusters[NSECTORS][GPUCA_ROW_COUNT];
   std::atomic_flag locks[NSECTORS][GPUCA_ROW_COUNT];

@@ -273,10 +273,10 @@ int32_t GPUChainTracking::RunTPCDecompression()
     CompressedClusters& inputGPUShadow = DecompressorShadow.mInputGPU;
 
     if (cmprClsHost.nTracks && cmprClsHost.solenoidBz != -1e6f && cmprClsHost.solenoidBz != param().bzkG) {
-      throw std::runtime_error("Configured solenoid Bz does not match value used for track model encoding");
+      throw std::runtime_error("Configured solenoid Bz " + std::to_string(param().bzkG) + " does not match value used for track model encoding " + std::to_string(cmprClsHost.solenoidBz));
     }
     if (cmprClsHost.nTracks && cmprClsHost.maxTimeBin != -1e6 && cmprClsHost.maxTimeBin != param().continuousMaxTimeBin) {
-      throw std::runtime_error("Configured max time bin does not match value used for track model encoding");
+      throw std::runtime_error("Configured max time bin " + std::to_string(param().continuousMaxTimeBin) + " does not match value used for track model encoding " + std::to_string(cmprClsHost.maxTimeBin));
     }
 
     int32_t inputStream = 0;

@@ -555,7 +555,7 @@ GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack
   prop.SetMaterialTPC();
   prop.SetMaxSinPhi(GPUCA_MAX_SIN_PHI);
   prop.SetSeedingErrors(true); // Larger errors for seeds, better since we don't start with good hypothesis
-  prop.SetFitInProjections(false);
+  prop.SetFitInProjections(true); // TODO: Was false, consider reenabling after fitInProjection is fixed
   prop.SetPolynomialField(&Param().polynomialField);
   GPUTPCGMTrackParam trk;
   trk.X() = inTrack->Param().GetX();
@@ -718,9 +718,6 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT
   } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right
     dAlpha = -dAlpha;
     x0 = GPUTPCGeometry::Row2X(63);
-  } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation
-    dAlpha = 0;
-    x0 = GPUTPCGeometry::Row2X(63);
   }
 
   const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
@@ -783,14 +780,14 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT
 }
 
 template <>
-GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
+GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
 {
   CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode));
   GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
   GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks();
   bool sameSector = (iSector1 == iSector2);
   for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) {
-    GPUTPCGMBorderTrack& b = B1[itr];
+    const GPUTPCGMBorderTrack& b = B1[itr];
     float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
     if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
       d *= 2;
@@ -809,7 +806,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea
   }
   if (!sameSector) {
     for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) {
-      GPUTPCGMBorderTrack& b = B2[itr];
+      const GPUTPCGMBorderTrack& b = B2[itr];
       float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1]));
       if (CAMath::Abs(b.Par()[4]) * Param().qptB5Scaler >= 20) {
         d *= 2;
@@ -827,7 +824,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea
 }
 
 template <>
-GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
+GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
 {
 #if !defined(GPUCA_GPUCODE_COMPILEKERNELS)
   GPUTPCGMBorderRange* range1 = mBorderRange[iSector1];
@@ -860,7 +857,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThrea
 }
 
 template <>
-GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
+GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode)
 {
   // int32_t statAll = 0, statMerged = 0;
   float factor2ys = Param().rec.tpc.trackMergerFactor2YS;
@@ -887,7 +884,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea
       i2++;
     }
 
-    GPUTPCGMBorderTrack& b1 = B1[r1.fId];
+    const GPUTPCGMBorderTrack& b1 = B1[r1.fId];
     if (b1.NClusters() < minNPartHits) {
       continue;
     }
@@ -904,7 +901,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea
       }
       // do check
 
-      GPUTPCGMBorderTrack& b2 = B2[r2.fId];
+      const GPUTPCGMBorderTrack& b2 = B2[r2.fId];
 #if defined(GPUCA_MERGER_BY_MC_LABEL) && !defined(GPUCA_GPUCODE)
       int64_t label1 = GetTrackLabel(b1);
       int64_t label2 = GetTrackLabel(b2);
@@ -1019,7 +1016,7 @@ GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t n
   const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi());
 
   for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) {
-    GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
+    const GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr];
     int32_t iSector = track.Sector();
     GPUTPCGMBorderTrack b;
     if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) {
@@ -1852,7 +1849,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in
         if (CAMath::Abs(trk.GetParam().GetQPt() * Param().qptB5Scaler) <= Param().rec.tpc.rejectQPtB5 && !trk.MergedLooper() && trk.Leg() == 0) {
           weight |= attachProtect;
         }
-        mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = weight;
+        CAMath::AtomicMax(&mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num], weight);
         CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u);
       }
       if (!trk.CCE() && !trk.MergedLooper()) {

@@ -199,6 +199,8 @@ class GPUTPCGMMerger : public GPUProcessor
   void DumpRefit(std::ostream& out) const;
   void DumpFinal(std::ostream& out) const;
   void DumpLoopers(std::ostream& out) const;
+  void DumpTrackParam(std::ostream& out) const;
+  void DumpTrackClusters(std::ostream& out, bool non0StateOnly = false, bool noNDF0 = false) const;
 
   template <int32_t mergeType>
   void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const;
@@ -224,7 +226,7 @@ class GPUTPCGMMerger : public GPUProcessor
  private:
   GPUd() void MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false);
   template <int32_t I>
-  GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0);
+  GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, const GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, const GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0);
 
   GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, int32_t itr);
 

@@ -43,10 +43,10 @@ using namespace gputpcgmmergertypes;
 void GPUTPCGMMerger::DumpSectorTracks(std::ostream& out) const
 {
   std::streamsize ss = out.precision();
-  out << std::setprecision(2);
+  out << std::setprecision(10);
   out << "\nTPC Merger Sector Tracks\n";
   for (int32_t iSector = 0; iSector < NSECTORS; iSector++) {
-    out << "Sector Track Info Index " << (mSectorTrackInfoIndex[iSector + 1] - mSectorTrackInfoIndex[iSector]) << " / " << (mSectorTrackInfoIndex[NSECTORS + iSector + 1] - mSectorTrackInfoIndex[NSECTORS + iSector]) << "\n";
+    out << "Sector Track Info Sector " << iSector << " Index " << (mSectorTrackInfoIndex[iSector + 1] - mSectorTrackInfoIndex[iSector]) << " / " << (mSectorTrackInfoIndex[NSECTORS + iSector + 1] - mSectorTrackInfoIndex[NSECTORS + iSector]) << "\n";
     for (int32_t iGlobal = 0; iGlobal < 2; iGlobal++) {
       out << "  Track type " << iGlobal << "\n";
       for (int32_t j = mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal]; j < mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal + 1]; j++) {
@@ -134,9 +134,14 @@ void GPUTPCGMMerger::DumpMergedBetweenSectors(std::ostream& out) const
 
 void GPUTPCGMMerger::DumpCollected(std::ostream& out) const
 {
-  std::streamsize ss = out.precision();
-  out << std::setprecision(6);
   out << "\nTPC Merger Collected Tracks\n";
+  DumpTrackParam(out);
+}
+
+void GPUTPCGMMerger::DumpTrackParam(std::ostream& out) const
+{
+  std::streamsize ss = out.precision();
+  out << std::setprecision(10);
   for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) {
     const auto& trk = mMergedTracks[i];
     const auto& p = trk.GetParam();
@@ -157,33 +162,42 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const
   }
 }
 
-void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const
+void GPUTPCGMMerger::DumpTrackClusters(std::ostream& out, bool non0StateOnly, bool noNDF0) const
 {
-  out << "\nTPC Merger Refit Prepare\n";
-  out << "  Sort\n";
-  for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) {
-    out << "    " << i << ": " << mTrackOrderAttach[i] << "\n";
-  }
-  out << "  Clusters\n";
   for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) {
     const auto& trk = mMergedTracks[j];
-    out << "  Track " << j << ": ";
+    if (trk.NClusters() == 0) {
+      continue;
+    }
+    if (noNDF0 && (!trk.OK() || trk.GetParam().GetNDF() < 0)) {
+      continue;
+    }
+    out << "  Track " << j << ": (" << trk.NClusters() << "): ";
     for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) {
-      out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", ";
+      if (!non0StateOnly || mClusters[i].state != 0) {
+        out << j << "/" << (i - trk.FirstClusterRef()) << ": " << (int32_t)mClusters[i].row << "/" << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", ";
+      }
     }
     out << "\n";
   }
-  uint32_t maxId = mNMaxClusters;
+}
+
+void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const
+{
+  out << "\nTPC Merger Refit Prepare\n";
+  out << "  Sort\n";
+  for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) {
+    out << "    " << i << ": " << mTrackOrderAttach[i] << "\n";
+  }
+  out << "  Track Clusters";
+  DumpTrackClusters(out);
   uint32_t j = 0;
-  for (uint32_t i = 0; i < maxId; i++) {
+  for (uint32_t i = 0; i < mNMaxClusters; i++) {
     if ((mClusterAttachment[i] & attachFlagMask) != 0) {
-      if (++j % 10 == 0) {
-        out << "    Cluster attachment ";
+      if (j++ % 10 == 0) {
+        out << "\n    Cluster attachment ";
       }
       out << i << ": " << (mClusterAttachment[i] & attachTrackMask) << " / " << (mClusterAttachment[i] & attachFlagMask) << " - ";
-      if (j % 10 == 0) {
-        out << "\n";
-      }
     }
   }
   out << "\n";
@@ -192,7 +206,7 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const
 void GPUTPCGMMerger::DumpRefit(std::ostream& out) const
 {
   std::streamsize ss = out.precision();
-  out << std::setprecision(2);
+  out << std::setprecision(10);
   out << "\nTPC Merger Refit\n";
   for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) {
     const auto& trk = mMergedTracks[i];
@@ -224,22 +238,10 @@ void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const
 void GPUTPCGMMerger::DumpFinal(std::ostream& out) const
 {
   out << "\nTPC Merger Finalized\n";
-  for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) {
-    const auto& trk = mMergedTracks[j];
-    if (trk.NClusters() == 0) {
-      continue;
-    }
-    out << "  Track " << j << ": ";
-    for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) {
-      if (mClusters[i].state != 0) {
-        out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", ";
-      }
-    }
-    out << "\n";
-  }
-  uint32_t maxId = mNMaxClusters;
+  out << "Track Clusters\n";
+  DumpTrackClusters(out, true);
   uint32_t j = 0;
-  for (uint32_t i = 0; i < maxId; i++) {
+  for (uint32_t i = 0; i < mNMaxClusters; i++) {
     if ((mClusterAttachment[i] & attachFlagMask) != 0) {
       if (++j % 10 == 0) {
         out << "    Cluster attachment ";

@@ -54,6 +54,7 @@ class GPUTPCGMSectorTrack
   GPUd() float SecPhi() const { return mParam.mSecPhi; }
   GPUd() float DzDs() const { return mParam.mDzDs; }
   GPUd() float QPt() const { return mParam.mQPt; }
+  GPUd() const auto& Param() const { return mParam; }
   GPUd() float TOffset() const { return mTOffset; }
 
   GPUd() int32_t LocalTrackId() const { return mLocalTrackId; }
@@ -75,7 +76,7 @@ class GPUTPCGMSectorTrack
   GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector);
   GPUd() void SetParam2(const GPUTPCGMTrackParam& trk);
   GPUd() void Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sectorTr, float alpha, int32_t sector);
-  GPUd() void UseParam2() { mParam = mParam2; }
+  GPUd() void UseParam2() { mParam = mParam2; } // TODO: Clean this up!
   GPUd() void SetX2(float v) { mParam2.mX = v; }
   GPUd() float X2() const { return mParam2.mX; }