From 9a0a411449ccb1e4d54898581c5a4eab8a33a4e9 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 13 Jun 2026 08:54:25 -0700
Subject: [PATCH 1/6] Rebase aman/two-code-heaps onto upstream/main

Squash-merge of amanasifkhalid:two-code-heaps#a676f29 onto current main.
Single WIP commit, 13 months stale.

Adds a per-LoaderAllocator separation between optimized (Tier1+) and
non-optimized (Tier0/MinOpts/Instrumented) code heaps:

* New CodeHeapRequestInfo::m_isOptimizedCode flag + IsOptimizedCode/
  SetOptimizedCode accessors (PascalCase, matching HEAD's existing
  accessor convention).
* Set via !pMD->IsJitOptimizationDisabled() in AllocCode (so Tier1,
  fully-optimized non-tiered, etc. land in optimized heaps; Tier0,
  Tier0-Instrumented, MinOpts in regular).
* RangeSection::RANGE_SECTION_OPTIMIZEDCODE flag (assigned 0x20 since
  HEAD took 0x10 for VIRTUALIP); stamped on heaps in NewCodeHeap.
* New LoaderAllocator cache pointers m_pLastUsedOptimizedCodeHeap and
  m_pLastUsedDynamicOptimizedCodeHeap; optimized-code allocations use
  the new caches.
* CanUseCodeHeap rejects a heap up front if its OPTIMIZEDCODE flag
  doesn't match the request (moved from aman's tail-of-function
  retVal-based check to a top-of-function guard since HEAD's version
  returns directly from each success branch rather than using retVal).
* m_isOptimizedCode initialized to false in the
  CodeHeapRequestInfo(MethodDesc*, LoaderAllocator*, BYTE*, BYTE*)
  ctor (the others delegate to it).

Aman-side fixups during merge:
* aman's lowercase getRequestSize/setRequestSize -> HEAD's PascalCase
  GetRequestSize/SetRequestSize.
* aman's inline-defined ctors in codeman.h dropped; HEAD's separate
  ctor declarations + definitions in codeman.cpp are used and a single
  initializer added for m_isOptimizedCode.
* aman's pInfo->m_pAllocator-> direct field access rewritten as
  pInfo->GetAllocator()->... (m_pAllocator is private in HEAD).
* aman's RANGE_SECTION_OPTIMIZEDCODE value 0x10 collides with HEAD's
  new RANGE_SECTION_VIRTUALIP=0x10; moved to 0x20.

Status carryovers from the original WIP:
* The dynamic (LCG) optimized-heap path remains commented out -
  m_pLastUsedDynamicOptimizedCodeHeap is declared and zero-initialized
  but never read/written.
* CanUseCodeHeap does a FindCodeRange lookup per allocation; could be
  optimized by storing the flag directly on HeapList.
* No tests, no telemetry, no policy knob.

Not yet built.
---
 src/coreclr/vm/codeman.cpp         | 45 +++++++++++++++++++++++++++++-
 src/coreclr/vm/codeman.h           |  5 ++++
 src/coreclr/vm/loaderallocator.cpp |  2 ++
 src/coreclr/vm/loaderallocator.hpp |  2 ++
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 9cf0efa976490a..b0311435b264f3 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -2840,6 +2840,7 @@ CodeHeapRequestInfo::CodeHeapRequestInfo(MethodDesc* pMD, LoaderAllocator* pAllo
     , m_isCollectible{ false }
     , m_isInterpreted{ false }
     , m_throwOnOutOfMemoryWithinRange{ true }
+    , m_isOptimizedCode{ false }
 {
     CONTRACTL
     {
@@ -2947,7 +2948,12 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe
 
     DWORD flags = RangeSection::RANGE_SECTION_CODEHEAP;
 
-    if (pInfo->IsInterpreted())
+    if (pInfo->IsOptimizedCode())
+    {
+        _ASSERTE(!pInfo->IsInterpreted());
+        flags |= RangeSection::RANGE_SECTION_OPTIMIZEDCODE;
+    }
+    else if (pInfo->IsInterpreted())
     {
         flags |= RangeSection::RANGE_SECTION_INTERPRETER;
     }
@@ -3055,6 +3061,12 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo,
         }
         else
 #endif // FEATURE_INTERPRETER
+        // if (pInfo->IsOptimizedCode())
+        // {
+        //     pCodeHeap = (HeapList *)pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap;
+        //     pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = NULL;
+        // }
+        // else
         {
             pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap;
             pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = NULL;
@@ -3070,6 +3082,12 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo,
         }
         else
 #endif // FEATURE_INTERPRETER
+        if (pInfo->IsOptimizedCode())
+        {
+            pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap;
+            pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap = NULL;
+        }
+        else
         {
             pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedCodeHeap;
             pInfo->GetAllocator()->m_pLastUsedCodeHeap = NULL;
@@ -3135,6 +3153,11 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo,
         }
         else
 #endif // FEATURE_INTERPRETER
+        // if (pInfo->IsOptimizedCode())
+        // {
+        //     pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = pCodeHeap;
+        // }
+        // else
         {
             pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = pCodeHeap;
         }
@@ -3148,6 +3171,11 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo,
         }
         else
 #endif // FEATURE_INTERPRETER
+        if (pInfo->IsOptimizedCode())
+        {
+            pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap = pCodeHeap;
+        }
+        else
         {
             pInfo->GetAllocator()->m_pLastUsedCodeHeap = pCodeHeap;
         }
@@ -3228,6 +3256,11 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser
         static_assert(CODE_SIZE_ALIGN >= sizeof(void*));
     }
 
+    if (!pMD->IsJitOptimizationDisabled())
+    {
+        requestInfo.SetOptimizedCode();
+    }
+
     // Scope the lock
     {
         CrstHolder ch(&m_CodeHeapLock);
@@ -3380,6 +3413,16 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod
 
     if ((pInfo->GetLoAddr() == 0) && (pInfo->GetHiAddr() == 0))
     {
+        // Don't mix optimized and non-optimized code in the same heap.
+        // The flag is recorded on the heap's RangeSection at creation time.
+        const RangeSection* pRS = ExecutionManager::FindCodeRange(pCodeHeap->startAddress, ExecutionManager::GetScanFlags());
+        _ASSERTE(pRS != NULL);
+        const bool isOptimizedHeap = (pRS->_flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0;
+        if (isOptimizedHeap != pInfo->IsOptimizedCode())
+        {
+            return false;
+        }
+
         // We have no constraint so this non empty heap will be able to satisfy our request
         if (pInfo->IsDynamicDomain())
         {
diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h
index 02909048ffc031..5045b6baee0535 100644
--- a/src/coreclr/vm/codeman.h
+++ b/src/coreclr/vm/codeman.h
@@ -411,6 +411,7 @@ class CodeHeapRequestInfo final
     bool         m_isCollectible;
     bool         m_isInterpreted;
     bool         m_throwOnOutOfMemoryWithinRange;
+    bool         m_isOptimizedCode;
 
 public:
     CodeHeapRequestInfo(MethodDesc* pMD);
@@ -430,6 +431,9 @@ class CodeHeapRequestInfo final
     bool   IsInterpreted()                      { return m_isInterpreted;      }
     void   SetInterpreted()                     { m_isInterpreted = true;      }
 
+    bool   IsOptimizedCode()                    { return m_isOptimizedCode;    }
+    void   SetOptimizedCode()                   { m_isOptimizedCode = true;    }
+
     size_t GetRequestSize()                     { return m_requestSize;        }
     void   SetRequestSize(size_t requestSize)   { m_requestSize = requestSize; }
 
@@ -735,6 +739,7 @@ struct RangeSection
         RANGE_SECTION_RANGELIST     = 0x4,
         RANGE_SECTION_INTERPRETER   = 0x8,
         RANGE_SECTION_VIRTUALIP     = 0x10, // This range section contains virtual IPs (e.g. for ReadyToRun code) instead of actual code addresses in linear memory
+        RANGE_SECTION_OPTIMIZEDCODE = 0x20,
     };
 
 #ifdef FEATURE_READYTORUN
diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp
index 92aeaa9c28679c..1189b38e102b7f 100644
--- a/src/coreclr/vm/loaderallocator.cpp
+++ b/src/coreclr/vm/loaderallocator.cpp
@@ -81,7 +81,9 @@ LoaderAllocator::LoaderAllocator(bool collectible) :
     m_pCodeHeapInitialAlloc = NULL;
     m_pVSDHeapInitialAlloc = NULL;
     m_pLastUsedCodeHeap = NULL;
+    m_pLastUsedOptimizedCodeHeap = NULL;
     m_pLastUsedDynamicCodeHeap = NULL;
+    m_pLastUsedDynamicOptimizedCodeHeap = NULL;
 #ifdef FEATURE_INTERPRETER
     m_pLastUsedInterpreterCodeHeap = NULL;
     m_pLastUsedInterpreterDynamicCodeHeap = NULL;
diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp
index 525eb06d5ddb8f..4d258ed24c1888 100644
--- a/src/coreclr/vm/loaderallocator.hpp
+++ b/src/coreclr/vm/loaderallocator.hpp
@@ -404,6 +404,8 @@ class LoaderAllocator
     // ExecutionManager caches
     void * m_pLastUsedCodeHeap;
     void * m_pLastUsedDynamicCodeHeap;
+    void * m_pLastUsedOptimizedCodeHeap;
+    void * m_pLastUsedDynamicOptimizedCodeHeap;
 #ifdef FEATURE_INTERPRETER
     void * m_pLastUsedInterpreterCodeHeap;
     void * m_pLastUsedInterpreterDynamicCodeHeap;

From 77c23c843e6c8905ba6182e836bab99090a6a745 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 13 Jun 2026 09:05:27 -0700
Subject: [PATCH 2/6] Clean up: LCG is not tiering-eligible, drop the
 dynamic-optimized cache slot

LCG / DynamicMethod methods are not tiering-eligible: MethodDesc::
IsJitOptimizationDisabled() returns the same answer for every LCG method
within a single process (the per-method branch is gated on !IsNoMetadata()
which is false for LCG; the chunk-wide branch is process- or module-global).
There's no way to end up with mixed-optimization LCG code in one process,
so splitting the dynamic code heap by optimization level would create at
most one pool of each kind anyway.

Cleanup:

* In AllocCode, also gate SetOptimizedCode() on !IsDynamicDomain() so LCG
  requests never get the optimized classification. The dynamic code heap
  is therefore never tagged with RANGE_SECTION_OPTIMIZEDCODE.

* Drop the m_pLastUsedDynamicOptimizedCodeHeap field on LoaderAllocator
  and its initialization, plus the two commented-out blocks in
  AllocCodeWorker that would have used it.

* Strengthen the NewCodeHeap assertion: with the gating above, optimized
  code is mutually exclusive with both interpreter and dynamic-domain.
  Asserting both makes the invariant explicit.

* Add a TODO note in CanUseCodeHeap explaining why the optimized-flag
  check is harmless for interpreter/LCG (they never carry the flag) and
  pointing at the FindCodeRange-lookup-per-allocation optimization for
  future work (caching the bit on HeapList itself).

Build: build.cmd -s clr.runtime+clr.corelib -c Checked -> 0 errors,
0 warnings (1m23s).
---
 src/coreclr/vm/codeman.cpp         | 30 ++++++++++++++++--------------
 src/coreclr/vm/loaderallocator.cpp |  1 -
 src/coreclr/vm/loaderallocator.hpp |  1 -
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index b0311435b264f3..29e32c08a7ce4d 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -2950,7 +2950,13 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe
 
     if (pInfo->IsOptimizedCode())
     {
+        // Optimized code is mutually exclusive with both interpreter and dynamic
+        // (LCG) domain because callers gate SetOptimizedCode() on neither being
+        // set. Tagging the RangeSection lets CanUseCodeHeap reject mismatched
+        // requests, keeping optimized and non-optimized JIT'd code in separate
+        // per-LoaderAllocator heaps.
         _ASSERTE(!pInfo->IsInterpreted());
+        _ASSERTE(!pInfo->IsDynamicDomain());
         flags |= RangeSection::RANGE_SECTION_OPTIMIZEDCODE;
     }
     else if (pInfo->IsInterpreted())
@@ -3061,12 +3067,6 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo,
         }
         else
 #endif // FEATURE_INTERPRETER
-        // if (pInfo->IsOptimizedCode())
-        // {
-        //     pCodeHeap = (HeapList *)pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap;
-        //     pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = NULL;
-        // }
-        // else
         {
             pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap;
             pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = NULL;
@@ -3153,11 +3153,6 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo,
         }
         else
 #endif // FEATURE_INTERPRETER
-        // if (pInfo->IsOptimizedCode())
-        // {
-        //     pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = pCodeHeap;
-        // }
-        // else
         {
             pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = pCodeHeap;
         }
@@ -3256,7 +3251,11 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser
         static_assert(CODE_SIZE_ALIGN >= sizeof(void*));
     }
 
-    if (!pMD->IsJitOptimizationDisabled())
+    // LCG (dynamic domain) methods are not tiering-eligible: every LCG method
+    // within a single process uses the same JIT optimization level. Splitting
+    // their heap by optimization level would create at most one pool of each
+    // kind, so we skip the classification and let all LCG code share one heap.
+    if (!requestInfo.IsDynamicDomain() && !pMD->IsJitOptimizationDisabled())
     {
         requestInfo.SetOptimizedCode();
     }
@@ -3413,8 +3412,11 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod
 
     if ((pInfo->GetLoAddr() == 0) && (pInfo->GetHiAddr() == 0))
     {
-        // Don't mix optimized and non-optimized code in the same heap.
-        // The flag is recorded on the heap's RangeSection at creation time.
+        // Don't mix optimized and non-optimized code in the same heap. LCG and
+        // interpreter requests never set IsOptimizedCode(), so dynamic-domain
+        // and interpreter heaps don't carry the flag either, and this check
+        // is a no-op for them. TODO: cache the optimized bit on HeapList
+        // itself to avoid the RangeSection lookup on every cache check.
         const RangeSection* pRS = ExecutionManager::FindCodeRange(pCodeHeap->startAddress, ExecutionManager::GetScanFlags());
         _ASSERTE(pRS != NULL);
         const bool isOptimizedHeap = (pRS->_flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0;
diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp
index 1189b38e102b7f..07f734a14b19f8 100644
--- a/src/coreclr/vm/loaderallocator.cpp
+++ b/src/coreclr/vm/loaderallocator.cpp
@@ -83,7 +83,6 @@ LoaderAllocator::LoaderAllocator(bool collectible) :
     m_pLastUsedCodeHeap = NULL;
     m_pLastUsedOptimizedCodeHeap = NULL;
     m_pLastUsedDynamicCodeHeap = NULL;
-    m_pLastUsedDynamicOptimizedCodeHeap = NULL;
 #ifdef FEATURE_INTERPRETER
     m_pLastUsedInterpreterCodeHeap = NULL;
     m_pLastUsedInterpreterDynamicCodeHeap = NULL;
diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp
index 4d258ed24c1888..fef9ca91116a65 100644
--- a/src/coreclr/vm/loaderallocator.hpp
+++ b/src/coreclr/vm/loaderallocator.hpp
@@ -405,7 +405,6 @@ class LoaderAllocator
     void * m_pLastUsedCodeHeap;
     void * m_pLastUsedDynamicCodeHeap;
     void * m_pLastUsedOptimizedCodeHeap;
-    void * m_pLastUsedDynamicOptimizedCodeHeap;
 #ifdef FEATURE_INTERPRETER
     void * m_pLastUsedInterpreterCodeHeap;
     void * m_pLastUsedInterpreterDynamicCodeHeap;

From 92d6f3addb0392a113eff9e6f4f99f2d568f7cd6 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 13 Jun 2026 09:10:44 -0700
Subject: [PATCH 3/6] Gate two-code-heaps behind
 DOTNET_SeparateOptimizedCodeHeaps (default off)

Adds a new INTERNAL retail config knob:
  DOTNET_SeparateOptimizedCodeHeaps  (default 0)

When set to a non-zero value, JIT'd methods that are not optimization-
disabled get their own per-LoaderAllocator code heap, separate from
Tier0/MinOpts/Tier0Instrumented code. When 0 (the default), all JIT'd
code shares a single heap per LoaderAllocator, matching pre-change
behavior. LCG and interpreter heaps are unaffected either way.

In AllocCode, SetOptimizedCode() is now gated on three conditions:
  * !requestInfo.IsDynamicDomain()  (LCG isn't tiering-eligible)
  * !pMD->IsJitOptimizationDisabled()  (the actual opt classification)
  * DOTNET_SeparateOptimizedCodeHeaps != 0  (opt-in)

Build clean; smoke tests pass in both modes:
  default       -> ArrBoundBinaryOp PASSED, smoke exit=100
  opt-in (=1)   -> ArrBoundBinaryOp PASSED, smoke exit=100
---
 src/coreclr/inc/clrconfigvalues.h |  6 ++++++
 src/coreclr/vm/codeman.cpp        | 13 ++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index cf3d63e0b3757a..2b310fc9733623 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -524,6 +524,12 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ReadPGOData, W("ReadPGOData"), 0, "Read PGO da
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_WritePGOData, W("WritePGOData"), 0, "Write PGO data")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredPGO, W("TieredPGO"), 1, "Instrument Tier0 code and make counts available to Tier1")
 
+// When set, JIT'd methods that are not optimization-disabled use a separate
+// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.).
+// LCG and interpreter heaps are unaffected. Default 0: all JIT'd code shares
+// one heap per LoaderAllocator.
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 0, "When non-zero, use a separate code heap for optimized JIT'd code")
+
 // TieredPGO_InstrumentOnlyHotCode values:
 //
 // 0) Instrument all IL-only code, R2R'd code is never instrumented
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 29e32c08a7ce4d..50fdb549334d71 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -3251,11 +3251,14 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser
         static_assert(CODE_SIZE_ALIGN >= sizeof(void*));
     }
 
-    // LCG (dynamic domain) methods are not tiering-eligible: every LCG method
-    // within a single process uses the same JIT optimization level. Splitting
-    // their heap by optimization level would create at most one pool of each
-    // kind, so we skip the classification and let all LCG code share one heap.
-    if (!requestInfo.IsDynamicDomain() && !pMD->IsJitOptimizationDisabled())
+    // Optionally route optimized (Tier1+) code to its own per-LoaderAllocator
+    // heap. LCG (dynamic-domain) methods are excluded because they are not
+    // tiering-eligible: every LCG method within a single process uses the same
+    // JIT optimization level, so splitting their heap would create at most one
+    // pool of each kind anyway. Gated off by default (DOTNET_SeparateOptimizedCodeHeaps).
+    if (!requestInfo.IsDynamicDomain()
+        && !pMD->IsJitOptimizationDisabled()
+        && CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SeparateOptimizedCodeHeaps) != 0)
     {
         requestInfo.SetOptimizedCode();
     }

From c45b932f7cb7f95620bcc42047a049ce9805aad6 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 13 Jun 2026 09:21:02 -0700
Subject: [PATCH 4/6] Cache OPTIMIZEDCODE flag on HeapList to avoid
 per-allocation lookup

Previously CanUseCodeHeap consulted ExecutionManager::FindCodeRange on
every cache check to read the RangeSection's RANGE_SECTION_OPTIMIZEDCODE
bit. The flag is set once at heap creation in NewCodeHeap and never
changes, so cache it directly on the HeapList instead.

* HeapList gets a new trailing bool isOptimizedCode field. Trailing
  placement means FakeHeapList/cDAC layout assertions are undisturbed
  (none of the mirrored consumers read this field).
* NewCodeHeap stamps the cached flag from the same expression that
  builds the RangeSection flags.
* Both HeapList constructors (LoaderCodeHeap::CreateCodeHeap and
  HostCodeHeap::InitializeHeapList) zero-initialize the field so
  NewCodeHeap's subsequent assignment is always meaningful, even on
  the path where the heap is created without the optimized flag.
* CanUseCodeHeap drops the FindCodeRange lookup and reads
  pCodeHeap->isOptimizedCode directly.

Build clean; smoke + ArrBoundBinaryOp PASSED in both default and
DOTNET_SeparateOptimizedCodeHeaps=1 modes.
---
 src/coreclr/vm/codeman.cpp       | 13 +++++++------
 src/coreclr/vm/codeman.h         |  6 ++++++
 src/coreclr/vm/dynamicmethod.cpp |  1 +
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 50fdb549334d71..3e805225b21f2b 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -2735,6 +2735,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap
 
     // this first allocation is critical as it sets up correctly the loader heap info
     HeapList *pHp = new HeapList;
+    pHp->isOptimizedCode = false;
 
 #if defined(TARGET_64BIT)
     if (pInfo->IsInterpreted())
@@ -2987,6 +2988,10 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe
     _ASSERTE (pHp != NULL);
     _ASSERTE (pHp->maxCodeHeapSize >= initialRequestSize);
 
+    // Cache the optimized-code bit on the HeapList so CanUseCodeHeap
+    // doesn't have to do a FindCodeRange lookup on every cache check.
+    pHp->isOptimizedCode = (flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0;
+
     // Append the current code heap to the new code heap element.
     pHp->SetNext(m_pAllCodeHeaps);
 
@@ -3418,12 +3423,8 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod
         // Don't mix optimized and non-optimized code in the same heap. LCG and
         // interpreter requests never set IsOptimizedCode(), so dynamic-domain
         // and interpreter heaps don't carry the flag either, and this check
-        // is a no-op for them. TODO: cache the optimized bit on HeapList
-        // itself to avoid the RangeSection lookup on every cache check.
-        const RangeSection* pRS = ExecutionManager::FindCodeRange(pCodeHeap->startAddress, ExecutionManager::GetScanFlags());
-        _ASSERTE(pRS != NULL);
-        const bool isOptimizedHeap = (pRS->_flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0;
-        if (isOptimizedHeap != pInfo->IsOptimizedCode())
+        // is a no-op for them.
+        if (pCodeHeap->isOptimizedCode != pInfo->IsOptimizedCode())
         {
             return false;
         }
diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h
index 5045b6baee0535..492a9c536234fa 100644
--- a/src/coreclr/vm/codeman.h
+++ b/src/coreclr/vm/codeman.h
@@ -536,6 +536,12 @@ struct HeapList
     BYTE*               CLRPersonalityRoutine;  // jump thunk to personality routine, NULL if there is no personality routine (e.g. interpreter code heap)
 #endif
 
+    // Cached copy of the RANGE_SECTION_OPTIMIZEDCODE bit on the heap's
+    // RangeSection. Lets CanUseCodeHeap reject heap/request mismatches
+    // without a per-allocation FindCodeRange lookup. Set at heap creation
+    // time in NewCodeHeap; never changes afterwards.
+    bool                isOptimizedCode;
+
     TADDR GetModuleBase()
     {
 #if defined(TARGET_64BIT)
diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp
index b91382bff21dd8..9f955bbc450886 100644
--- a/src/coreclr/vm/dynamicmethod.cpp
+++ b/src/coreclr/vm/dynamicmethod.cpp
@@ -433,6 +433,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo)
     m_pAllocator = pInfo->GetAllocator();
 
     HeapList* pHp = new HeapList;
+    pHp->isOptimizedCode = false;
 
     TrackAllocation *pTracker = NULL;
 

From 996339a4269a57cd2f8a2bbe1bb3d82ece1afeda Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 13 Jun 2026 09:29:45 -0700
Subject: [PATCH 5/6] Flip default of DOTNET_SeparateOptimizedCodeHeaps from 0
 to 1

Predicate (!= 0 enables) unchanged; only the default value moves.
---
 src/coreclr/inc/clrconfigvalues.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 2b310fc9733623..220d5d2a687314 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -526,9 +526,8 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredPGO, W("TieredPGO"), 1, "Instrument Tier
 
 // When set, JIT'd methods that are not optimization-disabled use a separate
 // per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.).
-// LCG and interpreter heaps are unaffected. Default 0: all JIT'd code shares
-// one heap per LoaderAllocator.
-RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 0, "When non-zero, use a separate code heap for optimized JIT'd code")
+// LCG and interpreter heaps are unaffected.
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code")
 
 // TieredPGO_InstrumentOnlyHotCode values:
 //

From 023ec3d49383fc4f93ad668890098adb1eef0c19 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 13 Jun 2026 09:58:36 -0700
Subject: [PATCH 6/6] Fix Copilot-review findings on SeparateOptimizedCodeHeaps

* Move INTERNAL_SeparateOptimizedCodeHeaps declaration outside the
  #ifdef FEATURE_PGO block. Browser-wasm and iOS simulator builds
  disable FEATURE_PGO, so the unconditional reference from codeman.cpp
  was breaking those builds:
    src/coreclr/vm/codeman.cpp:3266:49: error: no member named
    'INTERNAL_SeparateOptimizedCodeHeaps' in 'CLRConfig'

* Exclude interpreted requests from SetOptimizedCode(). When
  FEATURE_INTERPRETER is on, AllocCode<InterpreterCodeHeader> sets
  requestInfo.SetInterpreted() earlier; the new gate then allowed
  SetOptimizedCode() to also be set, which violates the mutual-exclusion
  invariant asserted in NewCodeHeap (_ASSERTE(!pInfo->IsInterpreted())
  inside the IsOptimizedCode() branch).

* Reword the comment: the split is keyed off
  MethodDesc::IsJitOptimizationDisabled() (attributes / global debug
  flags / minopts), not the current compilation tier. The previous
  'Tier1+' wording was misleading.

Build clean.
---
 src/coreclr/inc/clrconfigvalues.h | 11 ++++++-----
 src/coreclr/vm/codeman.cpp        | 15 ++++++++++-----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 220d5d2a687314..ffd48f0a0f6886 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -524,11 +524,6 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ReadPGOData, W("ReadPGOData"), 0, "Read PGO da
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_WritePGOData, W("WritePGOData"), 0, "Write PGO data")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredPGO, W("TieredPGO"), 1, "Instrument Tier0 code and make counts available to Tier1")
 
-// When set, JIT'd methods that are not optimization-disabled use a separate
-// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.).
-// LCG and interpreter heaps are unaffected.
-RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code")
-
 // TieredPGO_InstrumentOnlyHotCode values:
 //
 // 0) Instrument all IL-only code, R2R'd code is never instrumented
@@ -543,6 +538,12 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredPGO_ScalableCountThreshold, W("Tiered
 
 #endif
 
+// When set, JIT'd methods that are not optimization-disabled use a separate
+// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.).
+// LCG and interpreter heaps are unaffected.
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code")
+
+
 ///
 /// Entry point slot backpatch
 ///
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 3e805225b21f2b..fd4c31de57f58e 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -3256,12 +3256,17 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser
         static_assert(CODE_SIZE_ALIGN >= sizeof(void*));
     }
 
-    // Optionally route optimized (Tier1+) code to its own per-LoaderAllocator
-    // heap. LCG (dynamic-domain) methods are excluded because they are not
-    // tiering-eligible: every LCG method within a single process uses the same
-    // JIT optimization level, so splitting their heap would create at most one
-    // pool of each kind anyway. Gated off by default (DOTNET_SeparateOptimizedCodeHeaps).
+    // Optionally route JIT-optimized code to its own per-LoaderAllocator
+    // heap, separate from code where the JIT can't optimize (Tier0,
+    // global MinOpts, /clr DisableOpts, MethodImplOptions.NoOptimization).
+    // The split is keyed off MethodDesc::IsJitOptimizationDisabled(), not the
+    // current compilation tier, so e.g. fully-optimized non-tiered methods
+    // land in the optimized pool too. LCG (dynamic-domain) is excluded
+    // because every LCG method within a single process uses the same
+    // optimization level, and interpreter requests are excluded because
+    // their heaps use a separate code path entirely.
     if (!requestInfo.IsDynamicDomain()
+        && !requestInfo.IsInterpreted()
         && !pMD->IsJitOptimizationDisabled()
         && CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SeparateOptimizedCodeHeaps) != 0)
     {