From 9a0a411449ccb1e4d54898581c5a4eab8a33a4e9 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 13 Jun 2026 08:54:25 -0700 Subject: [PATCH 1/6] Rebase aman/two-code-heaps onto upstream/main Squash-merge of amanasifkhalid:two-code-heaps#a676f29 onto current main. Single WIP commit, 13 months stale. Adds a per-LoaderAllocator separation between optimized (Tier1+) and non-optimized (Tier0/MinOpts/Instrumented) code heaps: * New CodeHeapRequestInfo::m_isOptimizedCode flag + IsOptimizedCode/ SetOptimizedCode accessors (PascalCase, matching HEAD's existing accessor convention). * Set via !pMD->IsJitOptimizationDisabled() in AllocCode (so Tier1, fully-optimized non-tiered, etc. land in optimized heaps; Tier0, Tier0-Instrumented, MinOpts in regular). * RangeSection::RANGE_SECTION_OPTIMIZEDCODE flag (assigned 0x20 since HEAD took 0x10 for VIRTUALIP); stamped on heaps in NewCodeHeap. * New LoaderAllocator cache pointers m_pLastUsedOptimizedCodeHeap and m_pLastUsedDynamicOptimizedCodeHeap; optimized-code allocations use the new caches. * CanUseCodeHeap rejects a heap up front if its OPTIMIZEDCODE flag doesn't match the request (moved from aman's tail-of-function retVal-based check to a top-of-function guard since HEAD's version returns directly from each success branch rather than using retVal). * m_isOptimizedCode initialized to false in the CodeHeapRequestInfo(MethodDesc*, LoaderAllocator*, BYTE*, BYTE*) ctor (the others delegate to it). Aman-side fixups during merge: * aman's lowercase getRequestSize/setRequestSize -> HEAD's PascalCase GetRequestSize/SetRequestSize. * aman's inline-defined ctors in codeman.h dropped; HEAD's separate ctor declarations + definitions in codeman.cpp are used and a single initializer added for m_isOptimizedCode. * aman's pInfo->m_pAllocator-> direct field access rewritten as pInfo->GetAllocator()->... (m_pAllocator is private in HEAD). * aman's RANGE_SECTION_OPTIMIZEDCODE value 0x10 collides with HEAD's new RANGE_SECTION_VIRTUALIP=0x10; moved to 0x20. Status carryovers from the original WIP: * The dynamic (LCG) optimized-heap path remains commented out - m_pLastUsedDynamicOptimizedCodeHeap is declared and zero-initialized but never read/written. * CanUseCodeHeap does a FindCodeRange lookup per allocation; could be optimized by storing the flag directly on HeapList. * No tests, no telemetry, no policy knob. Not yet built. --- src/coreclr/vm/codeman.cpp | 45 +++++++++++++++++++++++++++++- src/coreclr/vm/codeman.h | 5 ++++ src/coreclr/vm/loaderallocator.cpp | 2 ++ src/coreclr/vm/loaderallocator.hpp | 2 ++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 9cf0efa976490a..b0311435b264f3 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -2840,6 +2840,7 @@ CodeHeapRequestInfo::CodeHeapRequestInfo(MethodDesc* pMD, LoaderAllocator* pAllo , m_isCollectible{ false } , m_isInterpreted{ false } , m_throwOnOutOfMemoryWithinRange{ true } + , m_isOptimizedCode{ false } { CONTRACTL { @@ -2947,7 +2948,12 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe DWORD flags = RangeSection::RANGE_SECTION_CODEHEAP; - if (pInfo->IsInterpreted()) + if (pInfo->IsOptimizedCode()) + { + _ASSERTE(!pInfo->IsInterpreted()); + flags |= RangeSection::RANGE_SECTION_OPTIMIZEDCODE; + } + else if (pInfo->IsInterpreted()) { flags |= RangeSection::RANGE_SECTION_INTERPRETER; } @@ -3055,6 +3061,12 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER + // if (pInfo->IsOptimizedCode()) + // { + // pCodeHeap = (HeapList *)pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap; + // pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = NULL; + // } + // else { pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap; pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = NULL; @@ -3070,6 +3082,12 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER + if (pInfo->IsOptimizedCode()) + { + pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap; + pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap = NULL; + } + else { pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedCodeHeap; pInfo->GetAllocator()->m_pLastUsedCodeHeap = NULL; @@ -3135,6 +3153,11 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER + // if (pInfo->IsOptimizedCode()) + // { + // pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = pCodeHeap; + // } + // else { pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = pCodeHeap; } @@ -3148,6 +3171,11 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER + if (pInfo->IsOptimizedCode()) + { + pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap = pCodeHeap; + } + else { pInfo->GetAllocator()->m_pLastUsedCodeHeap = pCodeHeap; } @@ -3228,6 +3256,11 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser static_assert(CODE_SIZE_ALIGN >= sizeof(void*)); } + if (!pMD->IsJitOptimizationDisabled()) + { + requestInfo.SetOptimizedCode(); + } + // Scope the lock { CrstHolder ch(&m_CodeHeapLock); @@ -3380,6 +3413,16 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod if ((pInfo->GetLoAddr() == 0) && (pInfo->GetHiAddr() == 0)) { + // Don't mix optimized and non-optimized code in the same heap. + // The flag is recorded on the heap's RangeSection at creation time. + const RangeSection* pRS = ExecutionManager::FindCodeRange(pCodeHeap->startAddress, ExecutionManager::GetScanFlags()); + _ASSERTE(pRS != NULL); + const bool isOptimizedHeap = (pRS->_flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0; + if (isOptimizedHeap != pInfo->IsOptimizedCode()) + { + return false; + } + // We have no constraint so this non empty heap will be able to satisfy our request if (pInfo->IsDynamicDomain()) { diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 02909048ffc031..5045b6baee0535 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -411,6 +411,7 @@ class CodeHeapRequestInfo final bool m_isCollectible; bool m_isInterpreted; bool m_throwOnOutOfMemoryWithinRange; + bool m_isOptimizedCode; public: CodeHeapRequestInfo(MethodDesc* pMD); @@ -430,6 +431,9 @@ class CodeHeapRequestInfo final bool IsInterpreted() { return m_isInterpreted; } void SetInterpreted() { m_isInterpreted = true; } + bool IsOptimizedCode() { return m_isOptimizedCode; } + void SetOptimizedCode() { m_isOptimizedCode = true; } + size_t GetRequestSize() { return m_requestSize; } void SetRequestSize(size_t requestSize) { m_requestSize = requestSize; } @@ -735,6 +739,7 @@ struct RangeSection RANGE_SECTION_RANGELIST = 0x4, RANGE_SECTION_INTERPRETER = 0x8, RANGE_SECTION_VIRTUALIP = 0x10, // This range section contains virtual IPs (e.g. for ReadyToRun code) instead of actual code addresses in linear memory + RANGE_SECTION_OPTIMIZEDCODE = 0x20, }; #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 92aeaa9c28679c..1189b38e102b7f 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -81,7 +81,9 @@ LoaderAllocator::LoaderAllocator(bool collectible) : m_pCodeHeapInitialAlloc = NULL; m_pVSDHeapInitialAlloc = NULL; m_pLastUsedCodeHeap = NULL; + m_pLastUsedOptimizedCodeHeap = NULL; m_pLastUsedDynamicCodeHeap = NULL; + m_pLastUsedDynamicOptimizedCodeHeap = NULL; #ifdef FEATURE_INTERPRETER m_pLastUsedInterpreterCodeHeap = NULL; m_pLastUsedInterpreterDynamicCodeHeap = NULL; diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp index 525eb06d5ddb8f..4d258ed24c1888 100644 --- a/src/coreclr/vm/loaderallocator.hpp +++ b/src/coreclr/vm/loaderallocator.hpp @@ -404,6 +404,8 @@ class LoaderAllocator // ExecutionManager caches void * m_pLastUsedCodeHeap; void * m_pLastUsedDynamicCodeHeap; + void * m_pLastUsedOptimizedCodeHeap; + void * m_pLastUsedDynamicOptimizedCodeHeap; #ifdef FEATURE_INTERPRETER void * m_pLastUsedInterpreterCodeHeap; void * m_pLastUsedInterpreterDynamicCodeHeap; From 77c23c843e6c8905ba6182e836bab99090a6a745 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 13 Jun 2026 09:05:27 -0700 Subject: [PATCH 2/6] Clean up: LCG is not tiering-eligible, drop the dynamic-optimized cache slot LCG / DynamicMethod methods are not tiering-eligible: MethodDesc:: IsJitOptimizationDisabled() returns the same answer for every LCG method within a single process (the per-method branch is gated on !IsNoMetadata() which is false for LCG; the chunk-wide branch is process- or module-global). There's no way to end up with mixed-optimization LCG code in one process, so splitting the dynamic code heap by optimization level would create at most one pool of each kind anyway. Cleanup: * In AllocCode, also gate SetOptimizedCode() on !IsDynamicDomain() so LCG requests never get the optimized classification. The dynamic code heap is therefore never tagged with RANGE_SECTION_OPTIMIZEDCODE. * Drop the m_pLastUsedDynamicOptimizedCodeHeap field on LoaderAllocator and its initialization, plus the two commented-out blocks in AllocCodeWorker that would have used it. * Strengthen the NewCodeHeap assertion: with the gating above, optimized code is mutually exclusive with both interpreter and dynamic-domain. Asserting both makes the invariant explicit. * Add a TODO note in CanUseCodeHeap explaining why the optimized-flag check is harmless for interpreter/LCG (they never carry the flag) and pointing at the FindCodeRange-lookup-per-allocation optimization for future work (caching the bit on HeapList itself). Build: build.cmd -s clr.runtime+clr.corelib -c Checked -> 0 errors, 0 warnings (1m23s). --- src/coreclr/vm/codeman.cpp | 30 ++++++++++++++++-------------- src/coreclr/vm/loaderallocator.cpp | 1 - src/coreclr/vm/loaderallocator.hpp | 1 - 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index b0311435b264f3..29e32c08a7ce4d 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -2950,7 +2950,13 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe if (pInfo->IsOptimizedCode()) { + // Optimized code is mutually exclusive with both interpreter and dynamic + // (LCG) domain because callers gate SetOptimizedCode() on neither being + // set. Tagging the RangeSection lets CanUseCodeHeap reject mismatched + // requests, keeping optimized and non-optimized JIT'd code in separate + // per-LoaderAllocator heaps. _ASSERTE(!pInfo->IsInterpreted()); + _ASSERTE(!pInfo->IsDynamicDomain()); flags |= RangeSection::RANGE_SECTION_OPTIMIZEDCODE; } else if (pInfo->IsInterpreted()) @@ -3061,12 +3067,6 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER - // if (pInfo->IsOptimizedCode()) - // { - // pCodeHeap = (HeapList *)pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap; - // pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = NULL; - // } - // else { pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap; pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = NULL; @@ -3153,11 +3153,6 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER - // if (pInfo->IsOptimizedCode()) - // { - // pInfo->m_pAllocator->m_pLastUsedDynamicOptimizedCodeHeap = pCodeHeap; - // } - // else { pInfo->GetAllocator()->m_pLastUsedDynamicCodeHeap = pCodeHeap; } @@ -3256,7 +3251,11 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser static_assert(CODE_SIZE_ALIGN >= sizeof(void*)); } - if (!pMD->IsJitOptimizationDisabled()) + // LCG (dynamic domain) methods are not tiering-eligible: every LCG method + // within a single process uses the same JIT optimization level. Splitting + // their heap by optimization level would create at most one pool of each + // kind, so we skip the classification and let all LCG code share one heap. + if (!requestInfo.IsDynamicDomain() && !pMD->IsJitOptimizationDisabled()) { requestInfo.SetOptimizedCode(); } @@ -3413,8 +3412,11 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod if ((pInfo->GetLoAddr() == 0) && (pInfo->GetHiAddr() == 0)) { - // Don't mix optimized and non-optimized code in the same heap. - // The flag is recorded on the heap's RangeSection at creation time. + // Don't mix optimized and non-optimized code in the same heap. LCG and + // interpreter requests never set IsOptimizedCode(), so dynamic-domain + // and interpreter heaps don't carry the flag either, and this check + // is a no-op for them. TODO: cache the optimized bit on HeapList + // itself to avoid the RangeSection lookup on every cache check. const RangeSection* pRS = ExecutionManager::FindCodeRange(pCodeHeap->startAddress, ExecutionManager::GetScanFlags()); _ASSERTE(pRS != NULL); const bool isOptimizedHeap = (pRS->_flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0; diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 1189b38e102b7f..07f734a14b19f8 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -83,7 +83,6 @@ LoaderAllocator::LoaderAllocator(bool collectible) : m_pLastUsedCodeHeap = NULL; m_pLastUsedOptimizedCodeHeap = NULL; m_pLastUsedDynamicCodeHeap = NULL; - m_pLastUsedDynamicOptimizedCodeHeap = NULL; #ifdef FEATURE_INTERPRETER m_pLastUsedInterpreterCodeHeap = NULL; m_pLastUsedInterpreterDynamicCodeHeap = NULL; diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp index 4d258ed24c1888..fef9ca91116a65 100644 --- a/src/coreclr/vm/loaderallocator.hpp +++ b/src/coreclr/vm/loaderallocator.hpp @@ -405,7 +405,6 @@ class LoaderAllocator void * m_pLastUsedCodeHeap; void * m_pLastUsedDynamicCodeHeap; void * m_pLastUsedOptimizedCodeHeap; - void * m_pLastUsedDynamicOptimizedCodeHeap; #ifdef FEATURE_INTERPRETER void * m_pLastUsedInterpreterCodeHeap; void * m_pLastUsedInterpreterDynamicCodeHeap; From 92d6f3addb0392a113eff9e6f4f99f2d568f7cd6 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 13 Jun 2026 09:10:44 -0700 Subject: [PATCH 3/6] Gate two-code-heaps behind DOTNET_SeparateOptimizedCodeHeaps (default off) Adds a new INTERNAL retail config knob: DOTNET_SeparateOptimizedCodeHeaps (default 0) When set to a non-zero value, JIT'd methods that are not optimization- disabled get their own per-LoaderAllocator code heap, separate from Tier0/MinOpts/Tier0Instrumented code. When 0 (the default), all JIT'd code shares a single heap per LoaderAllocator, matching pre-change behavior. LCG and interpreter heaps are unaffected either way. In AllocCode, SetOptimizedCode() is now gated on three conditions: * !requestInfo.IsDynamicDomain() (LCG isn't tiering-eligible) * !pMD->IsJitOptimizationDisabled() (the actual opt classification) * DOTNET_SeparateOptimizedCodeHeaps != 0 (opt-in) Build clean; smoke tests pass in both modes: default -> ArrBoundBinaryOp PASSED, smoke exit=100 opt-in (=1) -> ArrBoundBinaryOp PASSED, smoke exit=100 --- src/coreclr/inc/clrconfigvalues.h | 6 ++++++ src/coreclr/vm/codeman.cpp | 13 ++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index cf3d63e0b3757a..2b310fc9733623 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -524,6 +524,12 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ReadPGOData, W("ReadPGOData"), 0, "Read PGO da RETAIL_CONFIG_DWORD_INFO(INTERNAL_WritePGOData, W("WritePGOData"), 0, "Write PGO data") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredPGO, W("TieredPGO"), 1, "Instrument Tier0 code and make counts available to Tier1") +// When set, JIT'd methods that are not optimization-disabled use a separate +// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.). +// LCG and interpreter heaps are unaffected. Default 0: all JIT'd code shares +// one heap per LoaderAllocator. +RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 0, "When non-zero, use a separate code heap for optimized JIT'd code") + // TieredPGO_InstrumentOnlyHotCode values: // // 0) Instrument all IL-only code, R2R'd code is never instrumented diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 29e32c08a7ce4d..50fdb549334d71 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -3251,11 +3251,14 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser static_assert(CODE_SIZE_ALIGN >= sizeof(void*)); } - // LCG (dynamic domain) methods are not tiering-eligible: every LCG method - // within a single process uses the same JIT optimization level. Splitting - // their heap by optimization level would create at most one pool of each - // kind, so we skip the classification and let all LCG code share one heap. - if (!requestInfo.IsDynamicDomain() && !pMD->IsJitOptimizationDisabled()) + // Optionally route optimized (Tier1+) code to its own per-LoaderAllocator + // heap. LCG (dynamic-domain) methods are excluded because they are not + // tiering-eligible: every LCG method within a single process uses the same + // JIT optimization level, so splitting their heap would create at most one + // pool of each kind anyway. Gated off by default (DOTNET_SeparateOptimizedCodeHeaps). + if (!requestInfo.IsDynamicDomain() + && !pMD->IsJitOptimizationDisabled() + && CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SeparateOptimizedCodeHeaps) != 0) { requestInfo.SetOptimizedCode(); } From c45b932f7cb7f95620bcc42047a049ce9805aad6 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 13 Jun 2026 09:21:02 -0700 Subject: [PATCH 4/6] Cache OPTIMIZEDCODE flag on HeapList to avoid per-allocation lookup Previously CanUseCodeHeap consulted ExecutionManager::FindCodeRange on every cache check to read the RangeSection's RANGE_SECTION_OPTIMIZEDCODE bit. The flag is set once at heap creation in NewCodeHeap and never changes, so cache it directly on the HeapList instead. * HeapList gets a new trailing bool isOptimizedCode field. Trailing placement means FakeHeapList/cDAC layout assertions are undisturbed (none of the mirrored consumers read this field). * NewCodeHeap stamps the cached flag from the same expression that builds the RangeSection flags. * Both HeapList constructors (LoaderCodeHeap::CreateCodeHeap and HostCodeHeap::InitializeHeapList) zero-initialize the field so NewCodeHeap's subsequent assignment is always meaningful, even on the path where the heap is created without the optimized flag. * CanUseCodeHeap drops the FindCodeRange lookup and reads pCodeHeap->isOptimizedCode directly. Build clean; smoke + ArrBoundBinaryOp PASSED in both default and DOTNET_SeparateOptimizedCodeHeaps=1 modes. --- src/coreclr/vm/codeman.cpp | 13 +++++++------ src/coreclr/vm/codeman.h | 6 ++++++ src/coreclr/vm/dynamicmethod.cpp | 1 + 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 50fdb549334d71..3e805225b21f2b 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -2735,6 +2735,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap // this first allocation is critical as it sets up correctly the loader heap info HeapList *pHp = new HeapList; + pHp->isOptimizedCode = false; #if defined(TARGET_64BIT) if (pInfo->IsInterpreted()) @@ -2987,6 +2988,10 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe _ASSERTE (pHp != NULL); _ASSERTE (pHp->maxCodeHeapSize >= initialRequestSize); + // Cache the optimized-code bit on the HeapList so CanUseCodeHeap + // doesn't have to do a FindCodeRange lookup on every cache check. + pHp->isOptimizedCode = (flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0; + // Append the current code heap to the new code heap element. pHp->SetNext(m_pAllCodeHeaps); @@ -3418,12 +3423,8 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod // Don't mix optimized and non-optimized code in the same heap. LCG and // interpreter requests never set IsOptimizedCode(), so dynamic-domain // and interpreter heaps don't carry the flag either, and this check - // is a no-op for them. TODO: cache the optimized bit on HeapList - // itself to avoid the RangeSection lookup on every cache check. - const RangeSection* pRS = ExecutionManager::FindCodeRange(pCodeHeap->startAddress, ExecutionManager::GetScanFlags()); - _ASSERTE(pRS != NULL); - const bool isOptimizedHeap = (pRS->_flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0; - if (isOptimizedHeap != pInfo->IsOptimizedCode()) + // is a no-op for them. + if (pCodeHeap->isOptimizedCode != pInfo->IsOptimizedCode()) { return false; } diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 5045b6baee0535..492a9c536234fa 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -536,6 +536,12 @@ struct HeapList BYTE* CLRPersonalityRoutine; // jump thunk to personality routine, NULL if there is no personality routine (e.g. interpreter code heap) #endif + // Cached copy of the RANGE_SECTION_OPTIMIZEDCODE bit on the heap's + // RangeSection. Lets CanUseCodeHeap reject heap/request mismatches + // without a per-allocation FindCodeRange lookup. Set at heap creation + // time in NewCodeHeap; never changes afterwards. + bool isOptimizedCode; + TADDR GetModuleBase() { #if defined(TARGET_64BIT) diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index b91382bff21dd8..9f955bbc450886 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -433,6 +433,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) m_pAllocator = pInfo->GetAllocator(); HeapList* pHp = new HeapList; + pHp->isOptimizedCode = false; TrackAllocation *pTracker = NULL; From 996339a4269a57cd2f8a2bbe1bb3d82ece1afeda Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 13 Jun 2026 09:29:45 -0700 Subject: [PATCH 5/6] Flip default of DOTNET_SeparateOptimizedCodeHeaps from 0 to 1 Predicate (!= 0 enables) unchanged; only the default value moves. --- src/coreclr/inc/clrconfigvalues.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 2b310fc9733623..220d5d2a687314 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -526,9 +526,8 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredPGO, W("TieredPGO"), 1, "Instrument Tier // When set, JIT'd methods that are not optimization-disabled use a separate // per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.). -// LCG and interpreter heaps are unaffected. Default 0: all JIT'd code shares -// one heap per LoaderAllocator. -RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 0, "When non-zero, use a separate code heap for optimized JIT'd code") +// LCG and interpreter heaps are unaffected. +RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code") // TieredPGO_InstrumentOnlyHotCode values: // From 023ec3d49383fc4f93ad668890098adb1eef0c19 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 13 Jun 2026 09:58:36 -0700 Subject: [PATCH 6/6] Fix Copilot-review findings on SeparateOptimizedCodeHeaps * Move INTERNAL_SeparateOptimizedCodeHeaps declaration outside the #ifdef FEATURE_PGO block. Browser-wasm and iOS simulator builds disable FEATURE_PGO, so the unconditional reference from codeman.cpp was breaking those builds: src/coreclr/vm/codeman.cpp:3266:49: error: no member named 'INTERNAL_SeparateOptimizedCodeHeaps' in 'CLRConfig' * Exclude interpreted requests from SetOptimizedCode(). When FEATURE_INTERPRETER is on, AllocCode sets requestInfo.SetInterpreted() earlier; the new gate then allowed SetOptimizedCode() to also be set, which violates the mutual-exclusion invariant asserted in NewCodeHeap (_ASSERTE(!pInfo->IsInterpreted()) inside the IsOptimizedCode() branch). * Reword the comment: the split is keyed off MethodDesc::IsJitOptimizationDisabled() (attributes / global debug flags / minopts), not the current compilation tier. The previous 'Tier1+' wording was misleading. Build clean. --- src/coreclr/inc/clrconfigvalues.h | 11 ++++++----- src/coreclr/vm/codeman.cpp | 15 ++++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 220d5d2a687314..ffd48f0a0f6886 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -524,11 +524,6 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ReadPGOData, W("ReadPGOData"), 0, "Read PGO da RETAIL_CONFIG_DWORD_INFO(INTERNAL_WritePGOData, W("WritePGOData"), 0, "Write PGO data") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredPGO, W("TieredPGO"), 1, "Instrument Tier0 code and make counts available to Tier1") -// When set, JIT'd methods that are not optimization-disabled use a separate -// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.). -// LCG and interpreter heaps are unaffected. -RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code") - // TieredPGO_InstrumentOnlyHotCode values: // // 0) Instrument all IL-only code, R2R'd code is never instrumented @@ -543,6 +538,12 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredPGO_ScalableCountThreshold, W("Tiered #endif +// When set, JIT'd methods that are not optimization-disabled use a separate +// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.). +// LCG and interpreter heaps are unaffected. +RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code") + + /// /// Entry point slot backpatch /// diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 3e805225b21f2b..fd4c31de57f58e 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -3256,12 +3256,17 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser static_assert(CODE_SIZE_ALIGN >= sizeof(void*)); } - // Optionally route optimized (Tier1+) code to its own per-LoaderAllocator - // heap. LCG (dynamic-domain) methods are excluded because they are not - // tiering-eligible: every LCG method within a single process uses the same - // JIT optimization level, so splitting their heap would create at most one - // pool of each kind anyway. Gated off by default (DOTNET_SeparateOptimizedCodeHeaps). + // Optionally route JIT-optimized code to its own per-LoaderAllocator + // heap, separate from code where the JIT can't optimize (Tier0, + // global MinOpts, /clr DisableOpts, MethodImplOptions.NoOptimization). + // The split is keyed off MethodDesc::IsJitOptimizationDisabled(), not the + // current compilation tier, so e.g. fully-optimized non-tiered methods + // land in the optimized pool too. LCG (dynamic-domain) is excluded + // because every LCG method within a single process uses the same + // optimization level, and interpreter requests are excluded because + // their heaps use a separate code path entirely. if (!requestInfo.IsDynamicDomain() + && !requestInfo.IsInterpreted() && !pMD->IsJitOptimizationDisabled() && CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SeparateOptimizedCodeHeaps) != 0) {