diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index cf3d63e0b3757a..ffd48f0a0f6886 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -538,6 +538,12 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredPGO_ScalableCountThreshold, W("Tiered #endif +// When set, JIT'd methods that are not optimization-disabled use a separate +// per-LoaderAllocator code heap from non-optimized code (Tier0, MinOpts, etc.). +// LCG and interpreter heaps are unaffected. +RETAIL_CONFIG_DWORD_INFO(INTERNAL_SeparateOptimizedCodeHeaps, W("SeparateOptimizedCodeHeaps"), 1, "When non-zero, use a separate code heap for optimized JIT'd code") + + /// /// Entry point slot backpatch /// diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 9cf0efa976490a..fd4c31de57f58e 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -2735,6 +2735,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap // this first allocation is critical as it sets up correctly the loader heap info HeapList *pHp = new HeapList; + pHp->isOptimizedCode = false; #if defined(TARGET_64BIT) if (pInfo->IsInterpreted()) @@ -2840,6 +2841,7 @@ CodeHeapRequestInfo::CodeHeapRequestInfo(MethodDesc* pMD, LoaderAllocator* pAllo , m_isCollectible{ false } , m_isInterpreted{ false } , m_throwOnOutOfMemoryWithinRange{ true } + , m_isOptimizedCode{ false } { CONTRACTL { @@ -2947,7 +2949,18 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe DWORD flags = RangeSection::RANGE_SECTION_CODEHEAP; - if (pInfo->IsInterpreted()) + if (pInfo->IsOptimizedCode()) + { + // Optimized code is mutually exclusive with both interpreter and dynamic + // (LCG) domain because callers gate SetOptimizedCode() on neither being + // set. Tagging the RangeSection lets CanUseCodeHeap reject mismatched + // requests, keeping optimized and non-optimized JIT'd code in separate + // per-LoaderAllocator heaps. + _ASSERTE(!pInfo->IsInterpreted()); + _ASSERTE(!pInfo->IsDynamicDomain()); + flags |= RangeSection::RANGE_SECTION_OPTIMIZEDCODE; + } + else if (pInfo->IsInterpreted()) { flags |= RangeSection::RANGE_SECTION_INTERPRETER; } @@ -2975,6 +2988,10 @@ HeapList* EECodeGenManager::NewCodeHeap(CodeHeapRequestInfo *pInfo, DomainCodeHe _ASSERTE (pHp != NULL); _ASSERTE (pHp->maxCodeHeapSize >= initialRequestSize); + // Cache the optimized-code bit on the HeapList so CanUseCodeHeap + // doesn't have to do a FindCodeRange lookup on every cache check. + pHp->isOptimizedCode = (flags & RangeSection::RANGE_SECTION_OPTIMIZEDCODE) != 0; + // Append the current code heap to the new code heap element. pHp->SetNext(m_pAllCodeHeaps); @@ -3070,6 +3087,12 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER + if (pInfo->IsOptimizedCode()) + { + pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap; + pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap = NULL; + } + else { pCodeHeap = (HeapList *)pInfo->GetAllocator()->m_pLastUsedCodeHeap; pInfo->GetAllocator()->m_pLastUsedCodeHeap = NULL; @@ -3148,6 +3171,11 @@ void* EECodeGenManager::AllocCodeWorker(CodeHeapRequestInfo *pInfo, } else #endif // FEATURE_INTERPRETER + if (pInfo->IsOptimizedCode()) + { + pInfo->GetAllocator()->m_pLastUsedOptimizedCodeHeap = pCodeHeap; + } + else { pInfo->GetAllocator()->m_pLastUsedCodeHeap = pCodeHeap; } @@ -3228,6 +3256,23 @@ void EECodeGenManager::AllocCode(MethodDesc* pMD, size_t blockSize, size_t reser static_assert(CODE_SIZE_ALIGN >= sizeof(void*)); } + // Optionally route JIT-optimized code to its own per-LoaderAllocator + // heap, separate from code where the JIT can't optimize (Tier0, + // global MinOpts, /clr DisableOpts, MethodImplOptions.NoOptimization). + // The split is keyed off MethodDesc::IsJitOptimizationDisabled(), not the + // current compilation tier, so e.g. fully-optimized non-tiered methods + // land in the optimized pool too. LCG (dynamic-domain) is excluded + // because every LCG method within a single process uses the same + // optimization level, and interpreter requests are excluded because + // their heaps use a separate code path entirely. + if (!requestInfo.IsDynamicDomain() + && !requestInfo.IsInterpreted() + && !pMD->IsJitOptimizationDisabled() + && CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SeparateOptimizedCodeHeaps) != 0) + { + requestInfo.SetOptimizedCode(); + } + // Scope the lock { CrstHolder ch(&m_CodeHeapLock); @@ -3380,6 +3425,15 @@ bool EECodeGenManager::CanUseCodeHeap(CodeHeapRequestInfo *pInfo, HeapList *pCod if ((pInfo->GetLoAddr() == 0) && (pInfo->GetHiAddr() == 0)) { + // Don't mix optimized and non-optimized code in the same heap. LCG and + // interpreter requests never set IsOptimizedCode(), so dynamic-domain + // and interpreter heaps don't carry the flag either, and this check + // is a no-op for them. + if (pCodeHeap->isOptimizedCode != pInfo->IsOptimizedCode()) + { + return false; + } + // We have no constraint so this non empty heap will be able to satisfy our request if (pInfo->IsDynamicDomain()) { diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 02909048ffc031..492a9c536234fa 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -411,6 +411,7 @@ class CodeHeapRequestInfo final bool m_isCollectible; bool m_isInterpreted; bool m_throwOnOutOfMemoryWithinRange; + bool m_isOptimizedCode; public: CodeHeapRequestInfo(MethodDesc* pMD); @@ -430,6 +431,9 @@ class CodeHeapRequestInfo final bool IsInterpreted() { return m_isInterpreted; } void SetInterpreted() { m_isInterpreted = true; } + bool IsOptimizedCode() { return m_isOptimizedCode; } + void SetOptimizedCode() { m_isOptimizedCode = true; } + size_t GetRequestSize() { return m_requestSize; } void SetRequestSize(size_t requestSize) { m_requestSize = requestSize; } @@ -532,6 +536,12 @@ struct HeapList BYTE* CLRPersonalityRoutine; // jump thunk to personality routine, NULL if there is no personality routine (e.g. interpreter code heap) #endif + // Cached copy of the RANGE_SECTION_OPTIMIZEDCODE bit on the heap's + // RangeSection. Lets CanUseCodeHeap reject heap/request mismatches + // without a per-allocation FindCodeRange lookup. Set at heap creation + // time in NewCodeHeap; never changes afterwards. + bool isOptimizedCode; + TADDR GetModuleBase() { #if defined(TARGET_64BIT) @@ -735,6 +745,7 @@ struct RangeSection RANGE_SECTION_RANGELIST = 0x4, RANGE_SECTION_INTERPRETER = 0x8, RANGE_SECTION_VIRTUALIP = 0x10, // This range section contains virtual IPs (e.g. for ReadyToRun code) instead of actual code addresses in linear memory + RANGE_SECTION_OPTIMIZEDCODE = 0x20, }; #ifdef FEATURE_READYTORUN diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index b91382bff21dd8..9f955bbc450886 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -433,6 +433,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) m_pAllocator = pInfo->GetAllocator(); HeapList* pHp = new HeapList; + pHp->isOptimizedCode = false; TrackAllocation *pTracker = NULL; diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 92aeaa9c28679c..07f734a14b19f8 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -81,6 +81,7 @@ LoaderAllocator::LoaderAllocator(bool collectible) : m_pCodeHeapInitialAlloc = NULL; m_pVSDHeapInitialAlloc = NULL; m_pLastUsedCodeHeap = NULL; + m_pLastUsedOptimizedCodeHeap = NULL; m_pLastUsedDynamicCodeHeap = NULL; #ifdef FEATURE_INTERPRETER m_pLastUsedInterpreterCodeHeap = NULL; diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp index 525eb06d5ddb8f..fef9ca91116a65 100644 --- a/src/coreclr/vm/loaderallocator.hpp +++ b/src/coreclr/vm/loaderallocator.hpp @@ -404,6 +404,7 @@ class LoaderAllocator // ExecutionManager caches void * m_pLastUsedCodeHeap; void * m_pLastUsedDynamicCodeHeap; + void * m_pLastUsedOptimizedCodeHeap; #ifdef FEATURE_INTERPRETER void * m_pLastUsedInterpreterCodeHeap; void * m_pLastUsedInterpreterDynamicCodeHeap;