From cf20a2a26c4089f1ae31162ebad810cab87a02cb Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sat, 7 Mar 2026 13:51:55 -0500 Subject: [PATCH] Add DirectX 12 GPU backend for automated unit testing on Windows Introduce a DirectX 12 / HLSL rendering backend alongside the existing OpenGL / GLSL and Metal / MSL backends, enabling the GPU unit test suite to run natively on Windows without requiring an OpenGL context. Key changes: GraphicalApp abstract interface (graphicalapp.h/cpp) Backend-agnostic base class extracted from OglApp. OglApp and MetalApp now inherit from it. DxApp (dxapp.h/cpp) -- DirectX 12 backend Off-screen RGBA32F render target, full-screen triangle via SV_VertexID, staging readback, SM 6.0 DXC shader compilation. HLSLBuilder (hlsl.h/cpp) -- HLSL shader generation Translates GpuShaderDesc into HLSL pixel shaders with 1D and 3D LUT texture uploads in RGBA32F format. CMake integration OCIO_DIRECTX_ENABLED option, FetchContent for DirectX-Headers, auto-copy of DXC runtime DLLs to the test output directory. Test tolerance adjustments Minor epsilon increases for 4 tests due to DX12/SM6.0 FMA and pow() precision differences. All 263 GPU tests pass on the DirectX 12 backend. Build and run: # Configure (OCIO_DIRECTX_ENABLED defaults to ON on Windows) cmake -S . -B build -DCMAKE_BUILD_TYPE=Release # Build the GPU test binary cmake --build build --target test_gpu_exec --config Release # Run GPU tests with the DX12 backend ctest --test-dir build -C Release -R test_dx Signed-off-by: Eric Renaud-Houde --- CMakeLists.txt | 8 + .../install/InstallDirectXHeaders.cmake | 19 + src/apps/ociochecklut/main.cpp | 12 +- src/apps/ocioconvert/main.cpp | 16 +- src/apps/ociodisplay/main.cpp | 14 +- src/libutils/oglapphelpers/CMakeLists.txt | 131 ++- src/libutils/oglapphelpers/dxapp.cpp | 1038 +++++++++++++++++ src/libutils/oglapphelpers/dxapp.h | 105 ++ src/libutils/oglapphelpers/dxutils.h | 37 + src/libutils/oglapphelpers/graphicalapp.cpp | 15 + src/libutils/oglapphelpers/graphicalapp.h | 81 ++ src/libutils/oglapphelpers/hlsl.cpp | 498 ++++++++ src/libutils/oglapphelpers/hlsl.h | 95 ++ src/libutils/oglapphelpers/metalapp.h | 4 +- src/libutils/oglapphelpers/metalapp.mm | 6 +- src/libutils/oglapphelpers/oglapp.cpp | 63 +- src/libutils/oglapphelpers/oglapp.h | 102 +- tests/gpu/CMakeLists.txt | 58 +- tests/gpu/FixedFunctionOp_test.cpp | 11 +- tests/gpu/GPUUnitTest.cpp | 76 +- tests/gpu/MatrixOp_test.cpp | 3 +- 21 files changed, 2207 insertions(+), 185 deletions(-) create mode 100644 share/cmake/modules/install/InstallDirectXHeaders.cmake create mode 100644 src/libutils/oglapphelpers/dxapp.cpp create mode 100644 src/libutils/oglapphelpers/dxapp.h create mode 100644 src/libutils/oglapphelpers/dxutils.h create mode 100644 src/libutils/oglapphelpers/graphicalapp.cpp create mode 100644 src/libutils/oglapphelpers/graphicalapp.h create mode 100644 src/libutils/oglapphelpers/hlsl.cpp create mode 100644 src/libutils/oglapphelpers/hlsl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b1fce3398..a57649ac9d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -287,6 +287,14 @@ message(STATUS "") message(STATUS "Checking for GPU configuration...") include(CheckSupportGL) +# DirectX 12 is only available on Windows. +if(WIN32) + option(OCIO_DIRECTX_ENABLED "Enable DirectX 12 GPU rendering support" ON) +else() + set(OCIO_DIRECTX_ENABLED OFF CACHE BOOL "Enable DirectX 12 GPU rendering support" FORCE) +endif() +mark_as_advanced(OCIO_DIRECTX_ENABLED) + ############################################################################### # Check for ARM neon here because we need to know if ARM NEON is supported diff --git a/share/cmake/modules/install/InstallDirectXHeaders.cmake b/share/cmake/modules/install/InstallDirectXHeaders.cmake new file mode 100644 index 0000000000..7d2fe91d55 --- /dev/null +++ b/share/cmake/modules/install/InstallDirectXHeaders.cmake @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright Contributors to the OpenColorIO Project. +# +# Install DirectX-Headers (header-only, Windows only) +# https://github.com/microsoft/DirectX-Headers +# +############################################################################### + +include(FetchContent) + +set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/ext/build/DirectX-Headers") +set(DIRECTX_HEADERS_BUILD_TEST OFF CACHE BOOL "" FORCE) + +FetchContent_Declare(DirectX-Headers + GIT_REPOSITORY https://github.com/microsoft/DirectX-Headers.git + GIT_TAG v1.619.1 +) + +FetchContent_MakeAvailable(DirectX-Headers) diff --git a/src/apps/ociochecklut/main.cpp b/src/apps/ociochecklut/main.cpp index 9469090a15..593d676236 100644 --- a/src/apps/ociochecklut/main.cpp +++ b/src/apps/ociochecklut/main.cpp @@ -52,18 +52,18 @@ class ProcessorWrapper m_gpu = gpu; if (!m_oglApp) { - m_oglApp = OCIO::OglApp::CreateOglApp("ociochecklut", 256, 20); + m_oglApp = OCIO::GraphicalApp::CreateApp("ociochecklut", 256, 20); if (m_verbose) { - m_oglApp->printGLInfo(); + m_oglApp->printGraphicsInfo(); } } - m_oglApp->setPrintShader(m_verbose); + m_oglApp->setShaderVerbose(m_verbose); float image[4]{ 0.f, 0.f, 0.f, 0.f }; - m_oglApp->initImage(1, 1, OCIO::OglApp::COMPONENTS_RGBA, image); - m_oglApp->createGLBuffers(); + m_oglApp->initImage(1, 1, OCIO::GraphicalApp::COMPONENTS_RGBA, image); + m_oglApp->createBuffers(); OCIO::GpuShaderDescRcPtr shaderDesc = OCIO::GpuShaderDesc::CreateShaderDesc(); shaderDesc->setLanguage(OCIO::GPU_LANGUAGE_GLSL_1_2); m_gpu->extractGpuShaderInfo(shaderDesc); @@ -98,7 +98,7 @@ class ProcessorWrapper m_oglApp->redisplay(); m_oglApp->readImage(pixel.data()); } - OCIO::OglAppRcPtr m_oglApp; + OCIO::GraphicalAppRcPtr m_oglApp; #else void applyGPU(std::vector &) { diff --git a/src/apps/ocioconvert/main.cpp b/src/apps/ocioconvert/main.cpp index 31a5ed3542..165aefc4a8 100644 --- a/src/apps/ocioconvert/main.cpp +++ b/src/apps/ocioconvert/main.cpp @@ -361,18 +361,18 @@ int main(int argc, const char **argv) #ifdef OCIO_GPU_ENABLED // Initialize GPU. - OCIO::OglAppRcPtr oglApp; + OCIO::GraphicalAppRcPtr oglApp; if (usegpu || usegpuLegacy) { - OCIO::OglApp::Components comp = OCIO::OglApp::COMPONENTS_RGBA; + OCIO::GraphicalApp::Components comp = OCIO::GraphicalApp::COMPONENTS_RGBA; if (imgInput.getNumChannels() == 4) { - comp = OCIO::OglApp::COMPONENTS_RGBA; + comp = OCIO::GraphicalApp::COMPONENTS_RGBA; } else if (imgInput.getNumChannels() == 3) { - comp = OCIO::OglApp::COMPONENTS_RGB; + comp = OCIO::GraphicalApp::COMPONENTS_RGB; } else { @@ -383,7 +383,7 @@ int main(int argc, const char **argv) try { - oglApp = OCIO::OglApp::CreateOglApp("ocioconvert", 256, 20); + oglApp = OCIO::GraphicalApp::CreateApp("ocioconvert", 256, 20); } catch (const OCIO::Exception & e) { @@ -393,14 +393,14 @@ int main(int argc, const char **argv) if (verbose) { - oglApp->printGLInfo(); + oglApp->printGraphicsInfo(); } - oglApp->setPrintShader(outputgpuInfo); + oglApp->setShaderVerbose(outputgpuInfo); oglApp->initImage(imgInput.getWidth(), imgInput.getHeight(), comp, (float *)imgInput.getData()); - oglApp->createGLBuffers(); + oglApp->createBuffers(); } #endif // OCIO_GPU_ENABLED diff --git a/src/apps/ociodisplay/main.cpp b/src/apps/ociodisplay/main.cpp index ed7f09759a..6eebc3b6a3 100644 --- a/src/apps/ociodisplay/main.cpp +++ b/src/apps/ociodisplay/main.cpp @@ -64,7 +64,7 @@ float g_display_gamma{1.0f}; int g_channelHot[4]{1, 1, 1, 1}; // show rgb int g_viewsMenuID; -OCIO::OglAppRcPtr g_oglApp; +OCIO::GraphicalAppRcPtr g_oglApp; void UpdateOCIOGLState(); @@ -115,14 +115,14 @@ static void InitImageTexture(const char * filename) } } - OCIO::OglApp::Components comp = OCIO::OglApp::COMPONENTS_RGBA; + OCIO::GraphicalApp::Components comp = OCIO::GraphicalApp::COMPONENTS_RGBA; if (img.getNumChannels() == 4) { - comp = OCIO::OglApp::COMPONENTS_RGBA; + comp = OCIO::GraphicalApp::COMPONENTS_RGBA; } else if (img.getNumChannels() == 3) { - comp = OCIO::OglApp::COMPONENTS_RGB; + comp = OCIO::GraphicalApp::COMPONENTS_RGB; } else { @@ -658,7 +658,7 @@ int main(int argc, char **argv) else #endif { - g_oglApp = std::make_shared("ociodisplay", 512, 512); + g_oglApp = std::make_shared("ociodisplay", 512, 512); } } catch (const OCIO::Exception &e) @@ -669,11 +669,11 @@ int main(int argc, char **argv) if (g_verbose) { - g_oglApp->printGLInfo(); + g_oglApp->printGraphicsInfo(); } g_oglApp->setYMirror(); - g_oglApp->setPrintShader(g_gpuinfo); + g_oglApp->setShaderVerbose(g_gpuinfo); glutReshapeFunc(Reshape); glutKeyboardFunc(Key); diff --git a/src/libutils/oglapphelpers/CMakeLists.txt b/src/libutils/oglapphelpers/CMakeLists.txt index cef50ede1c..4eda17d4b7 100644 --- a/src/libutils/oglapphelpers/CMakeLists.txt +++ b/src/libutils/oglapphelpers/CMakeLists.txt @@ -1,20 +1,41 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright Contributors to the OpenColorIO Project. -if(NOT OCIO_GL_ENABLED) - message(WARNING "GL component missing. Skipping oglapphelpers.") +if(NOT OCIO_GL_ENABLED AND NOT (WIN32 AND OCIO_DIRECTX_ENABLED)) + message(WARNING "GL component missing and DirectX disabled. Skipping oglapphelpers.") return() endif() set(SOURCES - glsl.cpp - oglapp.cpp + graphicalapp.cpp ) set(INCLUDES - glsl.h - oglapp.h + graphicalapp.h ) +if(OCIO_GL_ENABLED) + list(APPEND SOURCES + glsl.cpp + oglapp.cpp + ) + list(APPEND INCLUDES + glsl.h + oglapp.h + ) +endif() + +if(WIN32 AND OCIO_DIRECTX_ENABLED) + list(APPEND SOURCES + dxapp.cpp + hlsl.cpp + ) + list(APPEND INCLUDES + dxapp.h + dxutils.h + hlsl.h + ) +endif() + if(APPLE) list(APPEND SOURCES @@ -31,7 +52,7 @@ if(APPLE) endif() -add_library(oglapphelpers STATIC ${SOURCES}) +add_library(oglapphelpers STATIC ${INCLUDES} ${SOURCES}) set_target_properties(oglapphelpers PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(oglapphelpers PROPERTIES OUTPUT_NAME OpenColorIOoglapphelpers) @@ -56,47 +77,55 @@ set_target_properties(oglapphelpers PROPERTIES target_include_directories(oglapphelpers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE - ${OPENGL_INCLUDE_DIR} - ${GLEW_INCLUDE_DIRS} - ${GLUT_INCLUDE_DIR} ) +if(OCIO_GL_ENABLED) + target_include_directories(oglapphelpers + PRIVATE + ${OPENGL_INCLUDE_DIR} + ${GLEW_INCLUDE_DIRS} + ${GLUT_INCLUDE_DIR} + ) +endif() -if(${OCIO_USE_GLVND}) - if(${OCIO_EGL_HEADLESS}) - target_include_directories(oglapphelpers - PRIVATE - ${OPENGL_EGL_INCLUDE_DIRS} - ) - target_link_libraries(oglapphelpers - PRIVATE - OpenColorIO - OpenGL::OpenGL - OpenGL::GLU - ${GLEW_LIBRARIES} - ${GLUT_LIBRARIES} - OpenGL::EGL +target_link_libraries(oglapphelpers + PRIVATE + OpenColorIO +) + +if(OCIO_GL_ENABLED) + if(${OCIO_USE_GLVND}) + if(${OCIO_EGL_HEADLESS}) + target_include_directories(oglapphelpers + PRIVATE + ${OPENGL_EGL_INCLUDE_DIRS} ) + target_link_libraries(oglapphelpers + PRIVATE + OpenGL::OpenGL + OpenGL::GLU + ${GLEW_LIBRARIES} + ${GLUT_LIBRARIES} + OpenGL::EGL + ) + else() + target_link_libraries(oglapphelpers + PRIVATE + OpenGL::OpenGL + OpenGL::GLU + ${GLEW_LIBRARIES} + ${GLUT_LIBRARIES} + ) + endif() else() + # if OCIO_USE_GLVND is OFF, OCIO_EGL_HEADLESS is also OFF target_link_libraries(oglapphelpers PRIVATE - OpenColorIO - OpenGL::OpenGL - OpenGL::GLU + ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES} ${GLUT_LIBRARIES} - ) - endif() -else() - # if OCIO_USE_GLVND is OFF, OCIO_EGL_HEADLESS is also OFF - target_link_libraries(oglapphelpers - PRIVATE - OpenColorIO - ${OPENGL_LIBRARIES} - ${GLEW_LIBRARIES} - ${GLUT_LIBRARIES} - ) + ) + endif() endif() if(APPLE) @@ -111,6 +140,30 @@ if(APPLE) ) endif() +if(OCIO_GL_ENABLED) + target_compile_definitions(oglapphelpers + PUBLIC + OCIO_GL_ENABLED + ) +endif() + +if(WIN32 AND OCIO_DIRECTX_ENABLED) + include(InstallDirectXHeaders) + target_compile_definitions(oglapphelpers + PUBLIC + OCIO_DIRECTX_ENABLED + ) + target_link_libraries(oglapphelpers + PUBLIC + Microsoft::DirectX-Headers + PRIVATE + d3d12 + dxgi + dxcompiler + dxguid + ) +endif() + if(${OCIO_EGL_HEADLESS}) target_include_directories(oglapphelpers PRIVATE diff --git a/src/libutils/oglapphelpers/dxapp.cpp b/src/libutils/oglapphelpers/dxapp.cpp new file mode 100644 index 0000000000..4859ba600d --- /dev/null +++ b/src/libutils/oglapphelpers/dxapp.cpp @@ -0,0 +1,1038 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#include +#include +#include + +#include + +#include "dxapp.h" +#include "dxutils.h" + +#include + +namespace OCIO_NAMESPACE +{ + namespace + { + LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) + { + switch (message) + { + case WM_DESTROY: + PostQuitMessage(0); + return 0; + } + + return DefWindowProc(hWnd, message, wParam, lParam); + } + + void GetHardwareAdapter( + IDXGIFactory1* pFactory, + IDXGIAdapter1** ppAdapter, + bool requestHighPerformanceAdapter = true) + { + *ppAdapter = nullptr; + + ComPtr adapter; + + ComPtr factory6; + if (SUCCEEDED(pFactory->QueryInterface(IID_PPV_ARGS(&factory6)))) + { + for ( + UINT adapterIndex = 0; + SUCCEEDED(factory6->EnumAdapterByGpuPreference( + adapterIndex, + requestHighPerformanceAdapter ? DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE : DXGI_GPU_PREFERENCE_UNSPECIFIED, + IID_PPV_ARGS(&adapter))); + ++adapterIndex) + { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + { + continue; + } + + if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) + { + break; + } + } + } + + if (adapter.Get() == nullptr) + { + for (UINT adapterIndex = 0; SUCCEEDED(pFactory->EnumAdapters1(adapterIndex, &adapter)); ++adapterIndex) + { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + { + continue; + } + + if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) + { + break; + } + } + } + + *ppAdapter = adapter.Detach(); + } + } + +DxApp::DxApp(const char* winTitle, int winWidth, int winHeight) + : m_viewportWidth{ winWidth } + , m_viewportHeight{ winHeight } +{ + // Initialize the window class. + WNDCLASSEXA windowClass = { 0 }; + windowClass.cbSize = sizeof(WNDCLASSEXA); + windowClass.style = CS_HREDRAW | CS_VREDRAW; + windowClass.lpfnWndProc = WindowProc; + windowClass.hInstance = NULL; + windowClass.hCursor = LoadCursor(NULL, IDC_ARROW); + windowClass.lpszClassName = winTitle; + RegisterClassExA(&windowClass); + m_windowClassName = winTitle; + + RECT windowRect = { 0, 0, static_cast(m_viewportWidth), static_cast(m_viewportHeight) }; + AdjustWindowRect(&windowRect, WS_OVERLAPPEDWINDOW, FALSE); + + // Create the window and store a handle to it. + m_hwnd = CreateWindowA( + windowClass.lpszClassName, + winTitle, + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, + CW_USEDEFAULT, + windowRect.right - windowRect.left, + windowRect.bottom - windowRect.top, + NULL, // We have no parent window. + NULL, // We aren't using menus. + NULL, + NULL); + + ShowWindow(m_hwnd, SW_RESTORE); + + UINT dxgiFactoryFlags = 0; +#if defined(_DEBUG) + { + ComPtr debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) + { + debugController->EnableDebugLayer(); + + // Enable additional debug layers. + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + } + } +#endif + + ComPtr factory; + ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory))); + + ComPtr hardwareAdapter; + GetHardwareAdapter(factory.Get(), &hardwareAdapter); + + ThrowIfFailed(D3D12CreateDevice( + hardwareAdapter.Get(), + D3D_FEATURE_LEVEL_11_0, // Standard minimum for D3D12, maximize compatibility + IID_PPV_ARGS(&m_device) + )); + + // Describe and create the command queue. + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue))); + + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.BufferCount = FrameCount; + swapChainDesc.Width = m_viewportWidth; + swapChainDesc.Height = m_viewportHeight; + swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + swapChainDesc.SampleDesc.Count = 1; + + ComPtr swapChain; + ThrowIfFailed(factory->CreateSwapChainForHwnd( + m_commandQueue.Get(), // Swap chain needs the queue so that it can force a flush on it. + m_hwnd, + &swapChainDesc, + nullptr, + nullptr, + &swapChain + )); + + // This sample does not support fullscreen transitions. + ThrowIfFailed(factory->MakeWindowAssociation(m_hwnd, DXGI_MWA_NO_ALT_ENTER)); + + ThrowIfFailed(swapChain.As(&m_swapChain)); + + // Create descriptor heaps. + { + // Describe and create a render target view (RTV) descriptor heap. + D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; + rtvHeapDesc.NumDescriptors = FrameCount; + rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap))); + + m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + m_cbvSrvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Create frame resources. + { + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Create a RTV for each frame. + for (UINT n = 0; n < FrameCount; n++) + { + ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n]))); + m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle); + rtvHandle.Offset(1, m_rtvDescriptorSize); + } + } + + ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator))); + // Create the command list. + ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator.Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList))); + // Close the command list and execute it to begin the initial GPU setup. + ThrowIfFailed(m_commandList->Close()); + + // Create fence + ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence))); + m_fenceValue = 1; + m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (!m_fenceEvent) + { + throw Exception("DxApp: CreateEvent failed."); + } +} + +DxApp::~DxApp() +{ + // Ensure that the GPU is no longer referencing resources that are about to be + // cleaned up by the destructor. + waitForPreviousFrame(); + + CloseHandle(m_fenceEvent); + + if (m_hwnd) + { + DestroyWindow(m_hwnd); + } + if (!m_windowClassName.empty()) + { + UnregisterClassA(m_windowClassName.c_str(), NULL); + } +} + +void DxApp::initImage(int imageWidth, int imageHeight, Components comp, const float* imageBuffer) +{ + if (comp != COMPONENTS_RGBA) + { + throw Exception("DxApp: COMPONENTS_RGB is unused and not currently implemented."); + } + + m_imageWidth = imageWidth; + m_imageHeight = imageHeight; + m_comp = comp; + + // Create CBV/SRV heap if not already created (room for image + LUT textures) + if (!m_cbvSrvHeap) + { + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; + srvHeapDesc.NumDescriptors = 16; // Slot 0 for image, remaining for LUT textures + srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_cbvSrvHeap))); + } + + // Create the image texture in default heap + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = m_imageWidth; + textureDesc.Height = m_imageHeight; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(m_device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_imageTexture))); + + // Create the upload buffer with row-pitch aligned size + const UINT pixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(m_imageWidth, pixelSize); + const UINT64 uploadBufferSize = rowPitch * m_imageHeight; + + CD3DX12_HEAP_PROPERTIES uploadHeapProps(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_imageUploadBuffer))); + + // Perform the initial upload + // Map the upload buffer and copy image data + const UINT srcRowPitch = m_imageWidth * pixelSize; // Source data is tightly packed + BYTE* pData = nullptr; + ThrowIfFailed(m_imageUploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + for (int y = 0; y < m_imageHeight; ++y) + { + const BYTE* srcRow = reinterpret_cast(imageBuffer) + y * srcRowPitch; + BYTE* dstRow = pData + y * rowPitch; + memcpy(dstRow, srcRow, srcRowPitch); + } + + m_imageUploadBuffer->Unmap(0, nullptr); + + // Record commands to copy from upload buffer to the image texture + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Offset = 0; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = m_imageWidth; + footprint.Footprint.Height = m_imageHeight; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = m_imageUploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = m_imageTexture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + m_commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to PIXEL_SHADER_RESOURCE + auto barrierInit = CD3DX12_RESOURCE_BARRIER::Transition( + m_imageTexture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_commandList->ResourceBarrier(1, &barrierInit); + + ThrowIfFailed(m_commandList->Close()); + + // Execute the command list + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Wait for GPU to finish + waitForPreviousFrame(); + + // Create SRV for the image texture in slot 0 + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_cbvSrvHeap->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateShaderResourceView(m_imageTexture.Get(), &srvDesc, srvHandle); +} + +void DxApp::updateImage(const float* imageBuffer) +{ + // Map the upload buffer and copy image data row-by-row with proper pitch alignment + const UINT pixelSize = 4 * sizeof(float); // RGBA32F + const UINT srcRowPitch = m_imageWidth * pixelSize; + const UINT dstRowPitch = AlignRowPitch(m_imageWidth, pixelSize); + + BYTE* pData = nullptr; + ThrowIfFailed(m_imageUploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + for (int y = 0; y < m_imageHeight; ++y) + { + const BYTE* srcRow = reinterpret_cast(imageBuffer) + y * srcRowPitch; + BYTE* dstRow = pData + y * dstRowPitch; + memcpy(dstRow, srcRow, srcRowPitch); + } + + m_imageUploadBuffer->Unmap(0, nullptr); + + // Record commands to copy from upload buffer to the image texture + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + // Transition texture from PIXEL_SHADER_RESOURCE to COPY_DEST for the update + auto barrierUpdate1 = CD3DX12_RESOURCE_BARRIER::Transition( + m_imageTexture.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST); + m_commandList->ResourceBarrier(1, &barrierUpdate1); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Offset = 0; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = m_imageWidth; + footprint.Footprint.Height = m_imageHeight; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = dstRowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = m_imageUploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = m_imageTexture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + m_commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to PIXEL_SHADER_RESOURCE + auto barrierUpdate2 = CD3DX12_RESOURCE_BARRIER::Transition( + m_imageTexture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_commandList->ResourceBarrier(1, &barrierUpdate2); + + ThrowIfFailed(m_commandList->Close()); + + // Execute the command list + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Wait for GPU to finish + waitForPreviousFrame(); +} + +void DxApp::createBuffers() +{ + // Create readback buffer for copying GPU results back to CPU + const UINT pixelSize = 4 * sizeof(float); // RGBA32F + m_readbackRowPitch = AlignRowPitch(m_viewportWidth, pixelSize); + const UINT64 readbackBufferSize = m_readbackRowPitch * m_viewportHeight; + + CD3DX12_HEAP_PROPERTIES readbackHeapProps(D3D12_HEAP_TYPE_READBACK); + CD3DX12_RESOURCE_DESC readbackBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(readbackBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &readbackHeapProps, + D3D12_HEAP_FLAG_NONE, + &readbackBufferDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readbackBuffer))); +} + +void DxApp::setShader(GpuShaderDescRcPtr& shaderDesc) +{ + // Reset command list for LUT texture uploads + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + // Store shader desc so redisplay() can update uniform values each frame. + m_currentShaderDesc = shaderDesc; + + // Create HLSLBuilder to allocate all LUT textures. + // Each texture is placed at the descriptor heap slot matching its HLSL register + // (derived from shaderDesc->get3DTextureShaderBindingIndex / getTextureShaderBindingIndex). + m_hlslBuilder = HLSLBuilder::Create(shaderDesc, m_device.Get(), m_commandList.Get(), + m_cbvSrvHeap.Get()); + + // Execute command list to upload LUT textures and wait for completion + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + waitForPreviousFrame(); + + // Create constant buffer for OCIO uniform variables (dynamic properties). + // D3D12 constant buffers must be 256-byte aligned. We always create one so the + // root signature can unconditionally declare a CBV at b0. + { + m_constantBuffer.Reset(); + m_cbMappedData = nullptr; + + const UINT rawSize = static_cast(shaderDesc->getUniformBufferSize()); + m_cbufferAlignedSize = ((rawSize + 255u) & ~255u); + if (m_cbufferAlignedSize == 0) + m_cbufferAlignedSize = 256u; + + auto heapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + auto cbDesc = CD3DX12_RESOURCE_DESC::Buffer(m_cbufferAlignedSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &heapProps, D3D12_HEAP_FLAG_NONE, &cbDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&m_constantBuffer))); + + CD3DX12_RANGE readRange(0, 0); + ThrowIfFailed(m_constantBuffer->Map(0, &readRange, + reinterpret_cast(&m_cbMappedData))); + memset(m_cbMappedData, 0, m_cbufferAlignedSize); + } + + // Get the OCIO HLSL shader text + std::string ocioShader = m_hlslBuilder->getShaderText(); + + // Build the full HLSL shader source with a full-screen triangle vertex shader + // and pixel shader that applies OCIO color transform + std::ostringstream hlslSource; + + // Add the OCIO shader functions first + hlslSource << ocioShader << "\n\n"; + + // Declare the image texture and its sampler. + // OCIO's generated shader always starts LUT texture bindings at t1+ (textureBindingStart=1), + // so t0/s0 are always free for the input image. + hlslSource << "Texture2D img : register(t0);\n"; + hlslSource << "SamplerState linearSampler : register(s0);\n\n"; + + // Vertex shader: full-screen triangle using SV_VertexID (no vertex buffer needed) + hlslSource << "struct VSOutput {\n"; + hlslSource << " float4 position : SV_Position;\n"; + hlslSource << " float2 texcoord : TEXCOORD0;\n"; + hlslSource << "};\n\n"; + + hlslSource << "VSOutput VSMain(uint vertexID : SV_VertexID) {\n"; + hlslSource << " VSOutput output;\n"; + hlslSource << " // Full-screen triangle: vertices at (-1,-1), (3,-1), (-1,3)\n"; + hlslSource << " float2 texcoord = float2((vertexID << 1) & 2, vertexID & 2);\n"; + hlslSource << " output.position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1);\n"; + hlslSource << " output.texcoord = texcoord;\n"; + hlslSource << " return output;\n"; + hlslSource << "}\n\n"; + + // Pixel shader: sample image and apply OCIO color transform + hlslSource << "float4 PSMain(VSOutput input) : SV_Target {\n"; + hlslSource << " float4 col = img.Sample(linearSampler, input.texcoord);\n"; + hlslSource << " return " << shaderDesc->getFunctionName() << "(col);\n"; + hlslSource << "}\n"; + + std::string fullShader = hlslSource.str(); + + if (isShaderVerbose()) + { + std::cout << std::endl; + std::cout << "GPU Shader Program:" << std::endl; + std::cout << std::endl; + std::cout << fullShader << std::endl; + std::cout << std::endl; + } + + // Compile shaders with DXC (DirectX Shader Compiler) for SM 6.0 + ComPtr dxcUtils; + ComPtr dxcCompiler; + ThrowIfFailed(DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&dxcUtils))); + ThrowIfFailed(DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler))); + + // Create a source blob from the shader string + ComPtr sourceBlob; + ThrowIfFailed(dxcUtils->CreateBlobFromPinned( + fullShader.c_str(), static_cast(fullShader.size()), + DXC_CP_UTF8, &sourceBlob)); + + DxcBuffer sourceBuffer; + sourceBuffer.Ptr = sourceBlob->GetBufferPointer(); + sourceBuffer.Size = sourceBlob->GetBufferSize(); + sourceBuffer.Encoding = DXC_CP_ACP; + + // Compile vertex shader (vs_6_0) + LPCWSTR vsArgs[] = { L"-T", L"vs_6_0", L"-E", L"VSMain" }; + ComPtr vsResult; + ThrowIfFailed(dxcCompiler->Compile(&sourceBuffer, vsArgs, _countof(vsArgs), + nullptr, IID_PPV_ARGS(&vsResult))); + HRESULT vsHr; + vsResult->GetStatus(&vsHr); + if (FAILED(vsHr)) + { + ComPtr errors; + vsResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); + std::ostringstream oss; + oss << "Vertex shader compilation failed (" << HrToString(vsHr) << ")"; + if (errors && errors->GetStringLength()) + oss << ":\n" << errors->GetStringPointer(); + std::cerr << oss.str() << std::endl; + throw Exception(oss.str().c_str()); + } + ComPtr vertexShaderBlob; + ThrowIfFailed(vsResult->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&vertexShaderBlob), nullptr)); + + // Compile pixel shader (ps_6_0). + LPCWSTR psArgs[] = { L"-T", L"ps_6_0", L"-E", L"PSMain" }; + ComPtr psResult; + ThrowIfFailed(dxcCompiler->Compile(&sourceBuffer, psArgs, _countof(psArgs), + nullptr, IID_PPV_ARGS(&psResult))); + HRESULT psHr; + psResult->GetStatus(&psHr); + if (FAILED(psHr)) + { + ComPtr errors; + psResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); + std::ostringstream oss; + oss << "Pixel shader compilation failed (" << HrToString(psHr) << ")"; + if (errors && errors->GetStringLength()) + oss << ":\n" << errors->GetStringPointer(); + std::cerr << oss.str() << std::endl; + throw Exception(oss.str().c_str()); + } + ComPtr pixelShaderBlob; + ThrowIfFailed(psResult->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&pixelShaderBlob), nullptr)); + + // Build root signature with a descriptor table for all SRVs and a static sampler. + // Total SRVs = 1 (image at t0) + total LUT count (at t1..tN). + // We use the total texture count from shaderDesc to cover all possible binding slots, + // since LUT heap slots are derived from binding indices and may not be sequential. + const UINT numLUTs = shaderDesc->getNumTextures() + shaderDesc->getNum3DTextures(); + UINT totalSRVs = 1 + numLUTs; + + CD3DX12_DESCRIPTOR_RANGE1 srvRange; + srvRange.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, totalSRVs, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC); + + // Root parameter 0: descriptor table for all SRVs (image + LUTs). + // Root parameter 1: root CBV at b0 for OCIO uniform variables. + CD3DX12_ROOT_PARAMETER1 rootParameters[2]; + rootParameters[0].InitAsDescriptorTable(1, &srvRange, D3D12_SHADER_VISIBILITY_PIXEL); + rootParameters[1].InitAsConstantBufferView(0, 0, + D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, + D3D12_SHADER_VISIBILITY_PIXEL); + + // Build static samplers: s0 for the input image, s1..sN for each OCIO LUT texture. + std::vector staticSamplers(1 + numLUTs); + for (UINT i = 0; i <= numLUTs; ++i) + { + staticSamplers[i] = {}; + staticSamplers[i].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + staticSamplers[i].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + staticSamplers[i].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + staticSamplers[i].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + staticSamplers[i].MipLODBias = 0; + staticSamplers[i].MaxAnisotropy = 0; + staticSamplers[i].ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + staticSamplers[i].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + staticSamplers[i].MinLOD = 0.0f; + staticSamplers[i].MaxLOD = D3D12_FLOAT32_MAX; + staticSamplers[i].ShaderRegister = i; // s0 = image, s1..sN = OCIO LUT samplers + staticSamplers[i].RegisterSpace = 0; + staticSamplers[i].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + } + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_1(_countof(rootParameters), rootParameters, + static_cast(staticSamplers.size()), staticSamplers.data(), + D3D12_ROOT_SIGNATURE_FLAG_NONE); + + ComPtr signature; + ComPtr sigErrorBlob; + HRESULT hr = D3DX12SerializeVersionedRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, + &signature, &sigErrorBlob); + if (FAILED(hr)) + { + if (sigErrorBlob) + { + std::cerr << "Root signature serialization error:\n" + << static_cast(sigErrorBlob->GetBufferPointer()) << std::endl; + } + ThrowIfFailed(hr); + } + + // Release previous root signature if it exists + m_rootSignature.Reset(); + + hr = m_device->CreateRootSignature(0, signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(&m_rootSignature)); + if (FAILED(hr)) + { + throw Exception(("CreateRootSignature failed: " + HrToString(hr)).c_str()); + } + + // Create the pipeline state object (PSO) + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = { nullptr, 0 }; // No vertex input layout (using SV_VertexID) + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = { vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize() }; + psoDesc.PS = { pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize() }; + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R32G32B32A32_FLOAT; + psoDesc.SampleDesc.Count = 1; + + // Release previous pipeline state if it exists + m_pipelineState.Reset(); + + hr = m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState)); + if (FAILED(hr)) + { + std::cerr << "CreateGraphicsPipelineState failed (" << HrToString(hr) << ")\n" + << "Full HLSL shader:\n" << fullShader << std::endl; + throw Exception(("CreateGraphicsPipelineState failed: " + HrToString(hr)).c_str()); + } +} + +void DxApp::reshape(int width, int height) +{ + // Skip if nothing changed and resources are already initialized + if (m_viewportWidth == width && m_viewportHeight == height && m_floatRenderTarget) + return; + + // Wait for any in-flight GPU work before resizing resources + waitForPreviousFrame(); + + m_viewportWidth = width; + m_viewportHeight = height; + + // Release swap chain render target references before ResizeBuffers + for (UINT n = 0; n < FrameCount; n++) + m_renderTargets[n].Reset(); + + // Resize swap chain back buffers to the new dimensions + ThrowIfFailed(m_swapChain->ResizeBuffers( + FrameCount, + static_cast(width), + static_cast(height), + DXGI_FORMAT_R8G8B8A8_UNORM, + 0)); + + // Recreate RTVs for the resized swap chain back buffers + { + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); + for (UINT n = 0; n < FrameCount; n++) + { + ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n]))); + m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle); + rtvHandle.Offset(1, m_rtvDescriptorSize); + } + } + + // Create (or recreate) the off-screen R32G32B32A32_FLOAT render target for OCIO rendering. + // This avoids 8-bit UNORM quantization errors when comparing GPU vs CPU results. + m_floatRenderTarget.Reset(); + + D3D12_RESOURCE_DESC floatRtDesc = {}; + floatRtDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + floatRtDesc.Width = static_cast(width); + floatRtDesc.Height = static_cast(height); + floatRtDesc.DepthOrArraySize = 1; + floatRtDesc.MipLevels = 1; + floatRtDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + floatRtDesc.SampleDesc.Count = 1; + floatRtDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + D3D12_CLEAR_VALUE floatClearValue = {}; + floatClearValue.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(m_device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &floatRtDesc, + D3D12_RESOURCE_STATE_RENDER_TARGET, + &floatClearValue, + IID_PPV_ARGS(&m_floatRenderTarget))); + + // Create a single-slot RTV heap for the float render target (created once) + if (!m_floatRtvHeap) + { + D3D12_DESCRIPTOR_HEAP_DESC floatRtvHeapDesc = {}; + floatRtvHeapDesc.NumDescriptors = 1; + floatRtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + floatRtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&floatRtvHeapDesc, IID_PPV_ARGS(&m_floatRtvHeap))); + } + + // Create the float RTV + D3D12_RENDER_TARGET_VIEW_DESC floatRtvDesc = {}; + floatRtvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + floatRtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + m_device->CreateRenderTargetView(m_floatRenderTarget.Get(), &floatRtvDesc, + m_floatRtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Recreate readback buffer with updated dimensions (if already allocated by createBuffers) + if (m_readbackBuffer) + { + m_readbackBuffer.Reset(); + const UINT pixelSize = 4 * sizeof(float); // R32G32B32A32_FLOAT = 16 bytes per pixel + m_readbackRowPitch = AlignRowPitch(static_cast(width), pixelSize); + const UINT64 readbackBufferSize = m_readbackRowPitch * static_cast(height); + + CD3DX12_HEAP_PROPERTIES readbackHeapProps(D3D12_HEAP_TYPE_READBACK); + CD3DX12_RESOURCE_DESC readbackBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(readbackBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &readbackHeapProps, + D3D12_HEAP_FLAG_NONE, + &readbackBufferDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readbackBuffer))); + } +} + +void DxApp::redisplay() +{ + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), m_pipelineState.Get())); + + UINT frameIndex = m_swapChain->GetCurrentBackBufferIndex(); + + // Transition swap chain back buffer to render target for clearing + auto barrierPresentToRt = CD3DX12_RESOURCE_BARRIER::Transition( + m_renderTargets[frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_commandList->ResourceBarrier(1, &barrierPresentToRt); + + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle( + m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), frameIndex, m_rtvDescriptorSize); + + // Clear the swap chain back buffer (used for display only) + const float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr); + + // If pipeline state is ready, render the OCIO transform to the off-screen float render target. + // The float RT preserves full precision for accurate GPU vs CPU comparison in tests. + if (m_pipelineState && m_floatRenderTarget) + { + CD3DX12_CPU_DESCRIPTOR_HANDLE floatRtvHandle( + m_floatRtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Clear the float render target + m_commandList->ClearRenderTargetView(floatRtvHandle, clearColor, 0, nullptr); + + // Set descriptor heap for shader resources + ID3D12DescriptorHeap* descriptorHeaps[] = { m_cbvSrvHeap.Get() }; + m_commandList->SetDescriptorHeaps(_countof(descriptorHeaps), descriptorHeaps); + + // Set the root signature and pipeline state + m_commandList->SetGraphicsRootSignature(m_rootSignature.Get()); + m_commandList->SetPipelineState(m_pipelineState.Get()); + + // Set the root descriptor table to the start of the SRV heap (image + LUTs) + CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(m_cbvSrvHeap->GetGPUDescriptorHandleForHeapStart()); + m_commandList->SetGraphicsRootDescriptorTable(0, srvHandle); + + // Update and bind the constant buffer (root parameter 1, b0). + // This fills in any OCIO uniform variables (dynamic properties like exposure, curves). + if (m_constantBuffer && m_cbMappedData && m_currentShaderDesc) + { + memset(m_cbMappedData, 0, m_cbufferAlignedSize); + + const unsigned numUniforms = m_currentShaderDesc->getNumUniforms(); + for (unsigned i = 0; i < numUniforms; ++i) + { + GpuShaderDesc::UniformData data; + m_currentShaderDesc->getUniform(i, data); + UINT8* dst = m_cbMappedData + data.m_bufferOffset; + + switch (data.m_type) + { + case UNIFORM_DOUBLE: + if (data.m_getDouble) + { + const float val = static_cast(data.m_getDouble()); + memcpy(dst, &val, sizeof(float)); + } + break; + case UNIFORM_BOOL: + if (data.m_getBool) + { + const int val = data.m_getBool() ? 1 : 0; + memcpy(dst, &val, sizeof(int)); + } + break; + case UNIFORM_FLOAT3: + if (data.m_getFloat3) + { + const Float3& f3 = data.m_getFloat3(); + memcpy(dst, f3.data(), 3 * sizeof(float)); + } + break; + case UNIFORM_VECTOR_FLOAT: + if (data.m_vectorFloat.m_getSize && data.m_vectorFloat.m_getVector) + { + const int sz = data.m_vectorFloat.m_getSize(); + const float* vals = data.m_vectorFloat.m_getVector(); + for (int j = 0; j < sz; ++j) + memcpy(dst + j * 16, &vals[j], sizeof(float)); + } + break; + case UNIFORM_VECTOR_INT: + if (data.m_vectorInt.m_getSize && data.m_vectorInt.m_getVector) + { + const int sz = data.m_vectorInt.m_getSize(); + const int* vals = data.m_vectorInt.m_getVector(); + for (int j = 0; j < sz; ++j) + memcpy(dst + j * 16, &vals[j], sizeof(int)); + } + break; + default: + break; + } + } + + m_commandList->SetGraphicsRootConstantBufferView( + 1, m_constantBuffer->GetGPUVirtualAddress()); + } + + // Set viewport and scissor rect + D3D12_VIEWPORT viewport = {}; + viewport.TopLeftX = 0.0f; + viewport.TopLeftY = 0.0f; + viewport.Width = static_cast(m_viewportWidth); + viewport.Height = static_cast(m_viewportHeight); + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + m_commandList->RSSetViewports(1, &viewport); + + D3D12_RECT scissorRect = { 0, 0, m_viewportWidth, m_viewportHeight }; + m_commandList->RSSetScissorRects(1, &scissorRect); + + // Render to the off-screen float render target + m_commandList->OMSetRenderTargets(1, &floatRtvHandle, FALSE, nullptr); + m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + m_commandList->DrawInstanced(3, 1, 0, 0); + } + + // Transition swap chain back buffer back to PRESENT state + auto barrierRtToPresent = CD3DX12_RESOURCE_BARRIER::Transition( + m_renderTargets[frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); + m_commandList->ResourceBarrier(1, &barrierRtToPresent); + + ThrowIfFailed(m_commandList->Close()); + + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + ThrowIfFailed(m_swapChain->Present(1, 0)); + + waitForPreviousFrame(); +} + +void DxApp::readImage(float* imageBuffer) +{ + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + // Transition the float render target from RENDER_TARGET to COPY_SOURCE for readback + auto barrierReadback1 = CD3DX12_RESOURCE_BARRIER::Transition( + m_floatRenderTarget.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + m_commandList->ResourceBarrier(1, &barrierReadback1); + + // Copy from float render target to readback buffer + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Offset = 0; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = static_cast(m_viewportWidth); + footprint.Footprint.Height = static_cast(m_viewportHeight); + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = m_readbackRowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = m_floatRenderTarget.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLocation.SubresourceIndex = 0; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = m_readbackBuffer.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLocation.PlacedFootprint = footprint; + + m_commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition the float render target back to RENDER_TARGET for the next frame + auto barrierReadback2 = CD3DX12_RESOURCE_BARRIER::Transition( + m_floatRenderTarget.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_commandList->ResourceBarrier(1, &barrierReadback2); + + ThrowIfFailed(m_commandList->Close()); + + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + waitForPreviousFrame(); + + // Map the readback buffer and copy float data row-by-row to caller's buffer, + // stripping D3D12 row-pitch alignment padding + BYTE* pData = nullptr; + ThrowIfFailed(m_readbackBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + const UINT dstRowPitch = static_cast(m_viewportWidth) * 4 * sizeof(float); + for (int y = 0; y < m_viewportHeight; ++y) + { + const BYTE* srcRow = pData + y * m_readbackRowPitch; + BYTE* dstRow = reinterpret_cast(imageBuffer) + y * dstRowPitch; + memcpy(dstRow, srcRow, dstRowPitch); + } + + m_readbackBuffer->Unmap(0, nullptr); +} + +void DxApp::printGraphicsInfo() const noexcept +{ + try + { + // Query IDXGIDevice from the D3D12 device + ComPtr dxgiDevice; + if (FAILED(m_device.As(&dxgiDevice))) + { + return; // Silently ignore failure + } + + // Get the adapter from the DXGI device + ComPtr adapter; + if (FAILED(dxgiDevice->GetAdapter(&adapter))) + { + return; // Silently ignore failure + } + + // Get adapter description + DXGI_ADAPTER_DESC desc; + if (FAILED(adapter->GetDesc(&desc))) + { + return; // Silently ignore failure + } + + // Convert wide string to narrow string for Description + char narrowDesc[128]; + WideCharToMultiByte(CP_UTF8, 0, desc.Description, -1, narrowDesc, sizeof(narrowDesc), nullptr, nullptr); + + // Print adapter name and dedicated video memory + std::cout << "Adapter: " << narrowDesc << std::endl; + std::cout << "Dedicated Video Memory: " << (desc.DedicatedVideoMemory / (1024 * 1024)) << " MB" << std::endl; + } + catch (...) + { + // Silently ignore any exceptions + } +} + +void DxApp::waitForPreviousFrame() +{ + // Signal and increment the fence value. + const UINT64 fence = m_fenceValue; + ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), fence)); + m_fenceValue++; + + // Wait until the previous frame is finished. + if (m_fence->GetCompletedValue() < fence) + { + ThrowIfFailed(m_fence->SetEventOnCompletion(fence, m_fenceEvent)); + WaitForSingleObject(m_fenceEvent, INFINITE); + } +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/dxapp.h b/src/libutils/oglapphelpers/dxapp.h new file mode 100644 index 0000000000..26f3beb4c4 --- /dev/null +++ b/src/libutils/oglapphelpers/dxapp.h @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + + +#pragma once +#include +#include + +#include "graphicalapp.h" +#include "hlsl.h" + +#include + +using Microsoft::WRL::ComPtr; + +namespace OCIO_NAMESPACE +{ + +class DxApp : public GraphicalApp +{ +public: + DxApp() = delete; + DxApp(const DxApp&) = delete; + DxApp& operator=(const DxApp&) = delete; + + DxApp(const char* winTitle, int winWidth, int winHeight); + ~DxApp(); + + virtual void initImage(int imageWidth, int imageHeight, + Components comp, const float* imageBuffer) override; + + virtual void updateImage(const float* imageBuffer) override; + + virtual void createBuffers() override; + + virtual void setShader(GpuShaderDescRcPtr& shaderDesc) override; + + virtual void reshape(int width, int height) override; + + virtual void redisplay() override; + + virtual void readImage(float* imageBuffer) override; + + virtual void printGraphicsInfo() const noexcept override; + +private: + void waitForPreviousFrame(); + + static const UINT FrameCount = 2; + + int m_viewportWidth{ 0 }; + int m_viewportHeight{ 0 }; + + int m_imageWidth{ 0 }; + int m_imageHeight{ 0 }; + Components m_comp{ COMPONENTS_RGBA }; + + ComPtr m_swapChain; + ComPtr m_device; + ComPtr m_renderTargets[FrameCount]; + ComPtr m_commandAllocator; + ComPtr m_commandQueue; + ComPtr m_rootSignature; + ComPtr m_rtvHeap; + ComPtr m_cbvSrvHeap; + ComPtr m_pipelineState; + ComPtr m_commandList; + UINT m_rtvDescriptorSize; + UINT m_cbvSrvDescriptorSize; + + // Synchronization objects. + HANDLE m_fenceEvent; + ComPtr m_fence; + UINT64 m_fenceValue; + + // Image texture and upload resources. + ComPtr m_imageTexture; + ComPtr m_imageUploadBuffer; + ComPtr m_readbackBuffer; + UINT m_readbackRowPitch{ 0 }; + + // Off-screen float render target (R32G32B32A32_FLOAT) for OCIO rendering and readback. + // The swap chain back buffers (UNORM) are required for windowing but unused in tests. + ComPtr m_floatRenderTarget; + ComPtr m_floatRtvHeap; + + // HLSL shader builder + HLSLBuilderRcPtr m_hlslBuilder; + + // Constant buffer for OCIO uniform variables (dynamic properties like exposure, curves). + // Always allocated; root signature always includes a CBV at b0. + ComPtr m_constantBuffer; + UINT8* m_cbMappedData{ nullptr }; + UINT m_cbufferAlignedSize{ 0 }; + + // Current shader description, retained so redisplay() can update uniform values. + GpuShaderDescRcPtr m_currentShaderDesc; + + // Window handle and class name for cleanup. + HWND m_hwnd{ nullptr }; + std::string m_windowClassName; +}; + +} + diff --git a/src/libutils/oglapphelpers/dxutils.h b/src/libutils/oglapphelpers/dxutils.h new file mode 100644 index 0000000000..f7e4ec84a6 --- /dev/null +++ b/src/libutils/oglapphelpers/dxutils.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#pragma once + +#include + +#include + +#include + +namespace OCIO_NAMESPACE +{ + +inline std::string HrToString(HRESULT hr) +{ + char s_str[64] = {}; + sprintf_s(s_str, "HRESULT of 0x%08X", static_cast(hr)); + return std::string(s_str); +} + +inline void ThrowIfFailed(HRESULT hr) +{ + if (FAILED(hr)) + { + throw Exception(HrToString(hr).c_str()); + } +} + +// Align a row pitch to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT (256 bytes). +inline UINT AlignRowPitch(UINT width, UINT pixelSize) +{ + return (width * pixelSize + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1) + & ~(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1); +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/graphicalapp.cpp b/src/libutils/oglapphelpers/graphicalapp.cpp new file mode 100644 index 0000000000..3baefdf36c --- /dev/null +++ b/src/libutils/oglapphelpers/graphicalapp.cpp @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#include "graphicalapp.h" +#include "oglapp.h" + +namespace OCIO_NAMESPACE +{ + +GraphicalAppRcPtr GraphicalApp::CreateApp(const char * winTitle, int winWidth, int winHeight) +{ + return OglApp::CreateApp(winTitle, winWidth, winHeight); +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/graphicalapp.h b/src/libutils/oglapphelpers/graphicalapp.h new file mode 100644 index 0000000000..a1b55c4aac --- /dev/null +++ b/src/libutils/oglapphelpers/graphicalapp.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#pragma once + +#include + +namespace OCIO_NAMESPACE +{ +// Forward declaration of GraphicalApp. +class GraphicalApp; +typedef OCIO_SHARED_PTR GraphicalAppRcPtr; + +class GraphicalApp +{ +public: + virtual ~GraphicalApp() = default; + enum Components + { + COMPONENTS_RGB = 0, + COMPONENTS_RGBA + }; + + // Shader code will be printed when generated. + void setShaderVerbose(bool print) + { + m_verboseShader = print; + } + + bool isShaderVerbose() const { return m_verboseShader; } + + // When displaying the processed image in a window, enable Y-axis mirroring. + void setYMirror() + { + m_yMirror = true; + } + + bool isYMirror() const { return m_yMirror; } + + // Initialize the image. + virtual void initImage(int imageWidth, int imageHeight, + Components comp, const float* imageBuffer) = 0; + + // Update the image if it changes. + virtual void updateImage(const float* imageBuffer) = 0; + + // Create frame and rendering buffers. Needed if readImage will be used. + virtual void createBuffers() = 0; + + // Set the shader code. + virtual void setShader(GpuShaderDescRcPtr& shaderDesc) = 0; + + // Update the size of the buffer of the viewport that will be used to process the image + // (it does not modify the UI). To be called at least one time. Use image size if we want to + // read back the processed image. To process another image with the same size or using a + // different shader, reshape does not need to be called again. In case of an interactive + // application it should be called by the glutReshapeFunc callback using the windows size. + virtual void reshape(int width, int height) = 0; + + // Process the image. + virtual void redisplay() = 0; + + // Read the image from the rendering buffer. It is not meant to be used by interactive + // applications used to display the image. + virtual void readImage(float* imageBuffer) = 0; + + // Helper to print graphics info. + void virtual printGraphicsInfo() const noexcept = 0; + + // Factory: returns a platform-appropriate GraphicalApp (OGL or DX). + static GraphicalAppRcPtr CreateApp(const char * winTitle, int winWidth, int winHeight); + +private: + // Will shader code be outputed when setShader is called. + bool m_verboseShader{ false }; + // For interactive applications displaying the processed image. + bool m_yMirror{ false }; +}; + +} // namespace OCIO_NAMESPACE + diff --git a/src/libutils/oglapphelpers/hlsl.cpp b/src/libutils/oglapphelpers/hlsl.cpp new file mode 100644 index 0000000000..e165e420a5 --- /dev/null +++ b/src/libutils/oglapphelpers/hlsl.cpp @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#include +#include + +#include + +#include "hlsl.h" +#include "dxutils.h" + +namespace OCIO_NAMESPACE +{ + +namespace +{ + +// Upload a TEXTURE_1D LUT as a real D3D12 1D texture with RGBA32F format. +// RGBA32F is used instead of RGB32F because DXGI_FORMAT_R32G32B32_FLOAT has +// limited hardware support in D3D12 (optional for most resource types). +void AllocateTexture1D(ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + UINT width, + UINT numChannels, + const float* values, + Microsoft::WRL::ComPtr& texture, + Microsoft::WRL::ComPtr& uploadBuffer) +{ + if (!values) + { + throw Exception("Missing 1D LUT texture data"); + } + + // Create a 1D texture resource in default heap. + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = width; + textureDesc.Height = 1; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + // Create upload buffer with row-pitch alignment + const UINT dstPixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(width, dstPixelSize); + const UINT uploadBufferSize = rowPitch; // Only 1 row + + CD3DX12_HEAP_PROPERTIES uploadHeapProps(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(device->CreateCommittedResource( + &uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadBuffer))); + + // Map and expand source data (numChannels floats/pixel) to RGBA32F (4 floats/pixel) + BYTE* pData = nullptr; + ThrowIfFailed(uploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + float* dst = reinterpret_cast(pData); + for (UINT i = 0; i < width; ++i) + { + dst[i * 4 + 0] = (numChannels > 0) ? values[i * numChannels + 0] : 0.0f; + dst[i * 4 + 1] = (numChannels > 1) ? values[i * numChannels + 1] : 0.0f; + dst[i * 4 + 2] = (numChannels > 2) ? values[i * numChannels + 2] : 0.0f; + dst[i * 4 + 3] = 1.0f; + } + uploadBuffer->Unmap(0, nullptr); + + // Copy from upload buffer to texture + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = width; + footprint.Footprint.Height = 1; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = uploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = texture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to shader resource state + auto barrier1D = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier1D); +} + +// Upload a TEXTURE_2D (folded) 1D LUT as a 2D texture of width x height with RGBA32F format. +void AllocateTexture2D( ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + UINT width, + UINT height, + UINT numChannels, + const float* values, + Microsoft::WRL::ComPtr& texture, + Microsoft::WRL::ComPtr& uploadBuffer) +{ + if (!values) + { + throw Exception("Missing 2D LUT texture data"); + } + + // Create a 2D texture resource in default heap. + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = width; + textureDesc.Height = height; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps2D(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(device->CreateCommittedResource( + &defaultHeapProps2D, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + // Create upload buffer: each row is padded to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT + const UINT dstPixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(width, dstPixelSize); + const UINT uploadBufferSize = rowPitch * height; + + CD3DX12_HEAP_PROPERTIES uploadHeapProps2D(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc2D = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(device->CreateCommittedResource( + &uploadHeapProps2D, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc2D, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadBuffer))); + + // Map and expand all rows: source is row-major (width * numChannels floats per row) + BYTE* pData = nullptr; + ThrowIfFailed(uploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + for (UINT y = 0; y < height; ++y) + { + const float* srcRow = values + y * width * numChannels; + float* dstRow = reinterpret_cast(pData + y * rowPitch); + for (UINT x = 0; x < width; ++x) + { + dstRow[x * 4 + 0] = (numChannels > 0) ? srcRow[x * numChannels + 0] : 0.0f; + dstRow[x * 4 + 1] = (numChannels > 1) ? srcRow[x * numChannels + 1] : 0.0f; + dstRow[x * 4 + 2] = (numChannels > 2) ? srcRow[x * numChannels + 2] : 0.0f; + dstRow[x * 4 + 3] = 1.0f; + } + } + uploadBuffer->Unmap(0, nullptr); + + // Copy from upload buffer to texture + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = width; + footprint.Footprint.Height = height; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = uploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = texture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to shader resource state + auto barrier2D = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier2D); +} + +// Upload a 3D LUT as a 3D texture with RGBA32F format. +void AllocateTexture3D(ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + UINT edgelen, + const float* values, + Microsoft::WRL::ComPtr& texture, + Microsoft::WRL::ComPtr& uploadBuffer) +{ + if (!values) + { + throw Exception("Missing 3D LUT texture data"); + } + + // Create 3D texture in default heap + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = edgelen; + textureDesc.Height = edgelen; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = edgelen; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps3D(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(device->CreateCommittedResource( + &defaultHeapProps3D, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + // Create upload buffer with row-pitch alignment + // Source data is always RGB (3 floats/voxel) for OCIO 3D LUTs. + const UINT srcNumChannels = 3; // OCIO 3D LUTs always provide RGB data + const UINT dstPixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(edgelen, dstPixelSize); + const UINT slicePitch = rowPitch * edgelen; + const UINT uploadBufferSize = slicePitch * edgelen; + + CD3DX12_HEAP_PROPERTIES uploadHeapProps3D(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc3D = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(device->CreateCommittedResource( + &uploadHeapProps3D, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc3D, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadBuffer))); + + // Map RGB source data and expand to RGBA32F + BYTE* pData = nullptr; + ThrowIfFailed(uploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + for (UINT z = 0; z < edgelen; ++z) + { + for (UINT y = 0; y < edgelen; ++y) + { + const float* srcRow = values + (z * edgelen * edgelen + y * edgelen) * srcNumChannels; + float* dstRow = reinterpret_cast(pData + z * slicePitch + y * rowPitch); + for (UINT x = 0; x < edgelen; ++x) + { + dstRow[x * 4 + 0] = srcRow[x * srcNumChannels + 0]; // R + dstRow[x * 4 + 1] = srcRow[x * srcNumChannels + 1]; // G + dstRow[x * 4 + 2] = srcRow[x * srcNumChannels + 2]; // B + dstRow[x * 4 + 3] = 1.0f; // A + } + } + } + + uploadBuffer->Unmap(0, nullptr); + + // Copy from upload buffer to texture + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = edgelen; + footprint.Footprint.Height = edgelen; + footprint.Footprint.Depth = edgelen; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = uploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = texture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to shader resource state + auto barrier3D = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier3D); +} + +} // anonymous namespace + + +////////////////////////////////////////////////////////// + +HLSLBuilderRcPtr HLSLBuilder::Create(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap) +{ + if (!shaderDesc) + { + throw Exception("HLSLBuilder: shaderDesc is null"); + } + if (!device) + { + throw Exception("HLSLBuilder: device is null"); + } + if (!commandList) + { + throw Exception("HLSLBuilder: commandList is null"); + } + if (!cbvSrvHeap) + { + throw Exception("HLSLBuilder: cbvSrvHeap is null"); + } + + HLSLBuilderRcPtr builder(new HLSLBuilder(shaderDesc, device, commandList, cbvSrvHeap)); + builder->allocateAllTextures(); + + // Close the command list; the caller executes and fence-waits. + ThrowIfFailed(commandList->Close()); + + return builder; +} + +HLSLBuilder::HLSLBuilder(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap) + : m_shaderDesc(shaderDesc) + , m_device(device) + , m_commandList(commandList) + , m_cbvSrvHeap(cbvSrvHeap) + , m_srvDescriptorSize(0) + , m_verbose(false) +{ + m_srvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); +} + +HLSLBuilder::~HLSLBuilder() +{ + deleteAllTextures(); +} + +void HLSLBuilder::allocateAllTextures() +{ + deleteAllTextures(); + + // Process 3D LUTs + const unsigned maxTexture3D = m_shaderDesc->getNum3DTextures(); + for (unsigned idx = 0; idx < maxTexture3D; ++idx) + { + // Get 3D texture information + const char* textureName = nullptr; + const char* samplerName = nullptr; + unsigned edgelen = 0; + Interpolation interpolation = INTERP_LINEAR; + m_shaderDesc->get3DTexture(idx, textureName, samplerName, edgelen, interpolation); + + if (!textureName || !*textureName || !samplerName || !*samplerName || edgelen == 0) + { + throw Exception("HLSLBuilder: 3D texture data is corrupted"); + } + + const float* values = nullptr; + m_shaderDesc->get3DTextureValues(idx, values); + if (!values) + { + throw Exception("HLSLBuilder: 3D texture values are missing"); + } + + // Allocate the 3D texture + TextureResource texRes(textureName, samplerName); + AllocateTexture3D(m_device, m_commandList, edgelen, values, texRes.m_texture, texRes.m_uploadBuffer); + + // Create SRV at the slot matching the HLSL register assignment + const UINT heapSlot = m_shaderDesc->get3DTextureShaderBindingIndex(idx); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srvDesc.Texture3D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle( + m_cbvSrvHeap->GetCPUDescriptorHandleForHeapStart(), + heapSlot, + m_srvDescriptorSize); + + m_device->CreateShaderResourceView(texRes.m_texture.Get(), &srvDesc, srvHandle); + + m_textures.push_back(texRes); + } + + // Process 1D LUTs (stored as 2D textures with height=1) + const unsigned maxTexture1D = m_shaderDesc->getNumTextures(); + for (unsigned idx = 0; idx < maxTexture1D; ++idx) + { + // Get 1D texture information + const char* textureName = nullptr; + const char* samplerName = nullptr; + unsigned width = 0; + unsigned height = 0; + GpuShaderDesc::TextureType channel = GpuShaderDesc::TEXTURE_RGB_CHANNEL; + Interpolation interpolation = INTERP_LINEAR; + GpuShaderDesc::TextureDimensions dimensions = GpuShaderDesc::TEXTURE_1D; + m_shaderDesc->getTexture(idx, textureName, samplerName, width, height, channel, dimensions, interpolation); + + if (!textureName || !*textureName || !samplerName || !*samplerName || width == 0) + { + throw Exception("HLSLBuilder: 1D texture data is corrupted"); + } + + const float* values = nullptr; + m_shaderDesc->getTextureValues(idx, values); + if (!values) + { + throw Exception("HLSLBuilder: 1D texture values are missing"); + } + + // Determine source channel count: RED=1, RGB=3 + const UINT numChannels = (channel == GpuShaderDesc::TEXTURE_RED_CHANNEL) ? 1u : 3u; + TextureResource texRes(textureName, samplerName); + + // Create SRV at the slot matching the HLSL register assignment + const UINT heapSlot = m_shaderDesc->getTextureShaderBindingIndex(idx); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + + if (dimensions == GpuShaderDesc::TEXTURE_1D) + { + AllocateTexture1D(m_device, m_commandList, width, numChannels, values, + texRes.m_texture, texRes.m_uploadBuffer); + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srvDesc.Texture1D.MipLevels = 1; + } + else + { + AllocateTexture2D(m_device, m_commandList, width, height, numChannels, values, + texRes.m_texture, texRes.m_uploadBuffer); + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + } + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle( + m_cbvSrvHeap->GetCPUDescriptorHandleForHeapStart(), + heapSlot, + m_srvDescriptorSize); + + m_device->CreateShaderResourceView(texRes.m_texture.Get(), &srvDesc, srvHandle); + + m_textures.push_back(texRes); + } +} + +void HLSLBuilder::deleteAllTextures() +{ + m_textures.clear(); +} + +std::string HLSLBuilder::getShaderText() const +{ + if (!m_shaderDesc) + { + return ""; + } + + const char* shaderText = m_shaderDesc->getShaderText(); + + if (m_verbose && shaderText && *shaderText) + { + std::cout << "\nOCIO HLSL Shader:\n\n" << shaderText << std::endl; + } + + return shaderText ? std::string(shaderText) : std::string(); +} + +UINT HLSLBuilder::getNumSRVs() const +{ + return static_cast(m_textures.size()); +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/hlsl.h b/src/libutils/oglapphelpers/hlsl.h new file mode 100644 index 0000000000..a0338de3f9 --- /dev/null +++ b/src/libutils/oglapphelpers/hlsl.h @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#ifndef INCLUDED_OCIO_HLSL_H +#define INCLUDED_OCIO_HLSL_H + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include + +#include +#include + +#include + +namespace OCIO_NAMESPACE +{ + +class HLSLBuilder; +typedef OCIO_SHARED_PTR HLSLBuilderRcPtr; + +// This is a DirectX implementation showing how to do texture upload & allocation +// for HLSL shaders, mirroring the role of OpenGLBuilder. + +class HLSLBuilder +{ +public: + // Create an HLSL builder using GPU shader information from a specific processor. + // Allocates D3D12 textures for all LUTs using binding indices from the shaderDesc. + // All GPU uploads are fence-synchronized before returning. + static HLSLBuilderRcPtr Create(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap); + + ~HLSLBuilder(); + + inline void setVerbose(bool verbose) { m_verbose = verbose; } + inline bool isVerbose() const { return m_verbose; } + + // Get the OCIO-generated HLSL shader text. + std::string getShaderText() const; + + // Get the number of SRVs (texture slots) allocated for LUTs. + // This does not include the image texture at slot 0. + UINT getNumSRVs() const; + +protected: + HLSLBuilder(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap); + + void allocateAllTextures(); + void deleteAllTextures(); + +private: + HLSLBuilder() = delete; + HLSLBuilder(const HLSLBuilder&) = delete; + HLSLBuilder& operator=(const HLSLBuilder&) = delete; + + struct TextureResource + { + Microsoft::WRL::ComPtr m_texture; + Microsoft::WRL::ComPtr m_uploadBuffer; + std::string m_textureName; + std::string m_samplerName; + + TextureResource(const std::string& textureName, + const std::string& samplerName) + : m_textureName(textureName) + , m_samplerName(samplerName) + {} + }; + + typedef std::vector TextureResources; + + const GpuShaderDescRcPtr m_shaderDesc; // Description of the shader + ID3D12Device* m_device; // D3D12 device (not owned) + ID3D12GraphicsCommandList* m_commandList; // Command list for uploads (not owned) + ID3D12DescriptorHeap* m_cbvSrvHeap; // Descriptor heap for SRVs (not owned) + UINT m_srvDescriptorSize; // Size of one SRV descriptor + TextureResources m_textures; // All allocated texture resources + bool m_verbose; // Print shader text for debugging +}; + +} // namespace OCIO_NAMESPACE + +#endif // INCLUDED_OCIO_HLSL_H diff --git a/src/libutils/oglapphelpers/metalapp.h b/src/libutils/oglapphelpers/metalapp.h index 37fba14018..cb8c7acc42 100644 --- a/src/libutils/oglapphelpers/metalapp.h +++ b/src/libutils/oglapphelpers/metalapp.h @@ -24,7 +24,7 @@ typedef OCIO_SHARED_PTR MetalAppRcPtr; class MtlTexture; typedef OCIO_SHARED_PTR MtlTextureRcPtr; -class MetalApp : public ScreenApp +class MetalApp : public ScreenOglApp { public: MetalApp() = delete; @@ -53,7 +53,7 @@ class MetalApp : public ScreenApp // Process the image. void redisplay() override; - // Return a pointer of either ScreenApp or HeadlessApp depending on the + // Return a pointer of either ScreenOglApp or HeadlessOglApp depending on the // OCIO_HEADLESS_ENABLED preprocessor. static MetalAppRcPtr CreateMetalGlApp(const char * winTitle, int winWidth, int winHeight); diff --git a/src/libutils/oglapphelpers/metalapp.mm b/src/libutils/oglapphelpers/metalapp.mm index be46d4e1b6..4114108848 100644 --- a/src/libutils/oglapphelpers/metalapp.mm +++ b/src/libutils/oglapphelpers/metalapp.mm @@ -43,7 +43,7 @@ }; MetalApp::MetalApp(const char * winTitle, int winWidth, int winHeight) - : ScreenApp(winTitle, winWidth, winHeight) + : ScreenOglApp(winTitle, winWidth, winHeight) { initContext(); } @@ -346,7 +346,7 @@ vertex VertexOut ColorCorrectionVS(unsigned int vId [[ vertex_id ]]) throw Exception("Metal renderer can only consume MSL shaders"); } - if(printShader()) + if(isShaderVerbose()) { std::cout << std::endl; std::cout << "GPU Shader Program:" << std::endl; @@ -380,7 +380,7 @@ vertex VertexOut ColorCorrectionVS(unsigned int vId [[ vertex_id ]]) prepareAndBindOpenGLState(); } - ScreenApp::redisplay(); + ScreenOglApp::redisplay(); } MetalAppRcPtr MetalApp::CreateMetalGlApp(const char * winTitle, int winWidth, int winHeight) diff --git a/src/libutils/oglapphelpers/oglapp.cpp b/src/libutils/oglapphelpers/oglapp.cpp index 32528a55cc..7205cc2b75 100644 --- a/src/libutils/oglapphelpers/oglapp.cpp +++ b/src/libutils/oglapphelpers/oglapp.cpp @@ -12,10 +12,17 @@ #include #elif _WIN32 - #include -#include - +#ifndef NDEBUG + // freeglut's header uses a #pragma comment(lib) that links freeglutd.lib in + // debug builds, which we don't ship. Temporarily define NDEBUG so the header + // selects freeglut.lib instead, then restore the original state. + #define NDEBUG + #include + #undef NDEBUG +#else + #include +#endif // !NDEBUG #else #include @@ -33,6 +40,15 @@ namespace OCIO_NAMESPACE { +GraphicalAppRcPtr OglApp::CreateApp(const char* winTitle, int winWidth, int winHeight) +{ +#ifdef OCIO_HEADLESS_ENABLED + return std::make_shared(winTitle, winWidth, winHeight); +#else + return std::make_shared(winTitle, winWidth, winHeight); +#endif +} + OglApp::OglApp(int winWidth, int winHeight) : m_viewportWidth(winWidth) , m_viewportHeight(winHeight) @@ -113,7 +129,7 @@ void OglApp::redisplay() pts[3] = (float)m_viewportHeight * 0.5f + imgHeightScreenSpace * 0.5f; } - if (m_yMirror) + if (isYMirror()) { std::swap(pts[1], pts[3]); } @@ -162,7 +178,7 @@ void OglApp::reshape(int width, int height) glLoadIdentity(); } -void OglApp::createGLBuffers() +void OglApp::createBuffers() { // Create a framebuffer object, you need to delete them when program exits. GLuint fboId; @@ -197,7 +213,7 @@ void OglApp::setShader(GpuShaderDescRcPtr & shaderDesc) { // Create oglBuilder using the shaderDesc. m_oglBuilder = OpenGLBuilder::Create(shaderDesc); - m_oglBuilder->setVerbose(m_printShader); + m_oglBuilder->setVerbose(isShaderVerbose()); // Allocate & upload all the LUTs in a dedicated GPU texture. // Note: The start index for the texture indices is 1 as one texture @@ -227,7 +243,7 @@ void OglApp::setShader(GpuShaderDescRcPtr & shaderDesc) m_oglBuilder->useAllUniforms(); } -void OglApp::printGLInfo() const noexcept +void OglApp::printGraphicsInfo() const noexcept { std::cout << std::endl << "GL Vendor: " << glGetString(GL_VENDOR) << std::endl @@ -262,16 +278,7 @@ void OglApp::setupCommon() glEnable(GL_TEXTURE_2D); } -OglAppRcPtr OglApp::CreateOglApp(const char * winTitle, int winWidth, int winHeight) -{ -#ifdef OCIO_HEADLESS_ENABLED - return std::make_shared(winTitle, winWidth, winHeight); -#else - return std::make_shared(winTitle, winWidth, winHeight); -#endif -} - -ScreenApp::ScreenApp(const char * winTitle, int winWidth, int winHeight): +ScreenOglApp::ScreenOglApp(const char * winTitle, int winWidth, int winHeight): OglApp(winWidth, winHeight) { int argc = 2; @@ -288,25 +295,25 @@ ScreenApp::ScreenApp(const char * winTitle, int winWidth, int winHeight): setupCommon(); } -ScreenApp::~ScreenApp() +ScreenOglApp::~ScreenOglApp() { glutDestroyWindow(m_mainWin); } -void ScreenApp::redisplay() +void ScreenOglApp::redisplay() { OglApp::redisplay(); glutSwapBuffers(); } -void ScreenApp::printGLInfo() const noexcept +void ScreenOglApp::printGraphicsInfo() const noexcept { - OglApp::printGLInfo(); + OglApp::printGraphicsInfo(); } #ifdef OCIO_HEADLESS_ENABLED -HeadlessApp::HeadlessApp(const char * /* winTitle */, int bufWidth, int bufHeight) +HeadlessOglApp::HeadlessOglApp(const char * /* winTitle */, int bufWidth, int bufHeight) : OglApp(bufWidth, bufHeight) , m_pixBufferWidth(bufWidth) , m_pixBufferHeight(bufHeight) @@ -361,27 +368,27 @@ HeadlessApp::HeadlessApp(const char * /* winTitle */, int bufWidth, int bufHeigh setupCommon(); } -HeadlessApp::~HeadlessApp() +HeadlessOglApp::~HeadlessOglApp() { eglTerminate(m_eglDisplay); } -void HeadlessApp::printGLInfo() const noexcept +void HeadlessOglApp::printGraphicsInfo() const noexcept { - OglApp::printGLInfo(); + GraphicalApp::printGraphicsInfo(); printEGLInfo(); } -void HeadlessApp::printEGLInfo() const noexcept +void HeadlessOglApp::printEGLInfo() const noexcept { std::cout << std::endl << "EGL Vendor: " << eglQueryString(m_eglDisplay, EGL_VENDOR) << std::endl << "EGL Version: " << eglQueryString(m_eglDisplay, EGL_VERSION) << std::endl; } -void HeadlessApp::redisplay() +void HeadlessOglApp::redisplay() { - OglApp::redisplay(); + GraphicalApp::redisplay(); eglSwapBuffers(m_eglDisplay, m_eglSurface); } diff --git a/src/libutils/oglapphelpers/oglapp.h b/src/libutils/oglapphelpers/oglapp.h index 34b2f7d071..51de4c2281 100644 --- a/src/libutils/oglapphelpers/oglapp.h +++ b/src/libutils/oglapphelpers/oglapp.h @@ -9,29 +9,30 @@ #include #include "glsl.h" +#include "graphicalapp.h" namespace OCIO_NAMESPACE { // Here is some sample code to demonstrate how to use this in a simple app that wants to process // using the GPU and OpenGL. Processed image is expected to have the same size as the input image. -// For an interactive application, OglApp can be used, but other OGL code is required. +// For an interactive application, GraphicalApp can be used, but other OGL code is required. // // See ociodisplay for an example of an interactive app that displays an image in the UI and // ocioconvert and ociochecklut for examples of non-interactive apps that just process values with // the GPU. /* -// Create and initialize OglAppRcPtr by creating a shared pointer to ScreenApp. You have to -// specify the name of the window and its size. OglAppRcPtr that points to HeadlessApp object +// Create and initialize OglAppRcPtr by creating a shared pointer to ScreenOglApp. You have to +// specify the name of the window and its size. OglAppRcPtr that points to HeadlessOglApp object // can be created and used in the same way. -OglAppRcPtr scrApp = std::make_shared("Window Name", windowWidth, windowHeight); +OglAppRcPtr scrApp = std::make_shared("Window Name", windowWidth, windowHeight); float * imageBuffer = GetImageBuffer(); int imageWidth = GetImageWidth(); int imageHeight = GetImageHeight(); -scrApp->initImage(imagewidth, imageheight, OglApp::COMPONENTS_RGB, imageBuffer); -scrApp->createGLBuffers(); +scrApp->initImage(imagewidth, imageheight, GraphicalApp::COMPONENTS_RGB, imageBuffer); +scrApp->createBuffers(); // Set (or change) shader. GpuShaderDescRcPtr shader = GpuShaderDesc::CreateShaderDesc(); @@ -51,73 +52,49 @@ scrApp->readImage(imageBufferOut.data()); */ -// Forward declaration of OglApp. -class OglApp; -typedef OCIO_SHARED_PTR OglAppRcPtr; - -class OglApp +class OglApp : public GraphicalApp { public: OglApp() = delete; - OglApp(const OglApp &) = delete; - OglApp & operator=(const OglApp &) = delete; + OglApp(const OglApp&) = delete; + OglApp& operator=(const OglApp&) = delete; // Initialize the app with given window name & client rect size. OglApp(int winWidth, int winHeight); virtual ~OglApp(); - // When displaying the processed image in a window this needs to be done. - // In that case, when image is read, the result will be mirrored on Y. - void setYMirror() - { - m_yMirror = true; - } - - // Shader code will be printed when generated. - void setPrintShader(bool print) - { - m_printShader = print; - } - - enum Components - { - COMPONENTS_RGB = 0, - COMPONENTS_RGBA - }; - - // Initialize the image. virtual void initImage(int imageWidth, int imageHeight, - Components comp, const float * imageBuffer); - // Update the image if it changes. - virtual void updateImage(const float * imageBuffer); + Components comp, const float* imageBuffer) override; + + virtual void updateImage(const float* imageBuffer) override; - // Create GL frame and rendering buffers. Needed if readImage will be used. - void createGLBuffers(); + // Create frame and rendering buffers. Needed if readImage will be used. + void createBuffers() override; // Set the shader code. - virtual void setShader(GpuShaderDescRcPtr & shaderDesc); + virtual void setShader(GpuShaderDescRcPtr& shaderDesc) override; // Update the size of the buffer of the OpenGL viewport that will be used to process the image // (it does not modify the UI). To be called at least one time. Use image size if we want to // read back the processed image. To process another image with the same size or using a // different shader, reshape does not need to be called again. In case of an interactive // application it should be called by the glutReshapeFunc callback using the windows size. - void reshape(int width, int height); + void reshape(int width, int height) override; // Process the image. - void virtual redisplay(); + void virtual redisplay() override; // Read the image from the rendering buffer. It is not meant to be used by interactive // applications used to display the image. - virtual void readImage(float * imageBuffer); + virtual void readImage(float* imageBuffer) override; - // Helper to print GL info. - void virtual printGLInfo() const noexcept; + // Helper to print graphics info. + void virtual printGraphicsInfo() const noexcept override; - // Return a pointer of either ScreenApp or HeadlessApp depending on the + // Return a pointer of either ScreenOglApp or HeadlessOglApp depending on the // OCIO_HEADLESS_ENABLED preprocessor. - static OglAppRcPtr CreateOglApp(const char * winTitle, int winWidth, int winHeight); + static GraphicalAppRcPtr CreateApp(const char* winTitle, int winWidth, int winHeight); protected: // Window or output image size (set using reshape). @@ -133,8 +110,6 @@ class OglApp void setImageDimensions(int imgWidth, int imgHeight, Components comp); Components getImageComponents() const { return m_components; } - - bool printShader() const { return m_printShader; } OpenGLBuilderRcPtr m_oglBuilder; @@ -142,32 +117,27 @@ class OglApp // Keep track of the original image ratio. float m_imageAspect{ 1.0f }; - // For interactive application displaying the processed image, this needs to be true. - bool m_yMirror{ false }; - - // Will shader code be outputed when setShader is called. - bool m_printShader{ false }; - // Image information. int m_imageWidth{ 0 }; int m_imageHeight{ 0 }; Components m_components{ COMPONENTS_RGBA }; + unsigned int m_imageTexID; }; -class ScreenApp: public OglApp +class ScreenOglApp: public OglApp { public: - ScreenApp() = delete; - ScreenApp(const ScreenApp &) = delete; - ScreenApp & operator=(const ScreenApp &) = delete; + ScreenOglApp() = delete; + ScreenOglApp(const ScreenOglApp &) = delete; + ScreenOglApp & operator=(const ScreenOglApp &) = delete; - ScreenApp(const char * winTitle, int winWidth, int winHeight); + ScreenOglApp(const char * winTitle, int winWidth, int winHeight); - ~ScreenApp(); + ~ScreenOglApp(); void redisplay() override; - void printGLInfo() const noexcept override; + void printGraphicsInfo() const noexcept override; private: // Window identifier returned by glutCreateWindow. @@ -178,16 +148,16 @@ class ScreenApp: public OglApp #include -class HeadlessApp: public OglApp +class HeadlessOglApp: public OglApp { public: - HeadlessApp() = delete; + HeadlessOglApp() = delete; - HeadlessApp(const char * winTitle, int bufWidth, int bufHeight); + HeadlessOglApp(const char * winTitle, int bufWidth, int bufHeight); - ~HeadlessApp(); + ~HeadlessOglApp(); - void printGLInfo() const noexcept override; + void printGraphicsInfo() const noexcept override; void redisplay() override; protected: diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 5f1c0379cf..b39a1d87aa 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -1,8 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright Contributors to the OpenColorIO Project. -if(NOT OCIO_GL_ENABLED) - message(WARNING "GL component missing. Skipping the GPU unit tests.") +if(NOT OCIO_GL_ENABLED AND NOT (WIN32 AND OCIO_DIRECTX_ENABLED)) + message(WARNING "GL or DirectX component missing. Skipping the GPU unit tests.") return() endif() @@ -41,10 +41,55 @@ target_link_libraries(test_gpu_exec testutils ) -add_test(NAME test_gpu COMMAND test_gpu_exec) +if(OCIO_GL_ENABLED) + add_test(NAME test_gpu COMMAND test_gpu_exec) +endif() if(APPLE) add_test(NAME test_metal COMMAND test_gpu_exec -metal) endif() +if(WIN32 AND OCIO_DIRECTX_ENABLED) + add_test(NAME test_dx COMMAND test_gpu_exec --dx) +endif() + +# Copy dxcompiler.dll and dxil.dll to the test output directory. +# These are required at runtime when DXC (IDxcCompiler3) is used for SM6.0 shader compilation. +# The Redist/D3D path is the stable, version-independent redistribution location. +if(WIN32 AND OCIO_DIRECTX_ENABLED) + find_file(DXCOMPILER_DLL + NAMES dxcompiler.dll + PATHS + # Note: x64 hardcoded; update if ARM64 Windows support is needed. + "$ENV{WindowsSdkDir}Redist/D3D/x64" + "C:/Program Files (x86)/Windows Kits/10/Redist/D3D/x64" + NO_DEFAULT_PATH + DOC "Path to dxcompiler.dll from Windows SDK" + ) + if(DXCOMPILER_DLL) + get_filename_component(_dxc_dll_dir "${DXCOMPILER_DLL}" DIRECTORY) + find_file(DXIL_DLL + NAMES dxil.dll + HINTS "${_dxc_dll_dir}" + NO_DEFAULT_PATH + ) + add_custom_command(TARGET test_gpu_exec POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXCOMPILER_DLL}" "$" + COMMENT "Copying dxcompiler.dll to test output directory" + ) + if(DXIL_DLL) + add_custom_command(TARGET test_gpu_exec POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXIL_DLL}" "$" + COMMENT "Copying dxil.dll to test output directory" + ) + endif() + else() + message(STATUS + "dxcompiler.dll not found in Windows SDK Redist/D3D. " + "Add its directory to PATH before running test_dx." + ) + endif() +endif() # Note: To avoid changing PATH from outside the cmake files. if(MSVC AND BUILD_SHARED_LIBS) @@ -58,6 +103,11 @@ if(MSVC AND BUILD_SHARED_LIBS) set(NEW_PATH "${NEW_PATH}\\\;${GLUT_INCLUDE_DIR}/../bin") set(NEW_PATH "${NEW_PATH}\\\;${GLEW_INCLUDE_DIRS}/../bin") - set_tests_properties(test_gpu PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + if(OCIO_GL_ENABLED) + set_tests_properties(test_gpu PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + endif() + if(WIN32 AND OCIO_DIRECTX_ENABLED) + set_tests_properties(test_dx PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + endif() endif() diff --git a/tests/gpu/FixedFunctionOp_test.cpp b/tests/gpu/FixedFunctionOp_test.cpp index 6c3879d858..6c5130bbda 100644 --- a/tests/gpu/FixedFunctionOp_test.cpp +++ b/tests/gpu/FixedFunctionOp_test.cpp @@ -303,7 +303,8 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces_gamutcomp13_inv) }; test.setCustomValues(values); - test.setErrorThreshold(3e-6f); + // 3e-5 accommodates GPU pow() precision at large output values (~3.08) on DirectX. + test.setErrorThreshold(3e-5f); } OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_fwd) @@ -618,8 +619,8 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_1000nit_p3_rndtrip) // TODO: Investigate why this is not closer. // Setting the CPUProcessor to OPTIMIZATION_NONE helps slightly, but is not the main - // cause of the error. - test.setErrorThreshold(0.012f); + // cause of the error. 0.014 accommodates DirectX floating point differences. + test.setErrorThreshold(0.014f); } OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_1000nit_p3_inv) @@ -692,8 +693,8 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_rec2020_rndtrip) GenerateIdentityLut3D(values, lut_size, lum_scale); test.setCustomValues(values); - // TODO: Investigate why this is not closer. - test.setErrorThreshold(0.03f); + // TODO: Investigate why this is not closer. 0.034 accommodates DirectX floating point differences. + test.setErrorThreshold(0.034f); } OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_rec2020_inv) diff --git a/tests/gpu/GPUUnitTest.cpp b/tests/gpu/GPUUnitTest.cpp index 6508131218..838954e128 100644 --- a/tests/gpu/GPUUnitTest.cpp +++ b/tests/gpu/GPUUnitTest.cpp @@ -15,7 +15,16 @@ #include "apputils/argparse.h" #include "utils/StringUtils.h" +#include "graphicalapp.h" + +#ifdef OCIO_GL_ENABLED #include "oglapp.h" +#endif + +#ifdef OCIO_DIRECTX_ENABLED +#include "dxapp.h" +#endif + #if __APPLE__ #include "metalapp.h" #endif @@ -194,12 +203,12 @@ namespace constexpr unsigned g_winHeight = 256; constexpr unsigned g_components = 4; - void AllocateImageTexture(OCIO::OglAppRcPtr & app) + void AllocateImageTexture(OCIO::GraphicalAppRcPtr& app) { const unsigned numEntries = g_winWidth * g_winHeight * g_components; OCIOGPUTest::CustomValues::Values image(numEntries, 0.0f); - app->initImage(g_winWidth, g_winHeight, OCIO::OglApp::COMPONENTS_RGBA, &image[0]); + app->initImage(g_winWidth, g_winHeight, OCIO::GraphicalApp::COMPONENTS_RGBA, &image[0]); } void SetTestValue(float * image, float val, unsigned numComponents) @@ -214,7 +223,7 @@ namespace } } - void UpdateImageTexture(OCIO::OglAppRcPtr & app, OCIOGPUTestRcPtr & test) + void UpdateImageTexture(OCIO::GraphicalAppRcPtr & app, OCIOGPUTestRcPtr & test) { // Note: User-specified custom values are padded out // to the preferred size (g_winWidth x g_winHeight). @@ -328,9 +337,9 @@ namespace app->updateImage(&values.m_inputValues[0]); } - void UpdateOCIOGLState(OCIO::OglAppRcPtr & app, OCIOGPUTestRcPtr & test) + void UpdateOCIOGPUState(OCIO::GraphicalAppRcPtr & app, OCIOGPUTestRcPtr & test) { - app->setPrintShader(test->isVerbose()); + app->setShaderVerbose(test->isVerbose()); OCIO::ConstProcessorRcPtr & processor = test->getProcessor(); OCIO::GpuShaderDescRcPtr & shaderDesc = test->getShaderDesc(); @@ -385,7 +394,7 @@ namespace constexpr size_t invalidIndex = std::numeric_limits::max(); // Validate the GPU processing against the CPU one. - void ValidateImageTexture(OCIO::OglAppRcPtr & app, OCIOGPUTestRcPtr & test) + void ValidateImageTexture(OCIO::GraphicalAppRcPtr & app, OCIOGPUTestRcPtr & test) { // Each retest is rebuilding a cpu proc. OCIO::ConstCPUProcessorRcPtr processor = test->getProcessor()->getDefaultCPUProcessor(); @@ -536,6 +545,7 @@ int main(int argc, const char ** argv) bool printHelp = false; bool useMetalRenderer = false; + bool useDxRenderer = false; bool verbose = false; bool stopOnFirstError = false; @@ -546,6 +556,9 @@ int main(int argc, const char ** argv) ap.options("\nCommand line arguments:\n", "--help", &printHelp, "Print help message", "--metal", &useMetalRenderer, "Run the GPU unit test with Metal", +#ifdef OCIO_DIRECTX_ENABLED + "--dx", &useDxRenderer, "Run the GPU unit test with DirectX 12", +#endif "-v", &verbose, "Output the GPU shader program", "--stop_on_error", &stopOnFirstError, "Stop on the first error", "--run_only %s", &filter, "Run only some unit tests\n" @@ -588,8 +601,8 @@ int main(int argc, const char ** argv) } // Step 1: Initialize the graphic library engines. - OCIO::OglAppRcPtr app; - + OCIO::GraphicalAppRcPtr app; + try { if(useMetalRenderer) @@ -601,9 +614,20 @@ int main(int argc, const char ** argv) return 1; #endif } +#ifdef OCIO_DIRECTX_ENABLED + else if(useDxRenderer) + { + app = std::make_shared("GPU tests - DirectX 12", 10, 10); + } +#endif else { - app = OCIO::OglApp::CreateOglApp("GPU tests", 10, 10); +#ifdef OCIO_GL_ENABLED + app = OCIO::OglApp::CreateApp("GPU tests - OpenGL", 10, 10); +#else + std::cerr << std::endl << "No GPU backend available." << std::endl; + return 1; +#endif } } catch (const OCIO::Exception & e) @@ -612,13 +636,13 @@ int main(int argc, const char ** argv) return 1; } - app->printGLInfo(); + app->printGraphicsInfo(); // Step 2: Allocate the texture that holds the image. AllocateImageTexture(app); // Step 3: Create the frame buffer and render buffer. - app->createGLBuffers(); + app->createBuffers(); app->reshape(g_winWidth, g_winHeight); @@ -661,12 +685,32 @@ int main(int argc, const char ** argv) // Prepare the unit test. test->setVerbose(verbose); - test->setShadingLanguage( + + // Select the appropriate shading language based on the renderer + OCIO::GpuLanguage shadingLanguage; #if __APPLE__ - useMetalRenderer ? - OCIO::GPU_LANGUAGE_MSL_2_0 : + if (useMetalRenderer) + { + shadingLanguage = OCIO::GPU_LANGUAGE_MSL_2_0; + } + else +#endif +#ifdef OCIO_DIRECTX_ENABLED + if (useDxRenderer) + { + shadingLanguage = OCIO::GPU_LANGUAGE_HLSL_SM_5_0; + } + else +#endif + { +#ifdef OCIO_GL_ENABLED + shadingLanguage = OCIO::GPU_LANGUAGE_GLSL_1_2; +#else + // This should never happen since we check for available backends earlier + shadingLanguage = OCIO::GPU_LANGUAGE_GLSL_1_2; #endif - OCIO::GPU_LANGUAGE_GLSL_1_2); + } + test->setShadingLanguage(shadingLanguage); bool enabledTest = true; try @@ -697,7 +741,7 @@ int main(int argc, const char ** argv) UpdateImageTexture(app, test); // Update the GPU shader program. - UpdateOCIOGLState(app, test); + UpdateOCIOGPUState(app, test); const size_t numRetest = test->getNumRetests(); // Need to run once and for each retest. diff --git a/tests/gpu/MatrixOp_test.cpp b/tests/gpu/MatrixOp_test.cpp index 6f47362d8e..6da6ea8a64 100644 --- a/tests/gpu/MatrixOp_test.cpp +++ b/tests/gpu/MatrixOp_test.cpp @@ -9,7 +9,8 @@ namespace OCIO = OCIO_NAMESPACE; -const float g_epsilon = 5e-7f; +// 1e-6 accommodates 1-2 ULP FMA rounding with DirectX tests (matrix output values ~7). +const float g_epsilon = 1e-6f; // Helper method to build unit tests