diff --git a/antora/modules/ROOT/nav.adoc b/antora/modules/ROOT/nav.adoc index 89c2dc6fd..37673e11a 100644 --- a/antora/modules/ROOT/nav.adoc +++ b/antora/modules/ROOT/nav.adoc @@ -128,6 +128,16 @@ *** xref:Building_a_Simple_Engine/Mobile_Development/04_rendering_approaches.adoc[Rendering approaches] *** xref:Building_a_Simple_Engine/Mobile_Development/05_vulkan_extensions.adoc[Vulkan extensions] *** xref:Building_a_Simple_Engine/Mobile_Development/06_conclusion.adoc[Conclusion] +** Extra Courses +*** Opacity Micromaps +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/00_introduction.adoc[Introduction] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/01_the_shadow_problem.adoc[The shadow problem] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/02_why_alpha_testing_is_expensive.adoc[Why alpha testing is expensive] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/03_what_are_micromaps.adoc[What are micromaps] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/04_hardware_traversal_with_omm.adoc[Hardware traversal with OMM] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc[Implementation overview] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/06_results_guidance_and_tradeoffs.adoc[Results, guidance and tradeoffs] +**** xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/07_conclusion.adoc[Conclusion] ** Advanced Topics *** xref:Building_a_Simple_Engine/Advanced_Topics/01_introduction.adoc[Introduction] *** xref:Building_a_Simple_Engine/Advanced_Topics/Rendering_Pipeline_Overview.adoc[Rendering pipeline overview] diff --git a/attachments/simple_engine/CMake/FindKTX.cmake b/attachments/simple_engine/CMake/FindKTX.cmake index e9d77d695..2fbea7c6a 100644 --- a/attachments/simple_engine/CMake/FindKTX.cmake +++ b/attachments/simple_engine/CMake/FindKTX.cmake @@ -13,99 +13,92 @@ # KTX::ktx # -# Check if we're on Linux - if so, we'll skip the search and directly use FetchContent -if(UNIX AND NOT APPLE) - # On Linux, we assume KTX is not installed and proceed directly to fetching it - set(KTX_FOUND FALSE) -else() - # On non-Linux platforms, try to find KTX using pkg-config first - find_package(PkgConfig QUIET) - if(PKG_CONFIG_FOUND) +# Try to find KTX using pkg-config first +find_package(PkgConfig QUIET) +if (PKG_CONFIG_FOUND) pkg_check_modules(PC_KTX QUIET ktx libktx ktx2 libktx2) - endif() +endif () - # Try to find KTX using standard find_package - find_path(KTX_INCLUDE_DIR - NAMES ktx.h - PATH_SUFFIXES include ktx KTX ktx2 KTX2 - HINTS - ${PC_KTX_INCLUDEDIR} - /usr/include - /usr/local/include - $ENV{KTX_DIR}/include - $ENV{VULKAN_SDK}/include - ${CMAKE_SOURCE_DIR}/external/ktx/include - ) - - find_library(KTX_LIBRARY - NAMES ktx ktx2 libktx libktx2 - PATH_SUFFIXES lib lib64 - HINTS - ${PC_KTX_LIBDIR} - /usr/lib - /usr/lib64 - /usr/local/lib - /usr/local/lib64 - $ENV{KTX_DIR}/lib - $ENV{VULKAN_SDK}/lib - ${CMAKE_SOURCE_DIR}/external/ktx/lib - ) - - if (KTX_INCLUDE_DIR AND KTX_LIBRARY) - set(KTX_FOUND TRUE) - else () - set(KTX_FOUND FALSE) - endif() -endif() +# Try to find KTX using standard find_path and find_library +find_path(KTX_INCLUDE_DIR + NAMES ktx.h + PATH_SUFFIXES include ktx KTX ktx2 KTX2 + HINTS + ${PC_KTX_INCLUDEDIR} + /usr/include + /usr/local/include + $ENV{KTX_DIR}/include + $ENV{VULKAN_SDK}/include + ${CMAKE_SOURCE_DIR}/external/ktx/include +) -if(KTX_FOUND) - set(KTX_INCLUDE_DIRS ${KTX_INCLUDE_DIR}) - set(KTX_LIBRARIES ${KTX_LIBRARY}) +find_library(KTX_LIBRARY + NAMES ktx ktx2 libktx libktx2 + PATH_SUFFIXES lib lib64 + HINTS + ${PC_KTX_LIBDIR} + /usr/lib + /usr/lib64 + /usr/local/lib + /usr/local/lib64 + $ENV{KTX_DIR}/lib + $ENV{VULKAN_SDK}/lib + ${CMAKE_SOURCE_DIR}/external/ktx/lib +) - if(NOT TARGET KTX::ktx) - add_library(KTX::ktx UNKNOWN IMPORTED) - set_target_properties(KTX::ktx PROPERTIES - IMPORTED_LOCATION "${KTX_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${KTX_INCLUDE_DIRS}" - ) - endif() -else() - # If not found, use FetchContent to download and build - include(FetchContent) +# If not found in the system, use FetchContent to download and build +if (NOT KTX_INCLUDE_DIR OR NOT KTX_LIBRARY) + include(FetchContent) - # Only show the message on non-Linux platforms - if(NOT (UNIX AND NOT APPLE)) message(STATUS "KTX not found, fetching from GitHub...") - endif() - FetchContent_Declare( - ktx - GIT_REPOSITORY https://github.com/KhronosGroup/KTX-Software.git - GIT_TAG v4.4.2 # Use a specific tag for stability - ) - - # Set options to minimize build time and dependencies - set(KTX_FEATURE_TOOLS OFF CACHE BOOL "Build KTX tools" FORCE) - set(KTX_FEATURE_DOC OFF CACHE BOOL "Build KTX documentation" FORCE) - set(KTX_FEATURE_TESTS OFF CACHE BOOL "Build KTX tests" FORCE) + FetchContent_Declare( + ktx + GIT_REPOSITORY https://github.com/KhronosGroup/KTX-Software.git + GIT_TAG v4.4.2 + ) - FetchContent_MakeAvailable(ktx) + # Minimize build time and dependencies + set(KTX_FEATURE_TOOLS OFF CACHE BOOL "Build KTX tools" FORCE) + set(KTX_FEATURE_DOC OFF CACHE BOOL "Build KTX documentation" FORCE) + set(KTX_FEATURE_TESTS OFF CACHE BOOL "Build KTX tests" FORCE) + set(KTX_FEATURE_STATIC_LIBRARY ON CACHE BOOL "Build KTX as static library" FORCE) - # Create an alias to match the expected target name - if(NOT TARGET KTX::ktx) - add_library(KTX::ktx ALIAS ktx) - endif() + FetchContent_MakeAvailable(ktx) - # Set variables to indicate that KTX was found and to satisfy find_package_handle_standard_args - set(KTX_FOUND TRUE) - FetchContent_GetProperties(ktx SOURCE_DIR ktx_SOURCE_DIR) - set(KTX_INCLUDE_DIR "${ktx_SOURCE_DIR}/include") - set(KTX_LIBRARY ktx) - set(KTX_INCLUDE_DIRS ${KTX_INCLUDE_DIR}) - set(KTX_LIBRARIES ${KTX_LIBRARY}) -endif() + if (TARGET ktx) + get_target_property(KTX_TARGET_INCLUDE_DIR ktx INTERFACE_INCLUDE_DIRECTORIES) + if (KTX_TARGET_INCLUDE_DIR) + set(KTX_INCLUDE_DIR ${KTX_TARGET_INCLUDE_DIR}) + else () + FetchContent_GetProperties(ktx SOURCE_DIR ktx_SOURCE_DIR) + set(KTX_INCLUDE_DIR ${ktx_SOURCE_DIR}/include) + endif () + set(KTX_LIBRARY ktx) + endif () +endif () include(FindPackageHandleStandardArgs) find_package_handle_standard_args(KTX REQUIRED_VARS KTX_INCLUDE_DIR KTX_LIBRARY ) + +if (KTX_FOUND) + set(KTX_INCLUDE_DIRS ${KTX_INCLUDE_DIR}) + set(KTX_LIBRARIES ${KTX_LIBRARY}) + + if (NOT TARGET KTX::ktx) + if (TARGET ktx) + # FetchContent target already exists — alias it + add_library(KTX::ktx ALIAS ktx) + else () + add_library(KTX::ktx UNKNOWN IMPORTED) + set_target_properties(KTX::ktx PROPERTIES + IMPORTED_LOCATION "${KTX_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${KTX_INCLUDE_DIR}" + ) + endif () + endif () +endif () + +mark_as_advanced(KTX_INCLUDE_DIR KTX_LIBRARY) diff --git a/attachments/simple_engine/CMake/FindVulkanHpp.cmake b/attachments/simple_engine/CMake/FindVulkanHpp.cmake index 2c0e23abf..11fd67d6b 100644 --- a/attachments/simple_engine/CMake/FindVulkanHpp.cmake +++ b/attachments/simple_engine/CMake/FindVulkanHpp.cmake @@ -71,6 +71,10 @@ find_path(VulkanProfiles_INCLUDE_DIR # Function to extract Vulkan version from vulkan_core.h function(extract_vulkan_version VULKAN_CORE_H_PATH OUTPUT_VERSION_TAG) + if (NOT EXISTS ${VULKAN_CORE_H_PATH}) + set(${OUTPUT_VERSION_TAG} "v1.3.275" PARENT_SCOPE) + return() + endif () # Extract the version information from vulkan_core.h file(STRINGS ${VULKAN_CORE_H_PATH} VULKAN_VERSION_MAJOR_LINE REGEX "^#define VK_VERSION_MAJOR") file(STRINGS ${VULKAN_CORE_H_PATH} VULKAN_VERSION_MINOR_LINE REGEX "^#define VK_VERSION_MINOR") @@ -137,6 +141,7 @@ elseif(DEFINED ENV{VULKAN_SDK}) find_file(VULKAN_CORE_H vulkan_core.h PATHS $ENV{VULKAN_SDK}/include/vulkan + $ENV{VULKAN_SDK}/x86_64/include/vulkan NO_DEFAULT_PATH ) @@ -177,6 +182,162 @@ else() endif() endif() +# Check if the detected version is less than 1.4.351 +string(REGEX REPLACE "^v" "" VULKAN_VERSION_NUM "${VULKAN_VERSION_TAG}") +if (VULKAN_VERSION_NUM VERSION_LESS "1.4.351") + message(STATUS "Vulkan version ${VULKAN_VERSION_NUM} is less than 1.4.351. Fetching latest Vulkan-Headers from git...") + include(FetchContent) + FetchContent_Declare( + VulkanHeaders + GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Headers.git + GIT_TAG main + ) + FetchContent_Populate(VulkanHeaders) + + # Override Vulkan_INCLUDE_DIR to use the git headers + set(Vulkan_INCLUDE_DIR "${vulkanheaders_SOURCE_DIR}/include" CACHE PATH "Path to Vulkan headers" FORCE) + set(VULKAN_VERSION_TAG "main") + + # Force fetching of Vulkan-Hpp and Vulkan-Profiles + set(VulkanHpp_INCLUDE_DIR "VulkanHpp_INCLUDE_DIR-NOTFOUND" CACHE PATH "" FORCE) + set(VulkanHpp_CPPM_DIR "VulkanHpp_CPPM_DIR-NOTFOUND" CACHE PATH "" FORCE) + set(VulkanProfiles_INCLUDE_DIR "VulkanProfiles_INCLUDE_DIR-NOTFOUND" CACHE PATH "" FORCE) + + message(STATUS "Using Vulkan-Headers from git: ${Vulkan_INCLUDE_DIR}") + + # Update the existing Vulkan::Headers target if it exists + if (TARGET Vulkan::Headers) + set_target_properties(Vulkan::Headers PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIR}" + ) + endif () + if (TARGET Vulkan::Vulkan) + set_target_properties(Vulkan::Vulkan PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIR}" + ) + endif () +endif () + +# ── Minimum version check for VK_KHR_opacity_micromap ───────────────────────── +# VkAccelerationStructureTrianglesOpacityMicromapKHR first appeared in Vulkan +# headers 1.4.351 (VK_KHR_opacity_micromap promotion from EXT to KHR). +# If the installed SDK/system headers are older, we fetch both Vulkan-Headers +# (C structs) and Vulkan-Hpp (C++ wrappers) from the GitHub main branch so the +# project can compile. The fetched C headers are prepended to all include +# paths at the end of this file so they shadow the too-old SDK headers. +# This block of code should be removed once SDK 351 is released. +set(VULKAN_KHR_OMM_MIN_HEADER_VERSION 351) +set(VULKAN_HEADERS_SUFFICIENT FALSE) + +if (VULKAN_CORE_H AND EXISTS "${VULKAN_CORE_H}") + file(STRINGS "${VULKAN_CORE_H}" _vk_hdr_line REGEX "^#define VK_HEADER_VERSION ") + file(STRINGS "${VULKAN_CORE_H}" _vk_cmp_line REGEX "^#define VK_HEADER_VERSION_COMPLETE") + string(REGEX MATCH "[0-9]+" _vk_patch "${_vk_hdr_line}") + if (_vk_cmp_line MATCHES "VK_MAKE_API_VERSION\\([^,]+,[ \t]*([0-9]+),[ \t]*([0-9]+),") + set(_vk_major "${CMAKE_MATCH_1}") + set(_vk_minor "${CMAKE_MATCH_2}") + else () + set(_vk_major 0) + set(_vk_minor 0) + endif () + if ((_vk_major GREATER 1) OR + (_vk_major EQUAL 1 AND _vk_minor GREATER 4) OR + (_vk_major EQUAL 1 AND _vk_minor EQUAL 4 AND + NOT _vk_patch LESS VULKAN_KHR_OMM_MIN_HEADER_VERSION)) + set(VULKAN_HEADERS_SUFFICIENT TRUE) + endif () + message(STATUS "Installed Vulkan headers: ${_vk_major}.${_vk_minor}.${_vk_patch} — need >= 1.4.${VULKAN_KHR_OMM_MIN_HEADER_VERSION} for VK_KHR_opacity_micromap") + unset(_vk_hdr_line) + unset(_vk_cmp_line) + unset(_vk_patch) + unset(_vk_major) + unset(_vk_minor) +else () + message(STATUS "Could not verify Vulkan header version — will fetch latest for VK_KHR_opacity_micromap safety") +endif () + +if (NOT VULKAN_HEADERS_SUFFICIENT) + message(STATUS "Installed Vulkan headers too old for VK_KHR_opacity_micromap — fetching from GitHub...") + + include(FetchContent) + if (POLICY CMP0169) + cmake_policy(SET CMP0169 OLD) + endif () + + # ── Step 1: Fetch Vulkan-Hpp at main ────────────────────────────────────── + # Use a distinct content name (VulkanHppMain, not VulkanHpp) so a stale + # FetchContent cache entry from a prior run at an older versioned tag is + # never silently reused here. + FetchContent_Declare( + VulkanHppMain + GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Hpp.git + GIT_TAG main + ) + FetchContent_GetProperties(VulkanHppMain SOURCE_DIR VulkanHppMain_SOURCE_DIR) + if (NOT VulkanHppMain_POPULATED) + FetchContent_Populate(VulkanHppMain) + FetchContent_GetProperties(VulkanHppMain SOURCE_DIR VulkanHppMain_SOURCE_DIR) + endif () + message(STATUS "Fetched Vulkan-Hpp (main): ${VulkanHppMain_SOURCE_DIR}") + + # ── Step 2: Determine the exact VK_HEADER_VERSION Vulkan-Hpp expects ───── + # vulkan.hpp contains a static_assert that pins the required C header + # version, e.g.: + # static_assert( VK_HEADER_VERSION == 352, "Wrong VK_HEADER_VERSION!" ); + # Fetching both Vulkan-Hpp and Vulkan-Headers independently at 'main' races: + # they advance separately and can diverge by one revision, triggering that + # assertion at compile time. Parse the expected version now so Vulkan-Headers + # can be fetched at the matching versioned tag rather than at 'main'. + set(_vk_headers_tag "main") + set(_vk_headers_content_name "VulkanHeadersC_main") + if (EXISTS "${VulkanHppMain_SOURCE_DIR}/vulkan/vulkan.hpp") + file(STRINGS "${VulkanHppMain_SOURCE_DIR}/vulkan/vulkan.hpp" _assert_line + REGEX "ASSERT.*VK_HEADER_VERSION ==") + if (_assert_line MATCHES "==[ \t]*([0-9]+)") + set(_vk_expected_patch "${CMAKE_MATCH_1}") + set(_vk_headers_tag "v1.4.${_vk_expected_patch}") + # Encode the patch version in the content name so FetchContent's + # cache is invalidated automatically when Vulkan-Hpp advances to a + # new header version — no manual cache wipe needed. + set(_vk_headers_content_name "VulkanHeadersC_v1_4_${_vk_expected_patch}") + message(STATUS "Vulkan-Hpp (main) asserts VK_HEADER_VERSION == ${_vk_expected_patch} — fetching Vulkan-Headers at ${_vk_headers_tag}") + unset(_vk_expected_patch) + else () + message(STATUS "Could not parse VK_HEADER_VERSION assertion from vulkan.hpp — falling back to Vulkan-Headers main") + endif () + unset(_assert_line) + endif () + + # ── Step 3: Fetch Vulkan-Headers at the matched versioned tag ───────────── + FetchContent_Declare( + ${_vk_headers_content_name} + GIT_REPOSITORY https://github.com/KhronosGroup/Vulkan-Headers.git + GIT_TAG "${_vk_headers_tag}" + ) + FetchContent_GetProperties(${_vk_headers_content_name} SOURCE_DIR _VulkanHeadersC_srcdir) + if (NOT ${_vk_headers_content_name}_POPULATED) + FetchContent_Populate(${_vk_headers_content_name}) + FetchContent_GetProperties(${_vk_headers_content_name} SOURCE_DIR _VulkanHeadersC_srcdir) + endif () + # Vulkan-Headers lays out its headers under include/vulkan/ + set(VULKAN_FETCHED_HEADERS_INCLUDE "${_VulkanHeadersC_srcdir}/include") + message(STATUS "Fetched Vulkan-Headers (${_vk_headers_tag}): ${VULKAN_FETCHED_HEADERS_INCLUDE}") + unset(_vk_headers_tag) + unset(_vk_headers_content_name) + unset(_VulkanHeadersC_srcdir) + + # Set VulkanHpp_INCLUDE_DIR (FORCE cache) so the existing versioned-fetch + # block below sees the directory as already satisfied and does not re-fetch + # at the old SDK-matched tag. + set(VulkanHpp_INCLUDE_DIR "${VulkanHppMain_SOURCE_DIR}" CACHE PATH + "Vulkan-Hpp include directory (fetched at main for VK_KHR_opacity_micromap)" FORCE) + if (EXISTS "${VulkanHppMain_SOURCE_DIR}/vulkan/vulkan.cppm") + set(VulkanHpp_CPPM_DIR "${VulkanHppMain_SOURCE_DIR}" CACHE PATH + "Vulkan-Hpp cppm directory (fetched at main)" FORCE) + endif () +endif () +# ── End of minimum version check ────────────────────────────────────────────── + # If the include directory wasn't found, use FetchContent to download and build if(NOT VulkanHpp_INCLUDE_DIR OR NOT VulkanHpp_CPPM_DIR) # If not found, use FetchContent to download @@ -207,6 +368,10 @@ if(NOT VulkanHpp_INCLUDE_DIR OR NOT VulkanHpp_CPPM_DIR) # Set the include directory to the source directory set(VulkanHpp_INCLUDE_DIR ${VulkanHpp_SOURCE_DIR}) + # Ensure we also include the Vulkan-Headers if we fetched them + if (vulkanheaders_SOURCE_DIR) + list(APPEND VulkanHpp_INCLUDE_DIR "${vulkanheaders_SOURCE_DIR}/include") + endif () message(STATUS "VulkanHpp_SOURCE_DIR: ${VulkanHpp_SOURCE_DIR}") message(STATUS "VulkanHpp_INCLUDE_DIR: ${VulkanHpp_INCLUDE_DIR}") @@ -300,6 +465,18 @@ message(STATUS "VULKANHPP_FOUND: ${VULKANHPP_FOUND}") if(VulkanHpp_FOUND) set(VulkanHpp_INCLUDE_DIRS ${VulkanHpp_INCLUDE_DIR}) + # Force git headers to the front to avoid conflicts with system headers + if (vulkanheaders_SOURCE_DIR) + include_directories(BEFORE SYSTEM "${vulkanheaders_SOURCE_DIR}/include") + endif () + include_directories(BEFORE SYSTEM "${VulkanHpp_INCLUDE_DIR}") + # The pinned C headers (VulkanHeadersC_v1_4_XXX) must shadow any unpinned + # main-branch headers added above. Add them last so BEFORE prepends them + # to the front of the global include path, ahead of vulkanheaders_SOURCE_DIR. + if (DEFINED VULKAN_FETCHED_HEADERS_INCLUDE AND EXISTS "${VULKAN_FETCHED_HEADERS_INCLUDE}") + include_directories(BEFORE SYSTEM "${VULKAN_FETCHED_HEADERS_INCLUDE}") + endif () + # Make sure VulkanHpp_CPPM_DIR is set if(NOT DEFINED VulkanHpp_CPPM_DIR) # Check if vulkan.cppm exists in the include directory @@ -423,4 +600,41 @@ export namespace vk { message(STATUS "Final VulkanHpp_CPPM_DIR: ${VulkanHpp_CPPM_DIR}") endif() +# ── Prepend fetched C headers so they shadow the too-old SDK headers ────────── +# This must run after all the VulkanHpp_INCLUDE_DIRS / target setup above so +# that we can insert at position 0 regardless of which code path ran. +# This section should be removed when SDK 351 is released. +if (DEFINED VULKAN_FETCHED_HEADERS_INCLUDE AND EXISTS "${VULKAN_FETCHED_HEADERS_INCLUDE}") + if (DEFINED VulkanHpp_INCLUDE_DIRS) + list(INSERT VulkanHpp_INCLUDE_DIRS 0 "${VULKAN_FETCHED_HEADERS_INCLUDE}") + else () + set(VulkanHpp_INCLUDE_DIRS "${VULKAN_FETCHED_HEADERS_INCLUDE}") + endif () + # Update the imported target so any consumer that links VulkanHpp::VulkanHpp + # also gets the fetched headers first in its include path. + if (TARGET VulkanHpp::VulkanHpp) + set_target_properties(VulkanHpp::VulkanHpp PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${VulkanHpp_INCLUDE_DIRS}") + endif () + # Patch the Vulkan::Headers / Vulkan::Vulkan targets that FindVulkan.cmake + # created — they point to the old SDK, so prepend the fetched headers there + # too so that direct users of those targets also see the newer definitions. + foreach (_vk_tgt Vulkan::Headers Vulkan::Vulkan) + if (TARGET "${_vk_tgt}") + get_target_property(_vk_incdirs "${_vk_tgt}" INTERFACE_INCLUDE_DIRECTORIES) + if (_vk_incdirs) + list(INSERT _vk_incdirs 0 "${VULKAN_FETCHED_HEADERS_INCLUDE}") + else () + set(_vk_incdirs "${VULKAN_FETCHED_HEADERS_INCLUDE}") + endif () + set_target_properties("${_vk_tgt}" PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_vk_incdirs}") + endif () + endforeach () + unset(_vk_tgt) + unset(_vk_incdirs) + message(STATUS "Prepended fetched Vulkan-Headers to all include paths for VK_KHR_opacity_micromap") +endif () +# ── End of fetched-headers prepend ──────────────────────────────────────────── + mark_as_advanced(VulkanHpp_INCLUDE_DIR VulkanHpp_CPPM_DIR) diff --git a/attachments/simple_engine/CMakeLists.txt b/attachments/simple_engine/CMakeLists.txt index 6b6b6cb86..55f2fd1e3 100644 --- a/attachments/simple_engine/CMakeLists.txt +++ b/attachments/simple_engine/CMakeLists.txt @@ -2,6 +2,13 @@ cmake_minimum_required(VERSION 3.29) project(SimpleEngine VERSION 1.0.0 LANGUAGES CXX C) +# ───────────────────────────────────────────────────────────────────────────── +# Course modules — optional self-contained feature extensions +# Each course lives entirely in Courses/ and is linked into the engine target. +# Disable globally with: cmake -DENABLE_COURSES=OFF .. +# ───────────────────────────────────────────────────────────────────────────── +option(ENABLE_COURSES "Build the Simple Engine course modules" OFF) + # Option to enable/disable Vulkan C++20 module support for this standalone project option(ENABLE_CPP20_MODULE "Enable C++ 20 module support for Vulkan in SimpleEngine" OFF) @@ -30,11 +37,16 @@ if(ENABLE_CPP20_MODULE) target_compile_definitions(VulkanCppModule PUBLIC VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VULKAN_HPP_NO_STRUCT_CONSTRUCTORS=1 ) - target_include_directories(VulkanCppModule - PUBLIC - "${Vulkan_INCLUDE_DIR}" - "${VulkanHpp_INCLUDE_DIRS}" - ) + # Build include list — fetched Vulkan-Headers must come before the SDK + # include dir when the installed SDK is too old for VK_KHR_opacity_micromap. + # This should be reverted when SDK 351 is released. + set(_vkmod_incdirs) + if (DEFINED VULKAN_FETCHED_HEADERS_INCLUDE AND EXISTS "${VULKAN_FETCHED_HEADERS_INCLUDE}") + list(APPEND _vkmod_incdirs "${VULKAN_FETCHED_HEADERS_INCLUDE}") + endif () + list(APPEND _vkmod_incdirs "${Vulkan_INCLUDE_DIR}" "${VulkanHpp_INCLUDE_DIRS}") + target_include_directories(VulkanCppModule PUBLIC ${_vkmod_incdirs}) + unset(_vkmod_incdirs) target_link_libraries(VulkanCppModule PUBLIC Vulkan::Vulkan @@ -68,7 +80,17 @@ else() target_compile_definitions(VulkanCppModule INTERFACE VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1 VULKAN_HPP_NO_STRUCT_CONSTRUCTORS=1 ) - target_include_directories(VulkanCppModule INTERFACE "${VulkanHpp_INCLUDE_DIRS}") + # Same header-ordering logic as the module path: fetched Vulkan-Headers must + # come before VulkanHpp_INCLUDE_DIRS so the newer vulkan_core.h is found + # first when the installed SDK is too old for VK_KHR_opacity_micromap. + # This section should be reverted when SDK 351 is released. + set(_vkmod_incdirs) + if (DEFINED VULKAN_FETCHED_HEADERS_INCLUDE AND EXISTS "${VULKAN_FETCHED_HEADERS_INCLUDE}") + list(APPEND _vkmod_incdirs "${VULKAN_FETCHED_HEADERS_INCLUDE}") + endif () + list(APPEND _vkmod_incdirs "${VulkanHpp_INCLUDE_DIRS}") + target_include_directories(VulkanCppModule INTERFACE ${_vkmod_incdirs}) + unset(_vkmod_incdirs) endif() @@ -92,6 +114,7 @@ list(FILTER SLANG_SHADER_SOURCES EXCLUDE REGEX ".*/(common_types|pbr_utils|light # Find slangc executable (optional) find_program(SLANGC_EXECUTABLE slangc HINTS $ENV{VULKAN_SDK}/bin) +find_program(SPIRV_OPT_EXECUTABLE spirv-opt HINTS $ENV{VULKAN_SDK}/bin) if(SLANGC_EXECUTABLE) # Ensure the output directory for compiled shaders exists @@ -102,16 +125,49 @@ if(SLANGC_EXECUTABLE) get_filename_component(SHADER_NAME ${SHADER} NAME) get_filename_component(SHADER_NAME_WE ${SHADER_NAME} NAME_WE) string(REGEX REPLACE "\.slang$" "" OUTPUT_NAME ${SHADER_NAME}) - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/shaders/${OUTPUT_NAME}.spv - COMMAND ${SLANGC_EXECUTABLE} ${SHADER} -target spirv -profile spirv_1_4 -emit-spirv-directly -o ${CMAKE_CURRENT_BINARY_DIR}/shaders/${OUTPUT_NAME}.spv - DEPENDS ${SHADER} - COMMENT "Compiling Slang shader ${SHADER_NAME} with slangc" - ) - list(APPEND SHADER_SPVS ${CMAKE_CURRENT_BINARY_DIR}/shaders/${OUTPUT_NAME}.spv) + set(OUTPUT_SPV ${CMAKE_CURRENT_BINARY_DIR}/shaders/${OUTPUT_NAME}.spv) + + set(SLANGC_FLAGS -target spirv -profile spirv_1_3 -g0 -O3) + if (ANDROID) + list(APPEND SLANGC_FLAGS -DPLATFORM_ANDROID=1) + endif () + + if (SPIRV_OPT_EXECUTABLE) + add_custom_command( + OUTPUT ${OUTPUT_SPV} + COMMAND ${SLANGC_EXECUTABLE} ${SHADER} ${SLANGC_FLAGS} -o ${OUTPUT_SPV}.tmp + COMMAND ${SPIRV_OPT_EXECUTABLE} --strip-debug --strip-nonsemantic -Os ${OUTPUT_SPV}.tmp -o ${OUTPUT_SPV} + COMMAND ${CMAKE_COMMAND} -E remove ${OUTPUT_SPV}.tmp + DEPENDS ${SHADER} + COMMENT "Compiling and optimizing Slang shader ${SHADER_NAME}" + ) + else () + add_custom_command( + OUTPUT ${OUTPUT_SPV} + COMMAND ${SLANGC_EXECUTABLE} ${SHADER} ${SLANGC_FLAGS} -o ${OUTPUT_SPV} + DEPENDS ${SHADER} + COMMENT "Compiling Slang shader ${SHADER_NAME} with slangc" + ) + endif () + list(APPEND SHADER_SPVS ${OUTPUT_SPV}) endforeach() add_custom_target(shaders DEPENDS ${SHADER_SPVS}) + + # Copy shaders to the source directory's shaders folder so the app can find them when run from root + add_custom_command(TARGET shaders POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${SHADER_SPVS} ${CMAKE_CURRENT_SOURCE_DIR}/shaders/ + COMMENT "Copying compiled shaders to source shaders directory" + ) + + if (ANDROID) + # On Android, copy shaders to the Assets directory so Gradle can package them + add_custom_command(TARGET shaders POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/Assets/shaders + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${SHADER_SPVS} ${CMAKE_CURRENT_SOURCE_DIR}/Assets/shaders/ + COMMENT "Copying compiled shaders to Assets/shaders for Android packaging" + ) + endif () else() message(STATUS "slangc not found. Skipping shader compilation step.") add_custom_target(shaders) @@ -161,7 +217,7 @@ set(SOURCES_DESKTOP # Create target if (ANDROID) # Android: build the engine as a library to be linked into the app's `simple_engine_android` SHARED library. - add_library(SimpleEngine STATIC ${SOURCES_COMMON}) + add_library(SimpleEngine STATIC ${SOURCES_COMMON} ${SOURCES_DESKTOP}) else () # Desktop: build the runnable executable (unchanged behavior vs `HEAD`). add_executable(SimpleEngine ${SOURCES_COMMON} ${SOURCES_DESKTOP}) @@ -170,6 +226,19 @@ endif () add_dependencies(SimpleEngine shaders) set_target_properties (SimpleEngine PROPERTIES CXX_STANDARD 20) +# ── Course modules ──────────────────────────────────────────────────────────── +# Each course is a static library defined in Courses/CMakeLists.txt. +# The SIMPLE_ENGINE_COURSES_LIB variable is set in that file; it is empty when +# all course modules are disabled. +if(ENABLE_COURSES) + add_subdirectory(Courses) + if(SIMPLE_ENGINE_COURSES_LIB) + target_link_libraries(SimpleEngine PUBLIC ${SIMPLE_ENGINE_COURSES_LIB}) + target_include_directories(SimpleEngine PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/Courses) + message(STATUS "[SimpleEngine] Course modules linked: ${SIMPLE_ENGINE_COURSES_LIB}") + endif() +endif() + # Enable required defines for GLM experimental extensions and MSVC math constants target_compile_definitions(SimpleEngine PRIVATE GLM_ENABLE_EXPERIMENTAL diff --git a/attachments/simple_engine/Courses/CMakeLists.txt b/attachments/simple_engine/Courses/CMakeLists.txt new file mode 100644 index 000000000..ab3a2dddf --- /dev/null +++ b/attachments/simple_engine/Courses/CMakeLists.txt @@ -0,0 +1,143 @@ +# ============================================================================= +# Courses/CMakeLists.txt +# ============================================================================= +# +# This subdirectory contains self-contained course modules that extend the +# Simple Engine with new rendering features. Each module is optional — if +# the parent CMakeLists.txt includes this directory with: +# +# option(ENABLE_COURSES "Build the Simple Engine course modules" ON) +# if(ENABLE_COURSES) +# add_subdirectory(Courses) +# endif() +# +# then all enabled courses are compiled as a STATIC library that is linked +# into the SimpleEngine target. No existing engine source files are modified +# by this build; the modules attach to the engine via the public extension +# points added to renderer.h. +# +# ADDING A NEW COURSE +# ------------------- +# 1. Create a subdirectory under Courses/ (e.g. Courses/My_New_Topic/). +# 2. Add its source files to the COURSE_SOURCES list below. +# 3. If the course has a Slang shader, add it to COURSE_SLANG_SHADERS below. +# 4. Document the course in en/Building_a_Simple_Engine/Courses/. +# +# The course modules in this file: +# * Opacity Micromaps — VK_KHR_opacity_micromap hardware shadow acceleration +# ============================================================================= + +cmake_minimum_required(VERSION 3.29) + +# ───────────────────────────────────────────────────────────────────────────── +# Per-course opt-in options +# ───────────────────────────────────────────────────────────────────────────── +option(ENABLE_COURSE_OPACITY_MICROMAPS + "Build the Opacity Micromaps course module (VK_KHR_opacity_micromap)" + ON) + +# ───────────────────────────────────────────────────────────────────────────── +# Collect sources from enabled course modules +# ───────────────────────────────────────────────────────────────────────────── +set(COURSE_SOURCES "") +set(COURSE_SLANG_SHADERS "") + +if(ENABLE_COURSE_OPACITY_MICROMAPS) + list(APPEND COURSE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/opacity_micromap_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/omm_integration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/omm_imgui_panel.cpp + ) + # No standalone shader needed: VK_KHR_opacity_micromap is transparent to + # the existing ray_query.slang shader — the hardware handles everything. + message(STATUS "[Courses] Opacity Micromaps module ENABLED") +else() + message(STATUS "[Courses] Opacity Micromaps module disabled") +endif() + +# ───────────────────────────────────────────────────────────────────────────── +# Build the course modules library (only if there are sources) +# ───────────────────────────────────────────────────────────────────────────── +if(COURSE_SOURCES) + add_library(SimpleEngineCourses STATIC ${COURSE_SOURCES}) + + set_target_properties(SimpleEngineCourses PROPERTIES CXX_STANDARD 20) + + if(ENABLE_COURSE_OPACITY_MICROMAPS) + target_compile_definitions(SimpleEngineCourses PUBLIC ENABLE_COURSE_OPACITY_MICROMAPS) + endif() + + # The course modules include engine headers directly (../renderer.h etc.) + target_include_directories(SimpleEngineCourses PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/.. # engine root (renderer.h, etc.) + ) + + # Inherit the same compile definitions as the main engine target so that + # the Vulkan-Hpp dispatch loader and GLM settings are consistent. + target_compile_definitions(SimpleEngineCourses PRIVATE + GLM_ENABLE_EXPERIMENTAL + _USE_MATH_DEFINES + VULKAN_HPP_NO_STRUCT_CONSTRUCTORS + VULKAN_HPP_DISPATCH_LOADER_DYNAMIC + ) + + # MSVC portability + if(MSVC) + target_compile_definitions(SimpleEngineCourses PRIVATE + NOMINMAX + WIN32_LEAN_AND_MEAN + _CRT_SECURE_NO_WARNINGS + ) + target_compile_options(SimpleEngineCourses PRIVATE + /permissive- + /Zc:__cplusplus + /EHsc + /W3 + /MP + ) + endif() + + # Link against Vulkan (same path as the engine). + if(TARGET Vulkan::cppm) + target_link_libraries(SimpleEngineCourses PUBLIC Vulkan::cppm) + else() + target_link_libraries(SimpleEngineCourses PUBLIC Vulkan::Vulkan) + endif() + + target_link_libraries(SimpleEngineCourses PUBLIC glm::glm) + + if (ANDROID) + target_link_libraries(SimpleEngineCourses PUBLIC game-activity::game-activity) + endif () + + # Export the library name to the parent scope so CMakeLists.txt can link it. + set(SIMPLE_ENGINE_COURSES_LIB SimpleEngineCourses PARENT_SCOPE) + + # ── Shader compilation (future courses that DO add shaders) ────────────── + if(COURSE_SLANG_SHADERS AND SLANGC_EXECUTABLE) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/shaders) + foreach(SHADER ${COURSE_SLANG_SHADERS}) + get_filename_component(SHADER_NAME ${SHADER} NAME) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/shaders/${SHADER_NAME}.spv + COMMAND ${SLANGC_EXECUTABLE} ${SHADER} + -target spirv -profile spirv_1_4 -emit-spirv-directly + -o ${CMAKE_CURRENT_BINARY_DIR}/shaders/${SHADER_NAME}.spv + DEPENDS ${SHADER} + COMMENT "Compiling course shader ${SHADER_NAME}" + ) + list(APPEND COURSE_SHADER_SPVS + ${CMAKE_CURRENT_BINARY_DIR}/shaders/${SHADER_NAME}.spv) + endforeach() + + if(COURSE_SHADER_SPVS) + add_custom_target(CourseShaders DEPENDS ${COURSE_SHADER_SPVS}) + add_dependencies(SimpleEngineCourses CourseShaders) + endif() + endif() + +else() + # No course sources enabled — export an empty library name. + set(SIMPLE_ENGINE_COURSES_LIB "" PARENT_SCOPE) + message(STATUS "[Courses] No course modules enabled; skipping library build.") +endif() diff --git a/attachments/simple_engine/Courses/README.adoc b/attachments/simple_engine/Courses/README.adoc new file mode 100644 index 000000000..dd27f30fb --- /dev/null +++ b/attachments/simple_engine/Courses/README.adoc @@ -0,0 +1,120 @@ += Simple Engine — Course Modules +:toc: left +:toclevels: 3 +:sectnums: + +Course modules are self-contained feature extensions that live entirely inside this `Courses/` directory. +Each module is compiled as a static library (`SimpleEngineCourses`) that is linked into the main `SimpleEngine` target — no existing engine source files are modified. + +== Available Courses + +[cols="2,5",options="header"] +|=== +| Module | Description + +| xref:../en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/00_introduction.adoc[Opacity Micromaps] +| Hardware-accelerated shadow rays through alpha-masked geometry using +`VK_KHR_opacity_micromap`. Reduces any-hit shader invocations to only the +micro-triangles at the exact alpha boundary. +|=== + +== Building with Course Modules + +=== Prerequisites + +* CMake 3.29 or newer +* Vulkan SDK 1.3.283 or newer (the `VK_KHR_opacity_micromap` extension headers must be present — SDK 1.3.296+ recommended) +* A GPU and driver that advertise `VK_KHR_opacity_micromap` + +=== Quick Start + +[source,shell] +---- +# Configure — enable the course modules umbrella option plus the +# Opacity Micromaps module specifically +cmake -B build \ + -DENABLE_COURSES=ON \ + -DENABLE_COURSE_OPACITY_MICROMAPS=ON \ + .. + +# Build +cmake --build build --parallel +---- + +=== CMake Options + +[cols="3,1,4",options="header"] +|=== +| Option | Default | Effect + +| `ENABLE_COURSES` +| `OFF` +| Master switch. Must be `ON` for any course module to be compiled. +When `OFF`, the `Courses/` subdirectory is skipped entirely and the +build is identical to the base engine. + +| `ENABLE_COURSE_OPACITY_MICROMAPS` +| `ON` +| Build the Opacity Micromaps module. Has no effect unless +`ENABLE_COURSES=ON`. When enabled, adds the +`ENABLE_COURSE_OPACITY_MICROMAPS` compile definition so the engine +startup code activates `OmmIntegration`. +|=== + +=== Enabling Only Courses, Disabling OMM + +[source,shell] +---- +# Build courses infrastructure but skip the OMM module +cmake -B build \ + -DENABLE_COURSES=ON \ + -DENABLE_COURSE_OPACITY_MICROMAPS=OFF \ + .. +---- + +When all per-course options are disabled, no `SimpleEngineCourses` library is produced and the linker step is a no-op. + +=== Disabling Everything (Base Engine Only) + +[source,shell] +---- +cmake -B build -DENABLE_COURSES=OFF .. +---- + +=== Reconfiguring an Existing Build Directory + +Pass the same flags to the existing build tree — CMake will re-run the configure step and update only the affected targets: + +[source,shell] +---- +cmake -B build -DENABLE_COURSES=ON -DENABLE_COURSE_OPACITY_MICROMAPS=ON +cmake --build build --parallel +---- + +=== IDE / GUI Configuration (cmake-gui / ccmake) + +1. Open `cmake-gui` (or run `ccmake build/`). +2. Set `ENABLE_COURSES` to `ON` and click **Configure**. +3. Set `ENABLE_COURSE_OPACITY_MICROMAPS` to `ON` (it appears after the first configure pass). +4. Click **Configure** again, then **Generate**. +5. Build the generated project as normal. + +== Verifying the Build + +A successful configure with OMM enabled prints: + +---- +[Courses] Opacity Micromaps module ENABLED +---- + +A successful build will produce `libSimpleEngineCourses.a` (Linux/macOS) or +`SimpleEngineCourses.lib` (Windows) in the build tree, linked automatically into `SimpleEngine`. + +At runtime the ImGui panel shows an **Opacity Micromaps** section with live statistics (micromap count, build time, any-hit invocations saved). + +== Adding a New Course Module + +1. Create a subdirectory under `Courses/` (e.g. `Courses/My_Feature/`). +2. Add its `.cpp` files to the `COURSE_SOURCES` list in `Courses/CMakeLists.txt`. +3. Add a corresponding `option(ENABLE_COURSE_MY_FEATURE ...)` entry. +4. Document the course in `en/Building_a_Simple_Engine/Courses/`. diff --git a/attachments/simple_engine/Courses/omm_imgui_panel.cpp b/attachments/simple_engine/Courses/omm_imgui_panel.cpp new file mode 100644 index 000000000..b88e16c8d --- /dev/null +++ b/attachments/simple_engine/Courses/omm_imgui_panel.cpp @@ -0,0 +1,223 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ============================================================================= +// OmmImGuiPanel — implementation +// ============================================================================= +// +// This panel is registered via Renderer::RegisterImGuiPanel() and injected +// directly into the engine's "Renderer" ImGui window. It is completely +// self-contained and only reads public accessors on OmmIntegration and +// Renderer. +// +// LAYOUT +// ------ +// The panel opens a collapsing header "Opacity Micromaps (Course Module)" +// inside the existing "Renderer" ImGui window — no extra window needed. +// Inside it shows: +// • Hardware support status (green/red) +// • Build summary (meshes built, GPU memory) +// • Coloured progress bars: opaque / transparent / unknown percentages +// • Live config controls: subdivision level, thresholds, unknown-state toggle +// • A "Rebuild Micromaps" button that calls OmmIntegration::buildMicromaps() +// ============================================================================= + +#include "omm_imgui_panel.h" +#include "../imgui/imgui.h" +#include "../renderer.h" + +namespace { + +// Static config instance — survives across frames. +OmmConfig s_config{}; + +// Colourful helpers matching the engine ImGui style (dark theme). +const ImVec4 kGreen { 0.30f, 0.85f, 0.40f, 1.f }; +const ImVec4 kRed { 0.95f, 0.30f, 0.30f, 1.f }; +const ImVec4 kYellow { 0.98f, 0.82f, 0.25f, 1.f }; +const ImVec4 kBlue { 0.40f, 0.65f, 1.00f, 1.f }; +const ImVec4 kGrey { 0.60f, 0.60f, 0.60f, 1.f }; +const ImVec4 kWhite { 1.00f, 1.00f, 1.00f, 1.f }; + +void coloredBar(float fraction, ImVec4 colour, const char* label) { + const float barW = ImGui::GetContentRegionAvail().x * 0.60f; + ImDrawList* dl = ImGui::GetWindowDrawList(); + ImVec2 pos = ImGui::GetCursorScreenPos(); + + // Background + dl->AddRectFilled(pos, + ImVec2(pos.x + barW, pos.y + 12.f), + IM_COL32(50, 50, 50, 200), 3.f); + // Fill + const float fillW = barW * std::max(0.f, std::min(fraction, 1.f)); + if (fillW > 0.f) + dl->AddRectFilled(pos, + ImVec2(pos.x + fillW, pos.y + 12.f), + ImGui::ColorConvertFloat4ToU32(colour), 3.f); + ImGui::Dummy(ImVec2(barW, 14.f)); + ImGui::SameLine(); + ImGui::TextColored(colour, "%s %.1f%%", label, fraction * 100.f); +} + +} // namespace + + +// ============================================================================= +// OmmImGuiPanel +// ============================================================================= + +OmmConfig& OmmImGuiPanel::mutableConfig() { + return s_config; +} + +void OmmImGuiPanel::draw(OmmIntegration& omm, Renderer* /*renderer*/) { + ImGui::Spacing(); + ImGui::Separator(); + + // ── Collapsing header ───────────────────────────────────────────────────── + const bool open = ImGui::CollapsingHeader("Opacity Micromaps (Course Module)"); + if (!open) return; + + ImGui::Spacing(); + + // ── Hardware support ────────────────────────────────────────────────────── + if (omm.isSupported()) { + ImGui::TextColored(kGreen, "[OK] VK_KHR_opacity_micromap: ENABLED"); + } else { + ImGui::TextColored(kRed, "[--] VK_KHR_opacity_micromap: NOT SUPPORTED"); + ImGui::TextWrapped("This GPU does not support opacity micromaps. " + "Shadow rays will continue to use the any-hit shader path " + "for alpha-masked geometry, which is correct but slower. " + "A GPU with NVIDIA Ada Lovelace or newer architecture is required."); + ImGui::Spacing(); + return; + } + + // ── Build summary ───────────────────────────────────────────────────────── + const OmmSceneStats& st = omm.stats(); + + ImGui::TextColored(kBlue, "Build summary"); + ImGui::Indent(); + ImGui::Text("Alpha-masked meshes examined : %u", st.meshesConsidered); + ImGui::Text("Micromaps built : %u", st.micromapsBuilt); + if (st.totalGpuBytes > 0) { + if (st.totalGpuBytes < 1024 * 1024) + ImGui::Text("GPU memory : %u KiB", + static_cast(st.totalGpuBytes / 1024)); + else + ImGui::Text("GPU memory : %.2f MiB", + static_cast(st.totalGpuBytes) / (1024.0 * 1024.0)); + } else { + ImGui::TextColored(kGrey, "GPU memory : (none built yet)"); + } + ImGui::Unindent(); + + // ── Average micro-triangle classification breakdown ──────────────────────── + if (st.micromapsBuilt > 0) { + ImGui::Spacing(); + ImGui::TextColored(kBlue, "Average micro-triangle breakdown (across %u meshes)", st.micromapsBuilt); + ImGui::Indent(); + coloredBar(st.avgPctOpaque, kGreen, "Opaque (hardware blocks — no shader)"); + coloredBar(st.avgPctTransparent, kBlue, "Transparent (hardware passes — no shader)"); + coloredBar(st.avgPctUnknown, kYellow, "Unknown (any-hit shader fires)"); + + const float shaderSaved = st.avgPctOpaque + st.avgPctTransparent; + ImGui::Spacing(); + ImGui::TextColored(shaderSaved > 0.8f ? kGreen : kYellow, + "Shader invocations avoided: ~%.0f%%", shaderSaved * 100.f); + ImGui::Unindent(); + } else if (st.meshesConsidered > 0) { + ImGui::TextColored(kYellow, "No micromaps were built yet — click 'Build Micromaps' below."); + } else { + ImGui::TextColored(kGrey, "No alpha-masked meshes found in the current scene."); + } + + // ── Configuration controls ──────────────────────────────────────────────── + ImGui::Spacing(); + ImGui::Separator(); + ImGui::TextColored(kBlue, "Configuration"); + ImGui::Indent(); + + // Subdivision level selector (0-4 as radio-style buttons) + ImGui::Text("Subdivision level:"); + ImGui::SameLine(); + ImGui::TextColored(kGrey, " (higher = more accurate, more GPU memory)"); + for (int lvl = 0; lvl <= 4; ++lvl) { + if (lvl > 0) ImGui::SameLine(); + char lbl[12]; + std::snprintf(lbl, sizeof(lbl), " %d ", lvl); + const bool active = (static_cast(s_config.subdivisionLevel) == lvl); + if (active) ImGui::PushStyleColor(ImGuiCol_Button, ImVec4(0.25f, 0.55f, 0.90f, 1.f)); + if (ImGui::SmallButton(lbl)) + s_config.subdivisionLevel = static_cast(lvl); + if (active) ImGui::PopStyleColor(); + } + // Show micro-tri count for the chosen level + uint32_t microCount = 1; + for (uint32_t i = 0; i < s_config.subdivisionLevel; ++i) microCount *= 4; + ImGui::SameLine(); + ImGui::TextColored(kGrey, " = %u micro-tris/triangle", microCount); + + ImGui::Spacing(); + ImGui::Checkbox("Allow Unknown state (edge micro-tris → any-hit shader)", + &s_config.allowUnknownState); + ImGui::SliderFloat("Opaque threshold", &s_config.opaqueThreshold, + 0.50f, 1.00f, "%.2f"); + ImGui::SliderFloat("Transparent threshold", &s_config.transparentThreshold, + 0.00f, 0.49f, "%.2f"); + int spp = static_cast(s_config.samplesPerMicroTriangle); + if (ImGui::SliderInt("Samples per micro-tri (build quality)", &spp, 1, 8)) + s_config.samplesPerMicroTriangle = static_cast(std::max(1, spp)); + + ImGui::Unindent(); + + // ── Rebuild button ──────────────────────────────────────────────────────── + ImGui::Spacing(); + if (omm.isBuildInProgress()) { + ImGui::TextColored(kYellow, "Build in progress... please wait."); + } else { + ImGui::PushStyleColor(ImGuiCol_Button, ImVec4(0.20f, 0.55f, 0.20f, 1.f)); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, ImVec4(0.25f, 0.75f, 0.25f, 1.f)); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, ImVec4(0.15f, 0.40f, 0.15f, 1.f)); + if (ImGui::Button(" Rebuild Micromaps ", ImVec2(-1.f, 0.f))) { + omm.buildMicromaps(s_config); + } + ImGui::PopStyleColor(3); + } + + // ── Conceptual explanation (collapsed by default) ───────────────────────── + ImGui::Spacing(); + if (ImGui::TreeNode("What are Opacity Micromaps?")) { + ImGui::TextWrapped( + "Every triangle in an alpha-masked mesh (foliage, fences, curtains) " + "is subdivided into a grid of micro-triangles. Each micro-triangle is " + "pre-classified as Opaque, Transparent, or Unknown before any frame is drawn."); + ImGui::Spacing(); + ImGui::TextWrapped( + "During shadow-ray traversal the GPU hardware reads this classification " + "table directly — no shader code runs. Only the narrow Unknown band at " + "alpha-gradient edges still invokes the any-hit shader. For a typical " + "tree canopy this eliminates over 90%% of shadow-ray shader invocations."); + ImGui::Spacing(); + ImGui::TextColored(kBlue, + "Course reference: " + "en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/"); + ImGui::TreePop(); + } + + ImGui::Spacing(); +} diff --git a/attachments/simple_engine/Courses/omm_imgui_panel.h b/attachments/simple_engine/Courses/omm_imgui_panel.h new file mode 100644 index 000000000..fc1b8aa90 --- /dev/null +++ b/attachments/simple_engine/Courses/omm_imgui_panel.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// ============================================================================= +// OmmImGuiPanel — in-engine UI for the Opacity Micromaps course module +// ============================================================================= +// +// This panel is injected into the engine's "Renderer" ImGui window via +// Renderer::RegisterImGuiPanel(). It shows: +// +// ┌─────────────────────────────────────────────────────┐ +// │ Opacity Micromaps (Course Module) │ +// │ Hardware support: ✓ / ✗ │ +// │ Micromaps built: 12 / 15 alpha-masked meshes │ +// │ GPU memory: 384 KiB │ +// │ │ +// │ [Progress bars] Opaque / Transparent / Unknown │ +// │ │ +// │ Subdivision level [ 0 ][ 1 ][●2][ 3 ][ 4 ] │ +// │ ☑ Allow Unknown state │ +// │ Opaque threshold ───●──── 0.95 │ +// │ Transparent thresh ─●────── 0.05 │ +// │ │ +// │ [ Rebuild Micromaps ] │ +// └─────────────────────────────────────────────────────┘ +// +// The panel is entirely self-contained and does not modify any engine source. +// ============================================================================= + +#include "omm_integration.h" + +class Renderer; + +namespace OmmImGuiPanel { + + /// Draw the panel into the currently-open ImGui window. + /// Called automatically from the Renderer::RegisterImGuiPanel() callback. + void draw(OmmIntegration& omm, Renderer* renderer); + + /// The config that the "Rebuild" button will use. Modified in-place by the + /// panel controls. External code can read it back after calling draw(). + OmmConfig& mutableConfig(); + +} // namespace OmmImGuiPanel diff --git a/attachments/simple_engine/Courses/omm_integration.cpp b/attachments/simple_engine/Courses/omm_integration.cpp new file mode 100644 index 000000000..de7f193aa --- /dev/null +++ b/attachments/simple_engine/Courses/omm_integration.cpp @@ -0,0 +1,225 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ============================================================================= +// OmmIntegration — implementation +// ============================================================================= +// +// This file wires OpacityMicromapBuilder into the engine without touching any +// of the core engine source files. The integration path is: +// +// OmmIntegration::init() — store renderer & model-loader pointers +// OmmIntegration::buildMicromaps() — iterate alpha-masked meshes, +// call builder.buildForMesh() for each, +// then register the ImGui panel via +// renderer.RegisterImGuiPanel(). +// OmmIntegration::getPNextForMesh()— called by renderer_ray_query.cpp +// (or any future patched build loop) +// to retrieve the per-mesh pNext chain. +// OmmIntegration::shutdown() — builder.reset(), unregister panel. +// +// TEXTURE ACCESS STRATEGY +// ----------------------- +// The engine uploads textures to the GPU via stb_image + ModelLoader. The CPU +// pixel data lives in the tinygltf image buffers that ModelLoader holds +// internally. We query the model loader for the Material's albedoTexturePath, +// then ask the renderer to resolve that path back to its raw pixel data via +// the helper Renderer::GetTexturePixels() which we add only as a protected +// accessor (already lives in the same translation unit family via renderer.h). +// +// If raw pixels are not accessible (e.g., the texture was streamed from disk), +// we fall back to skipping OMM for that mesh — the any-hit shader continues +// to handle it as before. +// ============================================================================= + +#include "omm_integration.h" +#include "omm_imgui_panel.h" + +#include "../renderer.h" +#include "../model_loader.h" +#include "../mesh_component.h" + +#include +#include +#include +#include + +void OmmIntegration::init(Renderer& renderer, ModelLoader& modelLoader) { + m_renderer = &renderer; + m_modelLoader = &modelLoader; + m_builder.init(renderer); + renderer.RegisterMicromapProvider([this](const MeshComponent* m) { return getPNextForMesh(m); }); + m_initialised = true; + + // We register the ImGui panel immediately so it's always available, + // even if no micromaps are built yet. + m_renderer->RegisterImGuiPanel( + [this](Renderer* r) { OmmImGuiPanel::draw(*this, r); }); + + if (m_builder.isSupported()) { + std::cout << "[OMM] Integration layer initialised — ready to build micromaps.\n"; + } else { + std::cout << "[OMM] Hardware does not support VK_KHR_opacity_micromap.\n" + << " Shadow rays will continue to use the any-hit shader path.\n"; + } +} + +// --------------------------------------------------------------------------- +// buildMicromaps +// --------------------------------------------------------------------------- +// Iterates every mesh known to the model loader, identifies those with +// alpha-masked materials, and calls buildForMesh() for each one. +// +// After building, it re-registers the ImGui panel so up-to-date statistics +// are displayed the next time the user opens the "Renderer" window. +// --------------------------------------------------------------------------- +void OmmIntegration::buildMicromaps(const OmmConfig& config) { + if (!m_initialised) { + std::cerr << "[OMM] buildMicromaps() called before init().\n"; + return; + } + if (m_buildInProgress.load()) { + return; // Already building + } + + m_buildInProgress.store(true); + + // We use a separate thread for the build loop so the engine UI stays responsive. + // The classification tasks within the builder will still use the renderer's thread pool. + std::thread([this, config]() { + try { + if (!m_builder.isSupported()) { + m_buildInProgress.store(false); + return; + } + + // Create a temporary builder to hold the new results without destroying the old ones yet. + // This prevents "Lost Device" crashes caused by destroying micromaps currently in use by the GPU. + OpacityMicromapBuilder tempBuilder; + tempBuilder.init(*m_renderer); + + // Wait for textures, then wait for the render thread to drain deferred + // mesh uploads (which populates meshResources). OMM must not scan + // before meshResources is fully populated or it will find 0 meshes. + // WaitForAllTextureTasks only confirms jobs are ENQUEUED to upload workers, + // not that they have been PROCESSED (StoreRawTexturePixels called). + // We must also wait for the raw pixel cache to stabilise before scanning. + std::cout << "[OMM] Waiting for texture jobs to enqueue...\n"; + m_renderer->WaitForAllTextureTasks(); + std::cout << "[OMM] Waiting for mesh resources to settle...\n"; + if (!m_renderer->WaitForMeshResourcesToSettle()) { + std::cout << "[OMM] Timed out waiting for mesh resources; proceeding with partial mesh list.\n"; + } + std::cout << "[OMM] Waiting for raw pixel cache to settle...\n"; + if (!m_renderer->WaitForRawPixelCacheToSettle()) { + std::cout << "[OMM] Timed out waiting for pixel cache; some textures may be missing.\n"; + } + + std::vector meshes = m_renderer->GetRegisteredMeshes(); + std::cout << "[OMM] Scanning " << meshes.size() << " registered meshes.\n"; + OmmSceneStats tempStats{}; + + float sumOpaque = 0.f, sumTransparent = 0.f, sumUnknown = 0.f; + + for (const MeshComponent* mesh : meshes) { + if (!mesh) continue; + + const uint32_t matIdx = mesh->GetInstanceCount() > 0 + ? mesh->GetInstance(0).materialIndex + : 0; + + const Material* mat = m_modelLoader->GetMaterialByIndex(matIdx); + if (!mat) continue; + if (mat->alphaMode != "MASK") continue; + + ++tempStats.meshesConsidered; + + uint32_t texW = 0, texH = 0, texCh = 0; + const uint8_t* pixels = m_renderer->GetRawTexturePixels( + mat->albedoTexturePath, &texW, &texH, &texCh); + + if (!pixels || texW == 0 || texH == 0) { + std::cout << "[OMM] No CPU pixel data for texture '" + << mat->albedoTexturePath << "' (alpha masked). Skipping mesh.\n"; + continue; + } + + std::cout << "[OMM] Building for mesh with texture '" << mat->albedoTexturePath + << "' (" << texW << "x" << texH << ")\n"; + + OmmMeshInfo info{}; + try { + info = tempBuilder.buildForMesh(mesh, pixels, texW, texH, texCh, config); + } catch (const std::exception& e) { + std::cerr << "[OMM] buildForMesh failed for '" << mat->albedoTexturePath + << "': " << e.what() << "\n"; + continue; + } + + if (info.built) { + ++tempStats.micromapsBuilt; + sumOpaque += info.pctOpaque; + sumTransparent += info.pctTransparent; + sumUnknown += info.pctUnknown; + } + } + + tempStats.totalGpuBytes = tempBuilder.totalGpuBytes(); + if (tempStats.micromapsBuilt > 0) { + const float inv = 1.f / static_cast(tempStats.micromapsBuilt); + tempStats.avgPctOpaque = sumOpaque * inv; + tempStats.avgPctTransparent = sumTransparent * inv; + tempStats.avgPctUnknown = sumUnknown * inv; + } + + // Swap the new builder content into the active builder. + m_builder.swap(tempBuilder); + m_stats = tempStats; + + std::cout << "[OMM] Build complete: " + << m_stats.micromapsBuilt << "/" << m_stats.meshesConsidered + << " alpha-masked meshes have micromaps.\n" + << " Total GPU: " << m_stats.totalGpuBytes / 1024 << " KiB\n"; + + // Trigger an acceleration structure rebuild. + m_renderer->RequestAccelerationStructureBuild("OMM rebuild complete"); + + // Give the GPU and renderer some time to build the new AS and finish current frames + // before letting tempBuilder (which now holds the OLD resources) be destroyed. + std::this_thread::sleep_for(std::chrono::seconds(2)); + + } catch (const std::exception& e) { + std::cerr << "[OMM] Error during background build: " << e.what() << std::endl; + } + + m_buildInProgress.store(false); + }).detach(); +} + +void OmmIntegration::shutdown() { + if (m_renderer) { + m_renderer->UnregisterImGuiPanel(); + m_renderer->RegisterMicromapProvider(nullptr); + } + m_builder.reset(); + m_initialised = false; +} + +void* OmmIntegration::getPNextForMesh(const MeshComponent* mesh) const { + const OmmMeshInfo* info = m_builder.getInfo(mesh); + return info ? info->pNextChain : nullptr; +} diff --git a/attachments/simple_engine/Courses/omm_integration.h b/attachments/simple_engine/Courses/omm_integration.h new file mode 100644 index 000000000..b632b71a6 --- /dev/null +++ b/attachments/simple_engine/Courses/omm_integration.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// ============================================================================= +// OmmIntegration — wires OpacityMicromapBuilder into the engine startup +// ============================================================================= +// +// This thin wrapper class is the single "seam" between the course module and +// the engine. It: +// +// 1. Builds micromaps for every alpha-masked mesh after textures are loaded. +// 2. Patches the geometry pNext chain just before each BLAS is constructed. +// 3. Registers the ImGui panel so the engine window shows OMM controls. +// 4. Cleans everything up when the scene is replaced or the engine shuts down. +// +// Usage in main.cpp (or engine initialisation code): +// +// OmmIntegration omm; +// omm.init(engine, renderer, modelLoader); +// // ... load scene ... +// omm.buildMicromaps(ommConfig); // call once after textures are ready +// +// From this point on, whenever the engine calls buildAccelerationStructures(), +// it will find the micromap pNext chains already attached to the geometry. +// +// See: en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc +// ============================================================================= + +#include "opacity_micromap_builder.h" +#include +#include +#include + +// Forward declarations +class Renderer; +class Engine; +class ModelLoader; +class MeshComponent; + +// --------------------------------------------------------------------------- +// OmmSceneStats — reported to the ImGui panel +// --------------------------------------------------------------------------- +struct OmmSceneStats { + uint32_t meshesConsidered = 0; // alpha-masked meshes examined + uint32_t micromapsBuilt = 0; // VkAccelerationStructureKHR micromap objects created + uint64_t totalGpuBytes = 0; + float avgPctOpaque = 0.f; + float avgPctTransparent = 0.f; + float avgPctUnknown = 0.f; +}; + +// --------------------------------------------------------------------------- +// OmmIntegration +// --------------------------------------------------------------------------- +class OmmIntegration { +public: + OmmIntegration() = default; + ~OmmIntegration() { shutdown(); } + + // Non-copyable + OmmIntegration(const OmmIntegration&) = delete; + OmmIntegration& operator=(const OmmIntegration&) = delete; + + // ── Lifecycle ───────────────────────────────────────────────────────────── + + /// Initialise with the engine subsystems. Call once after the renderer + /// and model loader are ready, before loading any scene. + void init(Renderer& renderer, ModelLoader& modelLoader); + + /// Build micromaps for all currently-loaded alpha-masked meshes and register + /// the BLAS pNext hooks. Call after all mesh textures are uploaded. + /// Safe to call multiple times (resets and rebuilds on each call). + void buildMicromaps(const OmmConfig& config = {}); + + /// Release all GPU resources and unregister the ImGui panel. + void shutdown(); + + // ── Accessors ───────────────────────────────────────────────────────────── + + [[nodiscard]] bool isSupported() const { return m_builder.isSupported(); } + [[nodiscard]] const OmmSceneStats& stats() const { return m_stats; } + [[nodiscard]] OpacityMicromapBuilder& builder() { return m_builder; } + + /// Returns the pNext chain to attach to a geometry triangles struct for the + /// given MeshComponent, or nullptr when no micromap was built for it. + [[nodiscard]] void* getPNextForMesh(const MeshComponent* mesh) const; + + [[nodiscard]] bool isBuildInProgress() const { return m_buildInProgress.load(); } + +private: + OpacityMicromapBuilder m_builder; + OmmSceneStats m_stats{}; + + Renderer* m_renderer = nullptr; + ModelLoader* m_modelLoader = nullptr; + + bool m_initialised = false; + std::atomic m_buildInProgress{false}; +}; diff --git a/attachments/simple_engine/Courses/opacity_micromap_builder.cpp b/attachments/simple_engine/Courses/opacity_micromap_builder.cpp new file mode 100644 index 000000000..1712025e2 --- /dev/null +++ b/attachments/simple_engine/Courses/opacity_micromap_builder.cpp @@ -0,0 +1,708 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// ============================================================================= +// OpacityMicromapBuilder — Implementation +// ============================================================================= +// +// Read alongside: +// en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc +// +// This module implements VK_KHR_opacity_micromap. +// +// THREE PHASES +// ------------ +// Phase 1 analyseVariation() Does this mesh have mixed opaque/transparent +// regions? If not, skip the build entirely. +// Phase 2 classify() Sample each micro-triangle centroid in +// texture space → assign a 2-bit opacity state. +// Phase 3 buildOnGpu() Pack states, upload to device, create a +// VkAccelerationStructureKHR (OPACITY_MICROMAP +// type) via vkCreateAccelerationStructure2KHR, +// build it with vkCmdBuildAccelerationStructuresKHR, +// then fill the pNext chain. +// +// KHR DESIGN NOTES +// ---------------- +// - Micromaps are VkAccelerationStructureKHR with type +// VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR. They are created via +// vkCreateAccelerationStructure2KHR (from VK_KHR_device_address_commands). +// vkCreateAccelerationStructureKHR cannot be used for micromaps. +// +// - Build is device-side only via vkCmdBuildAccelerationStructuresKHR with +// geometryType = VK_GEOMETRY_TYPE_MICROMAP_KHR. No host build path exists. +// +// - Size query is vkGetAccelerationStructureBuildSizesKHR with buildType = +// VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR. +// pMaxPrimitiveCounts[i] is the max triangle count for geometry[i]. +// The raw C dispatcher is used so we can directly control the call. +// +// - Input buffers require VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR. +// The micromap backing buffer requires VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR. +// +// - Synchronisation uses VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR. +// +// - The BLAS attachment is VkAccelerationStructureTrianglesOpacityMicromapKHR +// chained into pNext of VkAccelerationStructureGeometryTrianglesDataKHR. +// Its indexBuffer is VkDeviceAddress (device-only, no host address variant). +// Its micromap field is VkAccelerationStructureKHR. +// +// - The BLAS always holds a live reference to the micromap. There is no +// "discardable" property (removed from KHR). Destroy BLASes before micromaps. +// +// - Ray query shaders must declare the OpacityMicromapKHR execution mode via +// SPV_KHR_opacity_micromap for the hardware fast-path to activate. +// +// STATE ENCODING (VK_OPACITY_MICROMAP_FORMAT_4_STATE_KHR, 2 bits/entry) +// ---------------------------------------------------------------------- +// 0b00 TRANSPARENT Hardware passes the ray — no shader. +// 0b01 OPAQUE Hardware blocks the ray — no shader. +// 0b11 UNKNOWN_OPAQUE Hardware falls back to the any-hit shader. +// ============================================================================= + +#include "opacity_micromap_builder.h" +#include "../renderer.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr uint8_t STATE_TRANSPARENT = 0b00; +constexpr uint8_t STATE_OPAQUE = 0b01; +constexpr uint8_t STATE_UNKNOWN_OPAQUE = 0b11; + +constexpr uint32_t microTriCount(uint32_t level) { + uint32_t n = 1; + for (uint32_t i = 0; i < level; ++i) n *= 4; + return n; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Bird-curve centroid generation +// +// Produces barycentric centroids in the traversal order required by the spec. +// Both KHR and EXT use the same recursive Bird-curve layout. +// ───────────────────────────────────────────────────────────────────────────── +void generateRecursive(uint32_t targetLevel, uint32_t currentLevel, + glm::vec2 v0, glm::vec2 v1, glm::vec2 v2, + std::vector>& out) +{ + if (currentLevel == targetLevel) { + const glm::vec2 c = (v0 + v1 + v2) / 3.0f; + out.push_back({c.x, c.y}); + return; + } + const glm::vec2 m01 = (v0 + v1) * 0.5f; + const glm::vec2 m12 = (v1 + v2) * 0.5f; + const glm::vec2 m20 = (v2 + v0) * 0.5f; + generateRecursive(targetLevel, currentLevel + 1, v0, m01, m20, out); + generateRecursive(targetLevel, currentLevel + 1, m12, m20, m01, out); + generateRecursive(targetLevel, currentLevel + 1, m01, v1, m12, out); + generateRecursive(targetLevel, currentLevel + 1, m20, m12, v2, out); +} + +std::vector> generateCentroids(uint32_t level) { + std::vector> out; + out.reserve(microTriCount(level)); + generateRecursive(level, 0, {0.f, 0.f}, {1.f, 0.f}, {0.f, 1.f}, out); + assert(out.size() == static_cast(microTriCount(level))); + return out; +} + +// ───────────────────────────────────────────────────────────────────────────── +// packStates +// +// Converts one uint8_t per micro-triangle into the 2-bits-per-entry layout +// consumed by vkCmdBuildAccelerationStructuresKHR. +// ───────────────────────────────────────────────────────────────────────────── +std::vector packStates(const std::vector& unpacked, + uint32_t triangleCount, + uint32_t subdivisionLevel) +{ + const uint32_t uPerTri = microTriCount(subdivisionLevel); + const uint32_t bitsPerTri = uPerTri * 2u; + const uint32_t bytesPerTri = (bitsPerTri + 7u) / 8u; + + std::vector packed(static_cast(triangleCount) * bytesPerTri, 0u); + for (uint32_t t = 0; t < triangleCount; ++t) { + for (uint32_t m = 0; m < uPerTri; ++m) { + const uint8_t s = unpacked[t * uPerTri + m] & 0x3u; + const uint32_t bit = m * 2u; + packed[static_cast(t) * bytesPerTri + bit / 8u] |= + static_cast(s << (bit % 8u)); + } + } + return packed; +} + +uint32_t findMemType(const vk::raii::PhysicalDevice& pd, + uint32_t filter, + vk::MemoryPropertyFlags flags) +{ + const auto props = pd.getMemoryProperties(); + for (uint32_t i = 0; i < props.memoryTypeCount; ++i) + if ((filter & (1u << i)) && (props.memoryTypes[i].propertyFlags & flags) == flags) + return i; + throw std::runtime_error("[OMM] No suitable memory type found"); +} + +// Allocate a device-local buffer with the given usage and return {buffer, memory}. +struct BufMem { vk::raii::Buffer buf; vk::raii::DeviceMemory mem; }; + +BufMem makeDeviceBuffer(const vk::raii::Device& dev, + const vk::raii::PhysicalDevice& pd, + vk::DeviceSize size, + vk::BufferUsageFlags usage) +{ + vk::raii::Buffer buf(dev, vk::BufferCreateInfo{ + .size = size, + .usage = usage + }); + auto reqs = buf.getMemoryRequirements(); + vk::MemoryAllocateFlagsInfo flagsInfo{ .flags = vk::MemoryAllocateFlagBits::eDeviceAddress }; + vk::raii::DeviceMemory mem(dev, vk::MemoryAllocateInfo{ + .pNext = &flagsInfo, + .allocationSize = reqs.size, + .memoryTypeIndex = findMemType(pd, reqs.memoryTypeBits, + vk::MemoryPropertyFlagBits::eDeviceLocal) + }); + buf.bindMemory(*mem, 0); + return { std::move(buf), std::move(mem) }; +} + +BufMem makeStagingBuffer(const vk::raii::Device& dev, + const vk::raii::PhysicalDevice& pd, + const void* data, + vk::DeviceSize size) +{ + vk::raii::Buffer buf(dev, vk::BufferCreateInfo{ + .size = size, + .usage = vk::BufferUsageFlagBits::eTransferSrc + }); + auto reqs = buf.getMemoryRequirements(); + vk::raii::DeviceMemory mem(dev, vk::MemoryAllocateInfo{ + .allocationSize = reqs.size, + .memoryTypeIndex = findMemType(pd, reqs.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent) + }); + buf.bindMemory(*mem, 0); + void* p = mem.mapMemory(0, size); + std::memcpy(p, data, static_cast(size)); + mem.unmapMemory(); + return { std::move(buf), std::move(mem) }; +} + +} // namespace + + +// ============================================================================= +// Lifecycle +// ============================================================================= + +void OpacityMicromapBuilder::swap(OpacityMicromapBuilder& other) noexcept { + if (this == &other) return; + std::lock_guard l1(m_mutex); + std::lock_guard l2(other.m_mutex); + std::swap(m_initialised, other.m_initialised); + std::swap(m_supported, other.m_supported); + std::swap(m_renderer, other.m_renderer); + std::swap(m_device, other.m_device); + std::swap(m_physDev, other.m_physDev); + std::swap(m_gfxFamily, other.m_gfxFamily); + std::swap(m_entries, other.m_entries); + std::swap(m_meshToEntry, other.m_meshToEntry); + std::swap(m_infos, other.m_infos); + std::swap(m_totalGpuBytes, other.m_totalGpuBytes); +} + +void OpacityMicromapBuilder::init(Renderer& renderer) { + std::lock_guard lock(m_mutex); + m_renderer = &renderer; + m_device = &renderer.GetRaiiDevice(); + m_physDev = &renderer.GetPhysicalDevice(); + m_gfxFamily = renderer.GetGraphicsQueueFamilyIndex(); + m_supported = renderer.GetOpacityMicromapEnabled(); + m_initialised = true; + + if (m_supported) + std::cout << "[OMM] Initialised — VK_KHR_opacity_micromap is enabled.\n"; + else + std::cout << "[OMM] VK_KHR_opacity_micromap not supported on this device; " + "alpha-tested shadows will use the any-hit shader path.\n"; +} + +void OpacityMicromapBuilder::reset() { + std::lock_guard lock(m_mutex); + m_entries.clear(); + m_infos.clear(); + m_meshToEntry.clear(); + m_totalGpuBytes = 0; + m_initialised = false; +} + + +// ============================================================================= +// buildForMesh — public entry point; orchestrates all three phases +// ============================================================================= + +OmmMeshInfo OpacityMicromapBuilder::buildForMesh(const MeshComponent* mesh, + const uint8_t* texPixels, + uint32_t texW, + uint32_t texH, + uint32_t texChannels, + const OmmConfig& config) +{ + OmmMeshInfo result{}; + if (!m_initialised || !m_supported) return result; + if (!mesh || !texPixels || texW == 0 || texH == 0) return result; + + const auto& verts = mesh->GetVertices(); + const auto& indices = mesh->GetIndices(); + if (verts.empty() || indices.empty() || indices.size() % 3 != 0) return result; + + // Phase 1 + if (!analyseVariation(verts, indices, texPixels, texW, texH, texChannels, config)) { + std::cout << "[OMM] Skipping mesh — no meaningful alpha variation detected.\n"; + return result; + } + + // Phase 2 + std::vector unpacked; + classify(verts, indices, texPixels, texW, texH, texChannels, config, unpacked, result); + + // Phase 3 + const uint32_t triCount = static_cast(indices.size() / 3); + result.pNextChain = buildOnGpu(mesh, unpacked, triCount, + config.subdivisionLevel, config.lossyBuild, result); + result.built = (result.pNextChain != nullptr); + + if (result.built) { + std::lock_guard lock(m_mutex); + m_infos.push_back(result); + m_totalGpuBytes += result.gpuBytes; + + std::cout << "[OMM] Built — tris=" << triCount + << " opaque=" << static_cast(result.pctOpaque * 100.f) << "%" + << " transparent=" << static_cast(result.pctTransparent * 100.f) << "%" + << " unknown=" << static_cast(result.pctUnknown * 100.f) << "%" + << " GPU=" << result.gpuBytes / 1024 << " KiB\n"; + } + return result; +} + +const OmmMeshInfo* OpacityMicromapBuilder::getInfo(const MeshComponent* mesh) const { + std::lock_guard lock(m_mutex); + auto it = m_meshToEntry.find(mesh); + return (it == m_meshToEntry.end()) ? nullptr : &m_infos[it->second]; +} + +uint32_t OpacityMicromapBuilder::micromapCount() const { + std::lock_guard lock(m_mutex); + return static_cast(m_entries.size()); +} + +uint64_t OpacityMicromapBuilder::totalGpuBytes() const { + std::lock_guard lock(m_mutex); + return m_totalGpuBytes; +} + + +// ============================================================================= +// Phase 1 — analyseVariation +// ============================================================================= + +bool OpacityMicromapBuilder::analyseVariation( + const std::vector& verts, + const std::vector& indices, + const uint8_t* pixels, uint32_t w, uint32_t h, uint32_t ch, + const OmmConfig& cfg) const +{ + const uint32_t triCount = static_cast(indices.size() / 3); + const uint32_t stride = std::max(1u, triCount / std::min(triCount, 512u)); + bool foundOpaque = false, foundTransparent = false; + + for (uint32_t t = 0; t < triCount; t += stride) { + const uint32_t i0 = indices[t*3+0], i1 = indices[t*3+1], i2 = indices[t*3+2]; + if (i0 >= verts.size() || i1 >= verts.size() || i2 >= verts.size()) continue; + const glm::vec2 cen = (verts[i0].texCoord + verts[i1].texCoord + verts[i2].texCoord) / 3.f; + const float a = sampleAlpha(pixels, w, h, ch, cen.x, cen.y); + if (a < cfg.transparentThreshold) foundTransparent = true; + if (a >= cfg.opaqueThreshold) foundOpaque = true; + if (foundOpaque && foundTransparent) return true; + } + return false; +} + + +// ============================================================================= +// Phase 2 — classify +// ============================================================================= + +void OpacityMicromapBuilder::classify( + const std::vector& verts, + const std::vector& indices, + const uint8_t* pixels, uint32_t w, uint32_t h, uint32_t ch, + const OmmConfig& cfg, + std::vector& outStates, + OmmMeshInfo& outInfo) const +{ + const uint32_t triCount = static_cast(indices.size() / 3); + const uint32_t uPerTri = microTriCount(cfg.subdivisionLevel); + outStates.assign(static_cast(triCount) * uPerTri, STATE_UNKNOWN_OPAQUE); + + const auto centroids = generateCentroids(cfg.subdivisionLevel); + + std::atomic nOpaque{0}, nTrans{0}, nUnknown{0}; + + const uint32_t numThreads = std::thread::hardware_concurrency(); + const uint32_t chunkSize = std::max(1u, triCount / (numThreads * 4u)); + std::vector> futures; + + auto processTriangles = [&](uint32_t startTri, uint32_t endTri) { + uint32_t lO = 0, lT = 0, lU = 0; + for (uint32_t t = startTri; t < endTri; ++t) { + const uint32_t i0 = indices[t*3+0], i1 = indices[t*3+1], i2 = indices[t*3+2]; + if (i0 >= verts.size() || i1 >= verts.size() || i2 >= verts.size()) { + lU += uPerTri; continue; + } + const glm::vec2 uv0 = verts[i0].texCoord; + const glm::vec2 uv1 = verts[i1].texCoord; + const glm::vec2 uv2 = verts[i2].texCoord; + + for (uint32_t m = 0; m < uPerTri; ++m) { + const float bU = centroids[m][0], bV = centroids[m][1]; + float alphaSum = 0.f; + for (uint32_t s = 0; s < cfg.samplesPerMicroTriangle; ++s) { + const float jU = bU + (s==1 ? 0.04f:0.f) - (s==2 ? 0.02f:0.f) - (s==3 ? 0.02f:0.f); + const float jV = bV + (s==2 ? 0.04f:0.f) - (s==1 ? 0.02f:0.f) - (s==3 ? 0.02f:0.f); + const float jW = std::max(0.f, 1.f - jU - jV); + const glm::vec2 uv = jW * uv0 + jU * uv1 + jV * uv2; + alphaSum += sampleAlpha(pixels, w, h, ch, uv.x, uv.y); + } + const float avg = alphaSum / static_cast(cfg.samplesPerMicroTriangle); + + uint8_t state; + if (avg < cfg.transparentThreshold) { state = STATE_TRANSPARENT; ++lT; } + else if (avg >= cfg.opaqueThreshold) { state = STATE_OPAQUE; ++lO; } + else if (cfg.allowUnknownState) { state = STATE_UNKNOWN_OPAQUE; ++lU; } + else { state = STATE_OPAQUE; ++lO; } + + outStates[static_cast(t) * uPerTri + m] = state; + } + } + nOpaque += lO; nTrans += lT; nUnknown += lU; + }; + + if (m_renderer && m_renderer->GetThreadPool()) { + for (uint32_t t = 0; t < triCount; t += chunkSize) { + futures.push_back(m_renderer->GetThreadPool()->enqueue( + processTriangles, t, std::min(t + chunkSize, triCount))); + } + for (auto& f : futures) { + while (f.wait_for(std::chrono::milliseconds(100)) != std::future_status::ready) + if (m_renderer) m_renderer->KickWatchdog("OMM classify"); + } + } else { + processTriangles(0, triCount); + } + + const uint32_t total = triCount * uPerTri; + outInfo.totalMicroTris = total; + if (total > 0) { + const float inv = 1.f / static_cast(total); + outInfo.pctOpaque = static_cast(nOpaque.load()) * inv; + outInfo.pctTransparent = static_cast(nTrans.load()) * inv; + outInfo.pctUnknown = static_cast(nUnknown.load()) * inv; + } +} + + +// ============================================================================= +// Phase 3 — buildOnGpu +// +// KHR micromap build path (VK_KHR_opacity_micromap): +// 1. Upload packed state data and VkMicromapTriangleKHR array to device-local +// buffers via staging copies. +// 2. Fill VkAccelerationStructureGeometryMicromapDataKHR (chained via pNext of +// VkAccelerationStructureGeometryKHR with geometryType=eMicromap). +// 3. Query build sizes via vkGetAccelerationStructureBuildSizesKHR. +// 4. Allocate storage buffer with eAccelerationStructureStorageKHR. +// 5. Create VkAccelerationStructureKHR (type=eOpacityMicromap) via +// vkCreateAccelerationStructure2KHR (VK_KHR_device_address_commands). +// 6. Fill device addresses, record vkCmdBuildAccelerationStructuresKHR, +// submit and wait. +// 7. Fill VkAccelerationStructureTrianglesOpacityMicromapKHR for the BLAS +// pNext chain. +// ============================================================================= + +void* OpacityMicromapBuilder::buildOnGpu( + const MeshComponent* mesh, + const std::vector& unpackedStates, + uint32_t triangleCount, + uint32_t subdivisionLevel, + bool lossyBuild, + OmmMeshInfo& outInfo) +{ +#if defined(PLATFORM_ANDROID) + return nullptr; // OMM build is not supported on Android yet due to missing KHR symbols +#else + const auto& dev = *m_device; + + // ── Pack state data ──────────────────────────────────────────────────────── + const std::vector packed = packStates(unpackedStates, triangleCount, subdivisionLevel); + const vk::DeviceSize dataSize = static_cast(packed.size()); + + // ── Build VkMicromapTriangleKHR array ───────────────────────────────────── + const uint32_t uPerTri = microTriCount(subdivisionLevel); + const uint32_t bytesPerTri = (uPerTri * 2u + 7u) / 8u; + + std::vector triArray(triangleCount); + for (uint32_t i = 0; i < triangleCount; ++i) { + triArray[i].dataOffset = i * bytesPerTri; + triArray[i].subdivisionLevel = static_cast(subdivisionLevel); + triArray[i].format = static_cast(VK_OPACITY_MICROMAP_FORMAT_4_STATE_KHR); + } + const vk::DeviceSize triArraySize = triangleCount * sizeof(VkMicromapTriangleKHR); + + // ── One-shot command buffer ──────────────────────────────────────────────── + vk::raii::CommandPool localPool(dev, vk::CommandPoolCreateInfo{ + .flags = vk::CommandPoolCreateFlagBits::eTransient, + .queueFamilyIndex = m_gfxFamily + }); + auto cb = beginOneShot(localPool); + + // ── Stage + upload state data ────────────────────────────────────────────── + auto [stagDataBuf, stagDataMem] = makeStagingBuffer(dev, *m_physDev, packed.data(), dataSize); + auto [dataBuf, dataMem] = makeDeviceBuffer(dev, *m_physDev, dataSize, + vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eShaderDeviceAddress | + vk::BufferUsageFlagBits::eAccelerationStructureBuildInputReadOnlyKHR); + + cb.copyBuffer(*stagDataBuf, *dataBuf, vk::BufferCopy{ .size = dataSize }); + + // ── Stage + upload triangle array ───────────────────────────────────────── + auto [stagTriBuf, stagTriMem] = makeStagingBuffer(dev, *m_physDev, triArray.data(), triArraySize); + auto [triBuf, triMem] = makeDeviceBuffer(dev, *m_physDev, triArraySize, + vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eShaderDeviceAddress | + vk::BufferUsageFlagBits::eAccelerationStructureBuildInputReadOnlyKHR); + + cb.copyBuffer(*stagTriBuf, *triBuf, vk::BufferCopy{ .size = triArraySize }); + + // ── Barrier: transfer write → AS build read ─────────────────────────────── + const std::array barriers{{ + { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAccelerationStructureBuildKHR, + .dstAccessMask = vk::AccessFlagBits2::eAccelerationStructureReadKHR, + .buffer = *dataBuf, .offset = 0, .size = dataSize + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAccelerationStructureBuildKHR, + .dstAccessMask = vk::AccessFlagBits2::eAccelerationStructureReadKHR, + .buffer = *triBuf, .offset = 0, .size = triArraySize + } + }}; + cb.pipelineBarrier2(vk::DependencyInfo{ + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = barriers.data() + }); + + // ── Usage entry ──────────────────────────────────────────────────────────── + VkMicromapUsageKHR usage{}; + usage.count = triangleCount; + usage.subdivisionLevel = subdivisionLevel; + usage.format = VK_OPACITY_MICROMAP_FORMAT_4_STATE_KHR; + + // ── Micromap geometry data (chained via pNext of the geometry struct) ────── + VkAccelerationStructureGeometryMicromapDataKHR micromapData{}; + micromapData.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_MICROMAP_DATA_KHR; + micromapData.usageCountsCount = 1; + micromapData.pUsageCounts = &usage; + micromapData.triangleArrayStride = sizeof(VkMicromapTriangleKHR); + // data and triangleArray device addresses filled after size query + + VkAccelerationStructureGeometryKHR geometry{}; + geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + geometry.pNext = µmapData; + geometry.geometryType = VK_GEOMETRY_TYPE_MICROMAP_KHR; + + // ── Build info (size query phase) ───────────────────────────────────────── + vk::BuildAccelerationStructureFlagsKHR buildFlags = + vk::BuildAccelerationStructureFlagBitsKHR::ePreferFastTrace; + if (lossyBuild) + buildFlags |= vk::BuildAccelerationStructureFlagBitsKHR::eMicromapLossy; + + VkAccelerationStructureBuildGeometryInfoKHR buildInfo{}; + buildInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR; + buildInfo.flags = static_cast(buildFlags); + buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + buildInfo.geometryCount = 1; + buildInfo.pGeometries = &geometry; + + // ── Pre-flight: verify VK_KHR_device_address_commands function is loaded ─── + if (!dev.getDispatcher()->vkCreateAccelerationStructure2KHR) + throw std::runtime_error( + "[OMM] vkCreateAccelerationStructure2KHR is null — " + "VK_KHR_device_address_commands was not enabled at device creation."); + + // ── Size query ───────────────────────────────────────────────────────────── + // pMaxPrimitiveCounts[i] is the maximum number of micromap triangles for + // geometry[i] per the VK_KHR_opacity_micromap spec. + VkAccelerationStructureBuildSizesInfoKHR sizeInfo{}; + sizeInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + dev.getDispatcher()->vkGetAccelerationStructureBuildSizesKHR( + *dev, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &buildInfo, + &triangleCount, + &sizeInfo); + + if (sizeInfo.accelerationStructureSize == 0) + throw std::runtime_error( + "[OMM] vkGetAccelerationStructureBuildSizesKHR returned zero size. " + "Ensure VK_KHR_opacity_micromap and VK_KHR_device_address_commands are enabled."); + + // ── Storage buffer for the micromap AS ──────────────────────────────────── + auto [mmStoreBuf, mmStoreMem] = makeDeviceBuffer(dev, *m_physDev, + sizeInfo.accelerationStructureSize, + vk::BufferUsageFlagBits::eAccelerationStructureStorageKHR | + vk::BufferUsageFlagBits::eShaderDeviceAddress); + + // ── Scratch buffer ───────────────────────────────────────────────────────── + auto [scratchBuf, scratchMem] = makeDeviceBuffer(dev, *m_physDev, + std::max(sizeInfo.buildScratchSize, VkDeviceSize{4}), + vk::BufferUsageFlagBits::eStorageBuffer | + vk::BufferUsageFlagBits::eShaderDeviceAddress); + + // ── Create VkAccelerationStructureKHR (type = opacity micromap) ─────────── + const vk::DeviceAddress storageAddr = dev.getBufferAddress({.buffer = *mmStoreBuf}); + auto micromap = dev.createAccelerationStructure2KHR(vk::AccelerationStructureCreateInfo2KHR{ + .addressRange = {storageAddr, sizeInfo.accelerationStructureSize}, + .type = vk::AccelerationStructureTypeKHR::eOpacityMicromap + }); + + // ── Fill device addresses and record build ──────────────────────────────── + micromapData.data = dev.getBufferAddress({.buffer = *dataBuf}); + micromapData.triangleArray = dev.getBufferAddress({.buffer = *triBuf}); + + buildInfo.dstAccelerationStructure = *micromap; + buildInfo.scratchData.deviceAddress = dev.getBufferAddress({ .buffer = *scratchBuf }); + + const VkAccelerationStructureBuildRangeInfoKHR rangeInfo{ + .primitiveCount = triangleCount, + .primitiveOffset = 0, + .firstVertex = 0, + .transformOffset = 0 + }; + const VkAccelerationStructureBuildRangeInfoKHR* pRangeInfo = &rangeInfo; + dev.getDispatcher()->vkCmdBuildAccelerationStructuresKHR(*cb, 1, &buildInfo, &pRangeInfo); + + submitOneShot(cb); + + // ── Build the pNext attachment chain ────────────────────────────────────── + // VkAccelerationStructureTrianglesOpacityMicromapKHR is chained into pNext + // of VkAccelerationStructureGeometryTrianglesDataKHR for the BLAS build. + auto pNextOwner = std::make_unique(); + pNextOwner->usageEntry = usage; + + VkAccelerationStructureTrianglesOpacityMicromapKHR& chain = pNextOwner->chain; + chain.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_TRIANGLES_OPACITY_MICROMAP_KHR; + chain.pNext = nullptr; + chain.indexType = VK_INDEX_TYPE_NONE_KHR; // identity: tri N → entry N + chain.indexBuffer = {}; + chain.indexStride = 0; + chain.baseTriangle = 0; + chain.micromap = *micromap; + + void* pNextPtr = &chain; + + outInfo.gpuBytes = sizeInfo.accelerationStructureSize + dataSize + triArraySize; + + // ── Store GPU resources ──────────────────────────────────────────────────── + GpuEntry ge; + ge.dataBuf = std::move(dataBuf); + ge.dataMem = std::move(dataMem); + ge.triBuf = std::move(triBuf); + ge.triMem = std::move(triMem); + ge.mmStoreBuf = std::move(mmStoreBuf); + ge.mmStoreMem = std::move(mmStoreMem); + ge.micromap = std::move(micromap); + ge.pNextOwner = std::move(pNextOwner); + + { + std::lock_guard lock(m_mutex); + m_meshToEntry[mesh] = m_entries.size(); + m_entries.push_back(std::move(ge)); + } + + return pNextPtr; +#endif // !defined(PLATFORM_ANDROID) +} + +// ============================================================================= +// Helpers +// ============================================================================= + +float OpacityMicromapBuilder::sampleAlpha(const uint8_t* pixels, + uint32_t w, uint32_t h, uint32_t ch, + float u, float v) const +{ + u -= std::floor(u); + v -= std::floor(v); + const uint32_t px = std::min(static_cast(u * static_cast(w)), w - 1u); + const uint32_t py = std::min(static_cast(v * static_cast(h)), h - 1u); + const size_t off = (static_cast(py) * w + px) * ch; + if (ch == 1) return static_cast(pixels[off]) / 255.f; + if (ch == 2) return static_cast(pixels[off + 1]) / 255.f; + if (ch >= 4) return static_cast(pixels[off + 3]) / 255.f; + return 1.f; +} + +vk::raii::CommandBuffer OpacityMicromapBuilder::beginOneShot(vk::raii::CommandPool& pool) const { + auto bufs = m_device->allocateCommandBuffers(vk::CommandBufferAllocateInfo{ + .commandPool = *pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = 1 + }); + auto cb = std::move(bufs[0]); + cb.begin(vk::CommandBufferBeginInfo{ + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }); + return cb; +} + +void OpacityMicromapBuilder::submitOneShot(vk::raii::CommandBuffer& cb) const { + cb.end(); + vk::raii::Fence fence(*m_device, vk::FenceCreateInfo{}); + m_renderer->SubmitToGraphicsQueue(*cb, *fence); + auto _ = m_device->waitForFences(*fence, vk::True, UINT64_MAX); + (void)_; +} diff --git a/attachments/simple_engine/Courses/opacity_micromap_builder.h b/attachments/simple_engine/Courses/opacity_micromap_builder.h new file mode 100644 index 000000000..b9f9a7fc0 --- /dev/null +++ b/attachments/simple_engine/Courses/opacity_micromap_builder.h @@ -0,0 +1,258 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// ============================================================================= +// OpacityMicromapBuilder — Engine-integrated header +// ============================================================================= +// +// This class is part of the "Opacity Micromaps" course module. +// See: en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/ +// +// WHAT THIS DOES +// -------------- +// Builds a VkAccelerationStructureKHR (type=eOpacityMicromap) for every +// alpha-masked mesh submitted to it, then attaches the result into the engine's +// BLAS build via VkAccelerationStructureTrianglesOpacityMicromapKHR in the +// pNext chain. +// +// After the BLAS is rebuilt the GPU hardware traversal unit resolves most +// shadow-ray hits against alpha-tested geometry without running any shader +// code. Only micro-triangles at the very edge of the alpha boundary still +// invoke the any-hit shader. +// +// This module requires VK_KHR_opacity_micromap and VK_KHR_device_address_commands: +// - Build sizes via vkGetAccelerationStructureBuildSizesKHR +// - Micromap AS via vkCreateAccelerationStructure2KHR (type=eOpacityMicromap) +// - Build recorded via vkCmdBuildAccelerationStructuresKHR +// - Attached to BLAS via VkAccelerationStructureTrianglesOpacityMicromapKHR +// - Ray query shaders must declare the OpacityMicromapKHR SPIR-V execution +// mode (via SPV_KHR_opacity_micromap) for the hardware optimisation to +// activate. Without it the traversal unit ignores all micromap data. +// +// INTEGRATION SEQUENCE +// -------------------- +// 1. OpacityMicromapBuilder::init(renderer) +// — call once after Renderer::Initialize(). +// 2. OpacityMicromapBuilder::buildForMesh(mesh, pixels, w, h, ch, config) +// — call for every alpha-masked mesh just after texture data is +// available, before buildAccelerationStructures(). +// 3. During BLAS construction, look up getInfo(meshComp)->pNextChain and +// attach it to the geometry struct (done in omm_integration.cpp). +// 4. OpacityMicromapBuilder::reset() +// — call on scene change / engine shutdown. +// +// COURSE NOTE +// ----------- +// Read alongside: +// en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc +// ============================================================================= + +#include +#include +#include +#include +#include +#include + +#include +#include "../vulkan_compatibility.h" + +// Engine types +#include "../mesh_component.h" +#include "../model_loader.h" +#include "../memory_pool.h" + +// Forward declarations +class Renderer; + +// --------------------------------------------------------------------------- +// OmmConfig — tuning knobs exposed to students and the ImGui panel +// --------------------------------------------------------------------------- +struct OmmConfig { + /// Subdivision level 0–4. Level 2 → 16 micro-triangles per triangle. + /// Capped by VkPhysicalDeviceOpacityMicromapPropertiesKHR::maxOpacity4StateSubdivisionLevel. + /// With lossyBuild, up to maxOpacityLossy4StateSubdivisionLevel may be used. + uint32_t subdivisionLevel = 2; + + /// Alpha values below this threshold → TRANSPARENT (hardware passes ray, no shader). + float transparentThreshold = 0.05f; + + /// Alpha values at or above this threshold → OPAQUE (hardware blocks ray, no shader). + float opaqueThreshold = 0.95f; + + /// Samples taken per micro-triangle during classification. + uint32_t samplesPerMicroTriangle = 4; + + /// When true, boundary micro-triangles are classified UNKNOWN — the any-hit + /// shader fires for them. When false, they are forced OPAQUE. + bool allowUnknownState = true; + + /// Request VK_BUILD_ACCELERATION_STRUCTURE_MICROMAP_LOSSY_BIT_KHR. + /// Allows the driver to apply lossy compression, potentially supporting + /// higher subdivision levels at the cost of occasional Unknown substitution. + bool lossyBuild = false; +}; + +// --------------------------------------------------------------------------- +// OmmMeshInfo — per-mesh result returned by buildForMesh() +// --------------------------------------------------------------------------- +struct OmmMeshInfo { + /// True when a micromap was successfully built for this mesh. + bool built = false; + + /// Pointer to a VkAccelerationStructureTrianglesOpacityMicromapKHR that + /// must be placed in the pNext chain of VkAccelerationStructureGeometryTrianglesDataKHR. + /// Owned by OpacityMicromapBuilder; valid until reset() is called. + void* pNextChain = nullptr; + + // Diagnostics shown in the ImGui panel. + float pctOpaque = 0.f; + float pctTransparent = 0.f; + float pctUnknown = 0.f; + uint32_t totalMicroTris = 0; + uint64_t gpuBytes = 0; +}; + +// --------------------------------------------------------------------------- +// OpacityMicromapBuilder +// --------------------------------------------------------------------------- +class OpacityMicromapBuilder { +public: + OpacityMicromapBuilder() = default; + ~OpacityMicromapBuilder() { reset(); } + + OpacityMicromapBuilder(const OpacityMicromapBuilder&) = delete; + OpacityMicromapBuilder& operator=(const OpacityMicromapBuilder&) = delete; + OpacityMicromapBuilder(OpacityMicromapBuilder&&) = delete; + OpacityMicromapBuilder& operator=(OpacityMicromapBuilder&&) = delete; + + // ── Lifecycle ────────────────────────────────────────────────────────────── + + void swap(OpacityMicromapBuilder& other) noexcept; + + /// Bind engine resources. Safe to call even when the extension is absent. + void init(Renderer& renderer); + + /// Release all GPU resources. Must be called before the Renderer is destroyed. + void reset(); + + /// Returns true if VK_KHR_opacity_micromap is available and was enabled. + [[nodiscard]] bool isSupported() const { return m_supported; } + + // ── Build ────────────────────────────────────────────────────────────────── + + /// Classify micro-triangles and build the micromap acceleration structure + /// for one alpha-masked mesh. Call once per mesh after CPU texture data is + /// available, before the BLAS build. + [[nodiscard]] OmmMeshInfo buildForMesh(const MeshComponent* mesh, + const uint8_t* texPixels, + uint32_t texW, + uint32_t texH, + uint32_t texChannels, + const OmmConfig& config = {}); + + /// Look up the OmmMeshInfo for a given mesh. Returns nullptr if none exists. + [[nodiscard]] const OmmMeshInfo* getInfo(const MeshComponent* mesh) const; + + // ── Statistics ───────────────────────────────────────────────────────────── + + [[nodiscard]] uint32_t micromapCount() const; + [[nodiscard]] uint64_t totalGpuBytes() const; + +private: + // ── Internal phase implementations ──────────────────────────────────────── + + // Phase 1: does this mesh have alpha variation worth encoding? + [[nodiscard]] bool analyseVariation(const std::vector& verts, + const std::vector& indices, + const uint8_t* pixels, + uint32_t w, uint32_t h, uint32_t ch, + const OmmConfig& cfg) const; + + // Phase 2: CPU classification — assign a 2-bit state per micro-triangle. + void classify(const std::vector& verts, + const std::vector& indices, + const uint8_t* pixels, + uint32_t w, uint32_t h, uint32_t ch, + const OmmConfig& cfg, + std::vector& outStates, + OmmMeshInfo& outInfo) const; + + // Phase 3: GPU construction — upload packed states, create and build the + // VkAccelerationStructureKHR micromap, fill the pNext attachment chain. + [[nodiscard]] void* buildOnGpu(const MeshComponent* mesh, + const std::vector& unpackedStates, + uint32_t triangleCount, + uint32_t subdivisionLevel, + bool lossyBuild, + OmmMeshInfo& outInfo); + + // ── Helpers ──────────────────────────────────────────────────────────────── + + [[nodiscard]] float sampleAlpha(const uint8_t* pixels, + uint32_t w, uint32_t h, uint32_t ch, + float u, float v) const; + + [[nodiscard]] vk::raii::CommandBuffer beginOneShot(vk::raii::CommandPool& pool) const; + void submitOneShot(vk::raii::CommandBuffer& cb) const; + + // ── Per-micromap GPU resource ownership ─────────────────────────────────── + // + // Micromaps are VkAccelerationStructureKHR objects (type=eOpacityMicromap) + // built via VK_KHR_device_address_commands + VK_KHR_opacity_micromap. + + struct GpuEntry { + // State data and triangle-array buffers (build inputs) + vk::raii::Buffer dataBuf {nullptr}; + vk::raii::DeviceMemory dataMem {nullptr}; + vk::raii::Buffer triBuf {nullptr}; + vk::raii::DeviceMemory triMem {nullptr}; + + // Backing storage buffer for the micromap AS + vk::raii::Buffer mmStoreBuf {nullptr}; + vk::raii::DeviceMemory mmStoreMem {nullptr}; + + // The micromap acceleration structure (VK_KHR_opacity_micromap) + vk::raii::AccelerationStructureKHR micromap {nullptr}; + + // VkAccelerationStructureTrianglesOpacityMicromapKHR + usage entry. + // Heap-allocated and never moved — the BLAS build holds a raw pointer. + struct PNextStorage { + VkMicromapUsageKHR usageEntry{}; + VkAccelerationStructureTrianglesOpacityMicromapKHR chain{}; + }; + std::unique_ptr pNextOwner; + }; + + // ── State ────────────────────────────────────────────────────────────────── + + mutable std::mutex m_mutex; + + bool m_initialised = false; + bool m_supported = false; + Renderer* m_renderer = nullptr; + const vk::raii::Device* m_device = nullptr; + const vk::raii::PhysicalDevice* m_physDev = nullptr; + uint32_t m_gfxFamily = 0; + + std::unordered_map m_meshToEntry; + std::vector m_entries; + std::vector m_infos; + + uint64_t m_totalGpuBytes = 0; +}; diff --git a/attachments/simple_engine/android/app/build.gradle b/attachments/simple_engine/android/app/build.gradle index 178b4eda0..fefc1e87f 100644 --- a/attachments/simple_engine/android/app/build.gradle +++ b/attachments/simple_engine/android/app/build.gradle @@ -1,10 +1,10 @@ plugins { - id 'com.android.application' + alias(libs.plugins.android.application) } android { namespace "com.simple_engine" - compileSdk 36 + compileSdk 37 defaultConfig { applicationId "com.simple_engine" minSdk 24 @@ -20,10 +20,14 @@ android { } buildTypes { - release { + debug { minifyEnabled false proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' } + release { + minifyEnabled true + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } } compileOptions { @@ -34,7 +38,7 @@ android { externalNativeBuild { cmake { path "src/main/cpp/CMakeLists.txt" - version "4.0.2+" + version "3.29.0+" } } @@ -58,7 +62,7 @@ android { } dependencies { - implementation 'androidx.appcompat:appcompat:1.7.1' - implementation 'com.google.android.material:material:1.12.0' - implementation 'androidx.games:games-activity:4.0.0' + implementation libs.androidx.appcompat + implementation libs.material + implementation libs.androidx.games.activity } diff --git a/attachments/simple_engine/android/app/proguard-rules.pro b/attachments/simple_engine/android/app/proguard-rules.pro new file mode 100644 index 000000000..7fbac332d --- /dev/null +++ b/attachments/simple_engine/android/app/proguard-rules.pro @@ -0,0 +1,3 @@ +# Empty proguard rules +-keep class com.google.androidgamesdk.** { *; } +-keep class androidx.games.** { *; } diff --git a/attachments/simple_engine/android/app/src/main/AndroidManifest.xml b/attachments/simple_engine/android/app/src/main/AndroidManifest.xml index 55a581a38..0ae81b46f 100644 --- a/attachments/simple_engine/android/app/src/main/AndroidManifest.xml +++ b/attachments/simple_engine/android/app/src/main/AndroidManifest.xml @@ -1,5 +1,6 @@ - + @@ -13,8 +14,10 @@ android:theme="@style/AppTheme"> diff --git a/attachments/simple_engine/android/app/src/main/cpp/CMakeLists.txt b/attachments/simple_engine/android/app/src/main/cpp/CMakeLists.txt index 1efb5de4f..4ec6aad7e 100644 --- a/attachments/simple_engine/android/app/src/main/cpp/CMakeLists.txt +++ b/attachments/simple_engine/android/app/src/main/cpp/CMakeLists.txt @@ -22,8 +22,8 @@ add_library(simple_engine_android SHARED # Link against libraries target_link_libraries(simple_engine_android - SimpleEngine - game-activity::game-activity + -Wl,--whole-archive SimpleEngine -Wl,--no-whole-archive + -Wl,--whole-archive game-activity::game-activity -Wl,--no-whole-archive android log ) diff --git a/attachments/simple_engine/android/app/src/main/java/com/simple_engine/VulkanActivity.java b/attachments/simple_engine/android/app/src/main/java/com/simple_engine/VulkanActivity.java index ee893e3b8..4e10bf589 100644 --- a/attachments/simple_engine/android/app/src/main/java/com/simple_engine/VulkanActivity.java +++ b/attachments/simple_engine/android/app/src/main/java/com/simple_engine/VulkanActivity.java @@ -8,12 +8,9 @@ public class VulkanActivity extends GameActivity { @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); - - // Keep the screen on while the app is running getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); } - // Load the native library static { System.loadLibrary("simple_engine_android"); } diff --git a/attachments/simple_engine/android/app/src/main/res/values/styles.xml b/attachments/simple_engine/android/app/src/main/res/values/styles.xml index c63a3a912..cfe9aac3d 100644 --- a/attachments/simple_engine/android/app/src/main/res/values/styles.xml +++ b/attachments/simple_engine/android/app/src/main/res/values/styles.xml @@ -1,6 +1,9 @@ - diff --git a/attachments/simple_engine/android/build.gradle b/attachments/simple_engine/android/build.gradle index 8f8f43a07..e72fa5228 100644 --- a/attachments/simple_engine/android/build.gradle +++ b/attachments/simple_engine/android/build.gradle @@ -1,18 +1,9 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. -buildscript { - repositories { - google() - mavenCentral() - } - dependencies { - classpath 'com.android.tools.build:gradle:8.13.2' - - // NOTE: Do not place your application dependencies here; they belong - // in the individual module build.gradle files - } +plugins { + id 'com.android.application' version '9.2.1' apply false } // For Gradle 9.0+, use the Delete interface instead of type tasks.register('clean', Delete) { - delete rootProject.buildDir + delete rootProject.layout.buildDirectory } diff --git a/attachments/simple_engine/android/gradle.properties b/attachments/simple_engine/android/gradle.properties index adeef536f..543b9eebe 100644 --- a/attachments/simple_engine/android/gradle.properties +++ b/attachments/simple_engine/android/gradle.properties @@ -11,3 +11,7 @@ org.gradle.configuration-cache=true # Disable Java toolchain auto-detection to use current JVM org.gradle.java.installations.auto-detect=false org.gradle.java.installations.auto-download=false +android.uniquePackageNames=false +android.dependency.useConstraints=true +android.r8.strictFullModeForKeepRules=false +android.newDsl=true diff --git a/attachments/simple_engine/android/gradle/libs.versions.toml b/attachments/simple_engine/android/gradle/libs.versions.toml new file mode 100644 index 000000000..a56425583 --- /dev/null +++ b/attachments/simple_engine/android/gradle/libs.versions.toml @@ -0,0 +1,17 @@ +[versions] +agp = "9.2.1" +appcompat = "1.7.1" +gamesActivity = "4.4.2" +gradle = "9.2.1" +gradleToolchainsFoojayResolverConvention = "1.0.0" +material = "1.14.0" + +[libraries] +androidx-appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" } +androidx-games-activity = { group = "androidx.games", name = "games-activity", version.ref = "gamesActivity" } +gradle = { group = "com.android.tools.build", name = "gradle", version.ref = "gradle" } +material = { group = "com.google.android.material", name = "material", version.ref = "material" } + +[plugins] +android-application = { id = "com.android.application", version.ref = "agp" } +foojay = { id = "org.gradle.toolchains.foojay-resolver-convention", version.ref = "gradleToolchainsFoojayResolverConvention" } diff --git a/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.jar b/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.jar index 980502d16..b1b8ef56b 100644 Binary files a/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.jar and b/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.jar differ diff --git a/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.properties b/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.properties index 128196a7a..df6a6ad76 100644 --- a/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.properties +++ b/attachments/simple_engine/android/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,9 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-9.0-milestone-1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-9.5.1-bin.zip networkTimeout=10000 +retries=0 +retryBackOffMs=500 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/attachments/simple_engine/android/gradlew b/attachments/simple_engine/android/gradlew index faf93008b..b9bb139f7 100755 --- a/attachments/simple_engine/android/gradlew +++ b/attachments/simple_engine/android/gradlew @@ -1,7 +1,7 @@ #!/bin/sh # -# Copyright © 2015-2021 the original authors. +# Copyright © 2015 the original authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ # Darwin, MinGW, and NonStop. # # (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# https://github.com/gradle/gradle/blob/3d91ce3b8caaf77ad09f381f43615b715b53f72c/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt # within the Gradle project. # # You can find Gradle at https://github.com/gradle/gradle/. @@ -114,7 +114,6 @@ case "$( uname )" in #( NONSTOP* ) nonstop=true ;; esac -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar # Determine the Java command to use to start the JVM. @@ -172,7 +171,6 @@ fi # For Cygwin or MSYS, switch paths to Windows format before running java if "$cygwin" || "$msys" ; then APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) - CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) JAVACMD=$( cygpath --unix "$JAVACMD" ) @@ -212,8 +210,7 @@ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ - -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ + -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ "$@" # Stop when "xargs" is not available. diff --git a/attachments/simple_engine/android/gradlew.bat b/attachments/simple_engine/android/gradlew.bat index 9d21a2183..24c62d56f 100644 --- a/attachments/simple_engine/android/gradlew.bat +++ b/attachments/simple_engine/android/gradlew.bat @@ -23,8 +23,8 @@ @rem @rem ########################################################################## -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal +@rem Set local scope for the variables, and ensure extensions are enabled +setlocal EnableExtensions set DIRNAME=%~dp0 if "%DIRNAME%"=="" set DIRNAME=. @@ -51,7 +51,7 @@ echo. 1>&2 echo Please set the JAVA_HOME variable in your environment to match the 1>&2 echo location of your Java installation. 1>&2 -goto fail +"%COMSPEC%" /c exit 1 :findJavaFromJavaHome set JAVA_HOME=%JAVA_HOME:"=% @@ -65,30 +65,18 @@ echo. 1>&2 echo Please set the JAVA_HOME variable in your environment to match the 1>&2 echo location of your Java installation. 1>&2 -goto fail +"%COMSPEC%" /c exit 1 :execute @rem Setup the command line -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* +@rem endlocal doesn't take effect until after the line is parsed and variables are expanded +@rem which allows us to clear the local environment before executing the java command +endlocal & "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* & call :exitWithErrorLevel -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega +:exitWithErrorLevel +@rem Use "%COMSPEC%" /c exit to allow operators to work properly in scripts +"%COMSPEC%" /c exit %ERRORLEVEL% diff --git a/attachments/simple_engine/android/settings.gradle b/attachments/simple_engine/android/settings.gradle index aac78f001..8ce70cdd5 100644 --- a/attachments/simple_engine/android/settings.gradle +++ b/attachments/simple_engine/android/settings.gradle @@ -6,6 +6,9 @@ pluginManagement { gradlePluginPortal() } } +plugins { + id 'org.gradle.toolchains.foojay-resolver-convention' version '1.0.0' +} // For Gradle 9.0+, dependency repositories should be configured here dependencyResolutionManagement { diff --git a/attachments/simple_engine/crash_reporter.h b/attachments/simple_engine/crash_reporter.h index 34abef832..9c4c0d5e4 100644 --- a/attachments/simple_engine/crash_reporter.h +++ b/attachments/simple_engine/crash_reporter.h @@ -32,10 +32,13 @@ # include # include # pragma comment(lib, "dbghelp.lib") -#elif defined(__APPLE__) || defined(__linux__) +#elif defined(__APPLE__) || (defined(__linux__) && !defined(__ANDROID__)) # include # include # include +#elif defined(__ANDROID__) +# include +# include #endif #include "debug_system.h" @@ -220,6 +223,12 @@ class CrashReporter CloseHandle(hFile); } +#elif defined(__ANDROID__) + // Android implementation: use standard signals + signal(SIGSEGV, SIG_DFL); + signal(SIGABRT, SIG_DFL); + signal(SIGFPE, SIG_DFL); + signal(SIGILL, SIG_DFL); #else // Unix implementation std::ofstream file(filename, std::ios::out | std::ios::binary); @@ -372,6 +381,12 @@ class CrashReporter CrashReporter::GetInstance().HandleCrashInternal("Unhandled exception", exInfo); return EXCEPTION_EXECUTE_HANDLER; }); +#elif defined(__ANDROID__) + // Android implementation: use standard signals + signal(SIGSEGV, SIG_DFL); + signal(SIGABRT, SIG_DFL); + signal(SIGFPE, SIG_DFL); + signal(SIGILL, SIG_DFL); #else // Unix implementation signal(SIGSEGV, [](int sig) { @@ -409,6 +424,12 @@ class CrashReporter RemoveVectoredExceptionHandler(vectoredHandlerHandle); vectoredHandlerHandle = nullptr; } +#elif defined(__ANDROID__) + // Android implementation: use standard signals + signal(SIGSEGV, SIG_DFL); + signal(SIGABRT, SIG_DFL); + signal(SIGFPE, SIG_DFL); + signal(SIGILL, SIG_DFL); #else // Unix implementation signal(SIGSEGV, SIG_DFL); diff --git a/attachments/simple_engine/engine.cpp b/attachments/simple_engine/engine.cpp index 676845ae0..0bfec0b78 100644 --- a/attachments/simple_engine/engine.cpp +++ b/attachments/simple_engine/engine.cpp @@ -17,6 +17,7 @@ #include "engine.h" #include "mesh_component.h" #include "scene_loading.h" +#include #include #include @@ -115,6 +116,12 @@ bool Engine::Initialize(const std::string& appName, int width, int height, bool // Physics system via constructor (GPU enabled) physicsSystem = std::make_unique(renderer.get(), true); +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // OMM integration via constructor + ommIntegration = std::make_unique(); + ommIntegration->init(*renderer, *modelLoader); +#endif + // ImGui via constructor, then connect audio system imguiSystem = std::make_unique(renderer.get(), width, height); imguiSystem->SetAudioSystem(audioSystem.get()); @@ -204,6 +211,9 @@ void Engine::Cleanup() { // Clean up subsystems in reverse order of creation imguiSystem.reset(); +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + ommIntegration.reset(); +#endif physicsSystem.reset(); audioSystem.reset(); modelLoader.reset(); @@ -357,78 +367,81 @@ PhysicsSystem* Engine::GetPhysicsSystem() { return physicsSystem.get(); } +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS +OmmIntegration* Engine::GetOmmIntegration() { + return ommIntegration.get(); +} +#endif + const ImGuiSystem* Engine::GetImGuiSystem() const { return imguiSystem.get(); } void Engine::handleMouseInput(float x, float y, uint32_t buttons) { - // Check if ImGui wants to capture mouse input first - bool imguiWantsMouse = imguiSystem && imguiSystem->WantCaptureMouse(); - - // Suppress right-click while loading - if (renderer&& renderer - - -> - IsLoading() - ) { - buttons &= ~2u; // clear right button bit + // Update ImGui system with current mouse state immediately. + // This pushes events to the ImGui IO queue for processing in NewFrame(). + if (imguiSystem) { + imguiSystem->HandleMouse(x, y, buttons); } - if (!imguiWantsMouse) { - // Handle mouse click for ball throwing (right mouse button) - if (buttons & 2) { - // Right mouse button (bit 1) - if (!cameraControl.mouseRightPressed) { - cameraControl.mouseRightPressed = true; - // Throw a ball on mouse click - ThrowBall(x, y); - } - } else { - cameraControl.mouseRightPressed = false; + // Handle LEFT button (Touch DOWN/MOVE/UP) + if (buttons & 1) { + if (!cameraControl.mouseLeftPressed) { + // Finger just went down + cameraControl.mouseLeftPressed = true; + cameraControl.firstMouse = true; + cameraControl.touchTotalDistance = 0.0f; + cameraControl.touchDownX = x; + cameraControl.touchDownY = y; + cameraControl.touchStartTime = 0.0; // We'll increment this in Update } - // Handle camera rotation when left mouse button is pressed - if (buttons & 1) { - // Left mouse button (bit 0) - if (!cameraControl.mouseLeftPressed) { - cameraControl.mouseLeftPressed = true; - cameraControl.firstMouse = true; - } - - if (cameraControl.firstMouse) { - cameraControl.lastMouseX = x; - cameraControl.lastMouseY = y; - cameraControl.firstMouse = false; - } - - float xOffset = x - cameraControl.lastMouseX; - float yOffset = y - cameraControl.lastMouseY; + if (cameraControl.firstMouse) { cameraControl.lastMouseX = x; cameraControl.lastMouseY = y; + cameraControl.firstMouse = false; + } - xOffset *= cameraControl.mouseSensitivity; - yOffset *= cameraControl.mouseSensitivity; - - // Mouse look: positive X moves view to the right; positive Y moves view up. - // Platform mouse coordinates increase downward, so invert Y. - cameraControl.yaw -= xOffset; - cameraControl.pitch -= yOffset; + // Accumulate movement deltas. These will be applied in UpdateCameraControls + // AFTER ImGui has updated its capture state (post-NewFrame). + float dx = (x - cameraControl.lastMouseX); + float dy = (y - cameraControl.lastMouseY); + cameraControl.pendingXOffset += dx; + cameraControl.pendingYOffset += dy; + cameraControl.touchTotalDistance += std::sqrt(dx*dx + dy*dy); - // Constrain pitch to avoid gimbal lock - if (cameraControl.pitch > 89.0f) - cameraControl.pitch = 89.0f; - if (cameraControl.pitch < -89.0f) - cameraControl.pitch = -89.0f; - } else { - cameraControl.mouseLeftPressed = false; +#if defined(PLATFORM_ANDROID) + // On Android, we map SWIPE to MOVEMENT (Forward/Backward, Left/Right) + // if the touch didn't start on UI. + if (!cameraControl.isFirstFrameOfInteraction && !cameraControl.startedOnImGui) { + cameraControl.touchMoveX = dx; + cameraControl.touchMoveY = dy; } +#endif + + cameraControl.lastMouseX = x; + cameraControl.lastMouseY = y; + } else { + // Finger lifted + cameraControl.mouseLeftPressed = false; } - if (imguiSystem) { - imguiSystem->HandleMouse(x, y, buttons); + // Handle RIGHT button (Ball throwing) + if (buttons & 2) { + if (!cameraControl.mouseRightPressed) { + cameraControl.mouseRightPressed = true; + // Note: We check capture status in NewFrame/Update for consistent behavior + // but for discrete clicks, we use the stale capture status or wait. + // On Android, we don't currently generate right clicks easily. + if (imguiSystem && !imguiSystem->WantCaptureMouse()) { + ThrowBall(x, y); + } + } + } else { + cameraControl.mouseRightPressed = false; } - // Always perform hover detection (even when ImGui is active) + // Update hover detection HandleMouseHover(x, y); } void Engine::handleKeyInput(uint32_t key, bool pressed) { @@ -461,15 +474,15 @@ void Engine::handleKeyInput(uint32_t key, bool pressed) { default: break; } - - if (imguiSystem) { - imguiSystem->HandleKeyboard(key, pressed); - } #else // Android uses different input handling via touch events (void) key; (void) pressed; #endif + + if (imguiSystem) { + imguiSystem->HandleKeyboard(key, pressed); + } } void Engine::Update(TimeDelta deltaTime) { @@ -480,12 +493,13 @@ void Engine::Update(TimeDelta deltaTime) { // list from the main thread. This lets the loading thread construct // entities/components safely while the main thread only drives the // UI/loading overlay. - if (renderer&& renderer - - -> - IsLoading() - ) { + if (renderer && renderer->IsLoading()) { if (imguiSystem) { + uint32_t rw, rh; + renderer->GetSwapChainExtent(&rw, &rh); + if (rw > 0 && rh > 0) { + imguiSystem->HandleResize(rw, rh); + } imguiSystem->NewFrame(); } return; @@ -506,7 +520,14 @@ void Engine::Update(TimeDelta deltaTime) { audioSystem->Update(deltaTime); // Update ImGui system - imguiSystem->NewFrame(); + if (imguiSystem) { + uint32_t rw, rh; + renderer->GetSwapChainExtent(&rw, &rh); + if (rw > 0 && rh > 0) { + imguiSystem->HandleResize(rw, rh); + } + imguiSystem->NewFrame(); + } // Update camera controls if (activeCamera) { @@ -586,6 +607,8 @@ void Engine::HandleResize(int width, int height) const { if (height <= 0 || width <= 0) { return; } + LOGI("Engine: HandleResize %dx%d", width, height); + // Update the active camera's aspect ratio if (activeCamera) { activeCamera->SetAspectRatio(static_cast(width) / static_cast(height)); @@ -650,6 +673,158 @@ void Engine::UpdateCameraControls(TimeDelta deltaTime) { // Calculate movement speed float velocity = cameraControl.cameraSpeed * deltaTime.count() * .001f; + // Check if ImGui wants to capture mouse input (updated in NewFrame) + bool imguiWantsMouse = imguiSystem && imguiSystem->WantCaptureMouse(); + +#if defined(PLATFORM_ANDROID) + // --- Android: Ambitious Controls --- + // 1. Accelerometer -> Rotation (Tilting) + float ax, ay, az; + float androidPitchOffset = 0.0f; + float androidYawOffset = 0.0f; + if (platform->GetAccelerometerData(&ax, &ay, &az)) { + // Correct for display rotation (Portrait vs Landscape vs Reversed) + ax = -ax; + ay = -ay; + float rawX = ax; + float rawY = ay; + int rotation = platform->GetDisplayRotation(); + switch (rotation) { + case 1: // ROTATION_90 (Landscape Left) + ax = -rawY; ay = rawX; break; + case 2: // ROTATION_180 (Portrait Upside Down) + ax = -rawX; ay = -rawY; break; + case 3: // ROTATION_270 (Landscape Right) + ax = rawY; ay = -rawX; break; + default: // ROTATION_0 (Portrait) + break; + } + + // If not calibrated, take current values as neutral + if (!cameraControl.tiltCalibrated) { + cameraControl.tiltCenterX = ax; + cameraControl.tiltCenterY = ay; + cameraControl.tiltCalibrated = true; + } + + float dax = ax - cameraControl.tiltCenterX; + float day = ay - cameraControl.tiltCenterY; + + // Auto-recalibration: If the phone is held steady (small delta from current center), + // we slowly drift the center point towards the current reading. + // This allows the "rest" position to adapt to the user's hands. + float distFromCenter = std::sqrt(dax*dax + day*day); + float dt = deltaTime.count() * 0.001f; + + if (distFromCenter < 1.5f) { + // Steady detection: If we're close to the center for a while, snap it. + cameraControl.tiltSteadyTime += dt; + if (cameraControl.tiltSteadyTime > 0.5f) { + // Drift the center towards current value to "establish a new deadzone" + float driftRate = 2.0f * dt; + cameraControl.tiltCenterX += dax * driftRate; + cameraControl.tiltCenterY += day * driftRate; + } + } else { + cameraControl.tiltSteadyTime = 0.0f; + } + + // Deadzone and immediate stop: if within deadzone, motion is ZERO. + // Increased deadzone to 0.8f for more stability. + if (std::abs(dax) < 0.8f) dax = 0.0f; + if (std::abs(day) < 0.8f) day = 0.0f; + + // We multiply by deltaTime to ensure consistent rotation speed across different frame rates. + const float tiltSensitivity = 20.0f; // Degrees per second at max tilt + androidYawOffset = dax * tiltSensitivity * dt; + androidPitchOffset = -day * tiltSensitivity * dt; + } + + // 2. Swipe -> Movement + float androidMoveForward = 0.0f; + float androidMoveRight = 0.0f; + if (cameraControl.mouseLeftPressed && !cameraControl.startedOnImGui) { + const float moveSensitivity = 0.15f; + androidMoveRight = cameraControl.touchMoveX * moveSensitivity; + androidMoveForward = -cameraControl.touchMoveY * moveSensitivity; + } + // Clear touch movement frame delta + cameraControl.touchMoveX = 0.0f; + cameraControl.touchMoveY = 0.0f; + + // 3. Tap and Hold -> Reset Camera & Recalibrate Tilt + bool isHoldingToReset = false; + if (cameraControl.mouseLeftPressed && !cameraControl.startedOnImGui) { + cameraControl.touchStartTime += deltaTime.count() * 0.001f; + // If held for more than 0.5s without moving more than 10 pixels + if (cameraControl.touchStartTime > 0.5f && cameraControl.touchTotalDistance < 10.0f) { + cameraControl.yaw = 0.0f; + cameraControl.pitch = 0.0f; + + // Force the camera to be level (horizon-aligned) during reset. + // We extract the yaw from the base orientation and discard pitch/roll. + glm::vec3 euler = glm::eulerAngles(cameraControl.baseOrientation); + cameraControl.baseOrientation = glm::angleAxis(euler.y, glm::vec3(0.0f, 1.0f, 0.0f)); + + // Recalibrate: current physical orientation becomes the new "zero" + // We use the rotation-corrected values already calculated in ax/ay above. + cameraControl.tiltCenterX = ax; + cameraControl.tiltCenterY = ay; + + androidYawOffset = 0.0f; + androidPitchOffset = 0.0f; + isHoldingToReset = true; + } + } else { + cameraControl.touchStartTime = 0.0f; + } +#endif + + // INTERACTION LOCKING LOGIC: + // If a touch began, we wait until ImGui has processed the first DOWN event (in NewFrame) + // before deciding whether this drag belongs to the GUI or the 3D Scene. + if (cameraControl.mouseLeftPressed) { + if (cameraControl.isFirstFrameOfInteraction) { + // This is the first frame (Update call) where the finger is DOWN. + // ImGui's WantCaptureMouse now accurately reflects if the tap was on a window. + cameraControl.startedOnImGui = imguiWantsMouse; + cameraControl.isFirstFrameOfInteraction = false; + } + + // Only apply rotation if the interaction started on the scene background + if (!cameraControl.startedOnImGui) { +#if !defined(PLATFORM_ANDROID) + float xOffset = cameraControl.pendingXOffset * cameraControl.mouseSensitivity; + float yOffset = cameraControl.pendingYOffset * cameraControl.mouseSensitivity; + + cameraControl.yaw -= xOffset; + cameraControl.pitch -= yOffset; +#endif + } + } else { + // Reset locking state when finger is lifted + cameraControl.isFirstFrameOfInteraction = true; + cameraControl.startedOnImGui = false; + } + +#if defined(PLATFORM_ANDROID) + // Apply Android tilt and swiping + if (!isHoldingToReset) { + cameraControl.yaw += androidYawOffset; + cameraControl.pitch += androidPitchOffset; + } +#endif + + // Constrain pitch to avoid gimbal lock + if (cameraControl.pitch > 89.0f) + cameraControl.pitch = 89.0f; + if (cameraControl.pitch < -89.0f) + cameraControl.pitch = -89.0f; + + // Clear accumulated offsets after processing + cameraControl.pendingXOffset = 0.0f; + cameraControl.pendingYOffset = 0.0f; + // Capture base orientation from GLTF camera once and then apply mouse deltas relative to it if (!cameraControl.baseOrientationCaptured) { // TransformComponent stores Euler in radians; convert to quaternion @@ -701,6 +876,20 @@ void Engine::UpdateCameraControls(TimeDelta deltaTime) { position -= up * velocity; } +#if defined(PLATFORM_ANDROID) + // Apply Android swipe-to-walk displacement + // We use the same front/right vectors but apply the swipe deltas. + // Note: androidMoveForward/Right are already calculated in the Android block above. + position += front * androidMoveForward * cameraControl.cameraSpeed * 0.02f; + position += right * androidMoveRight * cameraControl.cameraSpeed * 0.02f; +#endif + +#if defined(PLATFORM_ANDROID) + // Apply Android swipe-based movement + position += front * androidMoveForward; + position += right * androidMoveRight; +#endif + // Update camera position cameraTransform->SetPosition(position); // Apply rotation to the camera transform based on GLTF base orientation plus mouse deltas @@ -930,8 +1119,19 @@ void Engine::HandleMouseHover(float mouseX, float mouseY) { #if defined(PLATFORM_ANDROID) // Android-specific implementation bool Engine::InitializeAndroid(android_app* app, const std::string& appName, bool enableValidationLayers) { + // Record main thread identity + mainThreadId = std::this_thread::get_id(); + // Create platform platform = CreatePlatform(app); + + // Wait for the window to be initialized before continuing + while (app->window == nullptr) { + if (!platform->ProcessEvents()) { + return false; // Exit requested + } + } + if (!platform->Initialize(appName, 0, 0)) { return false; } @@ -943,33 +1143,12 @@ bool Engine::InitializeAndroid(android_app* app, const std::string& appName, boo // Set mouse callback platform->SetMouseCallback([this](float x, float y, uint32_t buttons) { - // Check if ImGui wants to capture mouse input first - bool imguiWantsMouse = imguiSystem && imguiSystem->WantCaptureMouse(); - - if (!imguiWantsMouse) { - // Handle mouse click for ball throwing (right mouse button) - if (buttons & 2) { - // Right mouse button (bit 1) - if (!cameraControl.mouseRightPressed) { - cameraControl.mouseRightPressed = true; - // Throw a ball on mouse click - ThrowBall(x, y); - } - } else { - cameraControl.mouseRightPressed = false; - } - } - - if (imguiSystem) { - imguiSystem->HandleMouse(x, y, buttons); - } + handleMouseInput(x, y, buttons); }); // Set keyboard callback platform->SetKeyboardCallback([this](uint32_t key, bool pressed) { - if (imguiSystem) { - imguiSystem->HandleKeyboard(key, pressed); - } + handleKeyInput(key, pressed); }); // Set char callback @@ -980,31 +1159,50 @@ bool Engine::InitializeAndroid(android_app* app, const std::string& appName, boo }); // Create renderer + LOGI("Engine: Initializing Renderer..."); renderer = std::make_unique(platform.get()); if (!renderer->Initialize(appName, enableValidationLayers)) { + LOGE("Engine: Renderer initialization failed"); return false; } + LOGI("Engine: Renderer initialized successfully"); // Get window dimensions from platform for ImGui initialization int width, height; platform->GetWindowSize(&width, &height); + LOGI("Engine: Initial window size: %dx%d", width, height); + + // Ensure initial size is applied to camera and renderer + HandleResize(width, height); try { // Model loader via constructor; also wire into renderer + LOGI("Engine: Initializing ModelLoader..."); modelLoader = std::make_unique(renderer.get()); renderer->SetModelLoader(modelLoader.get()); // Audio system via constructor + LOGI("Engine: Initializing AudioSystem..."); audioSystem = std::make_unique(this, renderer.get()); // Physics system via constructor (GPU enabled) + LOGI("Engine: Initializing PhysicsSystem..."); physicsSystem = std::make_unique(renderer.get(), true); +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // OMM integration via constructor +LOGI("Engine: Initializing OmmIntegration..."); +ommIntegration = std::make_unique(); + ommIntegration->init(*renderer, *modelLoader); +#endif + // ImGui via constructor, then connect audio system + LOGI("Engine: Initializing ImGuiSystem..."); imguiSystem = std::make_unique(renderer.get(), width, height); imguiSystem->SetAudioSystem(audioSystem.get()); + LOGI("Engine: Subsystems initialized successfully"); } catch (const std::exception& e) { - std::cerr << "Subsystem initialization failed: " << e.what() << std::endl; + LOGE("Subsystem initialization failed: %s", e.what()); return false; } @@ -1025,16 +1223,29 @@ void Engine::RunAndroid() { running = true; - // Main loop is handled by the platform - // We just need to update and render when the platform is ready + while (running) { + // Process Android events + if (!platform->ProcessEvents()) { + running = false; + break; + } - // Calculate delta time - deltaTimeMs = CalculateDeltaTimeMs(); + // Only update and render if we have a valid window size + int width, height; + platform->GetWindowSize(&width, &height); + if (width > 0 && height > 0) { + // Calculate delta time + deltaTimeMs = CalculateDeltaTimeMs(); - // Update - Update(deltaTimeMs); + // Update + Update(deltaTimeMs); - // Render - Render(); + // Render + Render(); + } else { + // If the window is not ready or minimized, yield to the system + std::this_thread::sleep_for(std::chrono::milliseconds(16)); + } + } } -#endif \ No newline at end of file +#endif diff --git a/attachments/simple_engine/engine.h b/attachments/simple_engine/engine.h index 6f58027b7..2e3cabc6e 100644 --- a/attachments/simple_engine/engine.h +++ b/attachments/simple_engine/engine.h @@ -35,6 +35,10 @@ #include "renderer.h" #include "resource_manager.h" +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS +#include "Courses/omm_integration.h" +#endif + /** * @brief Main engine class that manages the game loop and subsystems. * @@ -160,6 +164,14 @@ class Engine */ PhysicsSystem *GetPhysicsSystem(); +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + /** + * @brief Get the OMM integration system. + * @return A pointer to the OmmIntegration system. + */ + OmmIntegration *GetOmmIntegration(); +#endif + /** * @brief Get the ImGui system. * @return A pointer to the ImGui system. @@ -218,7 +230,10 @@ class Engine std::unique_ptr modelLoader; std::unique_ptr audioSystem; std::unique_ptr physicsSystem; - std::unique_ptr imguiSystem; + std::unique_ptr imguiSystem; +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + std::unique_ptr ommIntegration; +#endif // Entities // NOTE: Entities can be created from a background loading thread (see `main.cpp`). @@ -276,6 +291,26 @@ class Engine float mouseSensitivity = 0.1f; bool baseOrientationCaptured = false; glm::quat baseOrientation{1.0f, 0.0f, 0.0f, 0.0f}; + + // Touch/Mouse interaction locking + bool startedOnImGui = false; + bool isFirstFrameOfInteraction = true; + float pendingXOffset = 0.0f; + float pendingYOffset = 0.0f; + + // Mobile movement state + float touchMoveX = 0.0f; // Left/Right + float touchMoveY = 0.0f; // Forward/Backward + float tiltYaw = 0.0f; + float tiltPitch = 0.0f; + float tiltCenterX = 0.0f; + float tiltCenterY = 0.0f; + bool tiltCalibrated = false; + float tiltSteadyTime = 0.0f; + float touchTotalDistance = 0.0f; + float touchDownX = 0.0f; + float touchDownY = 0.0f; + double touchStartTime = 0.0; } cameraControl; // Mouse position tracking diff --git a/attachments/simple_engine/imgui_system.cpp b/attachments/simple_engine/imgui_system.cpp index 7ce7c0313..e7e5b2ae7 100644 --- a/attachments/simple_engine/imgui_system.cpp +++ b/attachments/simple_engine/imgui_system.cpp @@ -63,6 +63,12 @@ bool ImGuiSystem::Initialize(Renderer* renderer, uint32_t width, uint32_t height // Set up ImGui style ImGui::StyleColorsDark(); +#if defined(PLATFORM_ANDROID) + // Scale UI for high-DPI mobile screens to ensure buttons are touchable + ImGui::GetStyle().ScaleAllSizes(2.0f); + io.FontGlobalScale = 2.0f; +#endif + // Create Vulkan resources if (!createResources()) { std::cerr << "Failed to create ImGui Vulkan resources" << std::endl; @@ -163,6 +169,12 @@ void ImGuiSystem::NewFrame() { ImGui::SetNextWindowPos(ImVec2(0, 0)); ImGui::SetNextWindowSize(dispSize); + + // Override style for loading overlay to ensure visibility and contrast + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.05f, 0.05f, 0.05f, 1.0f)); + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.9f, 0.9f, 0.9f, 1.0f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0f); + ImGuiWindowFlags flags = ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | @@ -207,9 +219,11 @@ void ImGuiSystem::NewFrame() { ImGui::Text("%s (%u/%u, %.1fs)", renderer->GetASBuildStage(), done, total, renderer->GetASBuildElapsedSeconds()); } ImGui::EndGroup(); - ImGui::PopStyleVar(); + ImGui::PopStyleVar(); // WindowPadding } ImGui::End(); + ImGui::PopStyleVar(); // WindowBorderSize + ImGui::PopStyleColor(2); // WindowBg, Text return; } } @@ -577,15 +591,27 @@ void ImGuiSystem::HandleMouse(float x, float y, uint32_t buttons) { ImGuiIO& io = ImGui::GetIO(); + // Inform ImGui that this is a touch screen event to adjust behavior (e.g. no hover tooltips) +#if defined(PLATFORM_ANDROID) + io.AddMouseSourceEvent(ImGuiMouseSource_TouchScreen); +#endif + // Update mouse position (v1.87+ event API) io.AddMousePosEvent(x, y); // Update mouse buttons (v1.87+ event API) // We compare with current state to send events only on change static uint32_t lastButtons = 0; - if ((buttons & 0x01) != (lastButtons & 0x01)) io.AddMouseButtonEvent(0, (buttons & 0x01) != 0); - if ((buttons & 0x02) != (lastButtons & 0x02)) io.AddMouseButtonEvent(1, (buttons & 0x02) != 0); - if ((buttons & 0x04) != (lastButtons & 0x04)) io.AddMouseButtonEvent(2, (buttons & 0x04) != 0); + if ((buttons & 0x01) != (lastButtons & 0x01)) { + io.AddMouseButtonEvent(0, (buttons & 0x01) != 0); + } + if ((buttons & 0x02) != (lastButtons & 0x02)) { + io.AddMouseButtonEvent(1, (buttons & 0x02) != 0); + } + if ((buttons & 0x04) != (lastButtons & 0x04)) { + io.AddMouseButtonEvent(2, (buttons & 0x04) != 0); + } + lastButtons = buttons; } diff --git a/attachments/simple_engine/main.cpp b/attachments/simple_engine/main.cpp index 38b8233c6..5421c74e6 100644 --- a/attachments/simple_engine/main.cpp +++ b/attachments/simple_engine/main.cpp @@ -52,9 +52,9 @@ void SetupScene(Engine *engine) // Add a camera component to the camera entity auto *camera = cameraEntity->AddComponent(); - camera->SetAspectRatio(static_cast(WINDOW_WIDTH) / static_cast(WINDOW_HEIGHT)); + // Camera aspect ratio will be set by the engine during initialization or resize events. - // Set the camera as the active camera + // Set the camera as the active camera engine->SetActiveCamera(camera); // Kick off GLTF model loading on a background thread so the main loop @@ -67,8 +67,12 @@ void SetupScene(Engine *engine) renderer->SetLoadingPhase(Renderer::LoadingPhase::Textures); } std::thread([engine] { - LoadGLTFModel(engine, "../Assets/bistro/bistro.gltf"); - }).detach(); +#if defined(PLATFORM_ANDROID) + LoadGLTFModel(engine, "bistro/bistro.gltf"); +#else + LoadGLTFModel(engine, "../Assets/bistro/bistro.gltf"); +#endif + }).detach(); } #if defined(PLATFORM_ANDROID) @@ -76,8 +80,7 @@ void SetupScene(Engine *engine) * @brief Android entry point. * @param app The Android app. */ -void android_main(android_app *app) -{ +extern "C" void android_main(android_app* app) { try { // Create the engine diff --git a/attachments/simple_engine/model_loader.cpp b/attachments/simple_engine/model_loader.cpp index 1fae5b542..c71208b04 100644 --- a/attachments/simple_engine/model_loader.cpp +++ b/attachments/simple_engine/model_loader.cpp @@ -371,11 +371,11 @@ void ModelLoader::ProcessMaterials(const tinygltf::Model& gltfModel, const auto& image = gltfModel.images[imageIndex]; std::string textureId = "gltf_baseColor_" + std::to_string(texIndex); if (!image.image.empty()) { - renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component); + renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true, material->alphaMode == "MASK"); material->albedoTexturePath = textureId; } else if (!image.uri.empty()) { std::string filePath = baseTexturePath + image.uri; - renderer->LoadTextureAsync(filePath); + renderer->LoadTextureAsync(filePath, true, material->alphaMode == "MASK"); material->albedoTexturePath = filePath; } } @@ -394,14 +394,14 @@ void ModelLoader::ProcessMaterials(const tinygltf::Model& gltfModel, const auto& image = gltfModel.images[texture.source]; if (!image.image.empty()) { // Embedded image data (already decoded by tinygltf image loader) - renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, false); + renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, false, material->alphaMode == "MASK"); material->specGlossTexturePath = textureId; material->metallicRoughnessTexturePath = textureId; // reuse binding 2 } else if (!image.uri.empty()) { // External KTX2 file: offload libktx decode + upload to renderer worker threads std::string filePath = baseTexturePath + image.uri; renderer->RegisterTextureAlias(textureId, filePath); - renderer->LoadTextureAsync(filePath); + renderer->LoadTextureAsync(filePath, false, material->alphaMode == "MASK"); material->specGlossTexturePath = textureId; material->metallicRoughnessTexturePath = textureId; // reuse binding 2 } @@ -439,13 +439,13 @@ void ModelLoader::ProcessMaterials(const tinygltf::Model& gltfModel, const auto& image = gltfModel.images[imageIndex]; if (!image.image.empty()) { // Always use memory-based upload (KTX2 already decoded by SetImageLoader) - renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true); + renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true, material->alphaMode == "MASK"); material->albedoTexturePath = textureId; } else if (!image.uri.empty()) { // Offload KTX2 file reading/upload to renderer thread pool std::string filePath = baseTexturePath + image.uri; renderer->RegisterTextureAlias(textureId, filePath); - renderer->LoadTextureAsync(filePath, true); + renderer->LoadTextureAsync(filePath, true, material->alphaMode == "MASK"); material->albedoTexturePath = textureId; } else { std::cerr << " Warning: No decoded image bytes for base color texture index " << texIndex << std::endl; @@ -620,7 +620,7 @@ void ModelLoader::ProcessMaterials(const tinygltf::Model& gltfModel, if (!image.uri.empty()) { texIdOrPath = baseTexturePath + image.uri; // Schedule async load; libktx decoding will occur on renderer worker threads - renderer->LoadTextureAsync(texIdOrPath, true); + renderer->LoadTextureAsync(texIdOrPath, true, mat->alphaMode == "MASK"); mat->albedoTexturePath = texIdOrPath; } if (mat->albedoTexturePath.empty() && !image.image.empty()) { @@ -669,9 +669,9 @@ void ModelLoader::ProcessMaterials(const tinygltf::Model& gltfModel, std::string cand = candidateBase; cand.replace(pos, normalLower[pos] == '_' && normalLower.compare(pos, 5, "_ddna") == 0 ? 5 : 2, suf); // Ensure the file exists before attempting to load - if (std::filesystem::exists(cand)) { + if (renderer->fileExists(cand)) { // Schedule async load; libktx decoding will occur on renderer worker threads - renderer->LoadTextureAsync(cand, true); + renderer->LoadTextureAsync(cand, true, mat->alphaMode == "MASK"); mat->albedoTexturePath = cand; break; } @@ -714,12 +714,12 @@ void ModelLoader::ProcessMaterials(const tinygltf::Model& gltfModel, std::string textureId = baseTexturePath + imageUri; // use path string as ID for cache if (!image.image.empty()) { - renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component); + renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true, mat->alphaMode == "MASK"); mat->albedoTexturePath = textureId; break; } else { // Fallback: offload KTX2 file load to renderer threads - renderer->LoadTextureAsync(textureId); + renderer->LoadTextureAsync(textureId, true, mat->alphaMode == "MASK"); mat->albedoTexturePath = textureId; break; } @@ -899,7 +899,7 @@ bool ModelLoader::ParseGLTF(const std::string& filename, Model* model) { // Extract the directory path from the model file to use as a base path for textures std::filesystem::path modelPath(filename); - std::filesystem::path baseDir = std::filesystem::absolute(modelPath).parent_path(); + std::filesystem::path baseDir = modelPath.parent_path(); std::string baseTexturePath = baseDir.string(); if (!baseTexturePath.empty() && baseTexturePath.back() != '/') { baseTexturePath += "/"; @@ -912,6 +912,32 @@ bool ModelLoader::ParseGLTF(const std::string& filename, Model* model) { std::string err; std::string warn; + // Set up file system callbacks to use our cross-platform readFile (supports Android assets) + tinygltf::FsCallbacks fsCallbacks; + fsCallbacks.user_data = this->renderer; + fsCallbacks.FileExists = [](const std::string& path, void* userData) -> bool { + Renderer* renderer = static_cast(userData); + return renderer->fileExists(path); + }; + fsCallbacks.ExpandFilePath = [](const std::string& path, void*) -> std::string { + return path; + }; + fsCallbacks.ReadWholeFile = [](std::vector* out, std::string* err, const std::string& path, void* userData) -> bool { + Renderer* renderer = static_cast(userData); + try { + std::vector data = renderer->readFile(path); + out->assign(data.begin(), data.end()); + return true; + } catch (const std::exception& e) { + if (err) *err = e.what(); + return false; + } + }; + fsCallbacks.WriteWholeFile = [](std::string*, const std::string&, const std::vector&, void*) -> bool { + return false; // No write support needed + }; + loader.SetFsCallbacks(fsCallbacks); + // Set up image loader: prefer KTX2 via libktx; fallback to stb for other formats loader.SetImageLoader(LoadKTX2Image, nullptr); @@ -1390,7 +1416,7 @@ bool ModelLoader::ParseGLTF(const std::string& filename, Model* model) { const auto& image = gltfModel.images[imageIndex]; if (!image.image.empty()) { if (!loadedTextures.contains(textureId)) { - renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true); + renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true, gltfMaterial.alphaMode == "MASK"); loadedTextures.insert(textureId); } } else { @@ -1422,12 +1448,12 @@ bool ModelLoader::ParseGLTF(const std::string& filename, Model* model) { // Use the relative path from the GLTF directory std::string textureId = baseTexturePath + imageUri; if (!image.image.empty()) { - renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component); + renderer->LoadTextureFromMemoryAsync(textureId, image.image.data(), image.width, image.height, image.component, true, gltfMaterial.alphaMode == "MASK"); materialMesh.baseColorTexturePath = textureId; materialMesh.texturePath = textureId; } else { // Fallback: offload KTX2 file load to renderer worker threads - renderer->LoadTextureAsync(textureId, true); + renderer->LoadTextureAsync(textureId, true, gltfMaterial.alphaMode == "MASK"); materialMesh.baseColorTexturePath = textureId; materialMesh.texturePath = textureId; } diff --git a/attachments/simple_engine/physics_system.cpp b/attachments/simple_engine/physics_system.cpp index 115bcbde4..8a2dd9281 100644 --- a/attachments/simple_engine/physics_system.cpp +++ b/attachments/simple_engine/physics_system.cpp @@ -617,22 +617,6 @@ bool PhysicsSystem::Raycast(const glm::vec3& origin, } // Helper function to read a shader file -static std::vector readFile(const std::string& filename) { - std::ifstream file(filename, std::ios::ate | std::ios::binary); - if (!file.is_open()) { - throw std::runtime_error("Failed to open file: " + filename); - } - - size_t fileSize = file.tellg(); - std::vector buffer(fileSize); - - file.seekg(0); - file.read(buffer.data(), static_cast(fileSize)); - file.close(); - - return buffer; -} - // Helper function to create a shader module static vk::raii::ShaderModule createShaderModule(const vk::raii::Device& device, const std::vector& code) { vk::ShaderModuleCreateInfo createInfo; @@ -659,7 +643,7 @@ bool PhysicsSystem::InitializeVulkanResources() { const vk::raii::Device& raiiDevice = renderer->GetRaiiDevice(); // Load physics shader once and reuse for all compute pipelines - std::vector physicsShaderCode = readFile("shaders/physics.spv"); + std::vector physicsShaderCode = renderer->readFile("shaders/physics.spv"); vulkanResources.integrateShaderModule = createShaderModule(raiiDevice, physicsShaderCode); vulkanResources.broadPhaseShaderModule = createShaderModule(raiiDevice, physicsShaderCode); vulkanResources.narrowPhaseShaderModule = createShaderModule(raiiDevice, physicsShaderCode); diff --git a/attachments/simple_engine/platform.cpp b/attachments/simple_engine/platform.cpp index 4f32c999c..a09852f16 100644 --- a/attachments/simple_engine/platform.cpp +++ b/attachments/simple_engine/platform.cpp @@ -28,6 +28,21 @@ AndroidPlatform::AndroidPlatform(android_app* androidApp) : app(androidApp) { // Set up the app's user data app->userData = this; + // Initialize sensors + // Use the deprecated but widely available getInstance() for compatibility with minSdk 24 + sensorManager = ASensorManager_getInstance(); + + if (sensorManager) { + accelerometerSensor = ASensorManager_getDefaultSensor(sensorManager, ASENSOR_TYPE_ACCELEROMETER); + if (accelerometerSensor) { + ALooper* looper = ALooper_forThread(); + if (!looper) { + looper = ALooper_prepare(ALOOPER_PREPARE_ALLOW_NON_CALLBACKS); + } + sensorEventQueue = ASensorManager_createEventQueue(sensorManager, looper, 3 /*IDENT_SENSOR*/, nullptr, nullptr); + } + } + // Set up the command callback app->onAppCmd = [](android_app* app, int32_t cmd) { auto* platform = static_cast(app->userData); @@ -35,22 +50,44 @@ AndroidPlatform::AndroidPlatform(android_app* androidApp) : app(androidApp) { switch (cmd) { case APP_CMD_INIT_WINDOW: case APP_CMD_WINDOW_RESIZED: + case APP_CMD_CONFIG_CHANGED: if (app->window != nullptr) { // Get the window dimensions ANativeWindow* window = app->window; - platform->width = ANativeWindow_getWidth(window); - platform->height = ANativeWindow_getHeight(window); - platform->windowResized = true; + int32_t newWidth = ANativeWindow_getWidth(window); + int32_t newHeight = ANativeWindow_getHeight(window); + + LOGI("AndroidPlatform: Window event %d. Dimensions: %dx%d", cmd, newWidth, newHeight); + + if (newWidth > 0 && newHeight > 0 && (newWidth != platform->width || newHeight != platform->height)) { + platform->width = newWidth; + platform->height = newHeight; + platform->windowResized = true; + + LOGI("AndroidPlatform: Resizing to %dx%d", platform->width, platform->height); - // Call the resize callback if set - if (platform->resizeCallback) { - platform->resizeCallback(platform->width, platform->height); + // Call the resize callback if set + if (platform->resizeCallback) { + platform->resizeCallback(platform->width, platform->height); + } } } break; case APP_CMD_TERM_WINDOW: - // Window is being hidden or closed + LOGI("AndroidPlatform: APP_CMD_TERM_WINDOW"); + // Window is being hidden or closed. Mark as resized with 0 size to stop rendering. + platform->width = 0; + platform->height = 0; + platform->windowResized = true; + break; + + case APP_CMD_GAINED_FOCUS: + LOGI("AndroidPlatform: APP_CMD_GAINED_FOCUS"); + break; + + case APP_CMD_LOST_FOCUS: + LOGI("AndroidPlatform: APP_CMD_LOST_FOCUS"); break; default: @@ -65,6 +102,13 @@ bool AndroidPlatform::Initialize(const std::string& appName, int requestedWidth, width = ANativeWindow_getWidth(app->window); height = ANativeWindow_getHeight(app->window); + // Enable accelerometer + if (sensorEventQueue && accelerometerSensor) { + ASensorEventQueue_enableSensor(sensorEventQueue, accelerometerSensor); + // Set sensor rate (e.g., 60Hz) + ASensorEventQueue_setEventRate(sensorEventQueue, accelerometerSensor, (1000L / 60) * 1000); + } + // Get device information for performance optimizations DetectDeviceCapabilities(); @@ -80,7 +124,13 @@ bool AndroidPlatform::Initialize(const std::string& appName, int requestedWidth, } void AndroidPlatform::Cleanup() { - // Nothing to clean up for Android + if (sensorEventQueue) { + if (accelerometerSensor) { + ASensorEventQueue_disableSensor(sensorEventQueue, accelerometerSensor); + } + ASensorManager_destroyEventQueue(sensorManager, sensorEventQueue); + sensorEventQueue = nullptr; + } } bool AndroidPlatform::ProcessEvents() { @@ -88,18 +138,87 @@ bool AndroidPlatform::ProcessEvents() { int events; android_poll_source* source; + int ident; // Poll for events with a timeout of 0 (non-blocking) - while (ALooper_pollOnce(0, nullptr, &events, (void **) &source) >= 0) { + // We check for both LOOPER_ID_MAIN (cmd/input) and IDENT_SENSOR (3) + while ((ident = ALooper_pollOnce(0, nullptr, &events, (void **) &source)) >= 0) { if (source != nullptr) { source->process(app, source); } + // Handle sensors if they triggered the looper + if (ident == 3 /*IDENT_SENSOR*/ && sensorEventQueue) { + ASensorEvent event; + while (ASensorEventQueue_getEvents(sensorEventQueue, &event, 1) > 0) { + if (event.type == ASENSOR_TYPE_ACCELEROMETER) { + accelX = event.acceleration.x; + accelY = event.acceleration.y; + accelZ = event.acceleration.z; + } + } + } + // Check if we are exiting if (app->destroyRequested != 0) { return false; } } + // Handle GameActivity input events + android_input_buffer* inputBuffer = android_app_swap_input_buffers(app); + if (inputBuffer) { + // Process motion events (touches) + for (uint64_t i = 0; i < inputBuffer->motionEventsCount; ++i) { + GameActivityMotionEvent& event = inputBuffer->motionEvents[i]; + + int32_t action = event.action & AMOTION_EVENT_ACTION_MASK; + + if (event.pointerCount > 0) { + // For mouse emulation, always follow the primary finger (index 0). + // This avoids position "jumps" when multiple fingers are used. + float x = GameActivityPointerAxes_getX(&event.pointers[0]); + float y = GameActivityPointerAxes_getY(&event.pointers[0]); + + uint32_t buttons = 0; + if (action == AMOTION_EVENT_ACTION_DOWN || + action == AMOTION_EVENT_ACTION_MOVE || + action == AMOTION_EVENT_ACTION_POINTER_DOWN) { + buttons = 0x01; // Finger(s) down + } else if (action == AMOTION_EVENT_ACTION_UP || + action == AMOTION_EVENT_ACTION_CANCEL) { + buttons = 0x00; // All fingers up + } else if (action == AMOTION_EVENT_ACTION_POINTER_UP) { + // One finger up, but others might still be down. + // If the primary finger (0) was the one that left, the next finger + // will become index 0 in the NEXT event, so we'll get a release + // only when the LAST finger is lifted. + buttons = (event.pointerCount > 1) ? 0x01 : 0x00; + } + + // Diagnostic log for touch events (throttled) + static int moveLogThrottler = 0; + if (action != AMOTION_EVENT_ACTION_MOVE || ++moveLogThrottler % 30 == 0) { + LOGI("Touch: act=%d pos=(%.1f, %.1f) btn=%u count=%d", + action, x, y, buttons, event.pointerCount); + } + + if (mouseCallback) { + mouseCallback(x, y, buttons); + } + } + } + android_app_clear_motion_events(inputBuffer); + + // Process key events + for (uint64_t i = 0; i < inputBuffer->keyEventsCount; ++i) { + GameActivityKeyEvent& event = inputBuffer->keyEvents[i]; + if (keyboardCallback) { + keyboardCallback(event.keyCode, event.action == AKEY_EVENT_ACTION_DOWN); + } + } + android_app_clear_key_events(inputBuffer); + } + return true; } @@ -141,6 +260,33 @@ void AndroidPlatform::SetCharCallback(std::function callback) { charCallback = std::move(callback); } +int AndroidPlatform::GetDisplayRotation() const { + if (!app || !app->activity || !app->activity->javaGameActivity) return 0; + + JNIEnv* env = nullptr; + app->activity->vm->AttachCurrentThread(&env, nullptr); + int rotation = 0; + if (env) { + // 1. Get WindowManager from Activity via getWindowManager() + jclass activityClass = env->GetObjectClass(app->activity->javaGameActivity); + jmethodID getWindowManager = env->GetMethodID(activityClass, "getWindowManager", "()Landroid/view/WindowManager;"); + jobject windowManager = env->CallObjectMethod(app->activity->javaGameActivity, getWindowManager); + + // 2. Get Default Display from WindowManager + jclass windowManagerClass = env->FindClass("android/view/WindowManager"); + jmethodID getDefaultDisplay = env->GetMethodID(windowManagerClass, "getDefaultDisplay", "()Landroid/view/Display;"); + jobject display = env->CallObjectMethod(windowManager, getDefaultDisplay); + + // 3. Get Rotation from Display + jclass displayClass = env->FindClass("android/view/Display"); + jmethodID getRotation = env->GetMethodID(displayClass, "getRotation", "()I"); + rotation = env->CallIntMethod(display, getRotation); + + app->activity->vm->DetachCurrentThread(); + } + return rotation; +} + void AndroidPlatform::SetWindowTitle([[maybe_unused]] const std::string& title) { // No-op on Android - mobile apps don't have window titles } @@ -253,9 +399,10 @@ void AndroidPlatform::InitializeTouchInput() { if (!app) return; - // GameActivity specific input handling is handled via GameActivity_set*Callback in the glue, - // but the android_app structure in the new glue doesn't have onInputEvent anymore. - // Instead, we rely on the activity callbacks or the internal event processing. + // Configure GameActivity to pass all motion and key events to the native side + // without filtering. This ensures we see all touches, moves, and releases. + android_app_set_motion_event_filter(app, nullptr); + android_app_set_key_event_filter(app, nullptr); } void AndroidPlatform::EnablePowerSavingMode(bool enable) { diff --git a/attachments/simple_engine/platform.h b/attachments/simple_engine/platform.h index a784f327c..e14b617cd 100644 --- a/attachments/simple_engine/platform.h +++ b/attachments/simple_engine/platform.h @@ -147,9 +147,30 @@ class Platform { * @param title The new window title. */ virtual void SetWindowTitle(const std::string& title) = 0; + + /** + * @brief Get the current accelerometer data (tilting). + * @param x Output for X axis tilting. + * @param y Output for Y axis tilting. + * @param z Output for Z axis tilting. + * @return True if sensor data was successfully retrieved. + */ + virtual bool GetAccelerometerData(float* x, float* y, float* z) const { + *x = 0.0f; *y = 0.0f; *z = 0.0f; + return false; + } + + /** + * @brief Get the current display rotation. + * @return 0 for 0 degrees, 1 for 90, 2 for 180, 3 for 270. + */ + virtual int GetDisplayRotation() const { + return 0; + } }; #if defined(PLATFORM_ANDROID) +#include /** * @brief Android implementation of the Platform interface. */ @@ -164,6 +185,14 @@ class AndroidPlatform : public Platform { std::function keyboardCallback; std::function charCallback; + // Sensor support + ASensorManager* sensorManager = nullptr; + const ASensor* accelerometerSensor = nullptr; + ASensorEventQueue* sensorEventQueue = nullptr; + float accelX = 0.0f; + float accelY = 0.0f; + float accelZ = 0.0f; + // Mobile-specific properties struct DeviceCapabilities { int apiLevel = 0; @@ -320,6 +349,21 @@ class AndroidPlatform : public Platform { */ void SetWindowTitle(const std::string& title) override; + /** + * @brief Get the current accelerometer data (tilting). + */ + bool GetAccelerometerData(float* x, float* y, float* z) const override { + *x = accelX; + *y = accelY; + *z = accelZ; + return true; + } + + /** + * @brief Get the current display rotation. + */ + int GetDisplayRotation() const override; + /** * @brief Get the Android app. * @return The Android app. diff --git a/attachments/simple_engine/renderer.h b/attachments/simple_engine/renderer.h index 2c716e229..65cabadae 100644 --- a/attachments/simple_engine/renderer.h +++ b/attachments/simple_engine/renderer.h @@ -45,16 +45,7 @@ #include "platform.h" #include "thread_pool.h" -// Fallback defines for optional extension names (allow compiling against older headers) -#ifndef VK_EXT_ROBUSTNESS_2_EXTENSION_NAME -# define VK_EXT_ROBUSTNESS_2_EXTENSION_NAME "VK_EXT_robustness2" -#endif -#ifndef VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME -# define VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME "VK_KHR_dynamic_rendering_local_read" -#endif -#ifndef VK_EXT_SHADER_TILE_IMAGE_EXTENSION_NAME -# define VK_EXT_SHADER_TILE_IMAGE_EXTENSION_NAME "VK_EXT_shader_tile_image" -#endif +#include "vulkan_compatibility.h" // Forward declarations class ImGuiSystem; @@ -402,13 +393,27 @@ class Renderer { }; std::lock_guard lock(queueMutex); // Prefer compute queue when available; otherwise, fall back to graphics queue to avoid crashes - if (*computeQueue) { + if (*computeQueue != VK_NULL_HANDLE) { computeQueue.submit(submitInfo, fence); } else { graphicsQueue.submit(submitInfo, fence); } } + /** + * @brief Submit a command buffer to the graphics queue with proper synchronization. + * @param commandBuffer The command buffer to submit. + * @param fence The fence to signal when the operation completes. + */ + void SubmitToGraphicsQueue(vk::CommandBuffer commandBuffer, vk::Fence fence) const { + vk::SubmitInfo submitInfo{ + .commandBufferCount = 1, + .pCommandBuffers = &commandBuffer + }; + std::lock_guard lock(queueMutex); + graphicsQueue.submit(submitInfo, fence); + } + /** * @brief Create a shader module from SPIR-V code. * @param code The SPIR-V code. @@ -433,14 +438,14 @@ class Renderer { * @param texturePath The path to the texture file. * @return True if the texture was loaded successfully, false otherwise. */ - bool LoadTexture(const std::string& texturePath); + bool LoadTexture(const std::string& texturePath, bool cachePixels = false); // Asynchronous texture loading APIs (thread-pool backed). // The 'critical' flag is used to front-load important textures (e.g., // baseColor/albedo) so the scene looks mostly correct before the loading // screen disappears. Non-critical textures (normals, MR, AO, emissive) // can stream in after geometry is visible. - std::future LoadTextureAsync(const std::string& texturePath, bool critical = false); + std::future LoadTextureAsync(const std::string& texturePath, bool critical = false, bool cachePixels = false); /** * @brief Load a texture from raw image data in memory. @@ -455,7 +460,8 @@ class Renderer { const unsigned char* imageData, int width, int height, - int channels); + int channels, + bool cachePixels = false); // Asynchronous upload from memory (RGBA/RGB/other). Safe for concurrent calls. std::future LoadTextureFromMemoryAsync(const std::string& textureId, @@ -463,7 +469,8 @@ class Renderer { int width, int height, int channels, - bool critical = false); + bool critical = false, + bool cachePixels = false); // Progress query for UI uint32_t GetTextureTasksScheduled() const { @@ -520,6 +527,62 @@ class Renderer { // creation sees the final textureResources instead of fallbacks. void WaitForAllTextureTasks(); + // Block until meshResources has been populated and its count is stable. + // The render thread drains deferred mesh uploads asynchronously; this polls + // until the count has not changed for 4 consecutive 50 ms steps so that + // OMM can scan a complete mesh list. Returns false on timeout. + bool WaitForMeshResourcesToSettle(float timeoutSeconds = 60.f) const { + const auto deadline = std::chrono::steady_clock::now() + + std::chrono::duration_cast( + std::chrono::duration(timeoutSeconds)); + size_t prevCount = 0; + int stableSteps = 0; + while (true) { + const size_t cur = GetRegisteredMeshes().size(); + if (cur > 0 && cur == prevCount) { + if (++stableSteps >= 4) return true; // stable for 4 × 50 ms = 200 ms + } else { + stableSteps = 0; + } + prevCount = cur; + if (std::chrono::steady_clock::now() >= deadline) return false; + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + } + + // WaitForAllTextureTasks() only confirms that jobs have been ENQUEUED to the + // upload workers — it does not wait for the workers to finish processing them. + // This method polls rawPixelCache.size() until it is stable, confirming that + // upload worker threads have finished their StoreRawTexturePixels calls. + // Returns false on timeout. +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + bool WaitForRawPixelCacheToSettle(float timeoutSeconds = 60.f) const { + const auto deadline = std::chrono::steady_clock::now() + + std::chrono::duration_cast( + std::chrono::duration(timeoutSeconds)); + size_t prevCount = 0; + int stableSteps = 0; + while (true) { + size_t cur; { + std::shared_lock lk(rawPixelCacheMutex); + cur = rawPixelCache.size(); + } + if (cur == prevCount) { + // Populated cache: 200 ms stability (4 × 50 ms). + // Empty cache: 2 s grace (40 × 50 ms) so we don't mistake + // "upload workers haven't started yet" for "no MASK textures". + const int threshold = (cur > 0) ? 4 : 40; + if (++stableSteps >= threshold) return cur > 0; + } else { + stableSteps = 0; + } + prevCount = cur; + if (std::chrono::steady_clock::now() >= deadline) return false; + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + } +#endif // ENABLE_COURSE_OPACITY_MICROMAPS + // Process pending texture GPU uploads on the calling thread. // This should be invoked from the main/render thread so that all // Vulkan work happens from a single thread while worker threads @@ -579,7 +642,10 @@ class Renderer { loadingPhaseProgress.store(std::clamp(v, 0.0f, 1.0f), std::memory_order_relaxed); } void MarkInitialLoadComplete() { + LOGI("Renderer: MarkInitialLoadComplete"); initialLoadComplete.store(true, std::memory_order_relaxed); + // Unsuppress watchdog after initial load is complete + watchdogSuppressed.store(false, std::memory_order_relaxed); SetLoadingPhase(LoadingPhase::Finalizing); loadingPhaseProgress.store(1.0f, std::memory_order_relaxed); } @@ -599,11 +665,22 @@ class Renderer { return loadingFlag.load(std::memory_order_relaxed); } void SetLoading(bool v) { + LOGI("Renderer: SetLoading %s", v ? "true" : "false"); loadingFlag.store(v, std::memory_order_relaxed); if (v) { // New load cycle starting initialLoadComplete.store(false, std::memory_order_relaxed); SetLoadingPhase(LoadingPhase::Scene); + } else { + // Load cycle ending (successfully or not), ensure we don't stay in white-fallback state forever. + // We don't call MarkInitialLoadComplete() here because that triggers "Finalizing" phase, + // which the render thread uses to clear other flags. We just want to ensure IsLoading() can return false. + // If scene construction failed, there are no more AS/textures pending, so this is safe. + if (!initialLoadComplete.load(std::memory_order_relaxed)) { + LOGI("Renderer: Ending load cycle without completion mark. Forcing completion to avoid deadlock."); + initialLoadComplete.store(true, std::memory_order_relaxed); + watchdogSuppressed.store(false, std::memory_order_relaxed); + } } } @@ -783,6 +860,120 @@ class Renderer { bool GetAccelerationStructureEnabled() const { return accelerationStructureEnabled; } +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + bool GetOpacityMicromapEnabled() const { + return opacityMicromapEnabled; + } +#else + bool GetOpacityEnabled() const { return false; } +#endif + // Get physical device (needed by Course modules for extension querying) + const vk::raii::PhysicalDevice& GetPhysicalDevice() const { + return physicalDevice; + } + // Get command pool (needed by Course modules for one-shot command buffers) + const vk::raii::CommandPool& GetCommandPool() const { + return commandPool; + } + // Get graphics queue (needed by Course modules for submissions) + vk::Queue GetGraphicsQueue() const { + std::lock_guard lock(queueMutex); + return *graphicsQueue; + } + // Get graphics queue family index (needed by Course modules for command pool creation) + uint32_t GetGraphicsQueueFamilyIndex() const { + return queueFamilyIndices.graphicsFamily.value(); + } + +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // ── Course module helpers ───────────────────────────────────────────────── + // Store CPU-side pixel data for an alpha-masked texture so that Course + // modules (e.g. OpacityMicromapBuilder) can classify micro-triangles + // without re-loading from disk. No-op if the ID is already cached. + void StoreRawTexturePixels(const std::string& id, + const uint8_t* pixels, + uint32_t width, + uint32_t height, + uint32_t channels) { + const std::string resolved = ResolveTextureId(id); + std::unique_lock lk(rawPixelCacheMutex); + if (rawPixelCache.count(resolved)) return; // already stored + auto& entry = rawPixelCache[resolved]; + entry.width = width; + entry.height = height; + entry.channels = channels; + entry.pixels.assign(pixels, pixels + static_cast(width) * height * channels); + } + + // Retrieve previously-stored CPU pixel data. Returns nullptr when not cached. + const uint8_t* GetRawTexturePixels(const std::string& id, + uint32_t* outWidth, + uint32_t* outHeight, + uint32_t* outChannels) const { + const std::string resolved = ResolveTextureId(id); + std::shared_lock lk(rawPixelCacheMutex); + auto it = rawPixelCache.find(resolved); + if (it == rawPixelCache.end()) return nullptr; + if (outWidth) *outWidth = it->second.width; + if (outHeight) *outHeight = it->second.height; + if (outChannels) *outChannels = it->second.channels; + return it->second.pixels.data(); + } + + // Clear the raw pixel cache (call on scene unload to free memory). + void ClearRawPixelCache() { + std::unique_lock lk(rawPixelCacheMutex); + rawPixelCache.clear(); + } +#endif + + // Return a snapshot of all MeshComponent pointers that have GPU resources. + // Used by Course modules to iterate meshes without coupling to internal maps. + std::vector GetRegisteredMeshes() const { + std::vector out; + out.reserve(meshResources.size()); + for (const auto& kv : meshResources) { + if (kv.first) out.push_back(kv.first); + } + return out; + } + // Get memory pool (needed by Course modules for GPU allocations) + MemoryPool& GetMemoryPool() const { + return *memoryPool; + } + + // ------------------------------------------------------------------------- + // ImGui panel extension point (Course modules / plugins) + // Register a callback that will be invoked inside the "Renderer" ImGui window + // immediately after the built-in controls, once per frame. + // The callback receives a pointer to this Renderer for state queries. + // Call with nullptr to unregister. + // ------------------------------------------------------------------------- + using ImGuiPanelCallback = std::function; + using MicromapProviderCallback = std::function; + void RegisterImGuiPanel(ImGuiPanelCallback cb) { + std::lock_guard lock(imguiPanelCallbackMutex); + imguiPanelCallback = std::move(cb); + } + void RegisterMicromapProvider(MicromapProviderCallback cb) { + std::lock_guard lock(micromapProviderMutex); + micromapProvider = std::move(cb); + } + void* GetMicromapPNext(const MeshComponent* mesh) const { + std::lock_guard lock(micromapProviderMutex); + return micromapProvider ? micromapProvider(mesh) : nullptr; + } + void UnregisterImGuiPanel() { + std::lock_guard lock(imguiPanelCallbackMutex); + imguiPanelCallback = nullptr; + } + + // --- Thread pool & Watchdog access for course modules --- + ThreadPool* GetThreadPool() const { return threadPool.get(); } + void KickWatchdog(const char* label = nullptr) { + lastFrameUpdateTime.store(std::chrono::steady_clock::now(), std::memory_order_relaxed); + if (label) watchdogProgressLabel.store(label); + } // Ray Query static-only mode (disable animation/physics updates and TLAS refits to render a static opaque scene) void SetRayQueryStaticOnly(bool v) { @@ -792,6 +983,11 @@ class Renderer { return rayQueryStaticOnly; } + void GetSwapChainExtent(uint32_t* width, uint32_t* height) const { + *width = swapChainExtent.width; + *height = swapChainExtent.height; + } + /** * @brief Request acceleration structure build at next safe frame point. * Safe to call from any thread (e.g., background loading thread). @@ -1389,6 +1585,7 @@ class Renderer { int width = 0; int height = 0; int channels = 0; + bool cachePixels = false; }; std::mutex pendingTextureJobsMutex; @@ -1562,7 +1759,7 @@ class Renderer { }; // Optional device extensions - const std::vector optionalDeviceExtensions = { + std::vector optionalDeviceExtensions = { VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, @@ -1586,6 +1783,8 @@ class Renderer { bool initialized = false; // Whether VK_EXT_descriptor_indexing (update-after-bind) path is enabled bool descriptorIndexingEnabled = false; + bool descriptorBindingUniformBufferUpdateAfterBindEnabled = false; + bool descriptorBindingSampledImageUpdateAfterBindEnabled = false; bool storageAfterBindEnabled = false; // Feature toggles detected/enabled at device creation bool robustness2Enabled = false; @@ -1593,6 +1792,9 @@ class Renderer { bool shaderTileImageEnabled = false; bool rayQueryEnabled = false; bool accelerationStructureEnabled = false; +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + bool opacityMicromapEnabled = false; // VK_KHR_opacity_micromap (Course: Opacity Micromaps) +#endif // When true and current render mode is RayQuery, the engine renders a static opaque scene: // - Animation/physics updates are suppressed by the Engine (input/Update hook) @@ -1688,6 +1890,26 @@ class Renderer { bool enableRayQueryReflections = true; // UI toggle to enable reflections in ray query mode bool enableRayQueryTransparency = true; // UI toggle to enable transparency/refraction in ray query mode + // ImGui panel extension callbacks (Course modules / plugins) + ImGuiPanelCallback imguiPanelCallback; + MicromapProviderCallback micromapProvider; + mutable std::mutex micromapProviderMutex; + mutable std::mutex imguiPanelCallbackMutex; + +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // Raw CPU pixel cache for alpha-masked textures (Course: Opacity Micromaps). + // Populated by StoreRawTexturePixels(); consumed by GetRawTexturePixels(). + // Keyed by resolved texture ID; cleared by reset / scene reload. + struct RawPixelEntry { + std::vector pixels; + uint32_t width = 0; + uint32_t height = 0; + uint32_t channels = 0; + }; + std::unordered_map rawPixelCache; + mutable std::shared_mutex rawPixelCacheMutex; +#endif + // === Watchdog system to detect application hangs === // Atomic timestamp updated every frame - watchdog thread checks if stale std::atomic lastFrameUpdateTime; @@ -1766,7 +1988,7 @@ class Renderer { // Shadow mapping methods bool createComputeCommandPool(); bool createDepthResources(); - bool createTextureImage(const std::string& texturePath, TextureResources& resources); + bool createTextureImage(const std::string& texturePath, TextureResources& resources, bool cachePixels = false); bool createTextureImageView(TextureResources& resources); bool createTextureSampler(TextureResources& resources); bool createDefaultTextureResources(); @@ -1874,8 +2096,7 @@ class Renderer { vk::Format findSupportedFormat(const std::vector& candidates, vk::ImageTiling tiling, vk::FormatFeatureFlags features); bool hasStencilComponent(vk::Format format); - std::vector readFile(const std::string& filename); - + private: // Background uploader helpers void StartUploadsWorker(size_t workerCount = 0); void StopUploadsWorker(); @@ -1887,6 +2108,8 @@ class Renderer { // Upload perf getters public: + std::vector readFile(const std::string& filename); + bool fileExists(const std::string& filename); uint64_t GetBytesUploadedTotal() const { return bytesUploadedTotal.load(std::memory_order_relaxed); } @@ -1909,4 +2132,4 @@ class Renderer { double mb = static_cast(bytesUploadedTotal.load(std::memory_order_relaxed)) / (1024.0 * 1024.0); return seconds > 0.0 ? (mb / seconds) : 0.0; } -}; \ No newline at end of file +}; diff --git a/attachments/simple_engine/renderer_core.cpp b/attachments/simple_engine/renderer_core.cpp index d2c2af1d8..6088067ad 100644 --- a/attachments/simple_engine/renderer_core.cpp +++ b/attachments/simple_engine/renderer_core.cpp @@ -15,6 +15,11 @@ * limitations under the License. */ #include "renderer.h" + +#ifdef PLATFORM_ANDROID +#include +#endif + #include #include #include @@ -120,6 +125,17 @@ Renderer::Renderer(Platform* platform) : platform(platform) { // Initialize deviceExtensions with required extensions only // Optional extensions will be added later after checking device support deviceExtensions = requiredDeviceExtensions; + + // Suppress watchdog by default during startup to allow for debugger attachment + // and long initialization times on some mobile devices. + watchdogSuppressed.store(false, std::memory_order_relaxed); + +#if defined(PLATFORM_ANDROID) + // Re-enable Ray Query and Forward+ for Android now that basic rendering is stabilized + currentRenderMode = RenderMode::RayQuery; + useForwardPlus = true; + forwardPlusPerFrame.resize(MAX_FRAMES_IN_FLIGHT); +#endif } // Destructor @@ -129,204 +145,275 @@ Renderer::~Renderer() { // Initialize the renderer bool Renderer::Initialize(const std::string& appName, bool enableValidationLayers) { - // Initialize the Vulkan-Hpp default dispatcher using the global symbol directly. - // This avoids differences across Vulkan-Hpp versions for DynamicLoader placement. + LOGI("Renderer::Initialize start"); + // Initialize the Vulkan-Hpp default dispatcher. + // On Android, use a dynamic loader to ensure we get the correct entry point. +#if defined(PLATFORM_ANDROID) + LOGI("Initializing dispatcher with DynamicLoader..."); + static vk::detail::DynamicLoader dl; + PFN_vkGetInstanceProcAddr pvkGetInstanceProcAddr = dl.getProcAddress("vkGetInstanceProcAddr"); + if (!pvkGetInstanceProcAddr) { + LOGE("Failed to load vkGetInstanceProcAddr!"); + return false; + } + VULKAN_HPP_DEFAULT_DISPATCHER.init(pvkGetInstanceProcAddr); + LOGI("Dispatcher initialized"); +#else VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); +#endif // Create a Vulkan instance + LOGI("Creating Vulkan instance..."); if (!createInstance(appName, enableValidationLayers)) { - std::cerr << "Failed to create Vulkan instance" << std::endl; + LOGE("Failed to create Vulkan instance"); return false; } + LOGI("Instance created successfully"); // Setup debug messenger + LOGI("Setting up debug messenger..."); if (!setupDebugMessenger(enableValidationLayers)) { - std::cerr << "Failed to setup debug messenger" << std::endl; + LOGE("Failed to setup debug messenger"); return false; } + LOGI("Debug messenger setup successfully"); // Create surface + LOGI("Creating surface..."); if (!createSurface()) { - std::cerr << "Failed to create surface" << std::endl; + LOGE("Failed to create surface"); return false; } + LOGI("Surface created successfully"); // Pick the physical device + LOGI("Picking physical device..."); if (!pickPhysicalDevice()) { - std::cerr << "Failed to pick physical device" << std::endl; + LOGE("Failed to pick physical device"); return false; } + LOGI("Physical device picked successfully"); // Create logical device + LOGI("Creating logical device..."); if (!createLogicalDevice(enableValidationLayers)) { - std::cerr << "Failed to create logical device" << std::endl; + LOGE("Failed to create logical device"); return false; } + LOGI("Logical device created successfully"); // Initialize memory pool for efficient memory management + LOGI("Initializing memory pool..."); try { memoryPool = std::make_unique(device, physicalDevice); if (!memoryPool->initialize()) { - std::cerr << "Failed to initialize memory pool" << std::endl; + LOGE("Failed to initialize memory pool"); return false; } - - // Optionally pre-allocate initial memory blocks for pools. - // For large scenes (e.g., Bistro) on mid-range GPUs this can cause early OOM. - // Skip pre-allocation to reduce peak memory pressure; blocks will be created on demand. - // if (!memoryPool->preAllocatePools()) { /* non-fatal */ } + LOGI("Memory pool initialized successfully"); } catch (const std::exception& e) { - std::cerr << "Failed to create memory pool: " << e.what() << std::endl; + LOGE("Failed to create memory pool: %s", e.what()); return false; } // Create swap chain + LOGI("Creating swap chain..."); if (!createSwapChain()) { - std::cerr << "Failed to create swap chain" << std::endl; + LOGE("Failed to create swap chain"); return false; } + LOGI("Swap chain created successfully"); // Create image views + LOGI("Creating image views..."); if (!createImageViews()) { - std::cerr << "Failed to create image views" << std::endl; + LOGE("Failed to create image views"); return false; } + LOGI("Image views created successfully"); // Setup dynamic rendering + LOGI("Setting up dynamic rendering..."); if (!setupDynamicRendering()) { - std::cerr << "Failed to setup dynamic rendering" << std::endl; + LOGE("Failed to setup dynamic rendering"); return false; } + LOGI("Dynamic rendering setup successfully"); // Create the descriptor set layout + LOGI("Creating descriptor set layout..."); if (!createDescriptorSetLayout()) { - std::cerr << "Failed to create descriptor set layout" << std::endl; + LOGE("Failed to create descriptor set layout"); return false; } + LOGI("Descriptor set layout created successfully"); // Create the graphics pipeline + LOGI("Creating graphics pipeline..."); if (!createGraphicsPipeline()) { - std::cerr << "Failed to create graphics pipeline" << std::endl; + LOGE("Failed to create graphics pipeline"); return false; } + LOGI("Graphics pipeline created successfully"); // Create PBR pipeline + LOGI("Creating PBR pipeline..."); if (!createPBRPipeline()) { - std::cerr << "Failed to create PBR pipeline" << std::endl; + LOGE("Failed to create PBR pipeline"); return false; } + LOGI("PBR pipeline created successfully"); // Create the lighting pipeline + LOGI("Creating lighting pipeline..."); if (!createLightingPipeline()) { - std::cerr << "Failed to create lighting pipeline" << std::endl; + LOGE("Failed to create lighting pipeline"); return false; } + LOGI("Lighting pipeline created successfully"); // Create composite pipeline (fullscreen pass for off-screen → swapchain) + LOGI("Creating composite pipeline..."); if (!createCompositePipeline()) { - std::cerr << "Failed to create composite pipeline" << std::endl; + LOGE("Failed to create composite pipeline"); return false; } + LOGI("Composite pipeline created successfully"); // Create compute pipeline + LOGI("Creating compute pipeline..."); if (!createComputePipeline()) { - std::cerr << "Failed to create compute pipeline" << std::endl; + LOGE("Failed to create compute pipeline"); return false; } + LOGI("Compute pipeline created successfully"); // Ensure light storage buffers exist before creating Forward+ resources // so that compute descriptor binding 0 (lights SSBO) can be populated safely. + LOGI("Creating light storage buffers..."); if (!createOrResizeLightStorageBuffers(1)) { - std::cerr << "Failed to create initial light storage buffers" << std::endl; + LOGE("Failed to create initial light storage buffers"); return false; } + LOGI("Light storage buffers created successfully"); // Create Forward+ compute and depth pre-pass pipelines/resources if (useForwardPlus) { + LOGI("Creating Forward+ resources..."); if (!createForwardPlusPipelinesAndResources()) { - std::cerr << "Failed to create Forward+ resources" << std::endl; + LOGE("Failed to create Forward+ resources"); return false; } + LOGI("Forward+ resources created successfully"); } // Create ray query descriptor set layout and pipeline (but not resources yet - need descriptor pool first) + LOGI("Creating ray query descriptor set layout..."); if (!createRayQueryDescriptorSetLayout()) { - std::cerr << "Failed to create ray query descriptor set layout" << std::endl; + LOGE("Failed to create ray query descriptor set layout"); return false; } + LOGI("Ray query descriptor set layout created successfully"); + + LOGI("Creating ray query pipeline..."); if (!createRayQueryPipeline()) { - std::cerr << "Failed to create ray query pipeline" << std::endl; + LOGE("Failed to create ray query pipeline"); return false; } + LOGI("Ray query pipeline created successfully"); // Create the command pool + LOGI("Creating command pool..."); if (!createCommandPool()) { - std::cerr << "Failed to create command pool" << std::endl; + LOGE("Failed to create command pool"); return false; } + LOGI("Command pool created successfully"); // Create depth resources + LOGI("Creating depth resources..."); if (!createDepthResources()) { - std::cerr << "Failed to create depth resources" << std::endl; + LOGE("Failed to create depth resources"); return false; } + LOGI("Depth resources created successfully"); if (useForwardPlus) { + LOGI("Creating depth prepass pipeline..."); if (!createDepthPrepassPipeline()) { - std::cerr << "Failed to create depth prepass pipeline" << std::endl; + LOGE("Failed to create depth prepass pipeline"); return false; } + LOGI("Depth prepass pipeline created successfully"); } // Create the descriptor pool + LOGI("Creating descriptor pool..."); if (!createDescriptorPool()) { - std::cerr << "Failed to create descriptor pool" << std::endl; + LOGE("Failed to create descriptor pool"); return false; } + LOGI("Descriptor pool created successfully"); // Create ray query resources AFTER descriptor pool (needs pool for descriptor set allocation) + LOGI("Creating ray query resources..."); if (!createRayQueryResources()) { - std::cerr << "Failed to create ray query resources" << std::endl; + LOGE("Failed to create ray query resources"); return false; } + LOGI("Ray query resources created successfully"); // Note: Acceleration structure build is requested by scene_loading.cpp after entities load // No need to request it here during init // Light storage buffers were already created earlier to satisfy Forward+ binding requirements + LOGI("Creating opaque scene color resources..."); if (!createOpaqueSceneColorResources()) { - std::cerr << "Failed to create opaque scene color resources" << std::endl; + LOGE("Failed to create opaque scene color resources"); return false; } + LOGI("Opaque scene color resources created successfully"); + LOGI("Creating transparent descriptor sets..."); createTransparentDescriptorSets(); + LOGI("Transparent descriptor sets created"); // Create default texture resources + LOGI("Creating default texture resources..."); if (!createDefaultTextureResources()) { - std::cerr << "Failed to create default texture resources" << std::endl; + LOGE("Failed to create default texture resources"); return false; } + LOGI("Default texture resources created successfully"); // Create fallback transparent descriptor sets (must occur after default textures exist) + LOGI("Creating fallback transparent descriptor sets..."); createTransparentFallbackDescriptorSets(); + LOGI("Fallback transparent descriptor sets created"); // Create shared default PBR textures (to avoid creating hundreds of identical textures) + LOGI("Creating shared default PBR textures..."); if (!createSharedDefaultPBRTextures()) { - std::cerr << "Failed to create shared default PBR textures" << std::endl; + LOGE("Failed to create shared default PBR textures"); return false; } + LOGI("Shared default PBR textures created successfully"); // Create command buffers + LOGI("Creating command buffers..."); if (!createCommandBuffers()) { - std::cerr << "Failed to create command buffers" << std::endl; + LOGE("Failed to create command buffers"); return false; } + LOGI("Command buffers created successfully"); // Create sync objects + LOGI("Creating sync objects..."); if (!createSyncObjects()) { - std::cerr << "Failed to create sync objects" << std::endl; + LOGE("Failed to create sync objects"); return false; } + LOGI("Sync objects created successfully"); // Initialize background thread pool for async tasks (textures, etc.) AFTER all Vulkan resources are ready try { @@ -334,7 +421,7 @@ bool Renderer::Initialize(const std::string& appName, bool enableValidationLayer unsigned int hw = std::max(2u, std::min(8u, std::thread::hardware_concurrency() ? std::thread::hardware_concurrency() : 4u)); threadPool = std::make_unique(hw); } catch (const std::exception& e) { - std::cerr << "Failed to create thread pool: " << e.what() << std::endl; + LOGE("Failed to create thread pool: %s", e.what()); return false; } @@ -346,7 +433,7 @@ bool Renderer::Initialize(const std::string& appName, bool enableValidationLayer watchdogRunning.store(true, std::memory_order_relaxed); watchdogThread = std::thread(WatchdogThreadFunc, &lastFrameUpdateTime, &watchdogRunning, &watchdogSuppressed, &watchdogProgressLabel, &watchdogProgressIndex); - std::cout << "[Watchdog] Started - will abort if no frame updates for 10+ seconds\n"; + std::cout << "[Watchdog] Started - will abort if no frame updates for 10+ seconds (60s during loading)\n"; initialized = true; return true; @@ -357,19 +444,10 @@ void Renderer::ensureThreadLocalVulkanInit() const { static thread_local bool s_tlsInitialized = false; if (s_tlsInitialized) return; - try { - // Initialize the dispatcher for this thread using the global symbol. - VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); - if (*instance) { - VULKAN_HPP_DEFAULT_DISPATCHER.init(*instance); - } - if (*device) { - VULKAN_HPP_DEFAULT_DISPATCHER.init(*device); - } + // The dispatcher is global and initialized on the main thread during Renderer::Initialize. + // Background threads inherit this global state. No per-thread init is required + // for VULKAN_HPP_DEFAULT_DISPATCHER when using the default storage. s_tlsInitialized = true; - } catch (...) { - // best-effort - } } // Clean up renderer resources @@ -575,6 +653,9 @@ bool Renderer::createInstance(const std::string& appName, bool enableValidationL uint32_t glfwExtensionCount = 0; const char** glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); extensions.insert(extensions.end(), glfwExtensions, glfwExtensions + glfwExtensionCount); +#elif defined(PLATFORM_ANDROID) + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + extensions.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME); #endif // Add debug extension if validation layers are enabled @@ -593,12 +674,15 @@ bool Renderer::createInstance(const std::string& appName, bool enableValidationL vk::ValidationFeaturesEXT validationFeatures{}; std::vector enabledValidationFeatures; - if (enableValidationLayers) { + bool actualEnableValidationLayers = enableValidationLayers; + if (actualEnableValidationLayers) { if (!checkValidationLayerSupport()) { - std::cerr << "Validation layers requested, but not available" << std::endl; - return false; + LOGW("Validation layers requested, but not available. Continuing without validation."); + actualEnableValidationLayers = false; } + } + if (actualEnableValidationLayers) { createInfo.enabledLayerCount = static_cast(validationLayers.size()); createInfo.ppEnabledLayerNames = validationLayers.data(); @@ -613,9 +697,11 @@ bool Renderer::createInstance(const std::string& appName, bool enableValidationL // Create instance instance = vk::raii::Instance(context, createInfo); + // Initialize the dispatcher with the instance to load instance-level functions + VULKAN_HPP_DEFAULT_DISPATCHER.init(*instance); return true; } catch (const std::exception& e) { - std::cerr << "Failed to create instance: " << e.what() << std::endl; + LOGE("Failed to create instance: %s", e.what()); return false; } } @@ -637,13 +723,8 @@ bool Renderer::setupDebugMessenger(bool enableValidationLayers) { vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance; - // Select callback via simple platform macro: Android typically expects C PFN types in headers - // while desktop (newer Vulkan-Hpp) expects vk:: types. -#if defined(__ANDROID__) - createInfo.pfnUserCallback = &debugCallbackVkRaii; -#else + // Select callback: modern Vulkan-Hpp expects vk:: types. createInfo.pfnUserCallback = &debugCallbackVkHpp; -#endif // Create debug messenger debugMessenger = vk::raii::DebugUtilsMessengerEXT(instance, createInfo); @@ -690,13 +771,12 @@ bool Renderer::pickPhysicalDevice() { for (auto& _device : devices) { // Print device properties for debugging vk::PhysicalDeviceProperties deviceProperties = _device.getProperties(); - std::cout << "Checking device: " << deviceProperties.deviceName - << " (Type: " << vk::to_string(deviceProperties.deviceType) << ")" << std::endl; + LOGI("Checking device: %s (Type: %s)", deviceProperties.deviceName.data(), vk::to_string(deviceProperties.deviceType).c_str()); // Check if the device supports Vulkan 1.3 bool supportsVulkan1_3 = deviceProperties.apiVersion >= VK_API_VERSION_1_3; if (!supportsVulkan1_3) { - std::cout << " - Does not support Vulkan 1.3" << std::endl; + LOGI(" - Does not support Vulkan 1.3"); continue; } @@ -704,14 +784,14 @@ bool Renderer::pickPhysicalDevice() { QueueFamilyIndices indices = findQueueFamilies(_device); bool supportsGraphics = indices.isComplete(); if (!supportsGraphics) { - std::cout << " - Missing required queue families" << std::endl; + LOGI(" - Missing required queue families"); continue; } // Check device extensions bool supportsAllRequiredExtensions = checkDeviceExtensionSupport(_device); if (!supportsAllRequiredExtensions) { - std::cout << " - Missing required extensions" << std::endl; + LOGI(" - Missing required extensions"); continue; } @@ -719,7 +799,7 @@ bool Renderer::pickPhysicalDevice() { SwapChainSupportDetails swapChainSupport = querySwapChainSupport(_device); bool swapChainAdequate = !swapChainSupport.formats.empty() && !swapChainSupport.presentModes.empty(); if (!swapChainAdequate) { - std::cout << " - Inadequate swap chain support" << std::endl; + LOGI(" - Inadequate swap chain support"); continue; } @@ -727,7 +807,7 @@ bool Renderer::pickPhysicalDevice() { auto features = _device.getFeatures2(); bool supportsRequiredFeatures = features.get().dynamicRendering; if (!supportsRequiredFeatures) { - std::cout << " - Does not support required features (dynamicRendering)" << std::endl; + LOGI(" - Does not support required features (dynamicRendering)"); continue; } @@ -737,12 +817,12 @@ bool Renderer::pickPhysicalDevice() { // Discrete GPUs get the highest priority (NVIDIA RTX 2080, AMD, etc.) if (deviceProperties.deviceType == vk::PhysicalDeviceType::eDiscreteGpu) { score += 1000; - std::cout << " - Discrete GPU: +1000 points" << std::endl; + LOGI(" - Discrete GPU: +1000 points"); } // Integrated GPUs get lower priority (Intel UHD Graphics, etc.) else if (deviceProperties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu) { score += 100; - std::cout << " - Integrated GPU: +100 points" << std::endl; + LOGI(" - Integrated GPU: +100 points"); } // Add points for memory size (more VRAM is better) @@ -755,7 +835,7 @@ bool Renderer::pickPhysicalDevice() { } } - std::cout << " - Device is suitable with score: " << score << std::endl; + LOGI(" - Device is suitable with score: %d", score); suitableDevices.emplace(score, _device); } @@ -763,9 +843,10 @@ bool Renderer::pickPhysicalDevice() { // Select the device with the highest score (discrete GPU with most VRAM) physicalDevice = suitableDevices.rbegin()->second; vk::PhysicalDeviceProperties deviceProperties = physicalDevice.getProperties(); - std::cout << "Selected device: " << deviceProperties.deviceName - << " (Type: " << vk::to_string(deviceProperties.deviceType) - << ", Score: " << suitableDevices.rbegin()->first << ")" << std::endl; + LOGI("Selected device: %s (Type: %s, Score: %d)", + deviceProperties.deviceName.data(), + vk::to_string(deviceProperties.deviceType).c_str(), + suitableDevices.rbegin()->first); // Store queue family indices for the selected device queueFamilyIndices = findQueueFamilies(physicalDevice); @@ -775,7 +856,7 @@ bool Renderer::pickPhysicalDevice() { return true; } - std::cerr << "Failed to find a suitable GPU. Make sure your GPU supports Vulkan and has the required extensions." << std::endl; + LOGE("Failed to find a suitable GPU. Make sure your GPU supports Vulkan and has the required extensions."); return false; } catch (const std::exception& e) { std::cerr << "Failed to pick physical device: " << e.what() << std::endl; @@ -789,6 +870,15 @@ void Renderer::addSupportedOptionalExtensions() { // Get available extensions auto availableExtensions = physicalDevice.enumerateDeviceExtensionProperties(); + //add any extra extensions enabled by courses +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // Opacity micromap for hardware-accelerated alpha-tested shadow rays (Course: Opacity Micromaps) + // vkCreateAccelerationStructure2KHR (KHR micromap build entry point) lives in + // VK_KHR_device_address_commands — both must be present for the build path to work. + optionalDeviceExtensions.push_back( VK_KHR_OPACITY_MICROMAP_EXTENSION_NAME ); + optionalDeviceExtensions.push_back(VK_KHR_DEVICE_ADDRESS_COMMANDS_EXTENSION_NAME); +#endif + // Build a set of available extension names for quick lookup std::set avail; for (const auto& e : availableExtensions) { @@ -808,6 +898,7 @@ void Renderer::addSupportedOptionalExtensions() { // Create logical device bool Renderer::createLogicalDevice(bool enableValidationLayers) { + LOGI("Entering createLogicalDevice"); try { // Create queue create info for each unique queue family std::vector queueCreateInfos; @@ -828,195 +919,166 @@ bool Renderer::createLogicalDevice(bool enableValidationLayers) { queueCreateInfos.push_back(queueCreateInfo); } + LOGI("Querying supported features..."); // Query supported features before enabling them - auto supportedFeatures = physicalDevice.getFeatures2< + auto featureChainSupported = physicalDevice.getFeatures2< vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceTimelineSemaphoreFeatures, vk::PhysicalDeviceVulkanMemoryModelFeatures, vk::PhysicalDeviceBufferDeviceAddressFeatures, vk::PhysicalDevice8BitStorageFeatures, vk::PhysicalDeviceVulkan11Features, - vk::PhysicalDeviceVulkan13Features>(); + vk::PhysicalDeviceVulkan12Features, + vk::PhysicalDeviceVulkan13Features, + vk::PhysicalDeviceDescriptorIndexingFeatures, + vk::PhysicalDeviceRobustness2FeaturesEXT, + vk::PhysicalDeviceDynamicRenderingLocalReadFeaturesKHR, + vk::PhysicalDeviceShaderTileImageFeaturesEXT, + vk::PhysicalDeviceAccelerationStructureFeaturesKHR, + vk::PhysicalDeviceRayQueryFeaturesKHR>(); + LOGI("Features queried successfully"); + + // Extract supported feature structs + const auto& coreSupported = featureChainSupported.get().features; + const auto& timelineSupported = featureChainSupported.get(); + const auto& memoryModelSupported = featureChainSupported.get(); + const auto& bufferAddressSupported = featureChainSupported.get(); + const auto& storage8BitSupported = featureChainSupported.get(); + const auto& vulkan11Supported = featureChainSupported.get(); + const auto& vulkan13Supported = featureChainSupported.get(); + const auto& indexingFeaturesSupported = featureChainSupported.get(); + const auto& robust2Supported = featureChainSupported.get(); + const auto& localReadSupported = featureChainSupported.get(); + const auto& tileImageSupported = featureChainSupported.get(); + const auto& accelerationStructureSupported = featureChainSupported.get(); + const auto& rayQuerySupported = featureChainSupported.get(); // Verify critical features are supported - const auto& coreSupported = supportedFeatures.get().features; - const auto& timelineSupported = supportedFeatures.get(); - const auto& memoryModelSupported = supportedFeatures.get(); - const auto& bufferAddressSupported = supportedFeatures.get(); - const auto& storage8BitSupported = supportedFeatures.get(); - const auto& vulkan11Supported = supportedFeatures.get(); - const auto& vulkan13Supported = supportedFeatures.get(); - - // Check for required features + if (!coreSupported.samplerAnisotropy) + LOGW("Missing feature: samplerAnisotropy"); + if (!timelineSupported.timelineSemaphore) + LOGW("Missing feature: timelineSemaphore"); + if (!memoryModelSupported.vulkanMemoryModel) + LOGW("Missing feature: vulkanMemoryModel"); + if (!bufferAddressSupported.bufferDeviceAddress) + LOGW("Missing feature: bufferDeviceAddress"); + if (!vulkan13Supported.dynamicRendering) + LOGW("Missing feature: dynamicRendering"); + if (!vulkan13Supported.synchronization2) + LOGW("Missing feature: synchronization2"); + if (!coreSupported.samplerAnisotropy || !timelineSupported.timelineSemaphore || !memoryModelSupported.vulkanMemoryModel || !bufferAddressSupported.bufferDeviceAddress || - !vulkan11Supported.shaderDrawParameters || !vulkan13Supported.dynamicRendering || !vulkan13Supported.synchronization2) { throw std::runtime_error("Required Vulkan features not supported by physical device"); } - // Enable required features (now verified to be supported) - auto features = physicalDevice.getFeatures2(); - features.features.samplerAnisotropy = vk::True; - features.features.depthBiasClamp = coreSupported.depthBiasClamp ? vk::True : vk::False; + // Helper to check extension availability + auto hasExtension = [&](const char* name) { + return std::find_if(deviceExtensions.begin(), + deviceExtensions.end(), + [&](const char* ext) { + return std::strcmp(ext, name) == 0; + }) != deviceExtensions.end(); + }; - // Explicitly configure device features to prevent validation layer warnings - // These features are required by extensions or other features, so we enable them explicitly + // Feature structures for the logical device + vk::PhysicalDeviceFeatures2 features2{}; + features2.features.samplerAnisotropy = vk::True; + features2.features.depthBiasClamp = coreSupported.depthBiasClamp ? vk::True : vk::False; + if (coreSupported.shaderSampledImageArrayDynamicIndexing) { + features2.features.shaderSampledImageArrayDynamicIndexing = vk::True; + } - // Timeline semaphore features (required for synchronization2) - vk::PhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures; + vk::PhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures{}; timelineSemaphoreFeatures.timelineSemaphore = vk::True; - // Vulkan memory model features (required for some shader operations) - vk::PhysicalDeviceVulkanMemoryModelFeatures memoryModelFeatures; + vk::PhysicalDeviceVulkanMemoryModelFeatures memoryModelFeatures{}; memoryModelFeatures.vulkanMemoryModel = vk::True; memoryModelFeatures.vulkanMemoryModelDeviceScope = memoryModelSupported.vulkanMemoryModelDeviceScope ? vk::True : vk::False; - // Buffer device address features (required for some buffer operations) - vk::PhysicalDeviceBufferDeviceAddressFeatures bufferDeviceAddressFeatures; + vk::PhysicalDeviceBufferDeviceAddressFeatures bufferDeviceAddressFeatures{}; bufferDeviceAddressFeatures.bufferDeviceAddress = vk::True; - // 8-bit storage features (required for some shader storage operations) - vk::PhysicalDevice8BitStorageFeatures storage8BitFeatures; + vk::PhysicalDevice8BitStorageFeatures storage8BitFeatures{}; storage8BitFeatures.storageBuffer8BitAccess = storage8BitSupported.storageBuffer8BitAccess ? vk::True : vk::False; - // Enable Vulkan 1.3 features - vk::PhysicalDeviceVulkan13Features vulkan13Features; - vulkan13Features.dynamicRendering = vk::True; - vulkan13Features.synchronization2 = vk::True; - - // Vulkan 1.1 features: shaderDrawParameters to satisfy SPIR-V DrawParameters capability vk::PhysicalDeviceVulkan11Features vulkan11Features{}; - vulkan11Features.shaderDrawParameters = vk::True; - // Query extended feature support -#if !defined(PLATFORM_ANDROID) - auto featureChain = physicalDevice.getFeatures2< - vk::PhysicalDeviceFeatures2, - vk::PhysicalDeviceDescriptorIndexingFeatures, - vk::PhysicalDeviceRobustness2FeaturesEXT, - vk::PhysicalDeviceDynamicRenderingLocalReadFeaturesKHR, - vk::PhysicalDeviceShaderTileImageFeaturesEXT, - vk::PhysicalDeviceAccelerationStructureFeaturesKHR, - vk::PhysicalDeviceRayQueryFeaturesKHR>(); - const auto& localReadSupported = featureChain.get(); - const auto& tileImageSupported = featureChain.get(); -#else - auto featureChain = physicalDevice.getFeatures2< - vk::PhysicalDeviceFeatures2, - vk::PhysicalDeviceDescriptorIndexingFeatures, - vk::PhysicalDeviceRobustness2FeaturesEXT, - vk::PhysicalDeviceAccelerationStructureFeaturesKHR, - vk::PhysicalDeviceRayQueryFeaturesKHR>(); -#endif - const auto& coreFeaturesSupported = featureChain.get().features; - const auto& indexingFeaturesSupported = featureChain.get(); - const auto& robust2Supported = featureChain.get(); - const auto& accelerationStructureSupported = featureChain.get(); - const auto& rayQuerySupported = featureChain.get(); - - // Ray Query shader uses indexing into a (large) sampled-image array. - // Some drivers require this core feature to be explicitly enabled. - if (coreFeaturesSupported.shaderSampledImageArrayDynamicIndexing) { - features.features.shaderSampledImageArrayDynamicIndexing = vk::True; + if (vulkan11Supported.shaderDrawParameters) { + vulkan11Features.shaderDrawParameters = vk::True; } - // Prepare descriptor indexing features to enable if supported + vk::PhysicalDeviceVulkan13Features vulkan13Features{}; + vulkan13Features.dynamicRendering = vk::True; + vulkan13Features.synchronization2 = vk::True; + vk::PhysicalDeviceDescriptorIndexingFeatures indexingFeaturesEnable{}; descriptorIndexingEnabled = false; - // Enable non-uniform indexing of sampled image arrays when supported — required for - // `NonUniformResourceIndex()` in the ray-query shader to actually take effect. if (indexingFeaturesSupported.shaderSampledImageArrayNonUniformIndexing) { indexingFeaturesEnable.shaderSampledImageArrayNonUniformIndexing = vk::True; descriptorIndexingEnabled = true; } - - // These are not strictly required when writing a fully-populated descriptor array, - // but enabling them when available avoids edge-case driver behavior for large arrays. if (descriptorIndexingEnabled) { - if (indexingFeaturesSupported.descriptorBindingPartiallyBound) { - indexingFeaturesEnable.descriptorBindingPartiallyBound = vk::True; + if (indexingFeaturesSupported.descriptorBindingPartiallyBound) indexingFeaturesEnable.descriptorBindingPartiallyBound = vk::True; + if (indexingFeaturesSupported.descriptorBindingUpdateUnusedWhilePending) indexingFeaturesEnable.descriptorBindingUpdateUnusedWhilePending = vk::True; + if (indexingFeaturesSupported.descriptorBindingSampledImageUpdateAfterBind) { + indexingFeaturesEnable.descriptorBindingSampledImageUpdateAfterBind = vk::True; + descriptorBindingSampledImageUpdateAfterBindEnabled = true; } - if (indexingFeaturesSupported.descriptorBindingUpdateUnusedWhilePending) { - indexingFeaturesEnable.descriptorBindingUpdateUnusedWhilePending = vk::True; + if (indexingFeaturesSupported.descriptorBindingUniformBufferUpdateAfterBind) { + indexingFeaturesEnable.descriptorBindingUniformBufferUpdateAfterBind = vk::True; + descriptorBindingUniformBufferUpdateAfterBindEnabled = true; } } - // Optionally enable UpdateAfterBind flags when supported (not strictly required for RQ textures) - if (indexingFeaturesSupported.descriptorBindingSampledImageUpdateAfterBind) - indexingFeaturesEnable.descriptorBindingSampledImageUpdateAfterBind = vk::True; - if (indexingFeaturesSupported.descriptorBindingUniformBufferUpdateAfterBind) - indexingFeaturesEnable.descriptorBindingUniformBufferUpdateAfterBind = vk::True; - if (indexingFeaturesSupported.descriptorBindingUpdateUnusedWhilePending) - indexingFeaturesEnable.descriptorBindingUpdateUnusedWhilePending = vk::True; - - // Helper to check if an extension is enabled (using string comparison) - auto hasExtension = [&](const char* name) { - return std::find_if(deviceExtensions.begin(), - deviceExtensions.end(), - [&](const char* ext) { - return std::strcmp(ext, name) == 0; - }) != deviceExtensions.end(); - }; - // Prepare Robustness2 features if the extension is enabled and device supports - auto hasRobust2 = hasExtension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); vk::PhysicalDeviceRobustness2FeaturesEXT robust2Enable{}; + bool hasRobust2 = hasExtension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); if (hasRobust2) { - if (robust2Supported.robustBufferAccess2) - robust2Enable.robustBufferAccess2 = vk::True; - if (robust2Supported.robustImageAccess2) - robust2Enable.robustImageAccess2 = vk::True; - if (robust2Supported.nullDescriptor) - robust2Enable.nullDescriptor = vk::True; + if (robust2Supported.robustBufferAccess2) robust2Enable.robustBufferAccess2 = vk::True; + if (robust2Supported.robustImageAccess2) robust2Enable.robustImageAccess2 = vk::True; + if (robust2Supported.nullDescriptor) robust2Enable.nullDescriptor = vk::True; } + robustness2Enabled = hasRobust2 && (robust2Enable.robustBufferAccess2 || robust2Enable.robustImageAccess2 || robust2Enable.nullDescriptor); -#if !defined(PLATFORM_ANDROID) - // Prepare Dynamic Rendering Local Read features if extension is enabled and supported - auto hasLocalRead = hasExtension(VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME); vk::PhysicalDeviceDynamicRenderingLocalReadFeaturesKHR localReadEnable{}; + bool hasLocalRead = hasExtension(VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME); if (hasLocalRead && localReadSupported.dynamicRenderingLocalRead) { localReadEnable.dynamicRenderingLocalRead = vk::True; } + dynamicRenderingLocalReadEnabled = hasLocalRead && localReadEnable.dynamicRenderingLocalRead; - // Prepare Shader Tile Image features if extension is enabled and supported - auto hasTileImage = hasExtension(VK_EXT_SHADER_TILE_IMAGE_EXTENSION_NAME); vk::PhysicalDeviceShaderTileImageFeaturesEXT tileImageEnable{}; - if (hasTileImage) { - if (tileImageSupported.shaderTileImageColorReadAccess) - tileImageEnable.shaderTileImageColorReadAccess = vk::True; - if (tileImageSupported.shaderTileImageDepthReadAccess) - tileImageEnable.shaderTileImageDepthReadAccess = vk::True; - if (tileImageSupported.shaderTileImageStencilReadAccess) - tileImageEnable.shaderTileImageStencilReadAccess = vk::True; + bool hasTileImage = hasExtension(VK_EXT_SHADER_TILE_IMAGE_EXTENSION_NAME); + if (hasTileImage && tileImageSupported.shaderTileImageColorReadAccess) { + tileImageEnable.shaderTileImageColorReadAccess = vk::True; } -#endif + shaderTileImageEnabled = hasTileImage && tileImageEnable.shaderTileImageColorReadAccess; - // Prepare Acceleration Structure features if extension is enabled and supported - auto hasAccelerationStructure = hasExtension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); - vk::PhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureEnable{}; - if (hasAccelerationStructure && accelerationStructureSupported.accelerationStructure) { - accelerationStructureEnable.accelerationStructure = vk::True; + vk::PhysicalDeviceAccelerationStructureFeaturesKHR asFeaturesEnable{}; + bool hasAS = hasExtension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + if (hasAS && accelerationStructureSupported.accelerationStructure) { + asFeaturesEnable.accelerationStructure = vk::True; } + accelerationStructureEnabled = hasAS && asFeaturesEnable.accelerationStructure; - // Prepare Ray Query features if extension is enabled and supported - auto hasRayQuery = hasExtension(VK_KHR_RAY_QUERY_EXTENSION_NAME); - vk::PhysicalDeviceRayQueryFeaturesKHR rayQueryEnable{}; - if (hasRayQuery && rayQuerySupported.rayQuery) { - rayQueryEnable.rayQuery = vk::True; + vk::PhysicalDeviceRayQueryFeaturesKHR rayQueryFeaturesEnable{}; + bool hasRQ = hasExtension(VK_KHR_RAY_QUERY_EXTENSION_NAME); + if (hasRQ && rayQuerySupported.rayQuery) { + rayQueryFeaturesEnable.rayQuery = vk::True; } + rayQueryEnabled = hasRQ && rayQueryFeaturesEnable.rayQuery; - // Chain the feature structures together (build pNext chain explicitly) - // Base - features.pNext = &timelineSemaphoreFeatures; + // Build the pNext chain + features2.pNext = &timelineSemaphoreFeatures; timelineSemaphoreFeatures.pNext = &memoryModelFeatures; memoryModelFeatures.pNext = &bufferDeviceAddressFeatures; bufferDeviceAddressFeatures.pNext = &storage8BitFeatures; - storage8BitFeatures.pNext = &vulkan11Features; // link 1.1 first - vulkan11Features.pNext = &vulkan13Features; // then 1.3 features + storage8BitFeatures.pNext = &vulkan11Features; + vulkan11Features.pNext = &vulkan13Features; - // Build tail chain starting at Vulkan 1.3 features void** tailNext = reinterpret_cast(&vulkan13Features.pNext); if (descriptorIndexingEnabled) { *tailNext = &indexingFeaturesEnable; @@ -1036,13 +1098,34 @@ bool Renderer::createLogicalDevice(bool enableValidationLayers) { tailNext = reinterpret_cast(&tileImageEnable.pNext); } #endif - if (hasAccelerationStructure) { - *tailNext = &accelerationStructureEnable; - tailNext = reinterpret_cast(&accelerationStructureEnable.pNext); + if (hasAS) { + *tailNext = &asFeaturesEnable; + tailNext = reinterpret_cast(&asFeaturesEnable.pNext); } - if (hasRayQuery) { - *tailNext = &rayQueryEnable; - tailNext = reinterpret_cast(&rayQueryEnable.pNext); + if (hasRQ) { + *tailNext = &rayQueryFeaturesEnable; + tailNext = reinterpret_cast(&rayQueryFeaturesEnable.pNext); + } + + // Opacity micromap — VK_KHR_opacity_micromap (Course: Opacity Micromaps) + // Also requires VK_KHR_device_address_commands for vkCreateAccelerationStructure2KHR. + auto hasOpacityMicromap = hasExtension(VK_KHR_OPACITY_MICROMAP_EXTENSION_NAME) + && hasExtension(VK_KHR_DEVICE_ADDRESS_COMMANDS_EXTENSION_NAME); + vk::PhysicalDeviceOpacityMicromapFeaturesKHR opacityMicromapSupported{}; + vk::PhysicalDeviceOpacityMicromapFeaturesKHR opacityMicromapEnable{}; + if (hasOpacityMicromap) { + auto featChain2 = physicalDevice.getFeatures2< + vk::PhysicalDeviceFeatures2, + vk::PhysicalDeviceOpacityMicromapFeaturesKHR>(); + opacityMicromapSupported = featChain2.template get(); + if (opacityMicromapSupported.micromap) { + opacityMicromapEnable.micromap = vk::True; +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + opacityMicromapEnabled = true; +#endif + *tailNext = &opacityMicromapEnable; + tailNext = reinterpret_cast(&opacityMicromapEnable.pNext); + } } // Record which features ended up enabled (for runtime decisions/tutorial diagnostics) @@ -1058,36 +1141,25 @@ bool Renderer::createLogicalDevice(bool enableValidationLayers) { dynamicRenderingLocalReadEnabled = false; shaderTileImageEnabled = false; #endif - accelerationStructureEnabled = hasAccelerationStructure && (accelerationStructureEnable.accelerationStructure == vk::True); - rayQueryEnabled = hasRayQuery && (rayQueryEnable.rayQuery == vk::True); - - // One-time startup diagnostics (Ray Query + texture array indexing) - static bool printedFeatureDiag = false; - if (!printedFeatureDiag) { - printedFeatureDiag = true; - std::cout << "[DeviceFeatures] shaderSampledImageArrayDynamicIndexing=" - << (features.features.shaderSampledImageArrayDynamicIndexing == vk::True ? "ON" : "OFF") - << ", shaderSampledImageArrayNonUniformIndexing=" - << (indexingFeaturesEnable.shaderSampledImageArrayNonUniformIndexing == vk::True ? "ON" : "OFF") - << ", descriptorIndexingEnabled=" - << (descriptorIndexingEnabled ? "true" : "false") - << "\n"; - } + accelerationStructureEnabled = hasAS && (asFeaturesEnable.accelerationStructure == vk::True); + rayQueryEnabled = hasRQ && (rayQueryFeaturesEnable.rayQuery == vk::True); - // Create a device. Device layers are deprecated and ignored, so we - // only configure extensions and features here; validation is enabled - // via instance layers. + // Create device info vk::DeviceCreateInfo createInfo{ - .pNext = &features, + .pNext = &features2, .queueCreateInfoCount = static_cast(queueCreateInfos.size()), .pQueueCreateInfos = queueCreateInfos.data(), .enabledExtensionCount = static_cast(deviceExtensions.size()), .ppEnabledExtensionNames = deviceExtensions.data(), - .pEnabledFeatures = nullptr // Using pNext for features + .pEnabledFeatures = nullptr }; - // Create the logical device + LOGI("Creating logical device..."); device = vk::raii::Device(physicalDevice, createInfo); + LOGI("Device created successfully"); + + // Initialize the dispatcher with the device to load device-level functions + VULKAN_HPP_DEFAULT_DISPATCHER.init(*device); // Get queue handles graphicsQueue = vk::raii::Queue(device, queueFamilyIndices.graphicsFamily.value(), 0); @@ -1104,7 +1176,7 @@ bool Renderer::createLogicalDevice(bool enableValidationLayers) { return true; } catch (const std::exception& e) { - std::cerr << "Failed to create logical device: " << e.what() << std::endl; + LOGE("Failed to create logical device: %s", e.what()); return false; } } @@ -1131,4 +1203,4 @@ bool Renderer::checkValidationLayerSupport() const { } return true; -} \ No newline at end of file +} diff --git a/attachments/simple_engine/renderer_pipelines.cpp b/attachments/simple_engine/renderer_pipelines.cpp index c76605098..a29c1798b 100644 --- a/attachments/simple_engine/renderer_pipelines.cpp +++ b/attachments/simple_engine/renderer_pipelines.cpp @@ -50,8 +50,12 @@ bool Renderer::createDescriptorSetLayout() { vk::DescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo{}; std::array bindingFlags{}; if (descriptorIndexingEnabled) { - bindingFlags[0] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; - bindingFlags[1] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; + if (descriptorBindingUniformBufferUpdateAfterBindEnabled) { + bindingFlags[0] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; + } + if (descriptorBindingSampledImageUpdateAfterBindEnabled) { + bindingFlags[1] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; + } bindingFlagsInfo.bindingCount = static_cast(bindingFlags.size()); bindingFlagsInfo.pBindingFlags = bindingFlags.data(); } @@ -76,7 +80,7 @@ bool Renderer::createDescriptorSetLayout() { bool Renderer::createPBRDescriptorSetLayout() { try { // Create descriptor set layout bindings for PBR shader - std::array bindings = { + std::array baseBindings = { // Binding 0: Uniform buffer (UBO) vk::DescriptorSetLayoutBinding{ .binding = 0, @@ -124,81 +128,94 @@ bool Renderer::createPBRDescriptorSetLayout() { .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 6: Light storage buffer (shadows removed) - vk::DescriptorSetLayoutBinding{ - .binding = 6, + } + }; + + std::vector bindings(baseBindings.begin(), baseBindings.end()); + + // Structured buffers and Ray-query related bindings. + // Only add them if the features are actually supported on the device. + // Binding 6: lights SSBO + bindings.push_back(vk::DescriptorSetLayoutBinding{ + .binding = 6, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 7: Forward+ tile headers SSBO - vk::DescriptorSetLayoutBinding{ - .binding = 7, + }); + // Binding 7: tile headers + bindings.push_back(vk::DescriptorSetLayoutBinding{ + .binding = 7, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 8: Forward+ tile light indices SSBO - vk::DescriptorSetLayoutBinding{ - .binding = 8, + }); + // Binding 8: tile indices + bindings.push_back(vk::DescriptorSetLayoutBinding{ + .binding = 8, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 9: Fragment debug output buffer (optional) - vk::DescriptorSetLayoutBinding{ - .binding = 9, + }); + // Binding 9: fragment debug output buffer + bindings.push_back(vk::DescriptorSetLayoutBinding{ + .binding = 9, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 10: Reflection texture (planar reflections) - vk::DescriptorSetLayoutBinding{ - .binding = 10, + }); + + // Binding 10 is always present (planar reflections) + bindings.push_back(vk::DescriptorSetLayoutBinding{ + .binding = 10, .descriptorType = vk::DescriptorType::eCombinedImageSampler, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 11: TLAS (ray-query shadows in raster fragment shader) - vk::DescriptorSetLayoutBinding{ + }); + + if (accelerationStructureEnabled) { + // Binding 11: TLAS + bindings.push_back(vk::DescriptorSetLayoutBinding{ .binding = 11, .descriptorType = vk::DescriptorType::eAccelerationStructureKHR, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 12: Ray-query geometry info buffer (per-instance addresses + material indices) - vk::DescriptorSetLayoutBinding{ + }); + // Binding 12: geometry info + bindings.push_back(vk::DescriptorSetLayoutBinding{ .binding = 12, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - }, - // Binding 13: Ray-query material buffer (PBR material properties) - vk::DescriptorSetLayoutBinding{ + }); + // Binding 13: material data + bindings.push_back(vk::DescriptorSetLayoutBinding{ .binding = 13, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment, .pImmutableSamplers = nullptr - } - }; + }); + } // Create a descriptor set layout // Descriptor indexing: set per-binding flags for UPDATE_AFTER_BIND on UBO (0) and sampled images (1..5) vk::DescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo{}; - std::array bindingFlags{}; + std::vector bindingFlags(bindings.size(), vk::DescriptorBindingFlags{}); if (descriptorIndexingEnabled) { - bindingFlags[0] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; - bindingFlags[1] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; - bindingFlags[10] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; + for (size_t i = 0; i < bindings.size(); ++i) { + if (bindings[i].descriptorType == vk::DescriptorType::eUniformBuffer && descriptorBindingUniformBufferUpdateAfterBindEnabled) { + bindingFlags[i] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; + } else if (bindings[i].descriptorType == vk::DescriptorType::eCombinedImageSampler && descriptorBindingSampledImageUpdateAfterBindEnabled) { + bindingFlags[i] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; + } + } bindingFlagsInfo.bindingCount = static_cast(bindingFlags.size()); bindingFlagsInfo.pBindingFlags = bindingFlags.data(); } @@ -219,7 +236,7 @@ bool Renderer::createPBRDescriptorSetLayout() { .binding = 0, .descriptorType = vk::DescriptorType::eCombinedImageSampler, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eFragment }; vk::DescriptorSetLayoutCreateInfo transparentLayoutInfo{.bindingCount = 1, .pBindings = &sceneColorBinding}; - if (descriptorIndexingEnabled) { + if (descriptorIndexingEnabled && descriptorBindingSampledImageUpdateAfterBindEnabled) { // Make this sampler binding update-after-bind safe as well (optional) vk::DescriptorSetLayoutBindingFlagsCreateInfo transBindingFlagsInfo{}; vk::DescriptorBindingFlags transFlags = vk::DescriptorBindingFlagBits::eUpdateAfterBind | vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending; @@ -245,31 +262,24 @@ bool Renderer::createPBRDescriptorSetLayout() { // Create a graphics pipeline bool Renderer::createGraphicsPipeline() { try { - // Read shader code + // Read shader code (Reverted to use the Slang-compiled texturedMesh shader) auto shaderCode = readFile("shaders/texturedMesh.spv"); - // Create shader modules + // Create shader module vk::raii::ShaderModule shaderModule = createShaderModule(shaderCode); // Create shader stage info - vk::PipelineShaderStageCreateInfo vertShaderStageInfo{ - .stage = vk::ShaderStageFlagBits::eVertex, - .module = *shaderModule, - .pName = "VSMain" - }; - - vk::PipelineShaderStageCreateInfo fragShaderStageInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = *shaderModule, - .pName = "PSMain" - }; - - // Fragment entry point specialized for architectural glass - vk::PipelineShaderStageCreateInfo fragGlassStageInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = *shaderModule, - .pName = "GlassPSMain" - }; + vk::PipelineShaderStageCreateInfo vertShaderStageInfo{}; + vertShaderStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + vertShaderStageInfo.stage = vk::ShaderStageFlagBits::eVertex; + vertShaderStageInfo.module = *shaderModule; + vertShaderStageInfo.pName = "VSMain"; + + vk::PipelineShaderStageCreateInfo fragShaderStageInfo{}; + fragShaderStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + fragShaderStageInfo.stage = vk::ShaderStageFlagBits::eFragment; + fragShaderStageInfo.module = *shaderModule; + fragShaderStageInfo.pName = "PSMain"; vk::PipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; @@ -376,13 +386,12 @@ bool Renderer::createGraphicsPipeline() { vk::Format depthFormat = findDepthFormat(); std::cout << "Creating main graphics pipeline with depth format: " << static_cast(depthFormat) << std::endl; - // Initialize member variable for proper lifetime management - mainPipelineRenderingCreateInfo = vk::PipelineRenderingCreateInfo{ - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &swapChainImageFormat, - .depthAttachmentFormat = depthFormat, - .stencilAttachmentFormat = vk::Format::eUndefined - }; + vk::PipelineRenderingCreateInfo pipelineRenderingInfo{}; + pipelineRenderingInfo.sType = vk::StructureType::ePipelineRenderingCreateInfo; + pipelineRenderingInfo.colorAttachmentCount = 1; + pipelineRenderingInfo.pColorAttachmentFormats = &swapChainImageFormat; + pipelineRenderingInfo.depthAttachmentFormat = depthFormat; + pipelineRenderingInfo.stencilAttachmentFormat = vk::Format::eUndefined; // Create the graphics pipeline vk::PipelineRasterizationStateCreateInfo rasterizerBack = rasterizer; @@ -390,25 +399,20 @@ bool Renderer::createGraphicsPipeline() { // instance/model transforms flip winding (ensures PASS 1 actually shades pixels) rasterizerBack.cullMode = vk::CullModeFlagBits::eNone; - vk::GraphicsPipelineCreateInfo pipelineInfo{ - .pNext = &mainPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = shaderStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerBack, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencil, - .pColorBlendState = &colorBlending, - .pDynamicState = &dynamicState, - .layout = *pipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + pipelineInfo.pNext = &pipelineRenderingInfo; + pipelineInfo.stageCount = 2; + pipelineInfo.pStages = shaderStages; + pipelineInfo.pVertexInputState = &vertexInputInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizerBack; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.pDynamicState = &dynamicState; + pipelineInfo.layout = *pipelineLayout; graphicsPipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); return true; @@ -420,41 +424,55 @@ bool Renderer::createGraphicsPipeline() { // Create PBR pipeline bool Renderer::createPBRPipeline() { + LOGI("Entering createPBRPipeline"); try { // Create PBR descriptor set layout + LOGI("Creating PBR descriptor set layout..."); if (!createPBRDescriptorSetLayout()) { return false; } + LOGI("PBR descriptor set layout created"); // Read shader code - auto shaderCode = readFile("shaders/pbr.spv"); + LOGI("Reading PBR shader..."); + std::string shaderPath = "shaders/pbr.spv"; + if (!rayQueryEnabled || !accelerationStructureEnabled) { + LOGI("Ray Query not supported/enabled. Using optimized Android PBR shader."); + shaderPath = "shaders/pbr_android.spv"; + } + auto shaderCode = readFile(shaderPath); + LOGI("PBR shader read successfully (%s), size: %zu", shaderPath.c_str(), shaderCode.size()); // Create shader modules vk::raii::ShaderModule shaderModule = createShaderModule(shaderCode); + LOGI("PBR shader module created"); // Create shader stage info - vk::PipelineShaderStageCreateInfo vertShaderStageInfo{ - .stage = vk::ShaderStageFlagBits::eVertex, - .module = *shaderModule, - .pName = "VSMain" - }; - - vk::PipelineShaderStageCreateInfo fragShaderStageInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = *shaderModule, - .pName = "PSMain" - }; + LOGI("Creating shader stage info..."); + vk::PipelineShaderStageCreateInfo vertShaderStageInfo{}; + vertShaderStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + vertShaderStageInfo.stage = vk::ShaderStageFlagBits::eVertex; + vertShaderStageInfo.module = *shaderModule; + vertShaderStageInfo.pName = "VSMain"; + + vk::PipelineShaderStageCreateInfo fragShaderStageInfo{}; + fragShaderStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + fragShaderStageInfo.stage = vk::ShaderStageFlagBits::eFragment; + fragShaderStageInfo.module = *shaderModule; + fragShaderStageInfo.pName = "PSMain"; // Fragment entry point specialized for architectural glass - vk::PipelineShaderStageCreateInfo fragGlassStageInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = *shaderModule, - .pName = "GlassPSMain" - }; + vk::PipelineShaderStageCreateInfo fragGlassStageInfo{}; + fragGlassStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + fragGlassStageInfo.stage = vk::ShaderStageFlagBits::eFragment; + fragGlassStageInfo.module = *shaderModule; + fragGlassStageInfo.pName = "GlassPSMain"; + LOGI("Shader stage info created"); vk::PipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; // Define vertex and instance binding descriptions + LOGI("Defining vertex input descriptions..."); auto vertexBindingDescription = Vertex::getBindingDescription(); auto instanceBindingDescription = InstanceData::getBindingDescription(); std::array bindingDescriptions = { @@ -473,64 +491,63 @@ bool Renderer::createPBRPipeline() { allAttributeDescriptions.insert(allAttributeDescriptions.end(), instanceModelMatrixAttributes.begin(), instanceModelMatrixAttributes.end()); allAttributeDescriptions.insert(allAttributeDescriptions.end(), instanceNormalMatrixAttributes.begin(), instanceNormalMatrixAttributes.end()); - vk::PipelineVertexInputStateCreateInfo vertexInputInfo{ - .vertexBindingDescriptionCount = static_cast(bindingDescriptions.size()), - .pVertexBindingDescriptions = bindingDescriptions.data(), - .vertexAttributeDescriptionCount = static_cast(allAttributeDescriptions.size()), - .pVertexAttributeDescriptions = allAttributeDescriptions.data() - }; + vk::PipelineVertexInputStateCreateInfo vertexInputInfo{}; + vertexInputInfo.sType = vk::StructureType::ePipelineVertexInputStateCreateInfo; + vertexInputInfo.vertexBindingDescriptionCount = static_cast(bindingDescriptions.size()); + vertexInputInfo.pVertexBindingDescriptions = bindingDescriptions.data(); + vertexInputInfo.vertexAttributeDescriptionCount = static_cast(allAttributeDescriptions.size()); + vertexInputInfo.pVertexAttributeDescriptions = allAttributeDescriptions.data(); // Create input assembly info - vk::PipelineInputAssemblyStateCreateInfo inputAssembly{ - .topology = vk::PrimitiveTopology::eTriangleList, - .primitiveRestartEnable = VK_FALSE - }; + vk::PipelineInputAssemblyStateCreateInfo inputAssembly{}; + inputAssembly.sType = vk::StructureType::ePipelineInputAssemblyStateCreateInfo; + inputAssembly.topology = vk::PrimitiveTopology::eTriangleList; + inputAssembly.primitiveRestartEnable = vk::False; // Create viewport state info - vk::PipelineViewportStateCreateInfo viewportState{ - .viewportCount = 1, - .scissorCount = 1 - }; + vk::PipelineViewportStateCreateInfo viewportState{}; + viewportState.sType = vk::StructureType::ePipelineViewportStateCreateInfo; + viewportState.viewportCount = 1; + viewportState.scissorCount = 1; // Create rasterization state info - vk::PipelineRasterizationStateCreateInfo rasterizer{ - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = vk::PolygonMode::eFill, - .cullMode = vk::CullModeFlagBits::eNone, - .frontFace = vk::FrontFace::eCounterClockwise, - .depthBiasEnable = VK_FALSE, - .lineWidth = 1.0f - }; + vk::PipelineRasterizationStateCreateInfo rasterizer{}; + rasterizer.sType = vk::StructureType::ePipelineRasterizationStateCreateInfo; + rasterizer.depthClampEnable = vk::False; + rasterizer.rasterizerDiscardEnable = vk::False; + rasterizer.polygonMode = vk::PolygonMode::eFill; + rasterizer.cullMode = vk::CullModeFlagBits::eNone; + rasterizer.frontFace = vk::FrontFace::eCounterClockwise; + rasterizer.depthBiasEnable = vk::False; + rasterizer.lineWidth = 1.0f; // Create multisample state info - vk::PipelineMultisampleStateCreateInfo multisampling{ - .rasterizationSamples = vk::SampleCountFlagBits::e1, - .sampleShadingEnable = VK_FALSE - }; + vk::PipelineMultisampleStateCreateInfo multisampling{}; + multisampling.sType = vk::StructureType::ePipelineMultisampleStateCreateInfo; + multisampling.rasterizationSamples = vk::SampleCountFlagBits::e1; + multisampling.sampleShadingEnable = vk::False; // Create depth stencil state info - vk::PipelineDepthStencilStateCreateInfo depthStencil{ - .depthTestEnable = VK_TRUE, - .depthWriteEnable = VK_TRUE, - .depthCompareOp = vk::CompareOp::eLess, - .depthBoundsTestEnable = VK_FALSE, - .stencilTestEnable = VK_FALSE - }; + vk::PipelineDepthStencilStateCreateInfo depthStencil{}; + depthStencil.sType = vk::StructureType::ePipelineDepthStencilStateCreateInfo; + depthStencil.depthTestEnable = vk::True; + depthStencil.depthWriteEnable = vk::True; + depthStencil.depthCompareOp = vk::CompareOp::eLess; + depthStencil.depthBoundsTestEnable = vk::False; + depthStencil.stencilTestEnable = vk::False; // Create a color blend attachment state - vk::PipelineColorBlendAttachmentState colorBlendAttachment{ - .blendEnable = VK_FALSE, - .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA - }; + vk::PipelineColorBlendAttachmentState colorBlendAttachment{}; + colorBlendAttachment.blendEnable = vk::False; + colorBlendAttachment.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA; // Create color blend state info - vk::PipelineColorBlendStateCreateInfo colorBlending{ - .logicOpEnable = VK_FALSE, - .logicOp = vk::LogicOp::eCopy, - .attachmentCount = 1, - .pAttachments = &colorBlendAttachment - }; + vk::PipelineColorBlendStateCreateInfo colorBlending{}; + colorBlending.sType = vk::StructureType::ePipelineColorBlendStateCreateInfo; + colorBlending.logicOpEnable = vk::False; + colorBlending.logicOp = vk::LogicOp::eCopy; + colorBlending.attachmentCount = 1; + colorBlending.pAttachments = &colorBlendAttachment; // Create dynamic state info std::vector dynamicStates = { @@ -538,55 +555,66 @@ bool Renderer::createPBRPipeline() { vk::DynamicState::eScissor }; - vk::PipelineDynamicStateCreateInfo dynamicState{ - .dynamicStateCount = static_cast(dynamicStates.size()), - .pDynamicStates = dynamicStates.data() - }; + vk::PipelineDynamicStateCreateInfo dynamicState{}; + dynamicState.sType = vk::StructureType::ePipelineDynamicStateCreateInfo; + dynamicState.dynamicStateCount = static_cast(dynamicStates.size()); + dynamicState.pDynamicStates = dynamicStates.data(); // Create push constant range for material properties - vk::PushConstantRange pushConstantRange{ - .stageFlags = vk::ShaderStageFlagBits::eFragment, - .offset = 0, - .size = sizeof(MaterialProperties) - }; + vk::PushConstantRange pushConstantRange{}; + pushConstantRange.stageFlags = vk::ShaderStageFlagBits::eFragment; + pushConstantRange.offset = 0; + pushConstantRange.size = sizeof(MaterialProperties); std::array transparentSetLayouts = {*pbrDescriptorSetLayout, *transparentDescriptorSetLayout}; // Create a pipeline layout for opaque PBR with only the PBR descriptor set (set 0) std::array pbrOnlySetLayouts = {*pbrDescriptorSetLayout}; // Create BOTH pipeline layouts with two descriptor sets (PBR set 0 + scene color set 1) - vk::PipelineLayoutCreateInfo pipelineLayoutInfo{ - .setLayoutCount = static_cast(transparentSetLayouts.size()), - .pSetLayouts = transparentSetLayouts.data(), - .pushConstantRangeCount = 1, - .pPushConstantRanges = &pushConstantRange - }; - + vk::PipelineLayoutCreateInfo pipelineLayoutInfo{}; + pipelineLayoutInfo.sType = vk::StructureType::ePipelineLayoutCreateInfo; + pipelineLayoutInfo.setLayoutCount = static_cast(transparentSetLayouts.size()); + pipelineLayoutInfo.pSetLayouts = transparentSetLayouts.data(); + pipelineLayoutInfo.pushConstantRangeCount = 1; + pipelineLayoutInfo.pPushConstantRanges = &pushConstantRange; + + LOGI("Creating pipeline layout..."); pbrPipelineLayout = vk::raii::PipelineLayout(device, pipelineLayoutInfo); + LOGI("Pipeline layout created"); // Transparent PBR layout uses the same two-set layout - vk::PipelineLayoutCreateInfo transparentPipelineLayoutInfo{.setLayoutCount = static_cast(transparentSetLayouts.size()), .pSetLayouts = transparentSetLayouts.data(), .pushConstantRangeCount = 1, .pPushConstantRanges = &pushConstantRange}; + vk::PipelineLayoutCreateInfo transparentPipelineLayoutInfo{}; + transparentPipelineLayoutInfo.sType = vk::StructureType::ePipelineLayoutCreateInfo; + transparentPipelineLayoutInfo.setLayoutCount = static_cast(transparentSetLayouts.size()); + transparentPipelineLayoutInfo.pSetLayouts = transparentSetLayouts.data(); + transparentPipelineLayoutInfo.pushConstantRangeCount = 1; + transparentPipelineLayoutInfo.pPushConstantRanges = &pushConstantRange; + LOGI("Creating transparent pipeline layout..."); pbrTransparentPipelineLayout = vk::raii::PipelineLayout(device, transparentPipelineLayoutInfo); + LOGI("Transparent pipeline layout created"); // Create pipeline rendering info vk::Format depthFormat = findDepthFormat(); + LOGI("Creating opaque PBR pipeline..."); + LOGI("Device: %p, Dispatcher: %p", (void *) (VkDevice) * device, (void *) device.getDispatcher()); - // Initialize member variable for proper lifetime management - pbrPipelineRenderingCreateInfo = vk::PipelineRenderingCreateInfo{ - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &swapChainImageFormat, - .depthAttachmentFormat = depthFormat, - .stencilAttachmentFormat = vk::Format::eUndefined - }; + vk::PipelineRenderingCreateInfo pipelineRenderingInfo{}; + pipelineRenderingInfo.sType = vk::StructureType::ePipelineRenderingCreateInfo; + pipelineRenderingInfo.colorAttachmentCount = 1; + pipelineRenderingInfo.pColorAttachmentFormats = &swapChainImageFormat; + pipelineRenderingInfo.depthAttachmentFormat = depthFormat; + pipelineRenderingInfo.stencilAttachmentFormat = vk::Format::eUndefined; // 1) Opaque PBR pipeline (no blending, depth writes enabled) vk::PipelineColorBlendAttachmentState opaqueBlendAttachment = colorBlendAttachment; opaqueBlendAttachment.blendEnable = VK_FALSE; - vk::PipelineColorBlendStateCreateInfo colorBlendingOpaque{ - .logicOpEnable = VK_FALSE, - .logicOp = vk::LogicOp::eCopy, - .attachmentCount = 1, - .pAttachments = &opaqueBlendAttachment - }; + + vk::PipelineColorBlendStateCreateInfo colorBlendingOpaque{}; + colorBlendingOpaque.sType = vk::StructureType::ePipelineColorBlendStateCreateInfo; + colorBlendingOpaque.logicOpEnable = VK_FALSE; + colorBlendingOpaque.logicOp = vk::LogicOp::eCopy; + colorBlendingOpaque.attachmentCount = 1; + colorBlendingOpaque.pAttachments = &opaqueBlendAttachment; + vk::PipelineDepthStencilStateCreateInfo depthStencilOpaque = depthStencil; depthStencilOpaque.depthWriteEnable = VK_TRUE; @@ -600,27 +628,24 @@ bool Renderer::createPBRPipeline() { vk::PipelineRasterizationStateCreateInfo rasterizerGlass = rasterizer; rasterizerGlass.cullMode = vk::CullModeFlagBits::eNone; - vk::GraphicsPipelineCreateInfo opaquePipelineInfo{ - - .pNext = &pbrPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = shaderStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerBack, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencilOpaque, - .pColorBlendState = &colorBlendingOpaque, - .pDynamicState = &dynamicState, - .layout = *pbrPipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo opaquePipelineInfo{}; + opaquePipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + opaquePipelineInfo.pNext = &pipelineRenderingInfo; + opaquePipelineInfo.stageCount = 2; + opaquePipelineInfo.pStages = shaderStages; + opaquePipelineInfo.pVertexInputState = &vertexInputInfo; + opaquePipelineInfo.pInputAssemblyState = &inputAssembly; + opaquePipelineInfo.pViewportState = &viewportState; + opaquePipelineInfo.pRasterizationState = &rasterizerBack; + opaquePipelineInfo.pMultisampleState = &multisampling; + opaquePipelineInfo.pDepthStencilState = &depthStencilOpaque; + opaquePipelineInfo.pColorBlendState = &colorBlendingOpaque; + opaquePipelineInfo.pDynamicState = &dynamicState; + opaquePipelineInfo.layout = *pbrPipelineLayout; + + LOGI("Calling vkCreateGraphicsPipelines for opaque PBR..."); pbrGraphicsPipeline = vk::raii::Pipeline(device, nullptr, opaquePipelineInfo); + LOGI("Opaque PBR pipeline created"); // 1b) Opaque PBR pipeline variant for color pass after a depth pre-pass. // Depth writes disabled (read-only) and compare against pre-pass depth. @@ -629,51 +654,41 @@ bool Renderer::createPBRPipeline() { depthStencilAfterPrepass.depthWriteEnable = VK_FALSE; depthStencilAfterPrepass.depthCompareOp = vk::CompareOp::eEqual; - vk::GraphicsPipelineCreateInfo opaqueAfterPrepassInfo{ - - .pNext = &pbrPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = shaderStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerBack, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencilAfterPrepass, - .pColorBlendState = &colorBlendingOpaque, - .pDynamicState = &dynamicState, - .layout = *pbrPipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo opaqueAfterPrepassInfo{}; + opaqueAfterPrepassInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + opaqueAfterPrepassInfo.pNext = &pipelineRenderingInfo; + opaqueAfterPrepassInfo.stageCount = 2; + opaqueAfterPrepassInfo.pStages = shaderStages; + opaqueAfterPrepassInfo.pVertexInputState = &vertexInputInfo; + opaqueAfterPrepassInfo.pInputAssemblyState = &inputAssembly; + opaqueAfterPrepassInfo.pViewportState = &viewportState; + opaqueAfterPrepassInfo.pRasterizationState = &rasterizerBack; + opaqueAfterPrepassInfo.pMultisampleState = &multisampling; + opaqueAfterPrepassInfo.pDepthStencilState = &depthStencilAfterPrepass; + opaqueAfterPrepassInfo.pColorBlendState = &colorBlendingOpaque; + opaqueAfterPrepassInfo.pDynamicState = &dynamicState; + opaqueAfterPrepassInfo.layout = *pbrPipelineLayout; + pbrPrepassGraphicsPipeline = vk::raii::Pipeline(device, nullptr, opaqueAfterPrepassInfo); // 1c) Reflection PBR pipeline for mirrored off-screen pass (cull none to avoid winding issues) vk::PipelineRasterizationStateCreateInfo rasterizerReflection = rasterizer; rasterizerReflection.cullMode = vk::CullModeFlagBits::eNone; - vk::GraphicsPipelineCreateInfo reflectionPipelineInfo{ - - .pNext = &pbrPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = shaderStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerReflection, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencilOpaque, - .pColorBlendState = &colorBlendingOpaque, - .pDynamicState = &dynamicState, - .layout = *pbrPipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo reflectionPipelineInfo{}; + reflectionPipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + reflectionPipelineInfo.pNext = &pipelineRenderingInfo; + reflectionPipelineInfo.stageCount = 2; + reflectionPipelineInfo.pStages = shaderStages; + reflectionPipelineInfo.pVertexInputState = &vertexInputInfo; + reflectionPipelineInfo.pInputAssemblyState = &inputAssembly; + reflectionPipelineInfo.pViewportState = &viewportState; + reflectionPipelineInfo.pRasterizationState = &rasterizerReflection; + reflectionPipelineInfo.pMultisampleState = &multisampling; + reflectionPipelineInfo.pDepthStencilState = &depthStencilOpaque; + reflectionPipelineInfo.pColorBlendState = &colorBlendingOpaque; + reflectionPipelineInfo.pDynamicState = &dynamicState; + reflectionPipelineInfo.layout = *pbrPipelineLayout; + pbrReflectionGraphicsPipeline = vk::raii::Pipeline(device, nullptr, reflectionPipelineInfo); // 2) Blended PBR pipeline (straight alpha blending, depth writes disabled for translucency) @@ -690,30 +705,21 @@ bool Renderer::createPBRPipeline() { depthStencilBlended.depthWriteEnable = VK_FALSE; depthStencilBlended.depthCompareOp = vk::CompareOp::eLessOrEqual; - vk::GraphicsPipelineCreateInfo blendedPipelineInfo{ - - .pNext = &pbrPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = shaderStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - // Use back-face culling for the blended (glass) pipeline to avoid - // rendering both front and back faces of thin glass geometry, which - // can cause flickering as the camera rotates due to overlapping - // transparent surfaces passing the depth test. - .pRasterizationState = &rasterizerBack, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencilBlended, - .pColorBlendState = &colorBlendingBlended, - .pDynamicState = &dynamicState, - .layout = *pbrTransparentPipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo blendedPipelineInfo{}; + blendedPipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + blendedPipelineInfo.pNext = &pipelineRenderingInfo; + blendedPipelineInfo.stageCount = 2; + blendedPipelineInfo.pStages = shaderStages; + blendedPipelineInfo.pVertexInputState = &vertexInputInfo; + blendedPipelineInfo.pInputAssemblyState = &inputAssembly; + blendedPipelineInfo.pViewportState = &viewportState; + blendedPipelineInfo.pRasterizationState = &rasterizerBack; + blendedPipelineInfo.pMultisampleState = &multisampling; + blendedPipelineInfo.pDepthStencilState = &depthStencilBlended; + blendedPipelineInfo.pColorBlendState = &colorBlendingBlended; + blendedPipelineInfo.pDynamicState = &dynamicState; + blendedPipelineInfo.layout = *pbrTransparentPipelineLayout; + pbrBlendGraphicsPipeline = vk::raii::Pipeline(device, nullptr, blendedPipelineInfo); // 3) Glass pipeline (architectural glass) - uses the same vertex input and @@ -721,26 +727,21 @@ bool Renderer::createPBRPipeline() { // (GlassPSMain) for more stable glass shading. vk::PipelineShaderStageCreateInfo glassStages[] = {vertShaderStageInfo, fragGlassStageInfo}; - vk::GraphicsPipelineCreateInfo glassPipelineInfo{ - - .pNext = &pbrPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = glassStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerGlass, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencilBlended, - .pColorBlendState = &colorBlendingBlended, - .pDynamicState = &dynamicState, - .layout = *pbrTransparentPipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo glassPipelineInfo{}; + glassPipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + glassPipelineInfo.pNext = &pipelineRenderingInfo; + glassPipelineInfo.stageCount = 2; + glassPipelineInfo.pStages = glassStages; + glassPipelineInfo.pVertexInputState = &vertexInputInfo; + glassPipelineInfo.pInputAssemblyState = &inputAssembly; + glassPipelineInfo.pViewportState = &viewportState; + glassPipelineInfo.pRasterizationState = &rasterizerGlass; + glassPipelineInfo.pMultisampleState = &multisampling; + glassPipelineInfo.pDepthStencilState = &depthStencilBlended; + glassPipelineInfo.pColorBlendState = &colorBlendingBlended; + glassPipelineInfo.pDynamicState = &dynamicState; + glassPipelineInfo.layout = *pbrTransparentPipelineLayout; + glassGraphicsPipeline = vk::raii::Pipeline(device, nullptr, glassPipelineInfo); return true; @@ -766,16 +767,18 @@ bool Renderer::createCompositePipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(shaderCode); // Shader stages - vk::PipelineShaderStageCreateInfo vert{ - .stage = vk::ShaderStageFlagBits::eVertex, - .module = *shaderModule, - .pName = "VSMain" - }; - vk::PipelineShaderStageCreateInfo frag{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = *shaderModule, - .pName = "PSMain" - }; + vk::PipelineShaderStageCreateInfo vert{}; + vert.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + vert.stage = vk::ShaderStageFlagBits::eVertex; + vert.module = *shaderModule; + vert.pName = "VSMain"; + + vk::PipelineShaderStageCreateInfo frag{}; + frag.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + frag.stage = vk::ShaderStageFlagBits::eFragment; + frag.module = *shaderModule; + frag.pName = "PSMain"; + vk::PipelineShaderStageCreateInfo stages[] = {vert, frag}; // No vertex inputs (fullscreen triangle via SV_VertexID) @@ -803,31 +806,27 @@ bool Renderer::createCompositePipeline() { compositePipelineLayout = vk::raii::PipelineLayout(device, plInfo); // Dynamic rendering info - compositePipelineRenderingCreateInfo = vk::PipelineRenderingCreateInfo{ - - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &swapChainImageFormat, - .depthAttachmentFormat = vk::Format::eUndefined, - .stencilAttachmentFormat = vk::Format::eUndefined - }; - - vk::GraphicsPipelineCreateInfo pipeInfo{ - - .pNext = &compositePipelineRenderingCreateInfo, - .stageCount = 2, - .pStages = stages, - .pVertexInputState = &vertexInput, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizer, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencil, - .pColorBlendState = &colorBlending, - .pDynamicState = &dynamicState, - .layout = *compositePipelineLayout, - .renderPass = nullptr, - .subpass = 0 - }; + vk::PipelineRenderingCreateInfo pipelineRenderingInfo{}; + pipelineRenderingInfo.sType = vk::StructureType::ePipelineRenderingCreateInfo; + pipelineRenderingInfo.colorAttachmentCount = 1; + pipelineRenderingInfo.pColorAttachmentFormats = &swapChainImageFormat; + pipelineRenderingInfo.depthAttachmentFormat = vk::Format::eUndefined; + pipelineRenderingInfo.stencilAttachmentFormat = vk::Format::eUndefined; + + vk::GraphicsPipelineCreateInfo pipeInfo{}; + pipeInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + pipeInfo.pNext = &pipelineRenderingInfo; + pipeInfo.stageCount = 2; + pipeInfo.pStages = stages; + pipeInfo.pVertexInputState = &vertexInput; + pipeInfo.pInputAssemblyState = &inputAssembly; + pipeInfo.pViewportState = &viewportState; + pipeInfo.pRasterizationState = &rasterizer; + pipeInfo.pMultisampleState = &multisampling; + pipeInfo.pDepthStencilState = &depthStencil; + pipeInfo.pColorBlendState = &colorBlending; + pipeInfo.pDynamicState = &dynamicState; + pipeInfo.layout = *compositePipelineLayout; compositePipeline = vk::raii::Pipeline(device, nullptr, pipeInfo); return true; @@ -852,11 +851,11 @@ bool Renderer::createDepthPrepassPipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(shaderCode); // Stages: Vertex only - vk::PipelineShaderStageCreateInfo vertStage{ - .stage = vk::ShaderStageFlagBits::eVertex, - .module = *shaderModule, - .pName = "VSMain" - }; + vk::PipelineShaderStageCreateInfo vertStage{}; + vertStage.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + vertStage.stage = vk::ShaderStageFlagBits::eVertex; + vertStage.module = *shaderModule; + vertStage.pName = "VSMain"; // Vertex/instance bindings & attributes same as PBR auto vertexBindingDescription = Vertex::getBindingDescription(); @@ -928,26 +927,26 @@ bool Renderer::createDepthPrepassPipeline() { }; vk::Format depthFormat = findDepthFormat(); - vk::PipelineRenderingCreateInfo renderingInfo{ - .colorAttachmentCount = 0, - .pColorAttachmentFormats = nullptr, - .depthAttachmentFormat = depthFormat - }; - - vk::GraphicsPipelineCreateInfo pipelineInfo{ - .pNext = &renderingInfo, - .stageCount = 1, - .pStages = &vertStage, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizer, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencil, - .pColorBlendState = &colorBlending, - .pDynamicState = &dynamicState, - .layout = *pbrPipelineLayout - }; + vk::PipelineRenderingCreateInfo pipelineRenderingInfo{}; + pipelineRenderingInfo.sType = vk::StructureType::ePipelineRenderingCreateInfo; + pipelineRenderingInfo.colorAttachmentCount = 0; + pipelineRenderingInfo.pColorAttachmentFormats = nullptr; + pipelineRenderingInfo.depthAttachmentFormat = depthFormat; + + vk::GraphicsPipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + pipelineInfo.pNext = &pipelineRenderingInfo; + pipelineInfo.stageCount = 1; + pipelineInfo.pStages = &vertStage; + pipelineInfo.pVertexInputState = &vertexInputInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizer; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.pDynamicState = &dynamicState; + pipelineInfo.layout = *pbrPipelineLayout; depthPrepassPipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); return true; @@ -967,17 +966,17 @@ bool Renderer::createLightingPipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(shaderCode); // Create shader stage info - vk::PipelineShaderStageCreateInfo vertShaderStageInfo{ - .stage = vk::ShaderStageFlagBits::eVertex, - .module = *shaderModule, - .pName = "VSMain" - }; - - vk::PipelineShaderStageCreateInfo fragShaderStageInfo{ - .stage = vk::ShaderStageFlagBits::eFragment, - .module = *shaderModule, - .pName = "PSMain" - }; + vk::PipelineShaderStageCreateInfo vertShaderStageInfo{}; + vertShaderStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + vertShaderStageInfo.stage = vk::ShaderStageFlagBits::eVertex; + vertShaderStageInfo.module = *shaderModule; + vertShaderStageInfo.pName = "VSMain"; + + vk::PipelineShaderStageCreateInfo fragShaderStageInfo{}; + fragShaderStageInfo.sType = vk::StructureType::ePipelineShaderStageCreateInfo; + fragShaderStageInfo.stage = vk::ShaderStageFlagBits::eFragment; + fragShaderStageInfo.module = *shaderModule; + fragShaderStageInfo.pName = "PSMain"; vk::PipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; @@ -1081,39 +1080,31 @@ bool Renderer::createLightingPipeline() { // Create pipeline rendering info vk::Format depthFormat = findDepthFormat(); - // Initialize member variable for proper lifetime management - lightingPipelineRenderingCreateInfo = vk::PipelineRenderingCreateInfo{ - - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &swapChainImageFormat, - .depthAttachmentFormat = depthFormat, - .stencilAttachmentFormat = vk::Format::eUndefined - }; + vk::PipelineRenderingCreateInfo pipelineRenderingInfo{}; + pipelineRenderingInfo.sType = vk::StructureType::ePipelineRenderingCreateInfo; + pipelineRenderingInfo.colorAttachmentCount = 1; + pipelineRenderingInfo.pColorAttachmentFormats = &swapChainImageFormat; + pipelineRenderingInfo.depthAttachmentFormat = depthFormat; + pipelineRenderingInfo.stencilAttachmentFormat = vk::Format::eUndefined; // Create a graphics pipeline vk::PipelineRasterizationStateCreateInfo rasterizerBack = rasterizer; rasterizerBack.cullMode = vk::CullModeFlagBits::eBack; - vk::GraphicsPipelineCreateInfo pipelineInfo{ - - .pNext = &lightingPipelineRenderingCreateInfo, - .flags = vk::PipelineCreateFlags{}, - .stageCount = 2, - .pStages = shaderStages, - .pVertexInputState = &vertexInputInfo, - .pInputAssemblyState = &inputAssembly, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerBack, - .pMultisampleState = &multisampling, - .pDepthStencilState = &depthStencil, - .pColorBlendState = &colorBlending, - .pDynamicState = &dynamicState, - .layout = *lightingPipelineLayout, - .renderPass = nullptr, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = -1 - }; + vk::GraphicsPipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = vk::StructureType::eGraphicsPipelineCreateInfo; + pipelineInfo.pNext = &pipelineRenderingInfo; + pipelineInfo.stageCount = 2; + pipelineInfo.pStages = shaderStages; + pipelineInfo.pVertexInputState = &vertexInputInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizerBack; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.pDynamicState = &dynamicState; + pipelineInfo.layout = *lightingPipelineLayout; lightingPipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); return true; @@ -1179,11 +1170,18 @@ bool Renderer::createRayQueryDescriptorSetLayout() { // On some drivers this requires descriptor indexing features + layout binding flags to avoid the // array collapsing to slot 0 (resulting in "no textures" even when `texIndex>0`). std::array bindingFlags{}; + bool useUpdateAfterBind = false; if (descriptorIndexingEnabled) { - // Binding 6 is the large sampled texture array. - bindingFlags[6] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | - vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending | - vk::DescriptorBindingFlagBits::ePartiallyBound; + if (descriptorBindingSampledImageUpdateAfterBindEnabled) { + // Binding 6 is the large sampled texture array. + bindingFlags[6] = vk::DescriptorBindingFlagBits::eUpdateAfterBind | + vk::DescriptorBindingFlagBits::eUpdateUnusedWhilePending | + vk::DescriptorBindingFlagBits::ePartiallyBound; + useUpdateAfterBind = true; + } else { + // If update-after-bind is not supported, we can still use partially bound if supported + bindingFlags[6] = vk::DescriptorBindingFlagBits::ePartiallyBound; + } } vk::DescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo{}; @@ -1195,7 +1193,9 @@ bool Renderer::createRayQueryDescriptorSetLayout() { vk::DescriptorSetLayoutCreateInfo layoutInfo{}; if (descriptorIndexingEnabled) { layoutInfo.pNext = &bindingFlagsInfo; - layoutInfo.flags = vk::DescriptorSetLayoutCreateFlagBits::eUpdateAfterBindPool; + if (useUpdateAfterBind) { + layoutInfo.flags = vk::DescriptorSetLayoutCreateFlagBits::eUpdateAfterBindPool; + } } layoutInfo.bindingCount = static_cast(bindings.size()); layoutInfo.pBindings = bindings.data(); diff --git a/attachments/simple_engine/renderer_ray_query.cpp b/attachments/simple_engine/renderer_ray_query.cpp index 5107352ce..d557e2d69 100644 --- a/attachments/simple_engine/renderer_ray_query.cpp +++ b/attachments/simple_engine/renderer_ray_query.cpp @@ -333,7 +333,16 @@ bool Renderer::buildAccelerationStructures(const std::vector& entities geometry.geometryType = vk::GeometryTypeKHR::eTriangles; // Mark geometry as OPAQUE to ensure closest hits are committed reliably for primary rays // (we can re-introduce transparency later with any-hit/candidate handling) +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // Course: Opacity Micromaps — attach micromap pNext if available. + // If a micromap is present, we MUST NOT set the OPAQUE flag, otherwise + // the "Unknown" regions will be treated as opaque and the any-hit shader + // won't fire for the alpha-mask edges. + void* ommPNext = GetMicromapPNext(meshComp); + geometry.flags = ommPNext ? vk::GeometryFlagBitsKHR{} : vk::GeometryFlagBitsKHR::eOpaque; +#else geometry.flags = vk::GeometryFlagBitsKHR::eOpaque; +#endif geometry.geometry.triangles.vertexFormat = vk::Format::eR32G32B32Sfloat; geometry.geometry.triangles.vertexData = vertexAddress; @@ -345,6 +354,11 @@ bool Renderer::buildAccelerationStructures(const std::vector& entities geometry.geometry.triangles.indexType = vk::IndexType::eUint32; geometry.geometry.triangles.indexData = indexAddress; +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // Course: Opacity Micromaps — attach micromap pNext if available + geometry.geometry.triangles.pNext = ommPNext; +#endif + // Build info vk::AccelerationStructureBuildGeometryInfoKHR buildInfo{}; buildInfo.type = vk::AccelerationStructureTypeKHR::eBottomLevel; diff --git a/attachments/simple_engine/renderer_rendering.cpp b/attachments/simple_engine/renderer_rendering.cpp index 32bf7dc7f..5dc5797c4 100644 --- a/attachments/simple_engine/renderer_rendering.cpp +++ b/attachments/simple_engine/renderer_rendering.cpp @@ -125,6 +125,14 @@ bool Renderer::createSwapChain() { imageCount = swapChainSupport.capabilities.maxImageCount; } + // Choose preTransform. On Android, eIdentity is preferred if supported to let the system handle rotation. + vk::SurfaceTransformFlagBitsKHR preTransform; + if (swapChainSupport.capabilities.supportedTransforms & vk::SurfaceTransformFlagBitsKHR::eIdentity) { + preTransform = vk::SurfaceTransformFlagBitsKHR::eIdentity; + } else { + preTransform = swapChainSupport.capabilities.currentTransform; + } + // Create swap chain info vk::SwapchainCreateInfoKHR createInfo{ .surface = *surface, @@ -134,7 +142,7 @@ bool Renderer::createSwapChain() { .imageExtent = extent, .imageArrayLayers = 1, .imageUsage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferDst, - .preTransform = swapChainSupport.capabilities.currentTransform, + .preTransform = preTransform, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque, .presentMode = presentMode, .clipped = VK_TRUE, @@ -527,7 +535,7 @@ bool Renderer::setupDynamicRendering() { .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, .loadOp = vk::AttachmentLoadOp::eClear, .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearColorValue(std::array < float, 4 >{0.0f, 0.0f, 0.0f, 1.0f}) + .clearValue = vk::ClearColorValue(std::array{0.0f, 0.0f, 0.0f, 1.0f}) // Black default } }; @@ -1362,12 +1370,16 @@ void Renderer::Render(const std::vector& entities, CameraComponent* ca } watchdogProgressLabel.store("Render: buildAccelerationStructures", std::memory_order_relaxed); - if (buildAccelerationStructures(entities)) { - watchdogProgressLabel.store("Render: after buildAccelerationStructures", std::memory_order_relaxed); - asBuildRequested.store(false, std::memory_order_release); - asBuildRequestStartNs.store(0, std::memory_order_relaxed); - // AS build request resolved; restore normal watchdog sensitivity. - watchdogSuppressed.store(false, std::memory_order_relaxed); + bool buildSuccess = buildAccelerationStructures(entities); + watchdogProgressLabel.store("Render: after buildAccelerationStructures", std::memory_order_relaxed); + + // Always resolve the request flag once we've attempted a build (success or fail) + // to avoid getting stuck in the loading state. + asBuildRequested.store(false, std::memory_order_release); + asBuildRequestStartNs.store(0, std::memory_order_relaxed); + watchdogSuppressed.store(false, std::memory_order_relaxed); + + if (buildSuccess) { // Transition the loading UI to a finalizing phase (descriptor cold-init, etc.). if (IsLoading()) { SetLoadingPhase(LoadingPhase::Finalizing); @@ -1583,7 +1595,9 @@ void Renderer::Render(const std::vector& entities, CameraComponent* ca const bool noPreallocPending = !pendingEntityPreallocQueued.load(std::memory_order_relaxed); const bool noDirtyEntities = descriptorDirtyEntities.empty(); const bool noDeferredDescOps = !descriptorRefreshPending.load(std::memory_order_relaxed); + if (loaderDone && criticalDone && noASPending && noPreallocPending && noDirtyEntities && noDeferredDescOps) { + LOGI("Renderer: Transitioning from Loading to Active scene"); MarkInitialLoadComplete(); } } @@ -1691,6 +1705,11 @@ void Renderer::Render(const std::vector& entities, CameraComponent* ca // imageIndex already populated above watchdogProgressLabel.store("Render: acquired swapchain image", std::memory_order_relaxed); + bool isLoading = IsLoading(); + bool flag = loadingFlag.load(); + uint32_t critical = criticalJobsOutstanding.load(); + bool initDone = initialLoadComplete.load(); + if (acquireResultCode == vk::Result::eSuboptimalKHR || framebufferResized.load(std::memory_order_relaxed)) { framebufferResized.store(false, std::memory_order_relaxed); if (imguiSystem) @@ -2215,6 +2234,14 @@ void Renderer::Render(const std::vector& entities, CameraComponent* ca ImGui::SliderFloat("Gamma", &gamma, 1.6f, 2.6f, "%.2f"); } ImGui::End(); + + // Invoke any registered Course module / plugin ImGui panel + { + std::lock_guard lock(imguiPanelCallbackMutex); + if (imguiPanelCallback) { + imguiPanelCallback(this); + } + } } // Rasterization rendering: only execute if ray query did not render this frame. @@ -2485,6 +2512,7 @@ void Renderer::Render(const std::vector& entities, CameraComponent* ca // Begin rendering to swapchain for composite colorAttachments[0].imageView = *swapChainImageViews[imageIndex]; colorAttachments[0].loadOp = vk::AttachmentLoadOp::eClear; // clear before composing base layer (full-screen composite overwrites all pixels) + colorAttachments[0].clearValue = vk::ClearColorValue(std::array{0.0f, 0.0f, 0.0f, 1.0f}); // Neutral black depthAttachment.loadOp = vk::AttachmentLoadOp::eDontCare; // no depth for composite renderingInfo.renderArea = vk::Rect2D({0, 0}, swapChainExtent); // IMPORTANT: Composite pass does not use a depth attachment. Avoid binding it to satisfy dynamic rendering VUIDs. @@ -2522,6 +2550,7 @@ void Renderer::Render(const std::vector& entities, CameraComponent* ca pc.exposure = std::clamp(this->exposure, 0.2f, 4.0f); pc.gamma = this->gamma; pc.outputIsSRGB = (swapChainImageFormat == vk::Format::eR8G8B8A8Srgb || swapChainImageFormat == vk::Format::eB8G8R8A8Srgb) ? 1 : 0; + commandBuffers[currentFrame].pushConstants(*compositePipelineLayout, vk::ShaderStageFlagBits::eFragment, 0, pc); // Draw fullscreen triangle diff --git a/attachments/simple_engine/renderer_resources.cpp b/attachments/simple_engine/renderer_resources.cpp index 08b3bb524..04c9c0b79 100644 --- a/attachments/simple_engine/renderer_resources.cpp +++ b/attachments/simple_engine/renderer_resources.cpp @@ -113,7 +113,7 @@ static vk::Format CoerceFormatSRGB(vk::Format fmt, bool wantSRGB) { } // Create texture image -bool Renderer::createTextureImage(const std::string& texturePath_, TextureResources& resources) { +bool Renderer::createTextureImage(const std::string& texturePath_, TextureResources& resources, bool cachePixels) { try { ensureThreadLocalVulkanInit(); const std::string textureId = ResolveTextureId(texturePath_); @@ -169,12 +169,12 @@ bool Renderer::createTextureImage(const std::string& texturePath_, TextureResour // If it's a KTX2 texture but the path doesn't exist, try common fallback filename variants if (isKtx2) { std::filesystem::path origPath(resolvedPath); - if (!std::filesystem::exists(origPath)) { + if (!fileExists(origPath.string())) { std::string fname = origPath.filename().string(); std::string dir = origPath.parent_path().string(); auto tryCandidate = [&](const std::string& candidateName) -> bool { std::filesystem::path cand = std::filesystem::path(dir) / candidateName; - if (std::filesystem::exists(cand)) { + if (fileExists(cand.string())) { std::cout << "Resolved missing texture '" << resolvedPath << "' to existing file '" << cand.string() << "'" << std::endl; resolvedPath = cand.string(); return true; @@ -218,19 +218,58 @@ bool Renderer::createTextureImage(const std::string& texturePath_, TextureResour std::vector copyRegions; if (isKtx2) { - // Load KTX2 file + // Load KTX2 file from memory on Android or file on desktop +#if defined(PLATFORM_ANDROID) + std::vector fileBuffer; + try { + fileBuffer = readFile(resolvedPath); + } catch (...) { + // Retry with fallback logic below if needed + } + + KTX_error_code result = KTX_SUCCESS; + if (!fileBuffer.empty()) { + result = ktxTexture2_CreateFromMemory(reinterpret_cast(fileBuffer.data()), + fileBuffer.size(), + KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, + &ktxTex); + } else { + result = KTX_FILE_OPEN_FAILED; + } +#else KTX_error_code result = ktxTexture2_CreateFromNamedFile(resolvedPath.c_str(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &ktxTex); +#endif if (result != KTX_SUCCESS) { + // ... (rest of fallback logic) // Retry with sibling suffix variants if file exists but cannot be parsed/opened std::filesystem::path origPath(resolvedPath); std::string fname = origPath.filename().string(); std::string dir = origPath.parent_path().string(); auto tryLoad = [&](const std::string& candidateName) -> bool { std::filesystem::path cand = std::filesystem::path(dir) / candidateName; - if (std::filesystem::exists(cand)) { - std::string candStr = cand.string(); + std::string candStr = cand.string(); +#if defined(PLATFORM_ANDROID) + std::vector candBuffer; + try { + candBuffer = readFile(candStr); + } catch (...) { + return false; + } + if (!candBuffer.empty()) { + result = ktxTexture2_CreateFromMemory(reinterpret_cast(candBuffer.data()), + candBuffer.size(), + KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, + &ktxTex); + if (result == KTX_SUCCESS) { + resolvedPath = candStr; + return true; + } + } + return false; +#else + if (fileExists(cand.string())) { std::cout << "Retrying KTX2 load with sibling candidate '" << candStr << "' for original '" << resolvedPath << "'" << std::endl; result = ktxTexture2_CreateFromNamedFile(candStr.c_str(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &ktxTex); if (result == KTX_SUCCESS) { @@ -239,6 +278,7 @@ bool Renderer::createTextureImage(const std::string& texturePath_, TextureResour } } return false; +#endif }; // Known suffix variants near the end of filename before extension std::vector suffixes = {"_c", "_d", "_cm", "_diffuse", "_basecolor", "_albedo"}; @@ -273,6 +313,48 @@ bool Renderer::createTextureImage(const std::string& texturePath_, TextureResour // Check if the texture needs BasisU transcoding; prefer GPU-compressed targets to save VRAM wasTranscoded = ktxTexture2_NeedsTranscoding(ktxTex); + +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // --- Course: Opacity Micromaps --- + // If requested, cache the raw CPU pixels for the OMM builder. + if (cachePixels) { + if (wasTranscoded) { + // BasisU must be transcoded to a readable format (RGBA32) for the CPU cache. + // Since transcoding is destructive, we use a temporary texture object for the cache. + ktxTexture2* cacheKtx = nullptr; + KTX_error_code cacheRes = KTX_SUCCESS; +#if defined(PLATFORM_ANDROID) + std::vector cacheBuf; + try { cacheBuf = readFile(resolvedPath); } catch (...) { + } + if (!cacheBuf.empty()) { + cacheRes = ktxTexture2_CreateFromMemory(reinterpret_cast(cacheBuf.data()), cacheBuf.size(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &cacheKtx); + } else { cacheRes = KTX_FILE_OPEN_FAILED; } +#else + cacheRes = ktxTexture2_CreateFromNamedFile(resolvedPath.c_str(), KTX_TEXTURE_CREATE_LOAD_IMAGE_DATA_BIT, &cacheKtx); +#endif + if (cacheRes == KTX_SUCCESS) { + if (ktxTexture2_TranscodeBasis(cacheKtx, KTX_TTF_RGBA32, 0) == KTX_SUCCESS) { + ktx_size_t offset = 0; + ktxTexture_GetImageOffset(reinterpret_cast(cacheKtx), 0, 0, 0, &offset); + const uint8_t* pData = ktxTexture_GetData(reinterpret_cast(cacheKtx)) + offset; + StoreRawTexturePixels(textureId, pData, cacheKtx->baseWidth, cacheKtx->baseHeight, 4); + } + ktxTexture_Destroy(reinterpret_cast(cacheKtx)); + } + } else { + // Already transcoded or not BasisU; check if it's a raw format we can use directly (RGBA8) + if (ktxTex->vkFormat == static_cast(vk::Format::eR8G8B8A8Unorm) || + ktxTex->vkFormat == static_cast(vk::Format::eR8G8B8A8Srgb)) { + ktx_size_t offset = 0; + ktxTexture_GetImageOffset(reinterpret_cast(ktxTex), 0, 0, 0, &offset); + const uint8_t* pData = ktxTexture_GetData(reinterpret_cast(ktxTex)) + offset; + StoreRawTexturePixels(textureId, pData, ktxTex->baseWidth, ktxTex->baseHeight, 4); + } + } + } +#endif + if (wasTranscoded) { // Select a compressed target supported by the device (prefer BC7 RGBA, then BC3 RGBA, then BC1 RGB) auto supportsFormat = [&](vk::Format f) { @@ -674,7 +756,7 @@ bool Renderer::createTextureSampler(TextureResources& resources) { } // Load texture from file (public wrapper for createTextureImage) -bool Renderer::LoadTexture(const std::string& texturePath) { +bool Renderer::LoadTexture(const std::string& texturePath, bool cachePixels) { ensureThreadLocalVulkanInit(); if (texturePath.empty()) { std::cerr << "LoadTexture: Empty texture path provided" << std::endl; @@ -689,8 +771,17 @@ bool Renderer::LoadTexture(const std::string& texturePath) { std::shared_lock texLock(textureResourcesMutex); auto it = textureResources.find(resolvedId); if (it != textureResources.end()) { - // Texture already loaded + // Texture already loaded. +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // If we need to cache pixels but they aren't cached yet, don't return early. + if (cachePixels && GetRawTexturePixels(resolvedId, nullptr, nullptr, nullptr) == nullptr) { + // Continue to createTextureImage which will handle caching. + } else { + return true; + } +#else return true; +#endif } } @@ -700,7 +791,7 @@ bool Renderer::LoadTexture(const std::string& texturePath) { // Use existing createTextureImage method (it inserts into textureResources on success) if it's a KTX2 path; otherwise fall back to memory path below bool success = false; if (resolvedId.ends_with(".ktx2")) { - success = createTextureImage(resolvedId, tempResources); + success = createTextureImage(resolvedId, tempResources, cachePixels); if (success) return true; // Fall through to raw-memory path if KTX load failed @@ -751,10 +842,11 @@ bool Renderer::LoadTextureFromMemory(const std::string& textureId, const unsigned char* imageData, int width, int height, - int channels) { + int channels, + bool cachePixels) { ensureThreadLocalVulkanInit(); const std::string resolvedId = ResolveTextureId(textureId); - std::cout << "[LoadTextureFromMemory] start id=" << textureId << " -> resolved=" << resolvedId << " size=" << width << "x" << height << " ch=" << channels << std::endl; + std::cout << "[LoadTextureFromMemory] start id=" << textureId << " -> resolved=" << resolvedId << " size=" << width << "x" << height << " ch=" << channels << " cache=" << cachePixels << std::endl; if (resolvedId.empty() || !imageData || width <= 0 || height <= 0 || channels <= 0) { std::cerr << "LoadTextureFromMemory: Invalid parameters" << std::endl; return false; @@ -765,8 +857,17 @@ bool Renderer::LoadTextureFromMemory(const std::string& textureId, std::shared_lock texLock(textureResourcesMutex); auto it = textureResources.find(resolvedId); if (it != textureResources.end()) { - // Texture already loaded + // Texture already loaded. +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // If we need to cache pixels but they aren't cached yet, don't return early. + if (cachePixels && GetRawTexturePixels(resolvedId, nullptr, nullptr, nullptr) == nullptr) { + // Continue to load logic below to cache pixels. + } else { + return true; + } +#else return true; +#endif } } @@ -779,9 +880,19 @@ bool Renderer::LoadTextureFromMemory(const std::string& textureId, } // Double-check cache after the wait { - std::shared_lock texLock(textureResourcesMutex); - auto it2 = textureResources.find(resolvedId); - if (it2 != textureResources.end()) { + bool alreadyLoaded = false; { + std::shared_lock texLock(textureResourcesMutex); + alreadyLoaded = textureResources.contains(resolvedId); + } + if (alreadyLoaded) { +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // Another thread may have loaded the GPU texture but skipped the pixel + // cache (e.g. it used a different cachePixels=false call). Store the + // pixels now while we still have imageData in hand. + if (cachePixels && GetRawTexturePixels(resolvedId, nullptr, nullptr, nullptr) == nullptr) { + StoreRawTexturePixels(resolvedId, imageData, width, height, channels); + } +#endif return true; } } @@ -856,6 +967,13 @@ bool Renderer::LoadTextureFromMemory(const std::string& textureId, } } +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + // --- Course: Opacity Micromaps --- + if (cachePixels) { + StoreRawTexturePixels(resolvedId, stagingData, width, height, 4); + } +#endif + stagingBufferMemory.unmapMemory(); // Determine the appropriate texture format based on the texture type @@ -1315,12 +1433,13 @@ bool Renderer::createDescriptorSets(Entity* entity, EntityResources& res, const vk::DescriptorBufferInfo lightBufferInfo; vk::DescriptorBufferInfo headersInfo; vk::DescriptorBufferInfo indicesInfo; + vk::DescriptorBufferInfo geoInfoInfo; + vk::DescriptorBufferInfo matInfoInfo; descriptorWrites.push_back({.dstSet = *targetDescriptorSets[i], .dstBinding = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eUniformBuffer, .pBufferInfo = &bufferInfo}); auto meshComponent = entity->GetComponent(); - std::vector pbrTexturePaths; - { + std::vector pbrTexturePaths; { const std::string legacyPath = (meshComponent ? meshComponent->GetTexturePath() : std::string()); const std::string baseColorPath = (meshComponent && !meshComponent->GetBaseColorTexturePath().empty()) ? meshComponent->GetBaseColorTexturePath() : (!legacyPath.empty() ? legacyPath : SHARED_DEFAULT_ALBEDO_ID); const std::string mrPath = (meshComponent && !meshComponent->GetMetallicRoughnessTexturePath().empty()) ? meshComponent->GetMetallicRoughnessTexturePath() : SHARED_DEFAULT_METALLIC_ROUGHNESS_ID; @@ -1348,8 +1467,8 @@ bool Renderer::createDescriptorSets(Entity* entity, EntityResources& res, const lightBufferInfo = vk::DescriptorBufferInfo{.buffer = *lightStorageBuffers[i].buffer, .range = VK_WHOLE_SIZE}; descriptorWrites.push_back({.dstSet = *targetDescriptorSets[i], .dstBinding = 6, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &lightBufferInfo}); - // Ensure Forward+ per-frame array exists - if (forwardPlusPerFrame.empty()) { + // Ensure Forward+ per-frame array is properly sized + if (forwardPlusPerFrame.size() < MAX_FRAMES_IN_FLIGHT) { forwardPlusPerFrame.resize(MAX_FRAMES_IN_FLIGHT); } @@ -1413,17 +1532,30 @@ bool Renderer::createDescriptorSets(Entity* entity, EntityResources& res, const vk::AccelerationStructureKHR h = *tlasStructure.handle; if (!!h) tlasHandleValue = h; - } - tlasInfo.accelerationStructureCount = 1; - tlasInfo.pAccelerationStructures = &tlasHandleValue; - vk::WriteDescriptorSet tlasWrite{}; - tlasWrite.dstSet = *targetDescriptorSets[i]; - tlasWrite.dstBinding = 11; - tlasWrite.dstArrayElement = 0; - tlasWrite.descriptorCount = 1; - tlasWrite.descriptorType = vk::DescriptorType::eAccelerationStructureKHR; - tlasWrite.pNext = &tlasInfo; - descriptorWrites.push_back(tlasWrite); { + + tlasInfo.accelerationStructureCount = 1; + tlasInfo.pAccelerationStructures = &tlasHandleValue; + vk::WriteDescriptorSet tlasWrite{}; + tlasWrite.dstSet = *targetDescriptorSets[i]; + tlasWrite.dstBinding = 11; + tlasWrite.dstArrayElement = 0; + tlasWrite.descriptorCount = 1; + tlasWrite.descriptorType = vk::DescriptorType::eAccelerationStructureKHR; + tlasWrite.pNext = &tlasInfo; + descriptorWrites.push_back(tlasWrite); + + // Binding 12/13: Ray-query geometry/material buffers for material-aware raster shadow queries. + auto& fpf = forwardPlusPerFrame[i]; + vk::Buffer hBuf = *fpf.tileHeaders; + vk::Buffer iBuf = *fpf.tileLightIndices; + vk::Buffer fallbackBuf = hBuf ? hBuf : iBuf; + vk::Buffer geoBuf = (!!*geometryInfoBuffer) ? *geometryInfoBuffer : fallbackBuf; + vk::Buffer matBuf = (!!*materialBuffer) ? *materialBuffer : fallbackBuf; + geoInfoInfo = vk::DescriptorBufferInfo{.buffer = geoBuf, .offset = 0, .range = VK_WHOLE_SIZE}; + matInfoInfo = vk::DescriptorBufferInfo{.buffer = matBuf, .offset = 0, .range = VK_WHOLE_SIZE}; + descriptorWrites.push_back(vk::WriteDescriptorSet{.dstSet = *targetDescriptorSets[i], .dstBinding = 12, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &geoInfoInfo}); + descriptorWrites.push_back(vk::WriteDescriptorSet{.dstSet = *targetDescriptorSets[i], .dstBinding = 13, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &matInfoInfo}); + } { std::lock_guard lk(descriptorMutex); device.updateDescriptorSets(descriptorWrites, {}); } @@ -1577,8 +1709,7 @@ bool Renderer::preAllocateEntityResourcesBatch(const std::vector& enti } // --- 2. Defer all GPU copies to the render thread safe point --- - if (!meshesNeedingUpload.empty()) - { + if (!meshesNeedingUpload.empty()) { watchdogProgressLabel.store("Batch: EnqueueMeshUploads", std::memory_order_relaxed); EnqueueMeshUploads(meshesNeedingUpload); if (flushUploadsNow) { @@ -2852,24 +2983,26 @@ void Renderer::refreshPBRForwardPlusBindingsForFrame(uint32_t frameIndex) { // Binding 11: TLAS - ALWAYS bind (required by layout when ray query/AS is enabled) // If TLAS is not built yet, the handle will be null; the shader must not trace when disabled. - vk::WriteDescriptorSet tlasWrite{}; - tlasWrite.dstSet = *res.pbrDescriptorSets[frameIndex]; - tlasWrite.dstBinding = 11; - tlasWrite.dstArrayElement = 0; - tlasWrite.descriptorCount = 1; - tlasWrite.descriptorType = vk::DescriptorType::eAccelerationStructureKHR; - tlasWrite.pNext = &tlasInfo; - writes.push_back(tlasWrite); - - // Binding 12/13: Ray-query geometry/material buffers for material-aware raster shadow queries. - // Always bind something valid; shader guards on `ubo.geometryInfoCount/materialCount`. - vk::Buffer fallbackBuf = headersBuf ? headersBuf : indicesBuf; - vk::Buffer geoBuf = (!!*geometryInfoBuffer) ? *geometryInfoBuffer : fallbackBuf; - vk::Buffer matBuf = (!!*materialBuffer) ? *materialBuffer : fallbackBuf; - geoInfoInfo = vk::DescriptorBufferInfo{.buffer = geoBuf, .offset = 0, .range = VK_WHOLE_SIZE}; - matInfoInfo = vk::DescriptorBufferInfo{.buffer = matBuf, .offset = 0, .range = VK_WHOLE_SIZE}; - writes.push_back(vk::WriteDescriptorSet{.dstSet = *res.pbrDescriptorSets[frameIndex], .dstBinding = 12, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &geoInfoInfo}); - writes.push_back(vk::WriteDescriptorSet{.dstSet = *res.pbrDescriptorSets[frameIndex], .dstBinding = 13, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &matInfoInfo}); + if (accelerationStructureEnabled) { + vk::WriteDescriptorSet tlasWrite{}; + tlasWrite.dstSet = *res.pbrDescriptorSets[frameIndex]; + tlasWrite.dstBinding = 11; + tlasWrite.dstArrayElement = 0; + tlasWrite.descriptorCount = 1; + tlasWrite.descriptorType = vk::DescriptorType::eAccelerationStructureKHR; + tlasWrite.pNext = &tlasInfo; + writes.push_back(tlasWrite); + + // Binding 12/13: Ray-query geometry/material buffers for material-aware raster shadow queries. + // Always bind something valid; shader guards on `ubo.geometryInfoCount/materialCount`. + vk::Buffer fallbackBuf = headersBuf ? headersBuf : indicesBuf; + vk::Buffer geoBuf = (!!*geometryInfoBuffer) ? *geometryInfoBuffer : fallbackBuf; + vk::Buffer matBuf = (!!*materialBuffer) ? *materialBuffer : fallbackBuf; + geoInfoInfo = vk::DescriptorBufferInfo{.buffer = geoBuf, .offset = 0, .range = VK_WHOLE_SIZE}; + matInfoInfo = vk::DescriptorBufferInfo{.buffer = matBuf, .offset = 0, .range = VK_WHOLE_SIZE}; + writes.push_back(vk::WriteDescriptorSet{.dstSet = *res.pbrDescriptorSets[frameIndex], .dstBinding = 12, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &geoInfoInfo}); + writes.push_back(vk::WriteDescriptorSet{.dstSet = *res.pbrDescriptorSets[frameIndex], .dstBinding = 13, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &matInfoInfo}); + } } if (!writes.empty()) { @@ -2925,9 +3058,9 @@ bool Renderer::updateLightStorageBuffer(uint32_t frameIndex, const std::vectorGetPosition(); - shadowCamPos = camPos - lightDir * 50.0f; + // Center shadow map on camera frustum + glm::vec3 camPos = camera->GetPosition(); + shadowCamPos = camPos - lightDir * 50.0f; } lightProjection = glm::ortho(-orthoSize, orthoSize, -orthoSize, orthoSize, 0.1f, 200.0f); @@ -2973,7 +3106,7 @@ bool Renderer::updateLightStorageBuffer(uint32_t frameIndex, const std::vector Renderer::LoadTextureAsync(const std::string& texturePath, bool critical) { +std::future Renderer::LoadTextureAsync(const std::string& texturePath, bool critical, bool cachePixels) { if (texturePath.empty()) { return std::async(std::launch::deferred, [] { return false; }); } @@ -2983,11 +3116,12 @@ std::future Renderer::LoadTextureAsync(const std::string& texturePath, boo // validation. textureTasksScheduled.fetch_add(1, std::memory_order_relaxed); uploadJobsTotal.fetch_add(1, std::memory_order_relaxed); - auto task = [this, texturePath, critical]() { + auto task = [this, texturePath, critical, cachePixels]() { PendingTextureJob job; job.type = PendingTextureJob::Type::FromFile; job.priority = critical ? PendingTextureJob::Priority::Critical : PendingTextureJob::Priority::NonCritical; - job.idOrPath = texturePath; { + job.idOrPath = texturePath; + job.cachePixels = cachePixels; { std::lock_guard lk(pendingTextureJobsMutex); pendingTextureJobs.emplace_back(std::move(job)); } @@ -3011,7 +3145,8 @@ std::future Renderer::LoadTextureFromMemoryAsync(const std::string& textur int width, int height, int channels, - bool critical) { + bool critical, + bool cachePixels) { if (!imageData || textureId.empty() || width <= 0 || height <= 0 || channels <= 0) { return std::async(std::launch::deferred, [] { return false; }); } @@ -3022,11 +3157,16 @@ std::future Renderer::LoadTextureFromMemoryAsync(const std::string& textur textureTasksScheduled.fetch_add(1, std::memory_order_relaxed); uploadJobsTotal.fetch_add(1, std::memory_order_relaxed); - auto task = [this, textureId, data = std::move(dataCopy), width, height, channels, critical]() mutable { + auto task = [this, textureId, data = std::move(dataCopy), width, height, channels, critical, cachePixels]() mutable { PendingTextureJob job; job.type = PendingTextureJob::Type::FromMemory; job.priority = critical ? PendingTextureJob::Priority::Critical : PendingTextureJob::Priority::NonCritical; - job.idOrPath = textureId; { + job.idOrPath = textureId; + job.data = std::move(data); + job.width = width; + job.height = height; + job.channels = channels; + job.cachePixels = cachePixels; { std::lock_guard lk(pendingTextureJobsMutex); pendingTextureJobs.emplace_back(std::move(job)); } @@ -3337,7 +3477,7 @@ void Renderer::StartUploadsWorker(size_t workerCount) { for (auto& job : batch) { try { if (job.type == PendingTextureJob::Type::FromFile) { - (void) LoadTexture(job.idOrPath); + (void) LoadTexture(job.idOrPath, job.cachePixels); OnTextureUploaded(job.idOrPath); if (job.priority == PendingTextureJob::Priority::Critical) { criticalJobsOutstanding.fetch_sub(1, std::memory_order_relaxed); @@ -3346,6 +3486,10 @@ void Renderer::StartUploadsWorker(size_t workerCount) { } } catch (const std::exception& e) { std::cerr << "UploadsWorker: failed to process job for '" << job.idOrPath << "': " << e.what() << std::endl; + if (job.priority == PendingTextureJob::Priority::Critical) { + criticalJobsOutstanding.fetch_sub(1, std::memory_order_relaxed); + } + uploadJobsCompleted.fetch_add(1, std::memory_order_relaxed); } } } @@ -3525,7 +3669,7 @@ bool Renderer::updateDescriptorSetsForFrame(Entity* entity, return; // Binding 7/8: Forward+ tile buffers (must be valid even when Forward+ is disabled) - if (forwardPlusPerFrame.empty()) { + if (forwardPlusPerFrame.size() < MAX_FRAMES_IN_FLIGHT) { forwardPlusPerFrame.resize(MAX_FRAMES_IN_FLIGHT); } vk::Buffer headersBuf{}; @@ -3575,26 +3719,29 @@ bool Renderer::updateDescriptorSetsForFrame(Entity* entity, dstWrites.push_back({.dstSet = *targetDescriptorSets[frameIndex], .dstBinding = 10, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eCombinedImageSampler, .pImageInfo = &reflInfo}); // Binding 11: TLAS (ray-query shadows in raster PBR fragment shader) - tlasHandleValue = accelerationStructureEnabled ? *tlasStructure.handle : vk::AccelerationStructureKHR{}; - tlasInfo.accelerationStructureCount = 1; - tlasInfo.pAccelerationStructures = &tlasHandleValue; - tlasWrite.dstSet = *targetDescriptorSets[frameIndex]; - tlasWrite.dstBinding = 11; - tlasWrite.dstArrayElement = 0; - tlasWrite.descriptorCount = 1; - tlasWrite.descriptorType = vk::DescriptorType::eAccelerationStructureKHR; - tlasWrite.pNext = &tlasInfo; - dstWrites.push_back(tlasWrite); - - // Binding 12/13: Ray-query geometry/material buffers for material-aware raster shadow queries. - // Always bind something valid; shader guards on `ubo.geometryInfoCount/materialCount`. - vk::Buffer fallbackBuf = headersBuf ? headersBuf : indicesBuf; - vk::Buffer geoBuf = (!!*geometryInfoBuffer) ? *geometryInfoBuffer : fallbackBuf; - vk::Buffer matBuf = (!!*materialBuffer) ? *materialBuffer : fallbackBuf; - geoInfoInfo = vk::DescriptorBufferInfo{.buffer = geoBuf, .offset = 0, .range = VK_WHOLE_SIZE}; - matInfoInfo = vk::DescriptorBufferInfo{.buffer = matBuf, .offset = 0, .range = VK_WHOLE_SIZE}; - dstWrites.push_back({.dstSet = *targetDescriptorSets[frameIndex], .dstBinding = 12, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &geoInfoInfo}); - dstWrites.push_back({.dstSet = *targetDescriptorSets[frameIndex], .dstBinding = 13, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &matInfoInfo}); + if (accelerationStructureEnabled) { + tlasHandleValue = accelerationStructureEnabled ? *tlasStructure.handle : vk::AccelerationStructureKHR{}; + tlasInfo.accelerationStructureCount = 1; + tlasInfo.pAccelerationStructures = &tlasHandleValue; + tlasWrite.dstSet = *targetDescriptorSets[frameIndex]; + tlasWrite.dstBinding = 11; + tlasWrite.dstArrayElement = 0; + tlasWrite.descriptorCount = 1; + tlasWrite.descriptorType = vk::DescriptorType::eAccelerationStructureKHR; + tlasWrite.pNext = &tlasInfo; + dstWrites.push_back(tlasWrite); + + // Binding 12/13: Ray-query geometry/material buffers for material-aware raster shadow queries. + // Always bind something valid; shader guards on `ubo.geometryInfoCount/materialCount`. + vk::Buffer fallbackBuf = headersBuf ? headersBuf : indicesBuf; + vk::Buffer geoBuf = (!!*geometryInfoBuffer) ? *geometryInfoBuffer : fallbackBuf; + vk::Buffer matBuf = (!!*materialBuffer) ? *materialBuffer : fallbackBuf; + geoInfoInfo = vk::DescriptorBufferInfo{.buffer = geoBuf, .offset = 0, .range = VK_WHOLE_SIZE}; + matInfoInfo = vk::DescriptorBufferInfo{.buffer = matBuf, .offset = 0, .range = VK_WHOLE_SIZE}; + + dstWrites.push_back({.dstSet = *targetDescriptorSets[frameIndex], .dstBinding = 12, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &geoInfoInfo}); + dstWrites.push_back({.dstSet = *targetDescriptorSets[frameIndex], .dstBinding = 13, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &matInfoInfo}); + } }; // Optionally write only the UBO (binding 0) — used at safe point to initialize per-frame sets once @@ -3803,7 +3950,7 @@ void Renderer::ProcessPendingTextureJobs(uint32_t maxJobs, switch (job.type) { case PendingTextureJob::Type::FromFile: // LoadTexture will resolve aliases and perform full GPU upload - LoadTexture(job.idOrPath); + LoadTexture(job.idOrPath, job.cachePixels); break; case PendingTextureJob::Type::FromMemory: // LoadTextureFromMemory will create GPU resources for this ID @@ -3811,7 +3958,8 @@ void Renderer::ProcessPendingTextureJobs(uint32_t maxJobs, job.data.data(), job.width, job.height, - job.channels); + job.channels, + job.cachePixels); break; } // Refresh descriptors for entities that use this texture so @@ -4064,4 +4212,4 @@ void Renderer::generateMipmaps(vk::Image image, graphicsQueue.submit(submit, *fence); } (void) waitForFencesSafe(*fence, VK_TRUE); -} \ No newline at end of file +} diff --git a/attachments/simple_engine/renderer_utils.cpp b/attachments/simple_engine/renderer_utils.cpp index 46319fbf2..eef4d3ee9 100644 --- a/attachments/simple_engine/renderer_utils.cpp +++ b/attachments/simple_engine/renderer_utils.cpp @@ -16,6 +16,7 @@ */ #include "renderer.h" #include +#include #include #include #include @@ -87,6 +88,44 @@ bool Renderer::hasStencilComponent(vk::Format format) { // Read file std::vector Renderer::readFile(const std::string& filename) { try { +#if defined(PLATFORM_ANDROID) + // Professional Android Game Path: + // 1. Check App's persistent data directory (for large models/DLC) + // 2. Fall back to APK assets (for small/core files) + + auto* androidPlatform = static_cast(platform); + + // Check external files dir first: /sdcard/Android/data/com.simple_engine/files/Assets/... + // Note: In a full engine this would be queried from Java, but we'll use the standard path + std::string externalPath = "/sdcard/Android/data/com.simple_engine/files/Assets/" + filename; + std::ifstream externalFile(externalPath, std::ios::ate | std::ios::binary); + if (externalFile.is_open()) { + size_t fileSize = externalFile.tellg(); + std::vector buffer(fileSize); + externalFile.seekg(0); + externalFile.read(buffer.data(), fileSize); + return buffer; + } + + // Fall back to APK assets + AAssetManager* assetManager = androidPlatform->GetAssetManager(); + if (!assetManager) { + throw std::runtime_error("Asset manager not available"); + } + + AAsset* asset = AAssetManager_open(assetManager, filename.c_str(), AASSET_MODE_BUFFER); + if (!asset) { + throw std::runtime_error("Failed to open asset: " + filename); + } + + size_t fileSize = AAsset_getLength(asset); + std::vector buffer(fileSize); + + AAsset_read(asset, buffer.data(), fileSize); + AAsset_close(asset); + + return buffer; +#else // Open file at end to get size std::ifstream file(filename, std::ios::ate | std::ios::binary); @@ -106,24 +145,55 @@ std::vector Renderer::readFile(const std::string& filename) { file.close(); return buffer; +#endif } catch (const std::exception& e) { - std::cerr << "Failed to read file: " << e.what() << std::endl; + LOGE("Failed to read file '%s': %s", filename.c_str(), e.what()); throw; } } +bool Renderer::fileExists(const std::string& filename) { +#if defined(PLATFORM_ANDROID) + auto* androidPlatform = static_cast(platform); + std::string externalPath = "/sdcard/Android/data/com.simple_engine/files/Assets/" + filename; + if (std::filesystem::exists(externalPath)) { + return true; + } + + AAssetManager* assetManager = androidPlatform->GetAssetManager(); + if (!assetManager) return false; + + AAsset* asset = AAssetManager_open(assetManager, filename.c_str(), AASSET_MODE_BUFFER); + if (asset) { + AAsset_close(asset); + return true; + } + return false; +#else + return std::filesystem::exists(filename); +#endif +} + // Create shader module vk::raii::ShaderModule Renderer::createShaderModule(const std::vector& code) { try { + // Ensure 4-byte alignment for pCode + if (code.size() % 4 != 0) { + throw std::runtime_error("Shader code size must be a multiple of 4"); + } + + std::vector alignedCode(code.size() / 4); + std::memcpy(alignedCode.data(), code.data(), code.size()); + // Create shader module vk::ShaderModuleCreateInfo createInfo{ .codeSize = code.size(), - .pCode = reinterpret_cast(code.data()) + .pCode = alignedCode.data() }; return vk::raii::ShaderModule(device, createInfo); } catch (const std::exception& e) { - std::cerr << "Failed to create shader module: " << e.what() << std::endl; + LOGE("Failed to create shader module: %s", e.what()); throw; } } @@ -260,13 +330,40 @@ vk::PresentModeKHR Renderer::chooseSwapPresentMode(const std::vectorGetWindowSize(&width, &height); + + LOGI("Renderer: chooseSwapExtent (Android). Platform window size: %dx%d. Surface currentExtent: %dx%d", + width, + height, + capabilities.currentExtent.width, + capabilities.currentExtent.height); + + vk::Extent2D actualExtent = { + static_cast(width), + static_cast(height) + }; + + // Clamp to supported range + actualExtent.width = std::clamp(actualExtent.width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width); + actualExtent.height = std::clamp(actualExtent.height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); + + return actualExtent; +#else if (capabilities.currentExtent.width != std::numeric_limits::max()) { + LOGI("Renderer: Using surface currentExtent: %dx%d", capabilities.currentExtent.width, capabilities.currentExtent.height); return capabilities.currentExtent; } else { // Get framebuffer size int width, height; platform->GetWindowSize(&width, &height); + LOGI("Renderer: surface extent is undefined. Using platform window size: %dx%d", width, height); + // Create extent vk::Extent2D actualExtent = { static_cast(width), @@ -277,8 +374,11 @@ vk::Extent2D Renderer::chooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabi actualExtent.width = std::clamp(actualExtent.width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width); actualExtent.height = std::clamp(actualExtent.height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); + LOGI("Renderer: Clamped extent: %dx%d", actualExtent.width, actualExtent.height); + return actualExtent; } +#endif } // Wait for device to be idle diff --git a/attachments/simple_engine/scene_loading.cpp b/attachments/simple_engine/scene_loading.cpp index e7ced8bb6..377c66c5c 100644 --- a/attachments/simple_engine/scene_loading.cpp +++ b/attachments/simple_engine/scene_loading.cpp @@ -101,10 +101,12 @@ bool LoadGLTFModel(Engine* engine, << ", noGeometry=" << physicsNoGeometry << std::endl; } }; - // Load the complete GLTF model with all textures and lighting on the main thread + // Load the complete GLTF model with all textures and lighting on the background thread Model* loadedModel = modelLoader->LoadGLTF(modelPath); if (!loadedModel) { std::cerr << "Failed to load GLTF model: " << modelPath << std::endl; + // Ensure the loading screen disappears even on failure so the app doesn't hang + renderer->MarkInitialLoadComplete(); return false; } @@ -188,6 +190,7 @@ bool LoadGLTFModel(Engine* engine, const std::vector& materialMeshes = modelLoader->GetMaterialMeshes(modelPath); if (materialMeshes.empty()) { std::cerr << "No material meshes found in loaded model: " << modelPath << std::endl; + renderer->MarkInitialLoadComplete(); return false; } @@ -567,10 +570,21 @@ bool LoadGLTFModel(Engine* engine, // Request acceleration structure build at next safe frame point // Don't build here in background thread to avoid threading issues with command pools if (renderer->GetRayQueryEnabled() && renderer->GetAccelerationStructureEnabled()) { + // Build Opacity Micromaps before building the acceleration structures +#ifdef ENABLE_COURSE_OPACITY_MICROMAPS + if (auto* omm = engine->GetOmmIntegration()) { + omm->buildMicromaps(); + } +#endif + renderer->SetLoadingPhase(Renderer::LoadingPhase::AccelerationStructures); renderer->SetLoadingPhaseProgress(0.0f); std::cout << "Requesting acceleration structure build for loaded scene..." << std::endl; renderer->RequestAccelerationStructureBuild(); + } else { + // No acceleration structure build needed; jump straight to finalizing + // to allow the render thread to dismiss the loading overlay. + renderer->SetLoadingPhase(Renderer::LoadingPhase::Finalizing); } return true; diff --git a/attachments/simple_engine/shaders/lighting.slang b/attachments/simple_engine/shaders/lighting.slang index 5d673cc8d..0bd2218d2 100644 --- a/attachments/simple_engine/shaders/lighting.slang +++ b/attachments/simple_engine/shaders/lighting.slang @@ -18,6 +18,9 @@ // This shader implements the Phong lighting model as a fallback when BRDF/PBR is disabled // Note: BRDF/PBR is now the default lighting model - this is used only when explicitly requested +// Import shared utility modules +import common_types; + // Input from vertex buffer struct VSInput { float3 Position : POSITION; @@ -35,30 +38,20 @@ struct VSOutput { float4 Tangent : TANGENT; // Pass through tangent (unused in basic lighting) }; -// Uniform buffer for transformation matrices and light information -struct UniformBufferObject { - float4x4 model; - float4x4 view; - float4x4 proj; - float4 lightPos; - float4 lightColor; - float4 viewPos; -}; +// Bindings +[[vk::binding(0, 0)]] ConstantBuffer ubo; +[[vk::binding(1, 0)]] Sampler2D texSampler; // Push constants for material properties -struct PushConstants { +struct LegacyPushConstants { float4 ambientColor; float4 diffuseColor; float4 specularColor; float shininess; }; -// Bindings -[[vk::binding(0, 0)]] ConstantBuffer ubo; -[[vk::binding(1, 0)]] Sampler2D texSampler; - // Push constants -[[vk::push_constant]] PushConstants material; +[[vk::push_constant]] LegacyPushConstants material; // Vertex shader entry point [[shader("vertex")]] @@ -94,20 +87,23 @@ float4 PSMain(VSOutput input) : SV_TARGET // Normalize vectors float3 normal = normalize(input.Normal); - float3 lightDir = normalize(ubo.lightPos.xyz - input.WorldPos); - float3 viewDir = normalize(ubo.viewPos.xyz - input.WorldPos); + // Use first light for legacy fallback (matches Renderer::updateUniformBufferInternal logic if it was using this UBO) + // Actually, common_types.slang UBO doesn't have lightPos. + // We should probably just use a default light or ignore it for now as this is a fallback. + float3 lightDir = normalize(float3(1.0, 1.0, 1.0)); + float3 viewDir = normalize(ubo.camPos.xyz - input.WorldPos); float3 reflectDir = reflect(-lightDir, normal); // Ambient - float3 ambient = material.ambientColor.rgb * ubo.lightColor.rgb; + float3 ambient = material.ambientColor.rgb * float3(0.1, 0.1, 0.1); // Diffuse float diff = max(dot(normal, lightDir), 0.0); - float3 diffuse = diff * material.diffuseColor.rgb * ubo.lightColor.rgb; + float3 diffuse = diff * material.diffuseColor.rgb; // Specular float spec = pow(max(dot(viewDir, reflectDir), 0.0), material.shininess); - float3 specular = spec * material.specularColor.rgb * ubo.lightColor.rgb; + float3 specular = spec * material.specularColor.rgb; // Combine components float3 result = (ambient + diffuse + specular) * texColor.rgb; diff --git a/attachments/simple_engine/shaders/pbr.slang b/attachments/simple_engine/shaders/pbr.slang index 3030125bc..6607d84bb 100644 --- a/attachments/simple_engine/shaders/pbr.slang +++ b/attachments/simple_engine/shaders/pbr.slang @@ -14,29 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -// Import shared utility modules import common_types; import pbr_utils; import lighting_utils; import tonemapping_utils; -// Input from vertex buffer struct VSInput { [[vk::location(0)]] float3 Position; [[vk::location(1)]] float3 Normal; [[vk::location(2)]] float2 UV; [[vk::location(3)]] float4 Tangent; - - // Per-instance data. Model matrix is a true 4x4, while the normal - // matrix is provided as three float4 columns that match the CPU - // layout (glm::mat3x4: 3 columns of vec4, xyz used, w unused). - [[vk::location(4)]] column_major float4x4 InstanceModelMatrix; // binding 1 (uses 4 locations) - [[vk::location(8)]] float4 InstanceNormal0; // normal matrix column 0 - [[vk::location(9)]] float4 InstanceNormal1; // normal matrix column 1 - [[vk::location(10)]] float4 InstanceNormal2; // normal matrix column 2 + [[vk::location(4)]] column_major float4x4 InstanceModelMatrix; + [[vk::location(8)]] float4 InstanceNormal0; + [[vk::location(9)]] float4 InstanceNormal1; + [[vk::location(10)]] float4 InstanceNormal2; }; -// Output from vertex shader / Input to fragment shader struct VSOutput { float4 Position : SV_POSITION; float3 WorldPos; @@ -48,65 +41,17 @@ struct VSOutput { [[vk::binding(0, 1)]] Sampler2D opaqueSceneColor; -// Bindings [[vk::binding(0, 0)]] ConstantBuffer ubo; [[vk::binding(1, 0)]] Sampler2D baseColorMap; [[vk::binding(2, 0)]] Sampler2D metallicRoughnessMap; [[vk::binding(3, 0)]] Sampler2D normalMap; [[vk::binding(4, 0)]] Sampler2D occlusionMap; [[vk::binding(5, 0)]] Sampler2D emissiveMap; -[[vk::binding(6, 0)]] StructuredBuffer lightBuffer; -// Forward+ per-tile light lists (same set 0 to keep pipeline layouts compact) -[[vk::binding(7, 0)]] StructuredBuffer tileHeaders; -[[vk::binding(8, 0)]] StructuredBuffer tileLightIndices; -// Planar reflection sampler (bound only when reflections are enabled) +// Bindings 6, 7, 8 (SSBOs) REMOVED [[vk::binding(10, 0)]] Sampler2D reflectionMap; -// Raster ray-query shadows: TLAS -[[vk::binding(11, 0)]] RaytracingAccelerationStructure tlas; - -// Ray-query shared buffers (used for material-aware raster shadow queries) -[[vk::binding(12, 0)]] StructuredBuffer geometryInfoBuffer; -[[vk::binding(13, 0)]] StructuredBuffer materialBuffer; - [[vk::push_constant]] PushConstants material; -static const float RASTER_SHADOW_EPS = 0.001; - -// Hard shadow query for raster fragment shading. -// NOTE: We intentionally treat NON_OPAQUE candidates as non-occluding here. -// To make glass/transmissive surfaces not block light, those instances should -// be flagged as FORCE_NO_OPAQUE in the TLAS build. -bool traceShadowOccluded(float3 origin, float3 direction, float tMin, float tMax) -{ - RayDesc ray; - ray.Origin = origin; - ray.Direction = direction; - ray.TMin = tMin; - ray.TMax = tMax; - - RayQuery q; - // Match TLAS instance masking convention from `renderer_ray_query.cpp`: - // 0x01 = regular scene geometry, 0x02 = environment/sky. - // For raster shadows, ignore the environment to avoid global false occlusion. - uint mask = 0x01; - q.TraceRayInline( - tlas, - RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH, - mask, - ray - ); - - int iter = 0; - while (q.Proceed() && iter < 64) - { - iter++; - // No special acceptance rules here; accept-first-hit will commit on the first opaque blocker. - } - return (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT); -} - -// Vertex shader entry point [[shader("vertex")]] VSOutput VSMain(VSInput input) { @@ -116,508 +61,49 @@ VSOutput VSMain(VSInput input) output.Position = mul(ubo.proj, mul(ubo.view, worldPos)); output.WorldPos = worldPos.xyz; - // Transform normals correctly: first by the per-instance normal matrix, - // then by the entity model 3x3 (avoid double-applying instance transform). float3x3 instNormal = float3x3(input.InstanceNormal0.xyz, input.InstanceNormal1.xyz, input.InstanceNormal2.xyz); float3x3 model3x3 = (float3x3)ubo.model; float3 worldNormal = normalize(mul(model3x3, mul(instNormal, input.Normal))); output.Normal = worldNormal; - - // Geometric normal (pre-normal-map) uses the same transform path. output.GeometricNormal = worldNormal; - // Transform tangent similarly (approximate with same normal transform path). float3 worldTangent = normalize(mul(model3x3, mul(instNormal, input.Tangent.xyz))); output.UV = input.UV; output.Tangent = float4(worldTangent, input.Tangent.w); return output; } -// Fragment shader entry point for generic PBR materials [[shader("fragment")]] float4 PSMain(VSOutput input) : SV_TARGET { - // --- 1. Material Properties --- float2 uv = float2(input.UV.x, 1.0 - input.UV.y); float4 baseColor = (material.baseColorTextureSet < 0) ? material.baseColorFactor : baseColorMap.Sample(uv) * material.baseColorFactor; - float4 mrOrSpecGloss = (material.physicalDescriptorTextureSet < 0) ? float4(1.0, 1.0, 1.0, 1.0) : metallicRoughnessMap.Sample(uv); - float metallic = 0.0, roughness = 1.0; - float3 F0, albedo; - - if (material.useSpecGlossWorkflow != 0) { - float3 specColorSG = mrOrSpecGloss.rgb * material.specularFactor; - float gloss = clamp(mrOrSpecGloss.a * material.glossinessFactor, 0.0, 1.0); - roughness = clamp(1.0 - gloss, 0.0, 1.0); - F0 = specColorSG; - albedo = baseColor.rgb * (1.0 - max(F0.r, max(F0.g, F0.b))); - } else { - // glTF metallic-roughness texture packs metallic in B, roughness in G (linear space) - float metallicTex = mrOrSpecGloss.b; - float roughnessTex = mrOrSpecGloss.g; - metallic = clamp(metallicTex * material.metallicFactor, 0.0, 1.0); - roughness = clamp(roughnessTex * material.roughnessFactor, 0.0, 1.0); - F0 = lerp(float3(0.04, 0.04, 0.04), baseColor.rgb, metallic); - albedo = baseColor.rgb * (1.0 - metallic); - } - - float ao = (material.occlusionTextureSet < 0) ? 1.0 : occlusionMap.Sample(uv).r; - // Emissive: default to constant white when no emissive texture so authored emissiveFactor works per glTF spec. - // If a texture is present but factor is zero, assume (1,1,1) to preserve emissive textures by default. - float3 emissiveTex = (material.emissiveTextureSet < 0) ? float3(1.0, 1.0, 1.0) : emissiveMap.Sample(uv).rgb; - float3 emissiveFactor = material.emissiveFactor; - float3 emissive = emissiveTex * emissiveFactor; - if (material.hasEmissiveStrengthExt) - emissive *= material.emissiveStrength; + float metallic = material.metallicFactor; + float roughness = material.roughnessFactor; + float3 albedo = baseColor.rgb * (1.0 - metallic); if (material.alphaMask > 0.5 && baseColor.a < material.alphaMaskCutoff) { discard; } - // --- 2. Normal Calculation --- float3 N = normalize(input.Normal); - if (material.normalTextureSet >= 0) { - float3 tangentNormal = normalMap.Sample(uv).xyz * 2.0 - 1.0; - float3 T = normalize(input.Tangent.xyz); - // We flip the V coordinate for all textures (uv.y -> 1-uv.y). In - // tangent space, this corresponds to inverting the bitangent. - // glTF's tangent.w encodes the sign of the bitangent relative to - // cross(N,T) in the *unflipped* UV space, so we must negate it here - // to keep the normal map oriented correctly after the V flip. - float handedness = -input.Tangent.w; - float3 B = normalize(cross(N, T)) * handedness; - float3x3 TBN = float3x3(T, B, N); - N = normalize(mul(TBN, tangentNormal)); - } - - // Geometric (non-normal-mapped) normal for large-scale effects like Fresnel, - // refraction and screen-space reflections. This stays stable across pixels - // on a flat pane and helps avoid flickering when rotating the camera. - float3 G = normalize(input.GeometricNormal); - - // --- 3. Opaque Lighting Calculation --- float3 V = normalize(ubo.camPos.xyz - input.WorldPos); + float3 L = normalize(float3(1.0, 1.0, 1.0)); - float3 diffuseLighting = float3(0.0, 0.0, 0.0); - float3 specularLighting = float3(0.0, 0.0, 0.0); - - // Forward+: compute tile id and iterate culled light list - const uint TILE = 16u; // must match engine configuration - uint tilesX = (uint(ubo.screenDimensions.x) + TILE - 1u) / TILE; - uint tilesY = (uint(ubo.screenDimensions.y) + TILE - 1u) / TILE; - - // SV_POSITION in the fragment stage is in window coordinates. Use robust integer index. - uint px = (uint)max(0.0, input.Position.x); - uint py = (uint)max(0.0, input.Position.y); - uint tileX = (tilesX > 0u) ? min(px / TILE, tilesX - 1u) : 0u; - uint tileY = (tilesY > 0u) ? min(py / TILE, tilesY - 1u) : 0u; - uint totalTiles = max(tilesX * tilesY, 1u); - - // Clustered Z slice index from view-space depth (positive distance) - float dVS = abs(mul(ubo.view, float4(input.WorldPos, 1.0)).z); - float lnN = log(max(ubo.nearZ, 1e-4)); - float lnF = log(max(ubo.farZ, lnN + 1e-4)); - float denom = max(lnF - lnN, 1e-6); - float slices = max(ubo.slicesZ, 1.0); - float lambda = saturate((log(max(dVS, 1e-4)) - lnN) / denom); - uint slice = (uint)clamp(floor(lambda * slices), 0.0, slices - 1.0); - - uint tileId = (slice * tilesY + tileY) * tilesX + tileX; - - // CPU toggles Forward+ via ubo.padding1 (0 = disabled, non-zero = enabled) - bool useForwardPlus = (ubo.padding1 != 0.0); - - uint base = 0u; - uint count = 0u; - if (useForwardPlus && tileId < totalTiles * (uint)slices) { - TileHeader th = tileHeaders[tileId]; - base = th.offset; - count = th.count; - } - - bool forceGlobal = false; - - // Accumulate per-light diffuse and specular terms using GGX microfacet BRDF. - if (useForwardPlus && !forceGlobal && count > 0) { - // Use Forward+ culled list - for (uint li = 0u; li < count; ++li) { - uint lightIndex = tileLightIndices[base + li]; - LightData light = lightBuffer[lightIndex]; - float3 L, radiance; - float distToLight = 10000.0; - if (light.lightType == 1) { - // Directional - L = normalize(-light.position.xyz); - radiance = light.color.rgb; - } else { - // Point/spot/emissive: position.xyz is light position in world space - float3 toLight = light.position.xyz - input.WorldPos; - float d = length(toLight); - L = (d > 1e-5) ? toLight / d : float3(0,0,1); - distToLight = d; - - float attenuation = 1.0; - if (light.lightType == 3) { - // Emissive: soft falloff using range as a characteristic radius - float r = max(light.range, 0.001); - attenuation = 1.0 / (1.0 + (d / r) * (d / r)); - } else { - attenuation = 1.0 / max(d * d, 0.0001); - // GLTF style range attenuation - if (light.range > 0.0) { - attenuation *= pow(saturate(1.0 - pow(d / light.range, 4.0)), 2.0); - } - } - radiance = light.color.rgb * attenuation; - - if (light.lightType == 2) { - // Spot light cone attenuation - float3 D = normalize(light.direction.xyz); - float cd = dot(D, -L); - float cosInner = cos(light.innerConeAngle); - float cosOuter = cos(light.outerConeAngle); - float spotAttenuation = saturate((cd - cosOuter) / max(cosInner - cosOuter, 0.0001)); - spotAttenuation *= spotAttenuation; - radiance *= spotAttenuation; - } - } - // For emissive lights, treat lighting as two-sided to avoid glass/self-occlusion issues - float rawDot = dot(N, L); - float NdotL = (light.lightType == 3) ? abs(rawDot) : max(rawDot, 0.0); - - if (NdotL > 0.0) { - float visibility = 1.0; - // Raster ray-query shadows are expensive. In Bistro, most lights are emissive - // and casting per-light shadows drops FPS drastically. Shadow only the directional - // light (sun) for now. - if (ubo.padding2 != 0.0 && light.lightType == 1) { - float tMaxShadow = (light.lightType == 1) ? 10000.0 : max(distToLight - RASTER_SHADOW_EPS, RASTER_SHADOW_EPS); - float3 shadowOrigin = input.WorldPos + N * RASTER_SHADOW_EPS; - bool occluded = traceShadowOccluded(shadowOrigin, L, RASTER_SHADOW_EPS, tMaxShadow); - visibility = occluded ? 0.0 : 1.0; - } - - float3 H = normalize(V + L); - float NdotV = max(dot(N, V), 0.0); - float NdotH = max(dot(N, H), 0.0); - float HdotV = max(dot(H, V), 0.0); - float D = DistributionGGX(NdotH, roughness); - float G = GeometrySmith(NdotV, NdotL, roughness); - float3 F = FresnelSchlick(HdotV, F0); - float3 spec = (D * G * F) / max(4.0 * NdotV * NdotL, 0.0001); - float3 kD = (1.0 - F) * (1.0 - metallic); - specularLighting += spec * radiance * NdotL * visibility; - diffuseLighting += (kD * albedo / PI) * radiance * NdotL * visibility; - } - } - } - // Global light loop (fallback or forced debug) - // Fallback when Forward+ list is empty but lights exist and not in single-tile mode, - // OR always when forceGlobal flag is enabled. - // If Forward+ is disabled, always use global lights. - // If Forward+ is enabled but lists are empty (e.g., before first dispatch), fall back to global. - if (forceGlobal || !useForwardPlus || (count == 0 && ubo.lightCount > 0)) { - // Fallback path when Forward+ is disabled or lists are not populated yet - for (uint li = 0u; li < (uint)ubo.lightCount; ++li) { - LightData light = lightBuffer[li]; - float3 L, radiance; - float distToLight = 10000.0; - if (light.lightType == 1) { - L = normalize(-light.position.xyz); - radiance = light.color.rgb; - } else { - float3 toLight = light.position.xyz - input.WorldPos; - float d = length(toLight); - L = (d > 1e-5) ? toLight / d : float3(0,0,1); - distToLight = d; - - float attenuation = 1.0; - if (light.lightType == 3) { - float r = max(light.range, 0.001); - attenuation = 1.0 / (1.0 + (d / r) * (d / r)); - } else { - attenuation = 1.0 / max(d * d, 0.0001); - // GLTF style range attenuation - if (light.range > 0.0) { - attenuation *= pow(saturate(1.0 - pow(d / light.range, 4.0)), 2.0); - } - } - radiance = light.color.rgb * attenuation; - - if (light.lightType == 2) { - // Spot light cone attenuation - float3 D = normalize(light.direction.xyz); - float cd = dot(D, -L); - float cosInner = cos(light.innerConeAngle); - float cosOuter = cos(light.outerConeAngle); - float spotAttenuation = saturate((cd - cosOuter) / max(cosInner - cosOuter, 0.0001)); - spotAttenuation *= spotAttenuation; - radiance *= spotAttenuation; - } - } - float NdotL = (light.lightType == 3) ? abs(dot(N, L)) : max(dot(N, L), 0.0); - if (NdotL > 0.0) { - float visibility = 1.0; - if (ubo.padding2 != 0.0 && light.lightType == 1) { - float tMaxShadow = (light.lightType == 1) ? 10000.0 : max(distToLight - RASTER_SHADOW_EPS, RASTER_SHADOW_EPS); - float3 shadowOrigin = input.WorldPos + N * RASTER_SHADOW_EPS; - bool occluded = traceShadowOccluded(shadowOrigin, L, RASTER_SHADOW_EPS, tMaxShadow); - visibility = occluded ? 0.0 : 1.0; - } - - float3 H = normalize(V + L); - float NdotV = max(dot(N, V), 0.0); - float NdotH = max(dot(N, H), 0.0); - float HdotV = max(dot(H, V), 0.0); - float D = DistributionGGX(NdotH, roughness); - float G = GeometrySmith(NdotV, NdotL, roughness); - float3 F = FresnelSchlick(HdotV, F0); - float3 spec = (D * G * F) / max(4.0 * NdotV * NdotL, 0.0001); - float3 kD = (1.0 - F) * (1.0 - metallic); - specularLighting += spec * radiance * NdotL * visibility; - diffuseLighting += (kD * albedo / PI) * radiance * NdotL * visibility; - } - } - } - - float3 ambient = albedo * ao * (0.1 * ubo.scaleIBLAmbient); - float3 opaqueLit = diffuseLighting + specularLighting + ambient + emissive; - - // --- 4. Final Color Assembly (opaque only; transmission handled in GlassPSMain) --- - float3 color = opaqueLit; - float alphaOut = baseColor.a; + float NdotL = max(dot(N, L), 0.0); + float3 ambient = albedo * 0.1 * ubo.scaleIBLAmbient; + float3 diffuse = albedo * NdotL; - // Clip-plane discard during reflection render pass (to remove behind-plane geometry) - if (ubo.reflectionPass == 1) { - float side = dot(ubo.clipPlaneWS, float4(input.WorldPos, 1.0)); - if (side > 0.0) discard; // discard geometry on the positive side of the plane - } - - // Note: reflections are only applied in glass path (GlassPSMain). No planar reflection - // sampling here to avoid banding/aliasing and ensure user-requested behavior. - - // --- 5. Post-Processing --- - // Output linear color for intermediate buffers (composite pass will tonemap) - return float4(color, alphaOut); + return float4(diffuse + ambient, baseColor.a); } -// Fragment shader entry point specialized for architectural glass. -// Shares the same inputs and bindings as PSMain, but uses a much simpler -// and more stable shading model: primarily refraction of the opaque scene -// with a small ambient/emissive surface term. Direct diffuse/specular -// lighting and screen-space reflections are omitted to avoid global -// bright/dark flashes across large glass surfaces. [[shader("fragment")]] float4 GlassPSMain(VSOutput input) : SV_TARGET { - // --- 1. Material / texture sampling (minimal subset) --- float2 uv = float2(input.UV.x, 1.0 - input.UV.y); + float4 baseColor = (material.baseColorTextureSet < 0) ? material.baseColorFactor : baseColorMap.Sample(uv) * material.baseColorFactor; - float4 baseColor = (material.baseColorTextureSet < 0) - ? material.baseColorFactor - : baseColorMap.Sample(uv) * material.baseColorFactor; - - // Emissive (same logic as PSMain) - float3 emissiveTex = (material.emissiveTextureSet < 0) - ? float3(1.0, 1.0, 1.0) - : emissiveMap.Sample(uv).rgb; - float3 emissiveFactor = material.emissiveFactor; - float3 emissive = emissiveTex * emissiveFactor; - if (material.hasEmissiveStrengthExt) - emissive *= material.emissiveStrength; - - // Alpha mask discard as in PSMain - if (material.alphaMask > 0.5 && baseColor.a < material.alphaMaskCutoff) { - discard; - } - - // Geometric normal for view-angle dependence and refraction - float3 G = normalize(input.GeometricNormal); - float3 V = normalize(ubo.camPos.xyz - input.WorldPos); - - // Base albedo used for transmission tint - float3 albedo = baseColor.rgb; - - // Ambient is intentionally disabled for the glass path. - // Even small ambient terms can make large glass surfaces look "filled in" - // (frosted/opaque) rather than primarily showing the background through refraction. - - // Transmission factor from push constants. - // Some assets flag “glass” via engine-side heuristics but may not author - // `KHR_materials_transmission`. Since this shader is only used for glass, - // derive a robust effective transmission so glass never goes black. - float T_auth = clamp(material.transmissionFactor, 0.0, 1.0); - float opacity = clamp(baseColor.a, 0.0, 1.0); - float T_fromAlpha = 1.0 - opacity; - float T_eff = max(T_auth, T_fromAlpha); - if (T_eff < 0.01) { - // Default to mostly transmissive for glass when no explicit transmission/alpha is authored. - T_eff = 0.90; - } - - float3 color; - float alphaOut = baseColor.a; - - if (T_eff > 0.0) { - // Transmission/background sample (refraction approximation): sample the opaque scene behind glass. - float2 uvR = input.Position.xy / ubo.screenDimensions; - uvR = clamp(uvR, float2(0.0, 0.0), float2(1.0, 1.0)); - float3 bg = opaqueSceneColor.Sample(uvR).rgb; - // Tint the background by albedo to approximate colored glass. - bg *= lerp(float3(1.0, 1.0, 1.0), max(albedo, 0.6), 0.8); - - // Planar reflection sample (optional) - float3 refl = bg; - if (ubo.reflectionEnabled == 1) { - float4 pr = mul(ubo.reflectionVP, float4(input.WorldPos, 1.0)); - float2 uvP = pr.xy / max(pr.w, 1e-5); - uvP = uvP * 0.5 + 0.5; - if (uvP.x >= 0.0 && uvP.x <= 1.0 && uvP.y >= 0.0 && uvP.y <= 1.0) { - refl = reflectionMap.Sample(uvP).rgb; - } - } - - // Stylized, stable glass: Use a tinted - // glass body + rim highlight, then add planar reflection contribution. - - // Use symmetric |N·V| so that front/back views of thin glass walls - // behave consistently (important when looking down into glasses). - float NdotV = abs(dot(G, V)); - - // Base clear color from albedo, slightly dimmed so glass does not - // appear self-emissive. - float3 clearColor = albedo * 0.6; - - // Rim term stronger at grazing angles (1 - NdotV)^3, but keep it subtle - float edge = pow(1.0 - NdotV, 3.0); - float3 rimColor = lerp(clearColor, float3(1.0, 1.0, 1.0), 0.25); - - // Surface term: keep subtle so glass does not appear frosted. - float3 surfaceBase = emissive; - float3 surfaceTerm = surfaceBase * (1.0 - T_eff) * 0.12; - - // Base surface appearance (slight body + rim) and transmitted background. - float3 glassBody = clearColor * 0.08; - float3 rim = rimColor * (edge * 0.25); - float3 surface = glassBody + rim + surfaceTerm; - - // Primary transmission mix: this is what makes interior lighting visible through windows. - color = lerp(surface, bg, T_eff); - - // Restore Fresnel-blended mixing with boosted visibility for debugging/tuning. - float3 F_view2 = FresnelSchlick(NdotV, float3(0.06, 0.06, 0.06)); - float F_avg2 = (F_view2.r + F_view2.g + F_view2.b) / 3.0; - float reflStrength = saturate(0.20 + (1.5 * F_avg2) * (1.0 - material.roughnessFactor)); - // Scale by user-controlled intensity - reflStrength *= max(0.0, ubo.reflectionIntensity); - color = lerp(color, refl, reflStrength); - - // Fresnel influences alpha (how opaque the glass appears), not color here. - // We already used F to modulate reflection strength above. - - // Opacity model for architectural glass: mostly transparent at - // normal incidence, with a gentle Fresnel-driven increase in - // opacity toward grazing angles. TransmissionFactor controls how - // much of the underlying scene shows through overall. - - // Since we are sampling the background (opaqueSceneColor) and mixing it in the shader, - // we should output an alpha of 1.0 to ensure our mixed color is shown correctly - // in the swapchain, avoiding "double blending" with the hardware blender. - alphaOut = 1.0; - } else { - // Non-transmissive fallback: just ambient + emissive. - color = emissive; - } - - // Simple Forward+ lighting for glass (additive), using per-tile lists. - // This is a pragmatic lighting contribution so emissive bulbs can light glass-covered pixels. - // It does not model full transmission; it simply adds local diffuse+spec highlights. - { - const uint TILE = 16u; - uint tilesX = (uint(ubo.screenDimensions.x) + TILE - 1u) / TILE; - uint tilesY = (uint(ubo.screenDimensions.y) + TILE - 1u) / TILE; - uint px = (uint)max(0.0, input.Position.x); - uint py = (uint)max(0.0, input.Position.y); - uint tileX = (tilesX > 0u) ? min(px / TILE, tilesX - 1u) : 0u; - uint tileY = (tilesY > 0u) ? min(py / TILE, tilesY - 1u) : 0u; - uint totalTiles = max(tilesX * tilesY, 1u); - uint tileId = tileY * tilesX + tileX; - uint base = 0u; - uint count = 0u; - if (tileId < totalTiles) { - TileHeader th = tileHeaders[tileId]; - base = th.offset; - count = th.count; - } - if (count > 0u) { - float3 Ng = normalize(input.GeometricNormal); - float3 Vv = normalize(ubo.camPos.xyz - input.WorldPos); - // Use a neutral albedo to avoid darkening glass; weight specular more - float3 alb = float3(0.6, 0.6, 0.6); - float rough = 0.49; - float metal = 0.0; - for (uint li = 0u; li < count; ++li) { - uint lightIndex = tileLightIndices[base + li]; - LightData light = lightBuffer[lightIndex]; - float3 L, radiance; - float distToLight = 10000.0; - if (light.lightType == 1) { - L = normalize(-light.position.xyz); - radiance = light.color.rgb; - } else { - float3 toLight = light.position.xyz - input.WorldPos; - float d = length(toLight); - L = (d > 1e-5) ? toLight / d : float3(0,0,1); - distToLight = d; - if (light.lightType == 3) { - float r = max(light.range, 0.001); - float att = 1.0 / (1.0 + (d / r) * (d / r)); - radiance = light.color.rgb * att; - } else { - radiance = light.color.rgb / max(d * d, 0.0001); - } - } - float rawDot = dot(Ng, L); - float NdotL = (light.lightType == 3) ? abs(rawDot) : max(rawDot, 0.0); - if (NdotL > 0.0) { - float visibility = 1.0; - if (ubo.padding2 != 0.0) { - float tMaxShadow = (light.lightType == 1) ? 10000.0 : max(distToLight - RASTER_SHADOW_EPS, RASTER_SHADOW_EPS); - float3 shadowOrigin = input.WorldPos + Ng * RASTER_SHADOW_EPS; - bool occluded = traceShadowOccluded(shadowOrigin, L, RASTER_SHADOW_EPS, tMaxShadow); - visibility = occluded ? 0.0 : 1.0; - } - - float3 H = normalize(Vv + L); - float NdotV = max(dot(Ng, Vv), 0.0); - float NdotH = max(dot(Ng, H), 0.0); - float HdotV = max(dot(H, Vv), 0.0); - float D = DistributionGGX(NdotH, rough); - float G = GeometrySmith(NdotV, NdotL, rough); - float3 F = FresnelSchlick(HdotV, lerp(float3(0.04,0.04,0.04), alb, metal)); - float3 spec = (D * G * F) / max(4.0 * NdotV * NdotL, 0.0001); - float3 kD = (1.0 - F) * (1.0 - metal); - // Add a modest contribution to the glass color - color += ((kD * alb / PI) * radiance * NdotL * 0.6 + spec * radiance * NdotL * 0.8) * visibility; - } - } - } - } - - - // --- 3. Post-processing (same as PSMain) --- - color *= ubo.exposure; - - // Uncharted2 / Hable filmic tonemap. Use the canonical form without - // the extra 1.2 pre-scale so that midtones and shadows are not - // over-compressed relative to highlights. - float3 t = Hable_Filmic_Tonemapping::Uncharted2Tonemap(color); - float3 w = Hable_Filmic_Tonemapping::Uncharted2Tonemap(float3(1,1,1) * Hable_Filmic_Tonemapping::W); - color = t / max(w, float3(1e-6, 1e-6, 1e-6)); - - if (ubo.padding0 == 0) { - color = pow(saturate(color), float3(1.0 / ubo.gamma)); - } else { - color = saturate(color); - } + float2 uvR = input.Position.xy / ubo.screenDimensions; + float3 bg = opaqueSceneColor.Sample(uvR).rgb; - return float4(color, alphaOut); -} \ No newline at end of file + return float4(lerp(baseColor.rgb, bg, 0.9), 1.0); +} diff --git a/attachments/simple_engine/shaders/pbr_android.slang b/attachments/simple_engine/shaders/pbr_android.slang new file mode 100644 index 000000000..4e8660677 --- /dev/null +++ b/attachments/simple_engine/shaders/pbr_android.slang @@ -0,0 +1,106 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// Simplified PBR shader for Android to avoid PowerVR driver bugs +import common_types; +import pbr_utils; +import lighting_utils; +import tonemapping_utils; + +struct VSInput { + [[vk::location(0)]] float3 Position; + [[vk::location(1)]] float3 Normal; + [[vk::location(2)]] float2 UV; + [[vk::location(3)]] float4 Tangent; + [[vk::location(4)]] column_major float4x4 InstanceModelMatrix; + [[vk::location(8)]] float4 InstanceNormal0; + [[vk::location(9)]] float4 InstanceNormal1; + [[vk::location(10)]] float4 InstanceNormal2; +}; + +struct VSOutput { + float4 Position : SV_POSITION; + float3 WorldPos; + float3 Normal : NORMAL; + float3 GeometricNormal : NORMAL1; + float2 UV : TEXCOORD0; + float4 Tangent : TANGENT; +}; + +[[vk::binding(0, 1)]] Sampler2D opaqueSceneColor; + +[[vk::binding(0, 0)]] ConstantBuffer ubo; +[[vk::binding(1, 0)]] Sampler2D baseColorMap; +[[vk::binding(2, 0)]] Sampler2D metallicRoughnessMap; +[[vk::binding(3, 0)]] Sampler2D normalMap; +[[vk::binding(4, 0)]] Sampler2D occlusionMap; +[[vk::binding(5, 0)]] Sampler2D emissiveMap; +[[vk::binding(10, 0)]] Sampler2D reflectionMap; + +[[vk::push_constant]] PushConstants material; + +[[shader("vertex")]] +VSOutput VSMain(VSInput input) +{ + VSOutput output; + float4x4 instanceModelMatrix = input.InstanceModelMatrix; + float4 worldPos = mul(ubo.model, mul(instanceModelMatrix, float4(input.Position, 1.0))); + output.Position = mul(ubo.proj, mul(ubo.view, worldPos)); + output.WorldPos = worldPos.xyz; + + float3x3 instNormal = float3x3(input.InstanceNormal0.xyz, input.InstanceNormal1.xyz, input.InstanceNormal2.xyz); + float3x3 model3x3 = (float3x3)ubo.model; + float3 worldNormal = normalize(mul(model3x3, mul(instNormal, input.Normal))); + output.Normal = worldNormal; + output.GeometricNormal = worldNormal; + + float3 worldTangent = normalize(mul(model3x3, mul(instNormal, input.Tangent.xyz))); + output.UV = input.UV; + output.Tangent = float4(worldTangent, input.Tangent.w); + return output; +} + +[[shader("fragment")]] +float4 PSMain(VSOutput input) : SV_TARGET +{ + float2 uv = float2(input.UV.x, 1.0 - input.UV.y); + float4 baseColor = (material.baseColorTextureSet < 0) ? material.baseColorFactor : baseColorMap.Sample(uv) * material.baseColorFactor; + + if (material.alphaMask > 0.5 && baseColor.a < material.alphaMaskCutoff) { discard; } + + float3 N = normalize(input.Normal); + float3 V = normalize(ubo.camPos.xyz - input.WorldPos); + float3 L = normalize(float3(1.0, 1.0, 1.0)); // Single directional light + + float NdotL = max(dot(N, L), 0.0); + float3 ambient = baseColor.rgb * 0.1 * ubo.scaleIBLAmbient; + float3 diffuse = baseColor.rgb * NdotL; + + return float4(diffuse + ambient, baseColor.a); +} + +[[shader("fragment")]] +float4 GlassPSMain(VSOutput input) : SV_TARGET +{ + // Basic glass fallback for Android + float2 uv = float2(input.UV.x, 1.0 - input.UV.y); + float4 baseColor = (material.baseColorTextureSet < 0) ? material.baseColorFactor : baseColorMap.Sample(uv) * material.baseColorFactor; + + float2 uvR = input.Position.xy / ubo.screenDimensions; + float3 bg = opaqueSceneColor.Sample(uvR).rgb; + + return float4(lerp(baseColor.rgb, bg, 0.9), 1.0); +} diff --git a/attachments/simple_engine/shaders/pbr_full.slang b/attachments/simple_engine/shaders/pbr_full.slang new file mode 100644 index 000000000..a2a0aa7e8 --- /dev/null +++ b/attachments/simple_engine/shaders/pbr_full.slang @@ -0,0 +1,635 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// Import shared utility modules +import common_types; +import pbr_utils; +import lighting_utils; +import tonemapping_utils; + +// Input from vertex buffer +struct VSInput { + [[vk::location(0)]] float3 Position; + [[vk::location(1)]] float3 Normal; + [[vk::location(2)]] float2 UV; + [[vk::location(3)]] float4 Tangent; + + // Per-instance data. Model matrix is a true 4x4, while the normal + // matrix is provided as three float4 columns that match the CPU + // layout (glm::mat3x4: 3 columns of vec4, xyz used, w unused). + [[vk::location(4)]] column_major float4x4 InstanceModelMatrix; // binding 1 (uses 4 locations) + [[vk::location(8)]] float4 InstanceNormal0; // normal matrix column 0 + [[vk::location(9)]] float4 InstanceNormal1; // normal matrix column 1 + [[vk::location(10)]] float4 InstanceNormal2; // normal matrix column 2 +}; + +// Output from vertex shader / Input to fragment shader +struct VSOutput { + float4 Position : SV_POSITION; + float3 WorldPos; + float3 Normal : NORMAL; + float3 GeometricNormal : NORMAL1; + float2 UV : TEXCOORD0; + float4 Tangent : TANGENT; +}; + +[[vk::binding(0, 1)]] Sampler2D opaqueSceneColor; + +// Bindings +[[vk::binding(0, 0)]] ConstantBuffer ubo; +[[vk::binding(1, 0)]] Sampler2D baseColorMap; +[[vk::binding(2, 0)]] Sampler2D metallicRoughnessMap; +[[vk::binding(3, 0)]] Sampler2D normalMap; +[[vk::binding(4, 0)]] Sampler2D occlusionMap; +[[vk::binding(5, 0)]] Sampler2D emissiveMap; +#if !defined(PLATFORM_ANDROID) +[[vk::binding(6, 0)]] StructuredBuffer lightBuffer; +// Forward+ per-tile light lists (same set 0 to keep pipeline layouts compact) +[[vk::binding(7, 0)]] StructuredBuffer tileHeaders; +[[vk::binding(8, 0)]] StructuredBuffer tileLightIndices; +#endif +// Planar reflection sampler (bound only when reflections are enabled) +[[vk::binding(10, 0)]] Sampler2D reflectionMap; + +// Raster ray-query shadows: TLAS +#if !defined(PLATFORM_ANDROID) +[[vk::binding(11, 0)]] RaytracingAccelerationStructure tlas; + +// Ray-query shared buffers (used for material-aware raster shadow queries) +[[vk::binding(12, 0)]] StructuredBuffer geometryInfoBuffer; +[[vk::binding(13, 0)]] StructuredBuffer materialBuffer; +#endif + +[[vk::push_constant]] PushConstants material; + +static const float RASTER_SHADOW_EPS = 0.001; + +// Hard shadow query for raster fragment shading. +// NOTE: We intentionally treat NON_OPAQUE candidates as non-occluding here. +// To make glass/transmissive surfaces not block light, those instances should +// be flagged as FORCE_NO_OPAQUE in the TLAS build. +bool traceShadowOccluded(float3 origin, float3 direction, float tMin, float tMax) +{ +#if !defined(PLATFORM_ANDROID) + RayDesc ray; + ray.Origin = origin; + ray.Direction = direction; + ray.TMin = tMin; + ray.TMax = tMax; + + RayQuery q; + // Match TLAS instance masking convention from `renderer_ray_query.cpp`: + // 0x01 = regular scene geometry, 0x02 = environment/sky. + // For raster shadows, ignore the environment to avoid global false occlusion. + uint mask = 0x01; + q.TraceRayInline( + tlas, + RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH, + mask, + ray + ); + + int iter = 0; + while (q.Proceed() && iter < 64) + { + iter++; + // No special acceptance rules here; accept-first-hit will commit on the first opaque blocker. + } + return (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT); +#else + return false; +#endif +} + +// Vertex shader entry point +[[shader("vertex")]] +VSOutput VSMain(VSInput input) +{ + VSOutput output; + float4x4 instanceModelMatrix = input.InstanceModelMatrix; + float4 worldPos = mul(ubo.model, mul(instanceModelMatrix, float4(input.Position, 1.0))); + output.Position = mul(ubo.proj, mul(ubo.view, worldPos)); + output.WorldPos = worldPos.xyz; + + // Transform normals correctly: first by the per-instance normal matrix, + // then by the entity model 3x3 (avoid double-applying instance transform). + float3x3 instNormal = float3x3(input.InstanceNormal0.xyz, input.InstanceNormal1.xyz, input.InstanceNormal2.xyz); + float3x3 model3x3 = (float3x3)ubo.model; + float3 worldNormal = normalize(mul(model3x3, mul(instNormal, input.Normal))); + output.Normal = worldNormal; + + // Geometric normal (pre-normal-map) uses the same transform path. + output.GeometricNormal = worldNormal; + + // Transform tangent similarly (approximate with same normal transform path). + float3 worldTangent = normalize(mul(model3x3, mul(instNormal, input.Tangent.xyz))); + output.UV = input.UV; + output.Tangent = float4(worldTangent, input.Tangent.w); + return output; +} + +// Fragment shader entry point for generic PBR materials +[[shader("fragment")]] +float4 PSMain(VSOutput input) : SV_TARGET +{ + // --- 1. Material Properties --- + float2 uv = float2(input.UV.x, 1.0 - input.UV.y); + float4 baseColor = (material.baseColorTextureSet < 0) ? material.baseColorFactor : baseColorMap.Sample(uv) * material.baseColorFactor; + float4 mrOrSpecGloss = (material.physicalDescriptorTextureSet < 0) ? float4(1.0, 1.0, 1.0, 1.0) : metallicRoughnessMap.Sample(uv); + float metallic = 0.0, roughness = 1.0; + float3 F0, albedo; + + if (material.useSpecGlossWorkflow != 0) { + float3 specColorSG = mrOrSpecGloss.rgb * material.specularFactor; + float gloss = clamp(mrOrSpecGloss.a * material.glossinessFactor, 0.0, 1.0); + roughness = clamp(1.0 - gloss, 0.0, 1.0); + F0 = specColorSG; + albedo = baseColor.rgb * (1.0 - max(F0.r, max(F0.g, F0.b))); + } else { + // glTF metallic-roughness texture packs metallic in B, roughness in G (linear space) + float metallicTex = mrOrSpecGloss.b; + float roughnessTex = mrOrSpecGloss.g; + metallic = clamp(metallicTex * material.metallicFactor, 0.0, 1.0); + roughness = clamp(roughnessTex * material.roughnessFactor, 0.0, 1.0); + F0 = lerp(float3(0.04, 0.04, 0.04), baseColor.rgb, metallic); + albedo = baseColor.rgb * (1.0 - metallic); + } + + float ao = (material.occlusionTextureSet < 0) ? 1.0 : occlusionMap.Sample(uv).r; + + // Emissive: default to constant white when no emissive texture so authored emissiveFactor works per glTF spec. + // If a texture is present but factor is zero, assume (1,1,1) to preserve emissive textures by default. + float3 emissiveTex = (material.emissiveTextureSet < 0) ? float3(1.0, 1.0, 1.0) : emissiveMap.Sample(uv).rgb; + float3 emissiveFactor = material.emissiveFactor; + float3 emissive = emissiveTex * emissiveFactor; + if (material.hasEmissiveStrengthExt) + emissive *= material.emissiveStrength; + + if (material.alphaMask > 0.5 && baseColor.a < material.alphaMaskCutoff) { discard; } + + // --- 2. Normal Calculation --- + float3 N = normalize(input.Normal); + if (material.normalTextureSet >= 0) { + float3 tangentNormal = normalMap.Sample(uv).xyz * 2.0 - 1.0; + float3 T = normalize(input.Tangent.xyz); + // We flip the V coordinate for all textures (uv.y -> 1-uv.y). In + // tangent space, this corresponds to inverting the bitangent. + // glTF's tangent.w encodes the sign of the bitangent relative to + // cross(N,T) in the *unflipped* UV space, so we must negate it here + // to keep the normal map oriented correctly after the V flip. + float handedness = -input.Tangent.w; + float3 B = normalize(cross(N, T)) * handedness; + float3x3 TBN = float3x3(T, B, N); + N = normalize(mul(TBN, tangentNormal)); + } + + // Geometric (non-normal-mapped) normal for large-scale effects like Fresnel, + // refraction and screen-space reflections. This stays stable across pixels + // on a flat pane and helps avoid flickering when rotating the camera. + float3 G = normalize(input.GeometricNormal); + + // --- 3. Opaque Lighting Calculation --- + float3 V = normalize(ubo.camPos.xyz - input.WorldPos); + + float3 diffuseLighting = float3(0.0, 0.0, 0.0); + float3 specularLighting = float3(0.0, 0.0, 0.0); + +#if !defined(PLATFORM_ANDROID) + // Forward+: compute tile id and iterate culled light list + const uint TILE = 16u; // must match engine configuration + uint tilesX = (uint(ubo.screenDimensions.x) + TILE - 1u) / TILE; + uint tilesY = (uint(ubo.screenDimensions.y) + TILE - 1u) / TILE; + + // SV_POSITION in the fragment stage is in window coordinates. Use robust integer index. + uint px = (uint)max(0.0, input.Position.x); + uint py = (uint)max(0.0, input.Position.y); + uint tileX = (tilesX > 0u) ? min(px / TILE, tilesX - 1u) : 0u; + uint tileY = (tilesY > 0u) ? min(py / TILE, tilesY - 1u) : 0u; + uint totalTiles = max(tilesX * tilesY, 1u); + + // Clustered Z slice index from view-space depth (positive distance) + float dVS = abs(mul(ubo.view, float4(input.WorldPos, 1.0)).z); + float lnN = log(max(ubo.nearZ, 1e-4)); + float lnF = log(max(ubo.farZ, lnN + 1e-4)); + float denom = max(lnF - lnN, 1e-6); + float slices = max(ubo.slicesZ, 1.0); + float lambda = saturate((log(max(dVS, 1e-4)) - lnN) / denom); + uint slice = (uint)clamp(floor(lambda * slices), 0.0, slices - 1.0); + + uint tileId = (slice * tilesY + tileY) * tilesX + tileX; + + // CPU toggles Forward+ via ubo.padding1 (0 = disabled, non-zero = enabled) + bool useForwardPlus = (ubo.padding1 != 0.0); + + uint base = 0u; + uint count = 0u; + if (useForwardPlus && tileId < totalTiles * (uint)slices) { + TileHeader th = tileHeaders[tileId]; + base = th.offset; + count = th.count; + } + + bool forceGlobal = false; + + // Accumulate per-light diffuse and specular terms using GGX microfacet BRDF. + if (useForwardPlus && !forceGlobal && count > 0) { + // Use Forward+ culled list + for (uint li = 0u; li < count; ++li) { + uint lightIndex = tileLightIndices[base + li]; + LightData light = lightBuffer[lightIndex]; + float3 L, radiance; + float distToLight = 10000.0; + if (light.lightType == 1) { + // Directional + L = normalize(-light.position.xyz); + radiance = light.color.rgb; + } else { + // Point/spot/emissive: position.xyz is light position in world space + float3 toLight = light.position.xyz - input.WorldPos; + float d = length(toLight); + L = (d > 1e-5) ? toLight / d : float3(0,0,1); + distToLight = d; + + float attenuation = 1.0; + if (light.lightType == 3) { + // Emissive: soft falloff using range as a characteristic radius + float r = max(light.range, 0.001); + attenuation = 1.0 / (1.0 + (d / r) * (d / r)); + } else { + attenuation = 1.0 / max(d * d, 0.0001); + // GLTF style range attenuation + if (light.range > 0.0) { + attenuation *= pow(saturate(1.0 - pow(d / light.range, 4.0)), 2.0); + } + } + radiance = light.color.rgb * attenuation; + + if (light.lightType == 2) { + // Spot light cone attenuation + float3 D = normalize(light.direction.xyz); + float cd = dot(D, -L); + float cosInner = cos(light.innerConeAngle); + float cosOuter = cos(light.outerConeAngle); + float spotAttenuation = saturate((cd - cosOuter) / max(cosInner - cosOuter, 0.0001)); + spotAttenuation *= spotAttenuation; + radiance *= spotAttenuation; + } + } + // For emissive lights, treat lighting as two-sided to avoid glass/self-occlusion issues + float rawDot = dot(N, L); + float NdotL = (light.lightType == 3) ? abs(rawDot) : max(rawDot, 0.0); + + if (NdotL > 0.0) { + float visibility = 1.0; + // Raster ray-query shadows are expensive. In Bistro, most lights are emissive + // and casting per-light shadows drops FPS drastically. Shadow only the directional + // light (sun) for now. + if (ubo.padding2 != 0.0 && light.lightType == 1) { + float tMaxShadow = (light.lightType == 1) ? 10000.0 : max(distToLight - RASTER_SHADOW_EPS, RASTER_SHADOW_EPS); + float3 shadowOrigin = input.WorldPos + N * RASTER_SHADOW_EPS; + bool occluded = traceShadowOccluded(shadowOrigin, L, RASTER_SHADOW_EPS, tMaxShadow); + visibility = occluded ? 0.0 : 1.0; + } + + float3 H = normalize(V + L); + float NdotV = max(dot(N, V), 0.0); + float NdotH = max(dot(N, H), 0.0); + float HdotV = max(dot(H, V), 0.0); + float D = DistributionGGX(NdotH, roughness); + float G = GeometrySmith(NdotV, NdotL, roughness); + float3 F = FresnelSchlick(HdotV, F0); + float3 spec = (D * G * F) / max(4.0 * NdotV * NdotL, 0.0001); + float3 kD = (1.0 - F) * (1.0 - metallic); + specularLighting += spec * radiance * NdotL * visibility; + diffuseLighting += (kD * albedo / PI) * radiance * NdotL * visibility; + } + } + } + // Global light loop (fallback or forced debug) + // Fallback when Forward+ list is empty but lights exist and not in single-tile mode, + // OR always when forceGlobal flag is enabled. + // If Forward+ is disabled, always use global lights. + // If Forward+ is enabled but lists are empty (e.g., before first dispatch), fall back to global. + if (forceGlobal || !useForwardPlus || (count == 0 && ubo.lightCount > 0)) { + // Fallback path when Forward+ is disabled or lists are not populated yet + for (uint li = 0u; li < (uint)ubo.lightCount; ++li) { + LightData light = lightBuffer[li]; + float3 L, radiance; + float distToLight = 10000.0; + if (light.lightType == 1) { + L = normalize(-light.position.xyz); + radiance = light.color.rgb; + } else { + float3 toLight = light.position.xyz - input.WorldPos; + float d = length(toLight); + L = (d > 1e-5) ? toLight / d : float3(0,0,1); + distToLight = d; + + float attenuation = 1.0; + if (light.lightType == 3) { + float r = max(light.range, 0.001); + attenuation = 1.0 / (1.0 + (d / r) * (d / r)); + } else { + attenuation = 1.0 / max(d * d, 0.0001); + // GLTF style range attenuation + if (light.range > 0.0) { + attenuation *= pow(saturate(1.0 - pow(d / light.range, 4.0)), 2.0); + } + } + radiance = light.color.rgb * attenuation; + + if (light.lightType == 2) { + // Spot light cone attenuation + float3 D = normalize(light.direction.xyz); + float cd = dot(D, -L); + float cosInner = cos(light.innerConeAngle); + float cosOuter = cos(light.outerConeAngle); + float spotAttenuation = saturate((cd - cosOuter) / max(cosInner - cosOuter, 0.0001)); + spotAttenuation *= spotAttenuation; + radiance *= spotAttenuation; + } + } + float NdotL = (light.lightType == 3) ? abs(dot(N, L)) : max(dot(N, L), 0.0); + if (NdotL > 0.0) { + float visibility = 1.0; + if (ubo.padding2 != 0.0 && light.lightType == 1) { + float tMaxShadow = (light.lightType == 1) ? 10000.0 : max(distToLight - RASTER_SHADOW_EPS, RASTER_SHADOW_EPS); + float3 shadowOrigin = input.WorldPos + N * RASTER_SHADOW_EPS; + bool occluded = traceShadowOccluded(shadowOrigin, L, RASTER_SHADOW_EPS, tMaxShadow); + visibility = occluded ? 0.0 : 1.0; + } + + float3 H = normalize(V + L); + float NdotV = max(dot(N, V), 0.0); + float NdotH = max(dot(N, H), 0.0); + float HdotV = max(dot(H, V), 0.0); + float D = DistributionGGX(NdotH, roughness); + float G = GeometrySmith(NdotV, NdotL, roughness); + float3 F = FresnelSchlick(HdotV, F0); + float3 spec = (D * G * F) / max(4.0 * NdotV * NdotL, 0.0001); + float3 kD = (1.0 - F) * (1.0 - metallic); + specularLighting += spec * radiance * NdotL * visibility; + diffuseLighting += (kD * albedo / PI) * radiance * NdotL * visibility; + } + } + } +#endif + + float3 ambient = albedo * ao * (0.1 * ubo.scaleIBLAmbient); + float3 opaqueLit = diffuseLighting + specularLighting + ambient + emissive; + + // --- 4. Final Color Assembly (opaque only; transmission handled in GlassPSMain) --- + float3 color = opaqueLit; + float alphaOut = baseColor.a; + + // Clip-plane discard during reflection render pass (to remove behind-plane geometry) + if (ubo.reflectionPass == 1) { + float side = dot(ubo.clipPlaneWS, float4(input.WorldPos, 1.0)); + if (side > 0.0) discard; // discard geometry on the positive side of the plane + } + + // Note: reflections are only applied in glass path (GlassPSMain). No planar reflection + // sampling here to avoid banding/aliasing and ensure user-requested behavior. + + // --- 5. Post-Processing --- + // Output linear color for intermediate buffers (composite pass will tonemap) + return float4(color, alphaOut); +} + +// Fragment shader entry point specialized for architectural glass. +// Shares the same inputs and bindings as PSMain, but uses a much simpler +// and more stable shading model: primarily refraction of the opaque scene +// with a small ambient/emissive surface term. Direct diffuse/specular +// lighting and screen-space reflections are omitted to avoid global +// bright/dark flashes across large glass surfaces. +[[shader("fragment")]] +float4 GlassPSMain(VSOutput input) : SV_TARGET +{ + // --- 1. Material / texture sampling (minimal subset) --- + float2 uv = float2(input.UV.x, 1.0 - input.UV.y); + + float4 baseColor = (material.baseColorTextureSet < 0) + ? material.baseColorFactor + : baseColorMap.Sample(uv) * material.baseColorFactor; + + // Emissive (same logic as PSMain) + float3 emissiveTex = (material.emissiveTextureSet < 0) + ? float3(1.0, 1.0, 1.0) + : emissiveMap.Sample(uv).rgb; + float3 emissiveFactor = material.emissiveFactor; + float3 emissive = emissiveTex * emissiveFactor; + if (material.hasEmissiveStrengthExt) + emissive *= material.emissiveStrength; + + // Alpha mask discard as in PSMain + if (material.alphaMask > 0.5 && baseColor.a < material.alphaMaskCutoff) { + discard; + } + + // Geometric normal for view-angle dependence and refraction + float3 G = normalize(input.GeometricNormal); + float3 V = normalize(ubo.camPos.xyz - input.WorldPos); + + // Base albedo used for transmission tint + float3 albedo = baseColor.rgb; + + // Ambient is intentionally disabled for the glass path. + // Even small ambient terms can make large glass surfaces look "filled in" + // (frosted/opaque) rather than primarily showing the background through refraction. + + // Transmission factor from push constants. + // Some assets flag “glass” via engine-side heuristics but may not author + // `KHR_materials_transmission`. Since this shader is only used for glass, + // derive a robust effective transmission so glass never goes black. + float T_auth = clamp(material.transmissionFactor, 0.0, 1.0); + float opacity = clamp(baseColor.a, 0.0, 1.0); + float T_fromAlpha = 1.0 - opacity; + float T_eff = max(T_auth, T_fromAlpha); + if (T_eff < 0.01) { + // Default to mostly transmissive for glass when no explicit transmission/alpha is authored. + T_eff = 0.90; + } + + float3 color; + float alphaOut = baseColor.a; + + if (T_eff > 0.0) { + // Transmission/background sample (refraction approximation): sample the opaque scene behind glass. + float2 uvR = input.Position.xy / ubo.screenDimensions; + uvR = clamp(uvR, float2(0.0, 0.0), float2(1.0, 1.0)); + float3 bg = opaqueSceneColor.Sample(uvR).rgb; + // Tint the background by albedo to approximate colored glass. + bg *= lerp(float3(1.0, 1.0, 1.0), max(albedo, 0.6), 0.8); + + // Planar reflection sample (optional) + float3 refl = bg; + if (ubo.reflectionEnabled == 1) { + float4 pr = mul(ubo.reflectionVP, float4(input.WorldPos, 1.0)); + float2 uvP = pr.xy / max(pr.w, 1e-5); + uvP = uvP * 0.5 + 0.5; + if (uvP.x >= 0.0 && uvP.x <= 1.0 && uvP.y >= 0.0 && uvP.y <= 1.0) { + refl = reflectionMap.Sample(uvP).rgb; + } + } + + // Stylized, stable glass: Use a tinted + // glass body + rim highlight, then add planar reflection contribution. + + // Use symmetric |N·V| so that front/back views of thin glass walls + // behave consistently (important when looking down into glasses). + float NdotV = abs(dot(G, V)); + + // Base clear color from albedo, slightly dimmed so glass does not + // appear self-emissive. + float3 clearColor = albedo * 0.6; + + // Rim term stronger at grazing angles (1 - NdotV)^3, but keep it subtle + float edge = pow(1.0 - NdotV, 3.0); + float3 rimColor = lerp(clearColor, float3(1.0, 1.0, 1.0), 0.25); + + // Surface term: keep subtle so glass does not appear frosted. + float3 surfaceBase = emissive; + float3 surfaceTerm = surfaceBase * (1.0 - T_eff) * 0.12; + + // Base surface appearance (slight body + rim) and transmitted background. + float3 glassBody = clearColor * 0.08; + float3 rim = rimColor * (edge * 0.25); + float3 surface = glassBody + rim + surfaceTerm; + + // Primary transmission mix: this is what makes interior lighting visible through windows. + color = lerp(surface, bg, T_eff); + + // Restore Fresnel-blended mixing with boosted visibility for debugging/tuning. + float3 F_view2 = FresnelSchlick(NdotV, float3(0.06, 0.06, 0.06)); + float F_avg2 = (F_view2.r + F_view2.g + F_view2.b) / 3.0; + float reflStrength = saturate(0.20 + (1.5 * F_avg2) * (1.0 - material.roughnessFactor)); + // Scale by user-controlled intensity + reflStrength *= max(0.0, ubo.reflectionIntensity); + color = lerp(color, refl, reflStrength); + + // Fresnel influences alpha (how opaque the glass appears), not color here. + // We already used F to modulate reflection strength above. + + // Opacity model for architectural glass: mostly transparent at + // normal incidence, with a gentle Fresnel-driven increase in + // opacity toward grazing angles. TransmissionFactor controls how + // much of the underlying scene shows through overall. + + // Since we are sampling the background (opaqueSceneColor) and mixing it in the shader, + // we should output an alpha of 1.0 to ensure our mixed color is shown correctly + // in the swapchain, avoiding "double blending" with the hardware blender. + alphaOut = 1.0; + } else { + // Non-transmissive fallback: just ambient + emissive. + color = emissive; + } + + // Simple Forward+ lighting for glass (additive), using per-tile lists. + // This is a pragmatic lighting contribution so emissive bulbs can light glass-covered pixels. + // It does not model full transmission; it simply adds local diffuse+spec highlights. +#if !defined(PLATFORM_ANDROID) + { + const uint TILE = 16u; + uint tilesX = (uint(ubo.screenDimensions.x) + TILE - 1u) / TILE; + uint tilesY = (uint(ubo.screenDimensions.y) + TILE - 1u) / TILE; + uint px = (uint)max(0.0, input.Position.x); + uint py = (uint)max(0.0, input.Position.y); + uint tileX = (tilesX > 0u) ? min(px / TILE, tilesX - 1u) : 0u; + uint tileY = (tilesY > 0u) ? min(py / TILE, tilesY - 1u) : 0u; + uint totalTiles = max(tilesX * tilesY, 1u); + uint tileId = tileY * tilesX + tileX; + uint base = 0u; + uint count = 0u; + if (tileId < totalTiles) { + TileHeader th = tileHeaders[tileId]; + base = th.offset; + count = th.count; + } + if (count > 0u) { + float3 Ng = normalize(input.GeometricNormal); + float3 Vv = normalize(ubo.camPos.xyz - input.WorldPos); + // Use a neutral albedo to avoid darkening glass; weight specular more + float3 alb = float3(0.6, 0.6, 0.6); + float rough = 0.49; + float metal = 0.0; + for (uint li = 0u; li < count; ++li) { + uint lightIndex = tileLightIndices[base + li]; + LightData light = lightBuffer[lightIndex]; + float3 L, radiance; + float distToLight = 10000.0; + if (light.lightType == 1) { + L = normalize(-light.position.xyz); + radiance = light.color.rgb; + } else { + float3 toLight = light.position.xyz - input.WorldPos; + float d = length(toLight); + L = (d > 1e-5) ? toLight / d : float3(0,0,1); + distToLight = d; + if (light.lightType == 3) { + float r = max(light.range, 0.001); + float att = 1.0 / (1.0 + (d / r) * (d / r)); + radiance = light.color.rgb * att; + } else { + radiance = light.color.rgb / max(d * d, 0.0001); + } + } + float rawDot = dot(Ng, L); + float NdotL = (light.lightType == 3) ? abs(rawDot) : max(rawDot, 0.0); + if (NdotL > 0.0) { + float visibility = 1.0; + if (ubo.padding2 != 0.0) { + float tMaxShadow = (light.lightType == 1) ? 10000.0 : max(distToLight - RASTER_SHADOW_EPS, RASTER_SHADOW_EPS); + float3 shadowOrigin = input.WorldPos + Ng * RASTER_SHADOW_EPS; + bool occluded = traceShadowOccluded(shadowOrigin, L, RASTER_SHADOW_EPS, tMaxShadow); + visibility = occluded ? 0.0 : 1.0; + } + + float3 H = normalize(Vv + L); + float NdotV = max(dot(Ng, Vv), 0.0); + float NdotH = max(dot(Ng, H), 0.0); + float HdotV = max(dot(H, Vv), 0.0); + float D = DistributionGGX(NdotH, rough); + float G = GeometrySmith(NdotV, NdotL, rough); + float3 F = FresnelSchlick(HdotV, lerp(float3(0.04,0.04,0.04), alb, metal)); + float3 spec = (D * G * F) / max(4.0 * NdotV * NdotL, 0.0001); + float3 kD = (1.0 - F) * (1.0 - metal); + // Add a modest contribution to the glass color + color += ((kD * alb / PI) * radiance * NdotL * 0.6 + spec * radiance * NdotL * 0.8) * visibility; + } + } + } + } +#endif + + + // --- 3. Post-processing (same as PSMain) --- + color *= ubo.exposure; + + // Uncharted2 / Hable filmic tonemap. Use the canonical form without + // the extra 1.2 pre-scale so that midtones and shadows are not + // over-compressed relative to highlights. + float3 t = Hable_Filmic_Tonemapping::Uncharted2Tonemap(color); + float3 w = Hable_Filmic_Tonemapping::Uncharted2Tonemap(float3(1,1,1) * Hable_Filmic_Tonemapping::W); + color = t / max(w, float3(1e-6, 1e-6, 1e-6)); + + if (ubo.padding0 == 0) { + color = pow(saturate(color), float3(1.0 / ubo.gamma)); + } else { + color = saturate(color); + } + + return float4(color, alphaOut); +} \ No newline at end of file diff --git a/attachments/simple_engine/shaders/ray_query.slang b/attachments/simple_engine/shaders/ray_query.slang index ad09434cc..a3ea80eb5 100644 --- a/attachments/simple_engine/shaders/ray_query.slang +++ b/attachments/simple_engine/shaders/ray_query.slang @@ -14,6 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#if defined(PLATFORM_ANDROID) +[numthreads(1, 1, 1)] +void main() {} +#else // Ray query compute shader for ray-traced rendering // Uses Slang's ray query extension as an alternative to rasterization @@ -1065,3 +1069,4 @@ void main(uint3 dispatchThreadID : SV_DispatchThreadID) outputImage[pixelCoord] = float4(skyColor(rayDir), 1.0); } } +#endif diff --git a/attachments/simple_engine/shaders/texturedMesh.slang b/attachments/simple_engine/shaders/texturedMesh.slang index 128ee1c89..775fd2079 100644 --- a/attachments/simple_engine/shaders/texturedMesh.slang +++ b/attachments/simple_engine/shaders/texturedMesh.slang @@ -17,20 +17,23 @@ // Combined vertex and fragment shader for textured mesh rendering // This shader provides basic textured rendering with simple lighting +// Import shared utility modules +import common_types; +import pbr_utils; +import lighting_utils; + // Input from vertex buffer struct VSInput { [[vk::location(0)]] float3 Position; [[vk::location(1)]] float3 Normal; - [[vk::location(2)]] float2 TexCoord; + [[vk::location(2)]] float2 UV; [[vk::location(3)]] float4 Tangent; - // Per-instance data. Model matrix is a true 4x4, while the normal - // matrix is provided as three float4 columns that match the CPU - // layout (glm::mat3x4: 3 columns of vec4, xyz used, w unused). - [[vk::location(4)]] column_major float4x4 InstanceModelMatrix; // binding 1 (consumes 4 locations) - [[vk::location(8)]] float4 InstanceNormal0; // normal matrix column 0 - [[vk::location(9)]] float4 InstanceNormal1; // normal matrix column 1 - [[vk::location(10)]] float4 InstanceNormal2; // normal matrix column 2 + // Per-instance data + [[vk::location(4)]] column_major float4x4 InstanceModelMatrix; + [[vk::location(8)]] float4 InstanceNormal0; + [[vk::location(9)]] float4 InstanceNormal1; + [[vk::location(10)]] float4 InstanceNormal2; }; // Output from vertex shader / Input to fragment shader @@ -38,20 +41,13 @@ struct VSOutput { float4 Position : SV_POSITION; float3 WorldPos; float3 Normal : NORMAL; - float2 TexCoord : TEXCOORD0; - float4 Tangent : TANGENT; // Pass through tangent to satisfy validation layer -}; - -// Uniform buffer -struct UniformBufferObject { - float4x4 model; - float4x4 view; - float4x4 proj; + float2 UV : TEXCOORD0; + float4 Tangent : TANGENT; }; // Bindings [[vk::binding(0, 0)]] ConstantBuffer ubo; -[[vk::binding(1, 0)]] Sampler2D texSampler; +[[vk::binding(1, 0)]] Sampler2D baseColorMap; // Vertex shader entry point [[shader("vertex")]] @@ -59,32 +55,17 @@ VSOutput VSMain(VSInput input) { VSOutput output; - // Use instance matrices directly (CPU uploads column-major model - // matrix and three float4 normal-matrix columns in attributes - // 4..10) float4x4 instanceModelMatrix = input.InstanceModelMatrix; - - // Transform position to world space: entity model * instance model float4 worldPos = mul(ubo.model, mul(instanceModelMatrix, float4(input.Position, 1.0))); - - // Final clip space position output.Position = mul(ubo.proj, mul(ubo.view, worldPos)); - - // Pass world position and transformed normal to fragment shader - // (apply entity model to normals too). Reconstruct the 3x3 normal - // matrix from the three uploaded columns and apply it in column - // form to avoid any row/column layout ambiguity. - float3x3 model3x3 = (float3x3)ubo.model; output.WorldPos = worldPos.xyz; - float3 instNormal = - input.InstanceNormal0.xyz * input.Normal.x + - input.InstanceNormal1.xyz * input.Normal.y + - input.InstanceNormal2.xyz * input.Normal.z; + float3x3 instNormal = float3x3(input.InstanceNormal0.xyz, input.InstanceNormal1.xyz, input.InstanceNormal2.xyz); + float3x3 model3x3 = (float3x3)ubo.model; + output.Normal = normalize(mul(model3x3, mul(instNormal, input.Normal))); - output.Normal = normalize(mul(model3x3, instNormal)); - output.TexCoord = input.TexCoord; - output.Tangent = input.Tangent; // Pass through tangent (unused in basic rendering) + output.UV = input.UV; + output.Tangent = input.Tangent; return output; } @@ -93,20 +74,18 @@ VSOutput VSMain(VSInput input) [[shader("fragment")]] float4 PSMain(VSOutput input) : SV_TARGET { - // Sample the texture with flipped V coordinate (glTF UV origin vs Vulkan) - float2 uv = float2(input.TexCoord.x, 1.0 - input.TexCoord.y); - float4 texColor = texSampler.Sample(uv); + float2 uv = float2(input.UV.x, 1.0 - input.UV.y); + float4 baseColor = baseColorMap.Sample(uv); - // Simple directional lighting - float3 lightDir = normalize(float3(0.5, 1.0, 0.3)); // Fixed light direction - float3 normal = normalize(input.Normal); - float lightIntensity = max(dot(normal, lightDir), 0.2); // Minimum ambient of 0.2 + // Simple PBR-lite lighting + float3 N = normalize(input.Normal); + float3 V = normalize(ubo.camPos.xyz - input.WorldPos); + float3 L = normalize(float3(1.0, 1.0, 1.0)); + float3 H = normalize(V + L); - // If texture is nearly white, use a default color to avoid washed-out look - float whiteness = (texColor.r + texColor.g + texColor.b) / 3.0; - float4 finalColor = (whiteness > 0.95) - ? float4(float3(0.8, 0.8, 0.8) * lightIntensity, 1.0) - : float4(texColor.rgb * lightIntensity, texColor.a); + float NdotL = max(dot(N, L), 0.0); + float3 ambient = baseColor.rgb * 0.1; + float3 diffuse = baseColor.rgb * NdotL; - return finalColor; + return float4(diffuse + ambient, baseColor.a); } diff --git a/attachments/simple_engine/vulkan_compatibility.h b/attachments/simple_engine/vulkan_compatibility.h new file mode 100644 index 000000000..92adb3f41 --- /dev/null +++ b/attachments/simple_engine/vulkan_compatibility.h @@ -0,0 +1,172 @@ +/* Copyright (c) 2025 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +// Fallback defines for optional extension names (allow compiling against older headers) +#ifndef VK_EXT_ROBUSTNESS_2_EXTENSION_NAME +# define VK_EXT_ROBUSTNESS_2_EXTENSION_NAME "VK_EXT_robustness2" +#endif +#ifndef VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME +# define VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME "VK_KHR_dynamic_rendering_local_read" +#endif +#ifndef VK_EXT_SHADER_TILE_IMAGE_EXTENSION_NAME +# define VK_EXT_SHADER_TILE_IMAGE_EXTENSION_NAME "VK_EXT_shader_tile_image" +#endif + +// Opacity Micromap fallback (KHR vs EXT) +#ifndef VK_KHR_OPACITY_MICROMAP_EXTENSION_NAME +# define VK_KHR_OPACITY_MICROMAP_EXTENSION_NAME "VK_KHR_opacity_micromap" +#endif + +#if defined(PLATFORM_ANDROID) || defined(__ANDROID__) + +// Only provide fallback mappings if the KHR types are not already defined by headers +#ifndef VK_KHR_opacity_micromap + +// Map missing KHR types/enums to EXT equivalents or dummies for Android compilation +#ifndef VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_TRIANGLES_OPACITY_MICROMAP_KHR +#define VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_TRIANGLES_OPACITY_MICROMAP_KHR VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_TRIANGLES_OPACITY_MICROMAP_EXT +#endif + +#ifndef VkAccelerationStructureTrianglesOpacityMicromapKHR +#define VkAccelerationStructureTrianglesOpacityMicromapKHR VkAccelerationStructureTrianglesOpacityMicromapEXT +#endif + +#ifndef VkMicromapUsageKHR +#define VkMicromapUsageKHR VkMicromapUsageEXT +#endif + +#ifndef VkMicromapTriangleKHR +#define VkMicromapTriangleKHR VkMicromapTriangleEXT +#endif + +#ifndef VK_OPACITY_MICROMAP_FORMAT_4_STATE_KHR +#define VK_OPACITY_MICROMAP_FORMAT_4_STATE_KHR VK_OPACITY_MICROMAP_FORMAT_4_STATE_EXT +#endif + +#ifndef VK_GEOMETRY_TYPE_MICROMAP_KHR +#define VK_GEOMETRY_TYPE_MICROMAP_KHR (VkGeometryTypeKHR)1000396001 +#endif + +#ifndef VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR +#define VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR (VkAccelerationStructureTypeKHR)1000396000 +#endif + +#ifndef VK_BUILD_ACCELERATION_STRUCTURE_MICROMAP_LOSSY_BIT_KHR +#define VK_BUILD_ACCELERATION_STRUCTURE_MICROMAP_LOSSY_BIT_KHR 0 +#endif + +#ifndef VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR +#define VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR (VkStructureType)1000150002 +#endif + +#ifndef VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR +#define VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR (VkAccelerationStructureBuildTypeKHR)0 +#endif + +// Provide dummy structure for missing types in Android EXT version of OMM +#ifndef VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_2_KHR +# define VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_2_KHR (VkStructureType)1000396003 +typedef struct VkAccelerationStructureCreateInfo2KHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureCreateFlagsKHR createFlags; + VkDeviceAddress addressRange; + VkDeviceSize size; + VkAccelerationStructureTypeKHR type; +} VkAccelerationStructureCreateInfo2KHR; +#endif + +#ifndef VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_MICROMAP_DATA_KHR +# define VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_MICROMAP_DATA_KHR (VkStructureType)1000396002 +typedef struct VkAccelerationStructureGeometryMicromapDataKHR { + VkStructureType sType; + const void* pNext; + uint32_t usageCountsCount; + const VkMicromapUsageKHR* pUsageCounts; + const VkMicromapUsageKHR* const* ppUsageCounts; + VkDeviceOrHostAddressConstKHR data; + VkDeviceOrHostAddressConstKHR triangleArray; + VkDeviceSize triangleArrayStride; +} VkAccelerationStructureGeometryMicromapDataKHR; +#endif + +#ifndef VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR +# define VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR (VkStructureType)1000150000 +#endif + +#ifndef VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR +#define VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR (VkBuildAccelerationStructureFlagBitsKHR)0x00000002 +#endif + +#ifndef VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR +#define VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR (VkBuildAccelerationStructureModeKHR)0 +#endif + +// Map other missing symbols +#ifndef VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_FEATURES_KHR +# define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_FEATURES_KHR VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_FEATURES_EXT +#endif + +#ifndef VkPhysicalDeviceOpacityMicromapFeaturesKHR +#define VkPhysicalDeviceOpacityMicromapFeaturesKHR VkPhysicalDeviceOpacityMicromapFeaturesEXT +#endif + +#ifndef VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_PROPERTIES_KHR +# define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_PROPERTIES_KHR VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_PROPERTIES_EXT +#endif + +#ifndef VkPhysicalDeviceOpacityMicromapPropertiesKHR +#define VkPhysicalDeviceOpacityMicromapPropertiesKHR VkPhysicalDeviceOpacityMicromapPropertiesEXT +#endif + +#ifndef VK_STRUCTURE_TYPE_MICROMAP_CREATE_INFO_KHR +# define VK_STRUCTURE_TYPE_MICROMAP_CREATE_INFO_KHR VK_STRUCTURE_TYPE_MICROMAP_CREATE_INFO_EXT +#endif + +#ifndef VkMicromapCreateInfoKHR +#define VkMicromapCreateInfoKHR VkMicromapCreateInfoEXT +#endif + +// Vulkan-Hpp compatibility aliases for the vk:: namespace +namespace vk { + +#ifndef VULKAN_HPP_DISABLE_OMM_KHR_ALIASES +using AccelerationStructureTrianglesOpacityMicromapKHR = ::VkAccelerationStructureTrianglesOpacityMicromapKHR; +using MicromapUsageKHR = ::VkMicromapUsageKHR; +using MicromapTriangleKHR = ::VkMicromapTriangleKHR; +using AccelerationStructureGeometryMicromapDataKHR = ::VkAccelerationStructureGeometryMicromapDataKHR; +using PhysicalDeviceOpacityMicromapFeaturesKHR = ::VkPhysicalDeviceOpacityMicromapFeaturesKHR; +using PhysicalDeviceOpacityMicromapPropertiesKHR = ::VkPhysicalDeviceOpacityMicromapPropertiesKHR; +using MicromapCreateInfoKHR = ::VkMicromapCreateInfoKHR; + +#ifndef VkMicromapBuildInfoKHR +# define VkMicromapBuildInfoKHR VkMicromapBuildInfoEXT +#endif +#ifndef VkMicromapBuildSizesInfoKHR +# define VkMicromapBuildSizesInfoKHR VkMicromapBuildSizesInfoEXT +#endif +using MicromapBuildInfoKHR = ::VkMicromapBuildInfoKHR; +using MicromapBuildSizesInfoKHR = ::VkMicromapBuildSizesInfoKHR; +#endif +} + +#endif // VK_KHR_opacity_micromap + +#endif // PLATFORM_ANDROID diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/00_introduction.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/00_introduction.adoc new file mode 100644 index 000000000..52711eb64 --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/00_introduction.adoc @@ -0,0 +1,28 @@ += Opacity Micromaps: Smarter Shadows for the Real World +:imagesdir: ../../../images + +By this point you have completed the Simple Game Engine tutorial, you have built something genuinely beautiful. Your ray-traced shadows fall across floors and walls with physical precision. Solid objects cast solid silhouettes; and soft shadows genuinely make those shadows look nice. The light knows where the geometry is, and darkness follows accordingly. For a simple architectural scene — a box room, a table, a few chairs — the result is nearly perfect, and it arrives without drama. + +But the real world is not made of boxes and chairs. Step outside and you immediately encounter a world that refuses to cooperate with simple ray-triangle intersection. Trees do not cast crisp rectangular shadows. A wrought-iron fence does not block light uniformly. Curtains billow with gaps of light. Hair is not a surface, it is a million semi-transparent strands. The visual richness of nature — and of the human world that imitates it — comes almost entirely from objects that are geometrically simple but optically complex. + +In real-time rendering, we handle this complexity with a technique called **alpha testing** (sometimes called alpha masking). Instead of modeling every leaf on a tree with real geometry, we take a broad, flat quadrilateral and paint a leaf texture onto it. The texture carries an **alpha channel** — a value from zero to one that says how opaque each texel is. At the edges of the leaf, the alpha is zero: transparent, ignore this pixel. In the body of the leaf, the alpha is one: opaque, this pixel exists. With this trick, a single flat triangle can represent a convincingly complex silhouette. + +During rasterization, alpha testing is inexpensive. The GPU evaluates the alpha at each screen pixel and simply discards the transparent ones. For shadow maps, a similar shortcut applies: a depth-only alpha-tested shadow pass is well-understood and well-optimized. + +Ray tracing changes the equation. In a ray-traced shadow system, a shadow ray does not project a texture onto a screen — it intersects with geometry in three-dimensional space. When that ray hits a leaf-triangle, it cannot simply ask "did I hit something?" It must ask "did I hit something that is actually solid at this precise point?" To answer that question, it must look up the alpha texture at the exact point of intersection. And it must do this for every triangle the ray might pass through. For a single tree with thousands of leaf triangles, and a scene with many trees, rendered at high resolution with multiple shadow rays per pixel — the arithmetic quickly becomes staggering. + +This is not a hypothetical problem. Alpha-tested foliage is one of the most common sources of performance bottlenecks in real-time ray-traced scenes. Profiling tools consistently reveal that any-hit shader invocations — the shader stage responsible for this alpha lookup — dominate the GPU's time whenever nature appears on screen. + +**Opacity Micromaps (OMMs)** are Vulkan's answer to this problem, delivered through the `VK_KHR_opacity_micromap` extension. The core idea is elegant and, once understood, almost obvious: rather than forcing the GPU to discover the opacity of each part of each triangle at runtime, we pre-bake that information into the GPU's acceleration structure before the first frame is ever drawn. We subdivide each triangle into a grid of tiny **micro-triangles** and classify each one as permanently opaque, permanently transparent, or edge-case unknown. The hardware traversal unit reads this classification directly, without running any shader code, and makes instant decisions about whether a ray is blocked. + +The result is that the expensive any-hit shader fires dramatically less often — only for that thin ring of edge pixels where certainty is genuinely ambiguous. For the vast majority of a leaf's surface, the answer is known in advance. The hardware acts on that knowledge in a single cycle. + +A brief note on lineage is worth making before we go further. `VK_KHR_opacity_micromap` is the Khronos-ratified evolution of the original `VK_EXT_opacity_micromap` extension. The key architectural change is that micromaps are no longer a separate `VkMicromapEXT` object — they fold directly into `VkAccelerationStructureKHR`, using a dedicated type at creation time. Host-build commands are removed in favour of a pure device-side API: micromap construction is driven exclusively through `vkCmdBuildAccelerationStructuresKHR` on a command buffer. Ray query shaders now require an explicit `OpacityMicromapKHR` execution mode declaration to benefit from the optimisation; without it the hardware ignores the micromap entirely. These changes unify the API and reflect what hardware actually supports. If you have read older documentation or sample code that refers to `vkBuildMicromapsEXT` or `VkMicromapEXT`, be aware that those concepts have been superseded; this course covers the KHR API exclusively. + +This course will give you a complete conceptual understanding of why this problem exists, how micromaps solve it, and what the implementation looks like in the simple engine's source code. We will begin with the visual language of shadows themselves — why they look the way they do, and why foliage breaks the assumptions that fast shadow algorithms rely on. We will then descend into the GPU's ray traversal hardware to understand exactly where the performance cost originates. From there we will build up the micromap concept from scratch, walking through the subdivision model, the three-state classification, and the way micromap data attaches to acceleration structures. Finally, we will tour the engine's `OpacityMicromapBuilder` implementation and discuss when this optimization earns its keep — and when it doesn't. + +No prior knowledge of GPU hardware internals is required, but you should be comfortable with the basics of ray tracing: what a BVH is, how shadow rays work, and what an acceleration structure does. If those concepts feel solid, you are ready to begin. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/01_the_shadow_problem.adoc[Next: Shadows, Light, and the Trouble with Leaves] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/01_the_shadow_problem.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/01_the_shadow_problem.adoc new file mode 100644 index 000000000..e1de2ae7f --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/01_the_shadow_problem.adoc @@ -0,0 +1,57 @@ += Shadows, Light, and the Trouble with Leaves + +== The Geometry of Darkness + +A shadow is absence. It is what remains when something stands between a surface and a source of light. Yet that absence carries enormous perceptual weight — shadows tell us about shape, about time of day, about depth, about the relationship between objects in a scene. Long before any algorithm was involved, painters spent centuries learning to render shadows convincingly, because the human visual system is exquisitely sensitive to them. + +The simplest shadow to reason about is the **hard shadow** — the kind you see indoors on a bright, sunny day when a window casts a perfectly sharp rectangle of light onto the floor. This sharpness arises from geometry. An ideal point source of light illuminates every surface it can see and leaves everything else in total darkness. There is no ambiguity at the boundary. A surface point either has a line of sight to the light source or it does not. The shadow's edge is a mathematical curve — the projection of the occluder's silhouette as seen from the light. + +image::images/omm_hard_shadow.svg[Diagram of a hard shadow cast by a point light source with sharp boundary, 600, 380] + +In a ray-traced shadow system, computing this is conceptually clean. We cast a ray from the surface point toward the light. If that ray intersects any piece of geometry before reaching the light, the point is in shadow. If it reaches the light unobstructed, the point is illuminated. The shadow is crisp because we fire exactly one ray per pixel and the answer is binary. + +== The Softness of Real Light + +Nature does not provide point light sources. The sun, for all its distance, subtends a small but non-zero angle in the sky — roughly half a degree of arc, which makes it effectively a disk. Interior lighting uses area lights: tubes, panels, spheres, and diffusers. Even a candle flame has physical extent. + +When the light source has area, the geometry of shadow becomes more interesting. Consider a surface point looking toward the sun. If the occluder (say, the edge of a roof overhang) blocks the entire solar disk from view, the point is in the **umbra** — fully in shadow, receiving no direct sunlight at all. If the occluder blocks only part of the solar disk, the point is in the **penumbra** — it receives some direct sunlight, from the unobstructed fraction of the disk, but not all. The ratio of blocked to unblocked disk area determines how bright the point appears. + +image::images/omm_soft_shadow.svg[Diagram of a soft shadow from an area light, showing umbra and penumbra regions with gradient boundary, 600, 420] + +The **penumbra** is that beautiful gradient at a shadow's edge — the blurry transition from full shadow to full light. It is so familiar from everyday experience that its absence in rendered images immediately reads as artificial. Our eyes have learned to expect soft edges. When we see a perfectly hard shadow edge from a large, bright light source, something feels wrong even if we cannot articulate why. + +In our engine, soft shadows are approximated by casting multiple shadow rays per pixel, sampling different points on the light source's surface, and averaging the results. Each ray gives a binary answer (blocked or not), and the average produces a smooth gradient. The quality of the soft shadow improves with the number of samples, and the performance cost scales accordingly. + +== Enter the Tree + +Now consider a scene with a tree. Not a stylized, cartoon tree — a realistic tree with thousands of leaves. The artist who created this tree did not model each leaf as a three-dimensional solid with thickness and geometry on every face. That would require an unmanageable number of polygons. Instead, they used a technique that has been standard in games and visualization for decades: every cluster of leaves is represented by a small number of large, flat triangles, each carrying a texture that shows the detailed leaf shape. + +This texture has two components: the color of the leaves (green, with vein patterns and light-scattering subtlety) and the **alpha channel** — a mask that defines exactly which pixels of the texture rectangle are "leaf" and which are "sky." The alpha value is 1.0 where the leaf exists and 0.0 where it does not, with a narrow gradient at the edge to avoid harsh pixel-level aliasing. + +From a distance, this works beautifully. The triangles are nearly invisible; you see only the leaf shapes defined by the alpha. The silhouette looks organic and complex even though the underlying geometry is a sparse collection of flat quadrilaterals. This is **alpha-masked** geometry, and it is ubiquitous in real-time content: trees, bushes, grass, chain-link fences, wrought iron railings, curtains, particle sprites, and more. + +== When a Shadow Ray Meets a Leaf + +Here is where the trouble begins. When a shadow ray travels through the scene and encounters the tree, the BVH traversal engine identifies candidate triangles whose bounding boxes the ray intersects. For a solid wall, this is the end of the story: the ray hits the triangle, the hit is committed, the surface point is in shadow. Done. + +For a leaf triangle, the story is not over. The triangle is there, geometrically speaking — the ray has intersected the flat rectangle. But is the intersection point actually on a leaf, or is it in the transparent gap between leaves? The GPU cannot know by looking at the triangle alone. It must look up the alpha texture at the specific UV coordinates corresponding to the ray's intersection point. Only after that lookup can it decide whether to commit the hit (the point is on a solid leaf) or ignore it (the point is in transparent space, and the ray keeps traveling). + +This lookup is performed by the **any-hit shader** — a small programmable shader that fires for every candidate hit when the geometry is marked as alpha-tested. And crucially, the ray does not stop at the first candidate. A ray traveling through the canopy of a tree may pass through dozens of leaf triangles. Each one fires the any-hit shader. Each one requires a texture sample. Each one delays the traversal. + +== The Scale of the Problem + +To make this visceral: imagine you are trying to determine whether your hand is in the shade of a tree. But instead of the tree being a solid, opaque object, every single leaf is made of Swiss cheese — and the holes in the cheese are not filled with anything, just empty. To know whether a particular hole blocks the light from reaching your hand, you have to physically pick up that leaf, hold it up, and look through it to see if the hole aligns with the light source. Then pick up the next leaf. Then the next. For a full tree, rendered at 4K resolution, with 16 soft-shadow sample rays per pixel, refreshed sixty times a second — the number of "cheese inspections" runs into the billions per second. + +image::images/omm_foliage_problem.svg[Diagram showing a leaf mesh with alpha texture, with multiple shadow rays each required to test every leaf triangle individually, 600, 400] + +Every one of those inspections requires shader execution, texture sampling, and a decision. Most of the time, the answer is "transparent, keep going." But the cost of asking the question is paid regardless. + +== A Better Question + +This is the moment to ask a better question. We know, at scene load time, exactly what the leaf textures look like. We know which parts of each triangle are opaque and which are transparent. That information does not change between frames — the leaves are not animated at the texel level. So why are we discovering it again on every shadow ray, on every frame? + +What if we could tell the hardware, once, at load time, which parts of each triangle are definitely solid and which are definitely empty? What if the GPU could consult that pre-baked knowledge during traversal and only run the expensive shader for the genuinely ambiguous edge regions? That is exactly the promise of Opacity Micromaps. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/00_introduction.adoc[Previous: Introduction] | xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/02_why_alpha_testing_is_expensive.adoc[Next: What Your GPU Is Actually Doing] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/02_why_alpha_testing_is_expensive.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/02_why_alpha_testing_is_expensive.adoc new file mode 100644 index 000000000..6c988e083 --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/02_why_alpha_testing_is_expensive.adoc @@ -0,0 +1,53 @@ += What Your GPU Is Actually Doing + +== A Filing Cabinet That Goes All The Way Down + +To understand why alpha testing is expensive, you need a mental model of how the GPU finds the triangles that a ray might hit. Checking every triangle in the scene for every ray would be catastrophically slow — a modern scene might have millions of triangles, and a 4K frame has millions of pixels, each casting shadow rays. The brute-force cost would be in the trillions of operations per frame. + +The GPU avoids this through a spatial data structure called a **Bounding Volume Hierarchy**, or **BVH**. Imagine a filing cabinet. The top-level drawer contains a single large box that encloses the entire scene. Inside that box are several smaller boxes, each enclosing a major region of the scene — perhaps the left half and the right half. Inside each of those are still smaller boxes, subdividing further. At the bottom of the hierarchy, each leaf node contains a small number of actual triangles. + +When a ray enters the BVH, it first tests the top-level bounding box. If it misses, done — the ray hits nothing in the entire scene. If it hits, it opens that box and tests the children. It follows the ones it hits and ignores the ones it misses. The traversal narrows geometrically: rather than testing a million triangles, the ray tests perhaps a few dozen bounding boxes and reaches perhaps a handful of candidate triangles. This is why ray traversal is feasible in real time. + +For fully **opaque** geometry, the story ends at the triangle level. When the traversal finds that a ray intersects a triangle, the hardware issues what is called a **committed hit** — it records the hit distance and surface data, and traversal can terminate (or continue looking for closer hits, depending on the mode). No shader code is involved. The fixed-function hardware handles it entirely. + +== The Any-Hit Shader: A Necessary Interruption + +Alpha-tested geometry breaks this clean story. When the BVH traversal finds a candidate triangle on an alpha-tested mesh, it cannot commit the hit without first checking whether the intersection point is actually solid. To do this, it must invoke a programmable shader — the **any-hit shader**. + +The any-hit shader receives the intersection data: specifically, the **barycentric coordinates** of the hit point within the triangle. Barycentric coordinates describe where exactly within the triangle the ray landed, expressed as a weighted combination of the three vertices. From the barycentric coordinates, the shader can interpolate the UV texture coordinates for the hit point. Then it samples the alpha texture at those UVs and examines the result. If the alpha value is above the material's threshold, the hit is solid — the shader allows the default commit to proceed. If the alpha is below threshold, the shader calls the ignore function and traversal continues, hunting for the next candidate. + +This is several sequential operations — barycentric interpolation, UV interpolation, a texture sample, a comparison, and a decision — that must complete before traversal can proceed. And it must complete for every candidate hit. + +== The Parallelism Problem: Divergence + +The GPU is not a sequential machine. It executes shader code on hundreds or thousands of parallel execution units simultaneously. These units work in groups called **warps** (NVIDIA terminology) or **wavefronts** (AMD terminology) — typically 32 or 64 shader threads executing in lockstep, running the same instruction at the same time on different data. This massively parallel execution model is why the GPU can shade millions of pixels per frame. + +The lockstep model has a critical weakness: **divergence**. When different threads in a warp need to take different code paths — when some threads find an opaque hit and others find a transparent hit — the warp cannot stay in lockstep. It must execute both code paths, masking off the threads that aren't active on each path. Half the warp waits while the other half runs its branch, then vice versa. The effective parallelism halves, or worse. + +Alpha testing is a divergence machine. Out of 32 shadow rays in a warp, some might hit opaque regions of leaf triangles, some might hit transparent regions, and some might not hit any leaf at all. The any-hit shader fires on different triangles for different threads. Some threads finish their alpha check quickly, others take longer due to cache behavior. The warp serializes. Execution units sit idle waiting for stragglers. The GPU's parallel throughput advantage evaporates precisely where it hurts most. + +== The Texture Cache Catastrophe + +There is a second, related cost that compounds the first: **texture cache misses**. The GPU has dedicated caches for texture data, designed to serve texture samples efficiently. During rasterization, texture caching works beautifully because nearby screen pixels tend to sample nearby points in texture space — spatial coherence means neighboring threads read from the same cache lines, and the cache stays warm. + +During BVH traversal, rays come from arbitrary screen positions and can hit triangles at arbitrary UV coordinates. Seventeen shadow rays in the same warp might be testing seventeen completely different leaf triangles, each with different UV coordinates, scattered across the texture atlas. Each sample might land in a different region of texture memory. The cache serves up cold miss after cold miss. The texture unit — built for coherent rasterization access — stalls repeatedly while data is fetched from video memory. + +Texture memory bandwidth is finite and shared with other operations happening simultaneously on the GPU. Cache misses in the any-hit shader compete for that bandwidth. In a forest scene, the texture cache thrashing from any-hit shader invocations can account for a substantial fraction of the total frame time. + +== Counting the Cost + +Let's put concrete numbers on this to build intuition. Suppose a forest scene has 50,000 leaf triangles visible in the shadow view frustum. A shadow map approach would process these once per frame at low resolution. In our ray-traced system, we cast 16 shadow rays per pixel at 1080p, giving us roughly 33 million pixels times 16 rays — about 530 million shadow rays. Each shadow ray might pass through 3 to 5 leaf triangles on average before either finding an opaque hit or exiting the canopy. That is somewhere between 1.5 and 2.5 billion any-hit shader invocations per frame, each requiring a texture sample, all competing for cache, all introducing divergence. + +A modern GPU can execute perhaps 50 to 100 billion simple operations per second. But the any-hit shader is not a simple operation — it is a branching shader with a texture sample and a memory-dependent decision. Realistically, each invocation costs tens of nanoseconds. The math is uncomfortable. + +== The Fundamental Insight + +The waste here is not computational in the algorithmic sense — the any-hit shader is doing exactly what it must, given what the hardware knows. The waste is informational. The GPU is discovering, at runtime, information that was available at scene load time. The alpha texture does not change between frames. The relationship between each triangle and its alpha mask does not change. Every single any-hit shader invocation is re-discovering something that was knowable in advance. + +This is the root of the problem, and it points directly to the solution. If the GPU already knew, encoded directly in the acceleration structure, which sub-regions of each triangle are opaque and which are transparent, it could make traversal decisions without running any shader at all — except for the genuinely uncertain edge regions. The any-hit shader would become rare rather than universal. + +That is precisely what Opacity Micromaps provide. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/01_the_shadow_problem.adoc[Previous: Shadows, Light, and the Trouble with Leaves] | xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/03_what_are_micromaps.adoc[Next: Micromaps: Teaching the Hardware What It Already Should Know] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/03_what_are_micromaps.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/03_what_are_micromaps.adoc new file mode 100644 index 000000000..097e325a6 --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/03_what_are_micromaps.adoc @@ -0,0 +1,73 @@ += Micromaps: Teaching the Hardware What It Already Should Know + +== The Central Idea + +Everything we have discussed so far converges on a single insight: the GPU is doing expensive runtime work to discover information that is fully knowable ahead of time. The alpha texture of a leaf does not animate. The opacity distribution of a fence mesh does not change from frame to frame. If we could bake this information into the acceleration structure itself — the same data structure the hardware uses for BVH traversal — the hardware could make opacity decisions without invoking any shader at all. + +This is the complete description of an **Opacity Micromap**: a data structure, attached to a triangle mesh in the GPU's acceleration structure, that subdivides each triangle into a regular grid of smaller triangles and assigns each one a pre-computed opacity state. The word "micro" is used precisely: these are not additional geometric triangles that the ray tracer has to traverse. They are sub-triangle classifications that the traversal hardware reads as metadata, using them to accelerate the decision of whether a given ray-triangle intersection represents an actual hit. + +== Three States, Not Two + +You might expect the micromap to store a simple binary value — opaque or transparent. But Opacity Micromaps use a three-state system, and the third state is the key to making the scheme both correct and practical. + +The first state is **Opaque**. A micro-triangle marked Opaque always blocks rays. When traversal finds that a shadow ray intersects a triangle in a region marked Opaque, it commits the hit immediately, without running any shader. This is identical behavior to fully opaque geometry — fixed-function, zero shader cost. + +The second state is **Transparent**. A micro-triangle marked Transparent never blocks rays. When traversal finds that the intersection falls in a Transparent region, it ignores the hit and continues traversal without running any shader. This too is zero shader cost. + +The third state is **Unknown**. A micro-triangle marked Unknown triggers normal any-hit shader behavior — the traversal falls back to the same shader-driven alpha test that the non-micromap path uses. The any-hit shader fires, samples the texture, makes the decision. + +The power of this system lies in what the Unknown state makes possible. Instead of trying to perfectly classify every pixel of every triangle in advance — which would require infinite subdivision — we can be conservative. Where the alpha texture is clearly solid across the entire micro-triangle, we say Opaque. Where it is clearly empty, we say Transparent. Only in the boundary regions, where a single micro-triangle straddles the alpha cutoff edge, do we admit uncertainty and fall back to the shader. + +For a well-designed leaf texture, the opaque and transparent regions dominate. The boundary between them is a thin perimeter. If the micro-triangle grid is fine enough, that perimeter region corresponds to perhaps five or ten percent of all micro-triangles. The other ninety to ninety-five percent are classified definitively, and the any-hit shader never fires for them. The cost reduction is proportional. + +== Visualizing the Subdivision + +image::images/omm_triangle_subdivision.svg[Diagram showing a leaf-shaped triangle subdivided into a regular grid of micro-triangles; center micro-triangles colored green for opaque, outer micro-triangles colored red for transparent, and a thin ring at the leaf edge colored yellow for unknown, 600, 500] + +Consider a single leaf triangle — a flat quadrilateral whose alpha texture shows a maple leaf. The micromap subdivision overlays a regular grid on this triangle. In the center of the leaf, the alpha is solidly 1.0. Every micro-triangle in that region is sampled and found to be fully opaque. They are colored green in our mental model — hardware will commit any hit that falls here, instantly. + +At the edges of the quad far from the leaf shape, the alpha is solidly 0.0. Every micro-triangle in that region is sampled and found to be fully transparent. They are colored red — hardware will skip any hit that falls here, instantly. + +At the precise boundary of the leaf silhouette, where the texture designer blended the alpha from 1.0 to 0.0 to avoid harsh edges, some micro-triangles straddle both values. These are colored yellow — unknown. They are the only ones that will trigger the any-hit shader during traversal. This yellow ring is thin, perhaps one or two micro-triangles wide, and it represents a small fraction of the triangle's area. + +== Subdivision Levels: Precision at a Cost + +The **subdivision level** controls how fine the micro-triangle grid is. At level 0, the entire triangle receives a single classification — one state for the whole thing. This is only useful for geometry that is genuinely uniform (entirely opaque or entirely transparent), which is unusual for alpha-tested content. + +At level 1, each original triangle is subdivided into 4 micro-triangles. At level 2, it becomes 16. At level 3, it becomes 64. At level 4, it becomes 256. Each increase in level roughly quadruples the number of micro-triangles, improving the accuracy of the classification at the cost of more storage and more upfront computation. + +For typical foliage textures, level 2 or level 3 provides a good balance. A level-3 micromap for a single leaf triangle contains 64 micro-triangles. The leaf shape is approximated reasonably well at this resolution — most of the leaf body is classified Opaque, most of the empty space is classified Transparent, and the unknown ring is narrow. Going to level 4 would improve accuracy slightly but quadruple the memory footprint for likely marginal runtime benefit. + +The right choice depends on the texture content and the performance targets of the application. Content with sharp, high-contrast alpha masks (like a chain-link fence with near-binary alpha) benefits greatly from even low subdivision levels because the boundaries are crisp. Content with soft, gradual alpha transitions (like hair cards or feathers) may require higher subdivision levels to keep the unknown fraction small. + +== Building the Micromap: A One-Time Investment + +The micromap is built during scene loading, or as an offline pre-process before the application ships. The procedure is conceptually simple. For each triangle in an alpha-tested mesh, and for each micro-triangle at the chosen subdivision level, the builder samples the alpha texture at several points within the micro-triangle — typically at the centroid or at multiple jittered positions. It averages those samples and compares against a threshold. + +If the average alpha is above the upper threshold (close to 1.0), the micro-triangle is classified Opaque. If it is below the lower threshold (close to 0.0), it is classified Transparent. If it falls between the thresholds, it is classified Unknown. This is a one-time operation per mesh, per texture, per subdivision level. It runs in a GPU compute shader and its cost is paid at load time, not at runtime. + +The output of this process is a compact array of 2-bit values: one for each micro-triangle. Opaque is encoded as 3, Transparent as 0, Unknown as 1 (and there is also an Unknown-Opaque variant that interacts with certain pipeline flags — but for our purposes, three conceptual states are all we need). For a mesh with 10,000 leaf triangles at subdivision level 3, the micromap data is 10,000 times 64 micro-triangles times 2 bits, which is about 160 kilobytes. This is negligible compared to the mesh data itself. + +== Where the Data Lives + +The classified micro-triangle data is uploaded to the GPU and used to construct a **`VkAccelerationStructureKHR`** whose type is set to `VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR`. This is a deliberate unification in the KHR API: rather than introducing a separate object type for micromap data, `VK_KHR_opacity_micromap` folds the micromap directly into the existing acceleration structure abstraction. The same `VkAccelerationStructureKHR` handle you use for BLASes and TLASes is also the handle for a micromap — the `type` field at creation time is what distinguishes them. The micromap acceleration structure is allocated via `vkCreateAccelerationStructure2KHR` (from `VK_KHR_device_address_commands`), passing `VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR` as the type. Building the micromap uses `vkCmdBuildAccelerationStructuresKHR` with `geometryType` set to `VK_GEOMETRY_TYPE_MICROMAP_KHR` — the same device-side command used for all acceleration structure builds. There is no separate host-side build path for micromaps in the KHR API; all micromap construction is GPU-driven. + +Once the micromap acceleration structure is built, it is attached to the corresponding BLAS during the BLAS build or rebuild. The attachment is specified through the `VkAccelerationStructureTrianglesOpacityMicromapKHR` structure, which is chained into the geometry description passed to the acceleration structure build. The `micromap` field in this structure is the `VkAccelerationStructureKHR` handle of the micromap you just built. The `indexBuffer` field, which maps original triangles to their micromap entries, is a plain `VkDeviceAddress` — there is no host address variant in the KHR API, reflecting the device-only design philosophy. At that point, the micromap data becomes part of the acceleration structure itself — stored in GPU memory in the hardware's native format for traversal. + +The traversal hardware reads the micromap data during BVH traversal as part of its normal operation. There is no additional API call, no shader modification, no synchronization primitive to manage on a per-frame basis. Once the BLAS is built with the micromap attached, the hardware simply uses it, every frame, for every ray that touches the mesh — automatically, transparently, at hardware speed. Ray query shaders do require the `OpacityMicromapKHR` SPIR-V execution mode, declared via the `SPV_KHR_opacity_micromap` extension, to inform the compiler that opacity micromap data may influence traversal results; this is a one-line declaration in your GLSL or HLSL source and carries no runtime cost. + +== Lossy Builds: Trading Perfection for Higher Precision + +The `VK_KHR_opacity_micromap` API offers one additional degree of freedom: the **lossy build flag**, `VK_BUILD_ACCELERATION_STRUCTURE_MICROMAP_LOSSY_BIT_KHR`. When this flag is set during the micromap build, the driver is permitted to apply lossy compression to the micromap data. In exchange for that freedom, the driver may be able to support higher effective subdivision levels — pushing the micro-triangle grid finer than would otherwise fit in the available memory budget. Subdivision levels that exceed the standard `maxOpacity4StateSubdivisionLevel` cap may become available, up to the value reported in `maxOpacityLossy4StateSubdivisionLevel`. + +The practical implication is nuanced. A lossy build may occasionally promote a micro-triangle from Unknown to either Opaque or Transparent based on compressed state, which means the any-hit shader will not fire for that micro-triangle even though the uncompressed classification was ambiguous. For most foliage and most viewing distances, this is visually imperceptible. For content where precision at the alpha boundary is critical — fine hair, feathers, or close-up leaf geometry — you may prefer to avoid the lossy flag and accept slightly coarser subdivision. The choice belongs to the application. + +== The Profound Simplicity + +Step back and appreciate what has happened. We have moved the most expensive part of alpha-tested ray traversal — the per-hit, per-frame shader invocation for opacity discovery — out of the hot path entirely. We replaced it with a pre-baked data structure that the hardware reads at fixed-function speed. We did not change the visual result: the any-hit shader still fires for Unknown micro-triangles, ensuring correctness at the boundaries. We did not change the application's API footprint significantly. We added one build step and one attachment structure. + +For most foliage, this means that perhaps 95% of all shadow ray interactions with leaf geometry now cost nothing in shader terms — they are handled entirely by hardware, in the same cycle budget as opaque geometry traversal. Only that thin ring of edge pixels — the 5% that genuinely straddles the alpha boundary — still pays the any-hit shader cost. The improvement is not a rounding error. It is a qualitative change in the nature of the workload. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/02_why_alpha_testing_is_expensive.adoc[Previous: What Your GPU Is Actually Doing] | xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/04_hardware_traversal_with_omm.adoc[Next: Inside the Traversal: How the GPU Uses Your Micromap] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/04_hardware_traversal_with_omm.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/04_hardware_traversal_with_omm.adoc new file mode 100644 index 000000000..23499e146 --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/04_hardware_traversal_with_omm.adoc @@ -0,0 +1,84 @@ += Inside the Traversal: How the GPU Uses Your Micromap + +== Two Journeys for One Shadow Ray + +The best way to understand what Opacity Micromaps do at runtime is to follow a single shadow ray through both worlds — first the world without OMMs, then the world with them — and observe exactly where the work happens and where it disappears. + +image::images/omm_traversal_comparison.svg[Side-by-side diagram contrasting shadow ray traversal without OMM (many any-hit shader invocations) versus with OMM (most hits resolved by hardware, few shader invocations), 700, 420] + +=== The Journey Without OMMs + +Our shadow ray is cast from a surface point on the ground directly below a tree canopy. Its target is a sample position on the area light above. The ray enters the BVH. + +First, it tests the top-level bounding box of the entire scene and finds it intersects. It descends into the scene's sub-nodes, testing bounding boxes at each level. The non-leaf geometry — the trunk, the ground, a nearby wall — is quickly excluded by the BVH. The ray's path passes through the bounding box region containing the leaf canopy. Several hundred leaf triangles live in this region. + +At the leaf level of the BVH, the traversal begins testing individual triangles. The ray intersects the bounding box of the first candidate leaf cluster. Several leaf triangles are candidates. For each candidate, the traversal hardware determines that the geometry is flagged as alpha-tested. It cannot commit the hit automatically. It invokes the any-hit shader. + +The any-hit shader runs. It receives the barycentric coordinates of the intersection. It interpolates UV coordinates. It issues a texture sample request. The texture unit processes the request — likely a cache miss, since this is the first frame and the cache is cold. The sample returns. The shader compares the alpha value to the material's cutoff threshold. The alpha is 0.02 — this is empty space between leaves. Traversal continues. + +The next candidate: another leaf triangle. The any-hit shader fires again. UV interpolation, texture sample (another miss), comparison. Alpha is 0.87 — solid leaf. The hit is committed. The ray is blocked. The shadow query returns "in shadow." + +But that was two any-hit shader invocations just for this one ray, and most shadow rays in a forest scene will encounter more. A ray traveling at a shallow angle through the canopy might test eight, twelve, or more leaf triangles before finding a solid hit or exiting the tree. Each test: a shader invocation, a texture sample, a decision. + +Multiply by 16 shadow rays per pixel. Multiply by millions of pixels. The any-hit shader is the most frequently invoked shader in the entire frame. + +=== The Journey With OMMs + +The same shadow ray is cast. The BVH traversal proceeds identically — micromaps do not change the BVH structure or the bounding box hierarchy. The ray reaches the leaf geometry region and begins testing candidate triangles. + +The first candidate leaf triangle is an alpha-tested mesh, but this time it has an attached micromap. The traversal hardware computes the intersection point's barycentric coordinates as before. Then — still in fixed-function hardware, without any shader involvement — it looks up the micro-triangle that contains this intersection point. The micro-triangle is classified Transparent. The hardware discards the hit immediately. No shader fires. No texture sample. Traversal continues in a fraction of the time. + +The second candidate: the hardware checks the micro-triangle. Opaque. The hit is committed immediately. No shader fires. No texture sample. The ray is blocked. Shadow query returns "in shadow." + +Total any-hit shader invocations for this ray: zero. The hardware resolved everything. + +For a different ray that happens to land precisely on the leaf edge region — in an Unknown micro-triangle — the any-hit shader does fire. But this is the exception, not the rule. For a well-subdivided leaf texture, this happens for perhaps one in twenty ray-leaf intersections, not nineteen in twenty. + +== How the Hardware Reads the Micromap + +The micromap data is stored in GPU memory as a densely packed array of 2-bit values, in a format the traversal hardware can address directly given a base pointer, a stride, and a subdivision level. When the BLAS is built with a micromap attached, the acceleration structure stores a reference to this data alongside the geometry. + +During traversal, when the hardware identifies a candidate triangle intersection, it performs a micro-triangle lookup that is essentially an index calculation: given the barycentric coordinates of the hit point and the subdivision level, compute which micro-triangle the point falls within, then read the 2-bit state from the array. This computation is simple integer arithmetic — no general-purpose shader, no texture fetch from a separately managed resource. It is part of the fixed-function traversal unit, the same circuit that tests bounding boxes and computes intersection distances. + +This is what it means for the hardware to provide **hardware-accelerated opacity evaluation**. The micromap is not a compute buffer that a shader reads. It is metadata embedded in the acceleration structure, readable by the traversal hardware itself. + +Under `VK_KHR_opacity_micromap`, the micromap is represented not as a separate object type but as a standard `VkAccelerationStructureKHR` created with type `VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR`. This unification with the standard acceleration structure object is a deliberate design choice in the KHR extension: it means the micromap participates in the same memory model, barrier rules, and lifetime management as all other acceleration structures in the scene, rather than introducing a parallel hierarchy of objects. + +== The OpacityMicromapKHR Execution Mode + +In `VK_KHR_opacity_micromap`, the hardware micromap fast-path for ray queries is **not** automatic. The shadow shader must explicitly declare the `OpacityMicromapKHR` SPIR-V execution mode, defined by `SPV_KHR_opacity_micromap`, to signal to the traversal hardware that it should consult the micromap data. In GLSL this declaration looks like: + +[source,glsl] +---- +layout(constant_id = N) gl_EnableOpacityMicromapExt; +---- + +where `N` is a specialisation constant ID. This is a one-line addition to the shader — not a logical change — but without it, the hardware silently ignores all micromap data during ray query traversal and falls back to the full any-hit shader path on every intersection, as if no micromaps were attached at all. This was a deliberate design change from `VK_EXT_opacity_micromap`, where the optimisation was implicit for ray queries. The explicit declaration makes the hardware intent unambiguous and allows compilers to reason about it correctly. + +== The Cascade Effect on Warp Efficiency + +Recall from the previous chapter the problem of divergence: when different threads in a warp take different code paths, the warp serializes. Alpha-tested geometry was a chronic source of divergence because the any-hit shader fired with different outcomes for different rays. + +With OMMs, this divergence is dramatically reduced. Most ray-leaf intersections never reach a shader. The threads that would have diverged on "did we hit solid or transparent?" now never reach that decision in a shader context — the hardware makes the decision silently, in a unified fixed-function path, before the shader even knows a candidate intersection existed. Threads whose rays pass through transparent micro-triangles stay synchronized with threads whose rays hit nothing, because both paths are zero-cost hardware operations. + +The any-hit shader still fires for Unknown micro-triangles, and divergence can still occur there. But the fraction of invocations that trigger this path is small enough that the warp efficiency improves substantially. In a typical foliage-heavy scene, moving from no OMMs to well-classified OMMs can reduce any-hit shader invocations by 80 to 95 percent. The warp efficiency gains are proportionally significant. + +== The Multiplier Effect for Soft Shadows + +The performance benefit of OMMs scales with the number of shadow rays per pixel. For hard shadows — one ray per pixel — the saving is real but bounded. For soft shadows with 16 samples per pixel, the saving multiplies by 16. This is one of the most important practical implications of the technique. + +Soft shadows are among the most visually compelling features of ray-traced rendering. They are also among the most expensive, because cost scales directly with sample count. Anything that reduces the per-ray cost of shadow queries has a multiplied impact on soft shadow performance. OMMs are therefore most impactful precisely in the rendering modes that matter most for visual quality: high-sample-count soft shadows under foliage. + +There is also a subtler cache benefit at play. When 16 shadow rays from nearby pixels all test the same leaf triangles (which they tend to do, since nearby pixels see nearby geometry), the micromap data for those triangles is already warm in the GPU's L1 and L2 caches from the first few rays. The micromap lookup for subsequent rays is nearly free in memory terms. The alpha texture, by contrast, would need to be fetched every time and is less likely to stay warm given the potentially varying UV coordinates of different rays. + +== The Shadow Ray Lifecycle, Complete + +image::images/omm_shadow_ray_lifecycle.svg[Flowchart showing the lifecycle of a shadow ray with OMM: cast ray, BVH traversal, candidate hit found, micromap lookup, branch to Opaque or Transparent or Unknown, with Opaque and Transparent resolved in hardware and Unknown falling back to any-hit shader, 600, 520] + +The complete lifecycle of a shadow ray in an OMM-enabled scene can be summarized as a decision tree. The ray enters BVH traversal. For each candidate triangle intersection, the hardware checks for an attached micromap. If no micromap is present (non-alpha-tested geometry), it uses the standard opaque-geometry path: commit the hit immediately. If a micromap is present, it looks up the micro-triangle state. Opaque: commit, no shader. Transparent: discard, no shader. Unknown: invoke the any-hit shader and let the shader make the final call. + +This decision tree lives entirely in fixed-function hardware for the Opaque and Transparent branches. Only the Unknown branch enters programmable execution. The result is that the GPU's shader execution units are freed from the overwhelming majority of alpha-testing work and can focus on the cases that genuinely require a shader. In `VK_KHR_opacity_micromap`, reaching this optimized path also depends on the shader having declared the `OpacityMicromapKHR` execution mode — the hardware and the shader must both be in agreement for the optimization to take effect. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/03_what_are_micromaps.adoc[Previous: Micromaps: Teaching the Hardware What It Already Should Know] | xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc[Next: Building Opacity Micromaps in the Simple Engine] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc new file mode 100644 index 000000000..81824a3e4 --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc @@ -0,0 +1,71 @@ += Building Opacity Micromaps in the Simple Engine + +== The Conceptual Architecture + +Understanding the hardware is one thing; seeing how that understanding translates into code is another. This chapter walks through the engine's Opacity Micromap implementation at a conceptual level. The full source is available in the `Courses/` folder alongside this curriculum, and we will point you to specific files throughout. The goal here is to understand the shape of the implementation: what happens when, why each step is necessary, and how the pieces connect. + +The implementation is organized around a single class, `OpacityMicromapBuilder`, defined in `opacity_micromap_builder.h` and implemented in `opacity_micromap_builder.cpp`. This class is responsible for the entire micromap lifecycle: checking hardware support, building the micromaps from mesh and texture data, managing their GPU memory, and providing them to the BLAS build process. + +== Phase Zero: Checking for Support + +Before any micromap work can begin, the engine must verify that the current GPU and driver support `VK_KHR_opacity_micromap`. This extension is relatively recent and is not universally available. The `OpacityMicromapBuilder` checks for the extension during device initialization and sets an internal flag. If the extension is absent, every subsequent call to the builder is a no-op — the engine proceeds with normal alpha testing, exactly as it did before micromaps were introduced. + +This graceful fallback is not optional; it is a hard requirement for any shipping application. Content must look correct regardless of whether OMMs are active. Because OMMs do not change the visual result (the any-hit shader remains the fallback for Unknown micro-triangles, and the shader's behavior is unchanged), disabling OMMs leaves the rendering visually identical while simply reverting to the higher per-frame cost. The fallback path requires no special code path in the shaders. + +The extension also requires that `VK_KHR_acceleration_structure` is enabled (which it already must be for any ray-traced rendering) and `VK_KHR_synchronization2` for the build commands. These dependencies are checked alongside the main extension. The feature structure used during device creation is `VkPhysicalDeviceOpacityMicromapFeaturesKHR`, which exposes the availability of the core micromap functionality and the optional lossy compression feature; the builder queries both and records which capabilities are active. + +== Phase One: Analysis + +When a mesh is submitted to the engine for loading, the `OpacityMicromapBuilder` inspects the mesh's material list. For each submesh, it examines the material properties and asks: is this an alpha-tested material? Specifically, does it have an alpha-mask texture and is it flagged to use alpha cutoff rather than full transparency blending? + +If the answer is yes, the submesh is a candidate for micromap generation. The builder records which submeshes need micromaps and determines the appropriate subdivision level for each one. The subdivision level choice is currently based on a simple heuristic: the builder examines the texture dimensions and the ratio of the mesh's triangle count to its surface area. Finely detailed textures on smaller triangles benefit from higher subdivision, while large triangles with simple alpha patterns can use lower levels. The engine also respects the device's reported `maxOpacityLossy4StateSubdivisionLevel` and `maxMicromapTriangles` limits when selecting subdivision levels, ensuring that the chosen parameters remain within what the hardware can represent. A configuration file in the `Courses/` folder documents the heuristic and how to override it per-material. + +The analysis phase is fast and happens on the CPU as part of normal mesh loading. Its output is a per-submesh descriptor that the next phase consumes. + +== Phase Two: Classification + +Classification is where the substantive work happens. For each alpha-tested submesh, and for each triangle in that submesh, the builder iterates over the micro-triangles at the chosen subdivision level. For each micro-triangle, it computes the barycentric coordinates of the micro-triangle's centroid within the original triangle. From those coordinates, it interpolates the triangle's UV coordinates to find the centroid's position in texture space. It then samples the CPU-side alpha texture at that UV position. + +The sampling here is deliberately simple: a bilinear sample at the centroid, optionally averaged with several nearby jittered samples to reduce aliasing artifacts at the boundary. If the sampled alpha is above an upper threshold (typically 0.9), the micro-triangle is classified Opaque. Below a lower threshold (typically 0.1), it is Transparent. Between the thresholds, it is Unknown. + +The classification results are packed into a compact array — 2 bits per micro-triangle — which forms the raw input for the GPU micromap build. For large meshes, this classification step can take noticeable time, which is why it is typically performed as part of an offline pre-process for shipping content rather than at application startup. The engine supports both modes: online classification during load (for development) and loading pre-classified micromap data from a binary file (for production). + +It is worth pausing to appreciate the asymmetry here. The classification step runs once. The benefit accumulates over every frame the application renders. For a shipping game or visualization application, the payoff ratio is enormous. + +== Phase Three: GPU Construction + +Once the classification data is ready on the CPU, it is uploaded to a staging buffer and transferred to GPU-local memory. The builder then fills in a `VkAccelerationStructureBuildGeometryInfoKHR` structure, specifying `type = VK_ACCELERATION_STRUCTURE_TYPE_OPACITY_MICROMAP_KHR` along with the geometry type `VK_GEOMETRY_TYPE_MICROMAP_KHR`, the usage flags, per-triangle format descriptors, the input data buffer address, and a scratch buffer for the build operation. The destination acceleration structure handle is obtained by first calling `vkCreateAccelerationStructure2KHR` — the KHR extension reuses the standard acceleration structure creation path rather than introducing a dedicated micromap creation command. + +The builder then records a call to `vkCmdBuildAccelerationStructuresKHR` into a command buffer, which performs the actual build of the micromap on the device. Unlike the earlier EXT extension, `VK_KHR_opacity_micromap` does not provide a host-side build path — there is no CPU equivalent to this command, and all micromap construction must be submitted to a queue. This is consistent with the KHR philosophy of keeping acceleration structure work firmly on the device side, where the hardware's internal data layout can be determined and applied by the driver without round-tripping through host memory. + +The resulting `VkAccelerationStructureKHR` handle is stored alongside the submesh's existing BLAS geometry descriptor. In `VK_KHR_opacity_micromap`, micromaps are not a separate object type: the extension deliberately reuses the standard acceleration structure handle for micromaps, distinguishing them only by their creation type. This simplifies lifecycle management considerably — the same creation, barrier, and destruction patterns that apply to BVH acceleration structures apply equally to micromap acceleration structures, and the programmer does not need to learn a parallel set of object management rules. The handle must be created before the BLAS that references it is built, and it must not be destroyed while the BLAS is alive. + +A detail worth noting: the `vkCmdBuildAccelerationStructuresKHR` call for the micromap is recorded into a command buffer and submitted to the GPU queue, just like the BLAS build itself. The micromap data is not computed on the CPU and pushed to the GPU as a plain buffer — the build step performs its own internal compaction and layout transformation to pack the data into the traversal hardware's native format. The engine uses a single transfer and compute queue submission to build all micromaps for a newly loaded mesh, then signals a semaphore before proceeding to the BLAS build. + +== Connecting the Micromap to the BLAS + +The micromap `VkAccelerationStructureKHR` object by itself does nothing. It becomes effective only when it is attached to a BLAS geometry description before the BLAS is built or updated. This attachment happens through the `VkAccelerationStructureTrianglesOpacityMicromapKHR` structure, which is added to the `pNext` chain of the `VkAccelerationStructureGeometryTrianglesDataKHR` for the relevant submesh. The `micromap` field in this structure holds the `VkAccelerationStructureKHR` handle created above, and the `indexBuffer` field — which identifies which micromap entry corresponds to each triangle — is a plain `VkDeviceAddress`. The KHR extension removes the host-address variant present in the EXT version; only a device-side address is accepted here, which enforces the device-only construction model consistently through the entire pipeline. + +This chaining is the literal link between the human-readable concept ("this leaf mesh has a micromap") and the hardware-visible result ("this BLAS contains micromap data in its internal representation"). Once the BLAS is built with this chain in place, all subsequent traversal queries against that BLAS automatically consult the micromap — provided the querying shader has declared the necessary execution mode, as discussed in the previous chapter. + +The BLAS must be rebuilt after micromaps are attached. If the mesh is static — as foliage usually is — this is a one-time cost. If the mesh deforms (unusual for micromap use cases but possible), the micromap must be rebuilt and the BLAS must be updated accordingly. The `OpacityMicromapBuilder` tracks which BLASes have attached micromaps and flags them for rebuild if their micromap data changes. + +== The Shadow Shader Needs One Small Addition + +Here is nearly the elegant part. After all of this infrastructure work — the analysis, the classification, the GPU build, the BLAS attachment — the shadow shader requires only a single addition, not a logical rewrite. + +Under `VK_KHR_opacity_micromap`, a ray query shader must declare the `OpacityMicromapKHR` SPIR-V execution mode in order for the traversal hardware to apply the micromap optimization. In GLSL this is expressed as enabling `gl_EnableOpacityMicromapExt` via the `GLSL_EXT_opacity_micromap_ray_query_mode` extension — one line at the top of the shader file. Without it, the GPU silently ignores the attached micromap and falls back to invoking the any-hit shader for every alpha-tested intersection, as though no micromap were present. The rest of the shader is untouched: the calls to `rayQueryInitializeEXT`, `rayQueryProceedEXT`, and the standard query functions remain identical. The micromap is still transparent to the shader's logic — the traversal hardware handles Opaque and Transparent micro-triangles before the shader layer is involved, and Unknown micro-triangles still invoke the any-hit shader through the same mechanism as before. The shader never learns whether a given intersection was resolved by the micromap or by the any-hit path. The single declaration is not a logic change; it is an opt-in that the KHR spec requires to ensure the shader's author has considered the subtle traversal-order implications of micromap participation. + +Students should open `opacity_micromap_builder.cpp` in the `Courses/` folder to see the full implementation of the three phases, and look at the engine's main initialization sequence in `simple_engine.cpp` to see where `OpacityMicromapBuilder::buildForScene()` is called. The integration is deliberately minimal — the builder is a self-contained utility that plugs into the existing scene loading pipeline without restructuring it. + +== Memory and Object Lifetime + +Every micromap `VkAccelerationStructureKHR` created by the builder is stored in a vector of owned handles alongside the backing device memory allocation. The builder provides a `releaseAll()` method that destroys all micromap acceleration structures and frees their device memory. This is called when the scene is unloaded or when the engine shuts down. + +The ordering requirement is strict: BLASes must be destroyed before the micromap acceleration structures they reference. Because micromaps in `VK_KHR_opacity_micromap` are standard `VkAccelerationStructureKHR` objects, there is no separate destroy command to learn — the same `vkDestroyAccelerationStructureKHR` call that tears down a BLAS also tears down a micromap. The BLAS always holds a live reference to its micromap; there is no concept of a discardable or optional micromap attachment in the KHR model. The engine's teardown sequence respects this order, destroying all BLAS acceleration structures before calling `releaseAll()` on the micromaps. If you modify the engine's shutdown sequence, preserve this ordering. + +For very large scenes with many distinct mesh types, micromap memory can add up. The `OpacityMicromapBuilder` includes a method to query the total micromap memory footprint, which can be used to implement a budget-based policy: if micromap memory exceeds a threshold, lower-priority submeshes (those with fewer shadow-casting triangles in view) can be built at lower subdivision levels or skipped entirely. The device's reported `maxMicromapTriangles` property also informs the upper bound on what can be built in a single micromap acceleration structure, and the builder respects this limit when deciding whether to split large meshes across multiple micromap objects. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/04_hardware_traversal_with_omm.adoc[Previous: Inside the Traversal: How the GPU Uses Your Micromap] | xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/06_results_guidance_and_tradeoffs.adoc[Next: Seeing the Difference and Knowing When to Use It] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/06_results_guidance_and_tradeoffs.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/06_results_guidance_and_tradeoffs.adoc new file mode 100644 index 000000000..6675f741f --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/06_results_guidance_and_tradeoffs.adoc @@ -0,0 +1,55 @@ += Seeing the Difference and Knowing When to Use It + +== What the Shadows Look Like + +Let's start with what you actually see. When you enable Opacity Micromaps in the simple engine and render a scene with dense foliage, the visual result should be indistinguishable from the non-micromap version. The leaf shadows should fall with the same detailed silhouettes. The penumbra gradients under the canopy should have the same softness. The individual leaf shapes should be identifiable in the shadow pattern on the ground below. + +This is a feature, not a limitation. Opacity Micromaps are a performance optimization, not an approximation. They are designed to produce exactly the same visual output as the fully shader-driven path, with the exception that the classification boundary (that thin ring of Unknown micro-triangles) relies on the same any-hit shader as before. The visual identity of the output is not a coincidence — it is a design requirement of the extension. + +What changes is what your profiler shows. In a scene dominated by foliage — a forest clearing, a garden terrace with broad-leafed plants, a park with dense overhead canopy — the time spent in the any-hit shader should drop dramatically. The reduction depends on the content, the subdivision level, and the fraction of shadow rays that intersect alpha-tested geometry, but reductions of 60 to 90 percent in any-hit invocation counts are common in well-suited scenes. For soft shadows with high sample counts, this translates directly into frame time savings that a GPU profiler will show as reduced time in the ray traversal stage. + +For a bistro-style scene — a terrace with wicker chairs, leafy overhead vines, translucent café umbrellas — the frame time improvement from OMMs can be substantial enough to unlock an additional soft-shadow sample tier without regressing the frame budget. That kind of quality-for-free improvement is exactly what well-targeted GPU optimizations feel like. + +== When OMMs Earn Their Keep + +Opacity Micromaps are most impactful in a specific combination of conditions. The first is **alpha-tested geometry in shadow paths**: the alpha-masked objects must actually be casting shadows that reach the pixels being rendered. A beautiful tree on the far side of the scene, outside the shadow camera frustum or facing away from the light, contributes nothing to shadow ray traffic and gains nothing from OMMs. + +The second condition is **high shadow ray counts**. The savings from OMMs are per-ray, so they scale with the number of shadow rays per pixel. Soft shadow configurations with 8, 16, or 32 samples per pixel amplify the benefit proportionally. If you are rendering hard shadows with a single ray per pixel, OMMs still help, but the absolute frame time saving is smaller. + +The third condition is **geometric complexity in the shadow path**. A sparse arrangement of a few leaf triangles does not create enough any-hit shader pressure to make the OMM overhead worthwhile. A dense canopy with thousands of overlapping leaf layers, through which many rays must pass before finding a solid hit or exiting, is exactly the case OMMs are designed to address. + +Scenes with indoor vegetation, dense forest corridors, parks, jungle environments, and realistic urban greenery are all strong candidates. Scenes with chain-link fencing, industrial gratings, perforated panels, and particle-based effects (where particle quads use alpha masks) also benefit significantly. + +== When OMMs Don't Help + +For fully opaque geometry — solid walls, floors, vehicles, characters without alpha-tested hair or clothing — the hardware already uses the fast committed-hit path. There are no any-hit shader invocations to eliminate. OMMs have no effect on opaque geometry performance, and you would not attach micromaps to opaque meshes. + +Simple scenes with small triangle counts are not good candidates. The overhead of building the micromaps, storing them in GPU memory, and attaching them to the BLAS is real, even if small. For a scene with a dozen leaf triangles, this overhead costs more than it saves. + +Scenes where the alpha-tested geometry is rarely in a shadow-casting configuration also see limited benefit. If the sun is behind the camera and the foliage is fully backlit, most of the foliage's alpha-tested triangles are not between any surface point and the light — the shadow rays never reach them. The any-hit shader invocation count for that geometry is already low. + +Finally, be aware that OMMs provide no benefit for **partial transparency** in the traditional sense — geometry with intermediate alpha values (like smoke, glass, or translucent foliage shaders) cannot use OMMs because those materials require the any-hit shader to compute physically accurate partial occlusion. OMMs are specifically for binary alpha masking where the correct answer is always fully opaque or fully transparent, never an intermediate value. + +== Choosing a Subdivision Level + +The subdivision level governs the balance between accuracy and memory consumption. For most foliage textures, starting at level 2 (16 micro-triangles per original triangle) is a good first choice. At this level, the leaf edge is captured with sufficient resolution to keep the Unknown fraction small, and the memory overhead is modest. + +If profiling reveals that the any-hit shader is still consuming significant time — indicating a high Unknown fraction — increasing to level 3 (64 micro-triangles) typically improves the classification accuracy enough to make a meaningful difference. Going to level 4 (256 micro-triangles) is rarely necessary and can produce micromap data that exceeds the memory budget for large scenes. + +The engine's `OpacityMicromapBuilder` exposes a method to query the Unknown fraction after classification: what percentage of micro-triangles ended up in the Unknown state. If this fraction is above 15 or 20 percent, increasing the subdivision level is likely worthwhile. If it is already below 5 percent, the current level is probably optimal. + +`VK_KHR_opacity_micromap` introduces a **lossy build mode**, enabled by setting the `VK_BUILD_ACCELERATION_STRUCTURE_MICROMAP_LOSSY_BIT_KHR` flag when building the micromap. When this flag is set, the driver is permitted to apply lossy compression internally, and in exchange it may support subdivision levels beyond the standard `maxOpacity4StateSubdivisionLevel` cap — up to the value reported in `maxOpacityLossy4StateSubdivisionLevel`. For content where occasional reclassification of borderline micro-triangles is acceptable, this is a useful lever for pushing classification accuracy further than the standard hardware limits allow, at the cost of the driver taking greater liberties with the stored states. + +The hardware also imposes an absolute limit — `maxMicromapTriangles` — on the total number of micro-triangles that may be contained within a single micromap object. For very high-density meshes, such as large terrain patches or extremely detailed foliage clusters with many thousands of base triangles at a high subdivision level, the expanded micro-triangle count may exceed this limit. When that happens, the mesh must be split into multiple submeshes, each with its own micromap, ensuring that no individual micromap exceeds the reported maximum. Checking against this limit during the build-time planning stage avoids surprises at runtime. + +== Hardware Requirements and Graceful Fallback + +`VK_KHR_opacity_micromap` is the Khronos-ratified promotion of opacity micromap functionality into the core Vulkan extension ecosystem, and as such it is expected to see broad driver support as hardware and drivers mature. Feature availability is tested at device initialization by querying `VkPhysicalDeviceOpacityMicromapFeaturesKHR` — the extension is considered present and usable when the `micromap` field of that structure is `VK_TRUE`. The `OpacityMicromapBuilder` falls back gracefully when the feature is absent. + +The fallback behavior is not merely a safety net — it is a first-class configuration. Many users will run the application on hardware without OMM support, and the experience must be correct and acceptable. Because OMMs do not alter the rendered image, the fallback path is visually identical. The only difference is performance, which gracefully degrades to the pre-OMM baseline. + +Always design for the fallback. Never assume OMM availability. Use the profiler to measure both paths and document the performance difference for your content. The graceful fallback posture is a hallmark of well-engineered GPU features. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/05_implementation_overview.adoc[Previous: Building Opacity Micromaps in the Simple Engine] | xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/07_conclusion.adoc[Next: Conclusion: Bridging Art and Hardware] diff --git a/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/07_conclusion.adoc b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/07_conclusion.adoc new file mode 100644 index 000000000..4d5e062cb --- /dev/null +++ b/en/Building_a_Simple_Engine/Courses/Opacity_Micromaps/07_conclusion.adoc @@ -0,0 +1,39 @@ += Conclusion: Bridging Art and Hardware + +== The Journey in Reverse + +We began this course with beautiful shadows and a hidden problem. The shadows looked right. The frames were rendering. But underneath the visual correctness, the GPU was engaged in an enormous amount of redundant discovery — re-learning, sixty times per second, which parts of every leaf were solid and which were air. We traced that redundancy to its root in the GPU's traversal hardware, understanding precisely why the any-hit shader is necessary, why it is expensive, why divergence and cache misses compound the cost, and why the problem grows with exactly the features that make rendering most visually compelling. + +Then we learned the solution in full. Opacity Micromaps pre-bake the answer to the GPU's repeated question. They subdivide each triangle into a grid of micro-triangles and classify each one with a permanent state. That classification, stored in the acceleration structure itself, allows the fixed-function traversal hardware to resolve the vast majority of alpha-tested intersections without ever entering the programmable shader domain. The any-hit shader is demoted from ubiquitous workhorse to rare specialist, called only for the genuinely uncertain edge regions. + +The result is that shadows in foliage-heavy scenes run dramatically faster, look identical, and require no changes to the shadow shader. The savings multiply with soft-shadow sample counts, becoming most valuable precisely in the rendering configurations that matter most for visual quality. + +== A Recurring Theme in Graphics + +The journey from problem to solution here follows a pattern that appears throughout the history of real-time graphics optimization: move work from runtime to pre-process, from general-purpose compute to dedicated hardware, from discovery to knowledge. + +Lightmaps moved dynamic light evaluation to an offline bake. Precomputed radiance transfer moved complex lighting integrals to pre-computation. Mesh shaders moved geometry amplification to a more hardware-aware stage. In every case, the underlying insight is the same: if the answer is knowable in advance, pay the cost once and cache the result in the most hardware-accessible form possible. Opacity Micromaps are an exceptionally clean instance of this principle, because the pre-baked data is small, the benefit is large, and the integration into existing rendering pipelines is nearly seamless. + +Understanding this pattern gives you a transferable skill. The next time you encounter a performance bottleneck in a ray-traced or rasterized pipeline, one of the first questions to ask is: "Is this information that could have been known ahead of time?" If the answer is yes, there is likely an optimization to be found. + +== What You Now Understand + +You understand a genuinely modern, hardware-level GPU feature. `VK_KHR_opacity_micromap` is the Khronos-ratified evolution of the micromap concept, and understanding it in this form is significant: the KHR extension deliberately unifies the micromap object into `VkAccelerationStructureKHR` and removes host-side build commands in favor of a pure device-side API. That simplification is not an accident — it reflects what real hardware implementations actually support and removes a class of synchronization ambiguity that existed in the earlier extension. Many experienced graphics developers are not yet familiar with micromaps in this ratified form. + +You understand not just the API surface, but the reason the API exists. You know what problem it solves, at which level of the hardware it operates, and what the trade-offs are in practice. That depth of understanding is what separates someone who can use a feature from someone who can evaluate it, extend it, and explain it to others. + +== Next Steps + +Open the `OpacityMicromapBuilder` source files in the `Courses/` attachment folder. Read through the implementation with the conceptual framework from this course fresh in mind. Notice how the three phases — analysis, classification, construction — map to the code structure. Look for the subdivision level heuristic and think about whether it fits the content you are rendering. + +When inspecting the ray query shader, look for the `OpacityMicromapKHR` SPIR-V execution mode declaration. With `VK_KHR_opacity_micromap`, shaders that participate in opacity micromap evaluation are required to declare this execution mode explicitly; without it, the driver cannot enable the micromap fast path for that shader. It is a small detail, but an important one to recognise when reading or writing ray query shaders that interact with micromap-enabled geometry. + +Enable and disable OMMs in the engine using the configuration flag, and use a GPU profiler — NVIDIA Nsight Graphics or AMD Radeon GPU Profiler — to capture both configurations on a foliage-heavy scene. Look at the any-hit shader invocation count, the time in the BVH traversal stage, and the warp occupancy metrics. The numbers will give you intuition that no amount of explanation can fully replace. + +When you are ready to go further, explore `VK_EXT_displacement_micromap` — the sibling extension that uses a similar micro-triangle subdivision model not for opacity, but for geometric displacement. Where Opacity Micromaps let you bake alpha into the acceleration structure, Displacement Micromaps let you bake fine surface detail — the kind normally stored in displacement maps — directly into the BVH without the polygon overhead of tessellating that detail into the mesh. The conceptual foundation you have built in this course transfers directly. + +The hardware is clever. Learning to think alongside it is how you become a more effective graphics engineer. + +''' + +xref:Building_a_Simple_Engine/Courses/Opacity_Micromaps/06_results_guidance_and_tradeoffs.adoc[Previous: Seeing the Difference and Knowing When to Use It] diff --git a/images/omm_foliage_problem.svg b/images/omm_foliage_problem.svg new file mode 100644 index 000000000..b29129434 --- /dev/null +++ b/images/omm_foliage_problem.svg @@ -0,0 +1,99 @@ + + + + + + + + + + + + + + + Alpha-Tested Foliage: The Performance Problem + + + + + + + + + + + + + + + + + + + + + Transparent + Transparent + Transparent + Transparent + Opaque Leaf + + + One GPU triangle = many transparent pixels + one leaf shape + + + + + + + + + Ray 1 + + + ? + + Transparent → + ray continues + + + + + + Ray 2 + + ? + Opaque → + ray blocked + + + + + Ray 3 + + ? + → ray continues + + + + + + Runtime Cost (Per Frame) + For EVERY shadow ray: + 1. Find UV at hit point (barycentrics) + 2. Sample alpha texture + 3. Decide: pass or block? + + + + Every shadow ray × every leaf triangle = one texture sample each frame + Thousands of triangles × millions of rays × 60 fps = enormous shader cost + + + + + + + + diff --git a/images/omm_hard_shadow.svg b/images/omm_hard_shadow.svg new file mode 100644 index 000000000..bb052916f --- /dev/null +++ b/images/omm_hard_shadow.svg @@ -0,0 +1,79 @@ + + + + + + Hard Shadows — Point Light Source + + + + Ground Plane + + + + + + + + + + + + Hard Shadow + Sharp, well-defined edge + + + + Opaque + Object + + + + + + + + + + + + + + + + Point Light + Source + + + + + + + + + + + + + + + + + + + + Sharp Edge + + + + Sharp Edge + + + Fully + Lit + + + + A point source produces + a perfectly crisp boundary + diff --git a/images/omm_shadow_ray_lifecycle.svg b/images/omm_shadow_ray_lifecycle.svg new file mode 100644 index 000000000..3c9f70e58 --- /dev/null +++ b/images/omm_shadow_ray_lifecycle.svg @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + Shadow Ray Lifecycle with Opacity Micromaps + + + + + + Shadow Ray Cast + + + + + + + Enter BVH — Test Boxes + + + + + + + Triangle Candidate Found + + + + + + + Consult Opacity Micromap + Hardware fixed-function — no shader cost + + + + + + State? + + + + + + + Opaque + + Ray Blocked + → In Shadow! + + + ✓ No shader needed + + + + + + Transparent + + Ray Passes + Continue BVH + + + + next + tri? + + + ✓ No shader needed + + + + + + Unknown + + + + Any-Hit Shader Runs + rare — only for edge micro-triangles + + + + + + Check Alpha Texture + (same as before, but rare) + + + + + + + + α < 0.5 + Continue + + + + + α ≥ 0.5 + Blocked + + + + + + No Hit Found + → Point is Lit ☀ + + + + diff --git a/images/omm_soft_shadow.svg b/images/omm_soft_shadow.svg new file mode 100644 index 000000000..f99609924 --- /dev/null +++ b/images/omm_soft_shadow.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + Soft Shadows — Area Light Source + + + + + + + + + + + + + Fully Lit + Penumbra + (Partial Shadow) + Umbra + (Full Shadow) + Penumbra + (Partial Shadow) + Fully Lit + + + + Opaque + Object + + + + + + + + Area Light Source + (finite size = soft edges) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Multiple light points mean different parts + of the object block different parts of the light + diff --git a/images/omm_traversal_comparison.svg b/images/omm_traversal_comparison.svg new file mode 100644 index 000000000..b847059cc --- /dev/null +++ b/images/omm_traversal_comparison.svg @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + BVH Traversal: Before vs After Opacity Micromaps + + + + + + + + Without OMM + Standard Alpha Testing + + + + + Shadow Ray Cast + + + + + + + BVH Box Test + + + + + + + Triangle Candidate + + + + + + + Any-Hit Shader + runs every hit + + + + + + + Sample Alpha Texture + cache miss likely + + + + + + + Alpha ≥ 0.5 → block + + + + Loop: + next + triangle + + + + N shader invocations per ray + divergence + cache misses = slow + + + + + With Opacity Micromaps + Micromap-Assisted Traversal + + + + Shadow Ray Cast + + + + + + BVH Box Test + + + + + + Micromap Check + Fixed-function hardware + + + + + + + State? + + + + + Opaque + Blocked — No shader + + + + + Transparent + Passed — No shader + + + + + Unknown + → any-hit shader (rare) + + + ✓ Hardware only + ✓ Hardware only + + + + ~5% shader invocations per ray + mostly hardware-handled = fast + diff --git a/images/omm_triangle_subdivision.svg b/images/omm_triangle_subdivision.svg new file mode 100644 index 000000000..565250094 --- /dev/null +++ b/images/omm_triangle_subdivision.svg @@ -0,0 +1,102 @@ + + + + + + Opacity Micromap Triangle Subdivision + + + Each source triangle is subdivided into a grid of micro-triangles, each assigned an opacity state + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Legend + + Opaque — hardware blocks ray + + Transparent — hardware passes ray + + + Unknown — any-hit shader runs + + + + Subdivision Level 2 + = 4² = 16 micro-triangles + per source triangle + + + + Edge + boundary + (unknown) +