diff --git a/cmake/common.cmake b/cmake/common.cmake index e70994dcdb..6c10a0a450 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1259,10 +1259,10 @@ struct DeviceConfigCaps if(NOT NBL_EMBED_BUILTIN_RESOURCES) list(APPEND REQUIRED_OPTIONS -no-nbl-builtins - -I "${NBL_ROOT_PATH}/include" - -I "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" - -I "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" - -I "${NBL_ROOT_PATH_BINARY}/src/nbl/device/include" + -isystem "${NBL_ROOT_PATH}/include" + -isystem "${NBL_ROOT_PATH}/3rdparty/dxc/dxc/external/SPIRV-Headers/include" + -isystem "${NBL_ROOT_PATH}/3rdparty/boost/superproject/libs/preprocessor/include" + -isystem "${NBL_ROOT_PATH_BINARY}/src/nbl/device/include" ) endif() @@ -1306,12 +1306,6 @@ struct DeviceConfigCaps TARGET ${IMPL_TARGET} ) - target_sources(${IMPL_TARGET} PUBLIC ${INCLUDE_FILE}) - set_source_files_properties(${INCLUDE_FILE} PROPERTIES - HEADER_FILE_ONLY ON - VS_TOOL_OVERRIDE None - ) - target_compile_definitions(${IMPL_TARGET} INTERFACE $) target_include_directories(${IMPL_TARGET} INTERFACE ${INCLUDE_DIR}) set_target_properties(${IMPL_TARGET} PROPERTIES NBL_HEADER_GENERATED_RULE ON) diff --git a/examples_tests b/examples_tests index 46826a50d9..887100fd44 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 46826a50d9f7cd081cb422c2457618405794b62a +Subproject commit 887100fd445775f21ba10faae0c91366de7e913b diff --git a/include/nbl/asset/utils/ISPIRVEntryPointTrimmer.h b/include/nbl/asset/utils/ISPIRVEntryPointTrimmer.h index a2e24dabab..fceea0aac3 100644 --- a/include/nbl/asset/utils/ISPIRVEntryPointTrimmer.h +++ b/include/nbl/asset/utils/ISPIRVEntryPointTrimmer.h @@ -7,10 +7,12 @@ #include "nbl/system/ILogger.h" +#include + namespace nbl::asset { -class ISPIRVEntryPointTrimmer final : public core::IReferenceCounted +class NBL_API2 ISPIRVEntryPointTrimmer final : public core::IReferenceCounted { public: ISPIRVEntryPointTrimmer(); @@ -46,6 +48,8 @@ class ISPIRVEntryPointTrimmer final : public core::IReferenceCounted }; Result trim(const ICPUBuffer* spirvBuffer, const core::set& entryPoints, system::logger_opt_ptr logger = nullptr) const; + bool ensureValidated(const ICPUBuffer* spirvBuffer, system::logger_opt_ptr logger = nullptr) const; + void markValidated(const ICPUBuffer* spirvBuffer) const; inline core::smart_refctd_ptr trim(const IShader* shader, const core::set& entryPoints, system::logger_opt_ptr logger = nullptr) const { @@ -72,6 +76,8 @@ class ISPIRVEntryPointTrimmer final : public core::IReferenceCounted private: core::smart_refctd_ptr m_optimizer; + mutable std::mutex m_validationCacheMutex; + mutable core::unordered_set m_validatedSpirvHashes; }; } diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index f3cfe07132..05116b8d52 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -17,6 +17,8 @@ #include "nbl/builtin/hlsl/enums.hlsl" #include +#include +#include namespace nbl::asset { @@ -26,6 +28,25 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted public: IShaderCompiler(core::smart_refctd_ptr&& system); + enum class IncludeRootOrigin : uint8_t + { + User, + Builtin, + Generated + }; + + enum class HeaderClass : uint8_t + { + User, + System + }; + + struct IncludeClassification + { + IncludeRootOrigin origin = IncludeRootOrigin::User; + HeaderClass headerClass = HeaderClass::User; + }; + class NBL_API2 IIncludeLoader : public core::IReferenceCounted { public: @@ -34,12 +55,13 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted system::path absolutePath = {}; std::string contents = {}; core::blake3_hash_t hash = {}; // TODO: we're not yet using IFile::getPrecomputedHash(), so for builtins we can maybe use that in the future + IncludeClassification classification = {}; // Could be used in the future for early rejection of cache hit //nbl::system::IFileBase::time_point_t lastWriteTime = {}; explicit inline operator bool() const {return !absolutePath.empty();} }; - virtual found_t getInclude(const system::path& searchPath, const std::string& includeName) const = 0; + virtual found_t getInclude(const system::path& searchPath, const std::string& includeName, bool needHash = true) const = 0; }; class NBL_API2 IIncludeGenerator : public core::IReferenceCounted @@ -65,7 +87,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted public: CFileSystemIncludeLoader(core::smart_refctd_ptr&& system); - IIncludeLoader::found_t getInclude(const system::path& searchPath, const std::string& includeName) const override; + IIncludeLoader::found_t getInclude(const system::path& searchPath, const std::string& includeName, bool needHash = true) const override; protected: core::smart_refctd_ptr m_system; @@ -74,37 +96,88 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted class NBL_API2 CIncludeFinder : public core::IReferenceCounted { public: + struct SSessionCache + { + struct Stats + { + uint64_t lookupFound = 0ull; + uint64_t lookupMissing = 0ull; + uint64_t lookupMiss = 0ull; + uint64_t storeFound = 0ull; + uint64_t storeMissing = 0ull; + }; + + enum class LookupResult : uint8_t + { + Miss, + Missing, + Found + }; + + explicit SSessionCache(const bool threadSafe = false) : threadSafe(threadSafe) {} + + void clear(); + LookupResult lookup(const std::string& key, IIncludeLoader::found_t& result) const; + void store(const std::string& key, IIncludeLoader::found_t result); + Stats snapshotStats() const; + + bool threadSafe = false; + + mutable std::mutex mutex; + mutable Stats stats; + core::unordered_map found; + core::unordered_set missing; + }; + CIncludeFinder(core::smart_refctd_ptr&& system); // ! includes within <> // @param requestingSourceDir: the directory where the incude was requested // @param includeName: the string within <> of the include preprocessing directive - IIncludeLoader::found_t getIncludeStandard(const system::path& requestingSourceDir, const std::string& includeName) const; + IIncludeLoader::found_t getIncludeStandard(const system::path& requestingSourceDir, const std::string& includeName, bool needHash = true, SSessionCache* readSessionCache = nullptr, SSessionCache* writeSessionCache = nullptr) const; // ! includes within "" // @param requestingSourceDir: the directory where the incude was requested // @param includeName: the string within "" of the include preprocessing directive - IIncludeLoader::found_t getIncludeRelative(const system::path& requestingSourceDir, const std::string& includeName) const; + IIncludeLoader::found_t getIncludeRelative(const system::path& requestingSourceDir, const std::string& includeName, bool needHash = true, SSessionCache* readSessionCache = nullptr, SSessionCache* writeSessionCache = nullptr) const; inline core::smart_refctd_ptr getDefaultFileSystemLoader() const { return m_defaultFileSystemLoader; } - void addSearchPath(const std::string& searchPath, const core::smart_refctd_ptr& loader); + void addSearchPath(const std::string& searchPath, const core::smart_refctd_ptr& loader, IncludeClassification classification = {}); - void addGenerator(const core::smart_refctd_ptr& generator); + void addGenerator(const core::smart_refctd_ptr& generator, IncludeClassification classification = {IncludeRootOrigin::Generated,HeaderClass::System}); + + bool isKnownGlobalInclude(std::string_view includeName) const; + IIncludeLoader::found_t classifyFound(IIncludeLoader::found_t found) const; protected: - IIncludeLoader::found_t trySearchPaths(const std::string& includeName) const; + IIncludeLoader::found_t trySearchPaths(const std::string& includeName, bool needHash) const; IIncludeLoader::found_t tryIncludeGenerators(const std::string& includeName) const; + void registerHeaderRoot(std::string rootPath, IncludeClassification classification); struct LoaderSearchPath { core::smart_refctd_ptr loader = nullptr; std::string searchPath = {}; + IncludeClassification classification = {}; + }; + + struct GeneratorEntry + { + core::smart_refctd_ptr generator = nullptr; + IncludeClassification classification = {IncludeRootOrigin::Generated,HeaderClass::System}; + }; + + struct HeaderRoot + { + std::string path = {}; + IncludeClassification classification = {}; }; std::vector m_loaders; - std::vector> m_generators; + std::vector m_generators; + std::vector m_headerRoots; core::smart_refctd_ptr m_defaultFileSystemLoader; }; @@ -134,9 +207,12 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted std::string_view sourceIdentifier = ""; system::logger_opt_ptr logger = nullptr; const CIncludeFinder* includeFinder = nullptr; + CIncludeFinder::SSessionCache* readIncludeSessionCache = nullptr; + CIncludeFinder::SSessionCache* writeIncludeSessionCache = nullptr; std::span extraDefines = {}; E_SPIRV_VERSION targetSpirvVersion = E_SPIRV_VERSION::ESV_1_6; bool depfile = false; + bool preserveComments = false; system::path depfilePath = {}; std::function onPartialOutputOnFailure = {}; }; diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl index cb7743e02d..65db32f336 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl @@ -4,9 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_BXDF_REFLECTION_BECKMANN_INCLUDED_ #define _NBL_BUILTIN_HLSL_BXDF_REFLECTION_BECKMANN_INCLUDED_ -#include "nbl/builtin/hlsl/bxdf/common.hlsl" #include "nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl" -#include "nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl" #include "nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl" #include "nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl" diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl index 0f49d0be43..a984a14b3f 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl @@ -4,9 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_BXDF_REFLECTION_GGX_INCLUDED_ #define _NBL_BUILTIN_HLSL_BXDF_REFLECTION_GGX_INCLUDED_ -#include "nbl/builtin/hlsl/bxdf/common.hlsl" #include "nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl" -#include "nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl" #include "nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl" #include "nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl" diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl index 8c61692c5c..b911968d16 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl @@ -4,10 +4,8 @@ #ifndef _NBL_BUILTIN_HLSL_BXDF_TRANSMISSION_BECKMANN_INCLUDED_ #define _NBL_BUILTIN_HLSL_BXDF_TRANSMISSION_BECKMANN_INCLUDED_ -#include "nbl/builtin/hlsl/bxdf/common.hlsl" #include "nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl" -#include "nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl" -#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl" #include "nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl" namespace nbl diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl index cdd4483c7f..a095d5fdba 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl @@ -4,10 +4,7 @@ #ifndef _NBL_BUILTIN_HLSL_BXDF_TRANSMISSION_GGX_INCLUDED_ #define _NBL_BUILTIN_HLSL_BXDF_TRANSMISSION_GGX_INCLUDED_ -#include "nbl/builtin/hlsl/bxdf/common.hlsl" #include "nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl" -#include "nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl" -#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" #include "nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl" namespace nbl diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index da32fab7b0..54c07ddb27 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -2,7 +2,6 @@ #define _NBL_BUILTIN_HLSL_EMULATED_FLOAT64_T_HLSL_INCLUDED_ #include -#include namespace nbl { diff --git a/include/nbl/builtin/hlsl/member_test_macros.hlsl b/include/nbl/builtin/hlsl/member_test_macros.hlsl index 7579fb0fa2..556c6a463e 100644 --- a/include/nbl/builtin/hlsl/member_test_macros.hlsl +++ b/include/nbl/builtin/hlsl/member_test_macros.hlsl @@ -5,7 +5,10 @@ #define _NBL_BUILTIN_HLSL_MEMBER_TEST_MACROS_INCLUDED_ #include -#include +#include +#include +#include +#include #ifdef __HLSL_VERSION @@ -123,4 +126,4 @@ GENERATE_METHOD_TESTER(set) #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl b/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl index 25ca98772c..140a800c81 100644 --- a/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl +++ b/include/nbl/builtin/hlsl/path_tracing/concepts.hlsl @@ -5,7 +5,6 @@ #define _NBL_BUILTIN_HLSL_PATH_TRACING_CONCEPTS_INCLUDED_ #include -#include namespace nbl { @@ -15,6 +14,17 @@ namespace path_tracing { namespace concepts { +namespace impl +{ +template +struct DummyRayInteraction +{ + using vector3_type = Vector3; + + vector3_type getN() NBL_CONST_MEMBER_FUNC; + bool isMaterialBSDF() NBL_CONST_MEMBER_FUNC; +}; +} #define NBL_CONCEPT_NAME RandGenerator #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) @@ -38,7 +48,7 @@ NBL_CONCEPT_END( #define NBL_CONCEPT_TPLT_PRM_NAMES (T) #define NBL_CONCEPT_PARAM_0 (ray, T) #define NBL_CONCEPT_PARAM_1 (v, typename T::vector3_type) -#define NBL_CONCEPT_PARAM_2 (interaction, bxdf::surface_interactions::SIsotropic, typename T::spectral_type>) +#define NBL_CONCEPT_PARAM_2 (interaction, impl::DummyRayInteraction) #define NBL_CONCEPT_PARAM_3 (scalar, typename T::scalar_type) #define NBL_CONCEPT_PARAM_4 (color, typename T::spectral_type) NBL_CONCEPT_BEGIN(5) @@ -52,7 +62,7 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) ((NBL_CONCEPT_REQ_TYPE)(T::spectral_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.init(v/*origin*/, v/*direction*/)), ::nbl::hlsl::is_same_v, void)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.template setInteraction, typename T::spectral_type> >(interaction)), ::nbl::hlsl::is_same_v, void)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.setInteraction(interaction)), ::nbl::hlsl::is_same_v, void)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.initPayload()), ::nbl::hlsl::is_same_v, void)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.shouldDoMIS()), ::nbl::hlsl::is_same_v, bool)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.foundEmissiveMIS(scalar)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) @@ -124,6 +134,7 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(T::scene_type)) ((NBL_CONCEPT_REQ_TYPE)(T::ray_type)) ((NBL_CONCEPT_REQ_TYPE)(T::object_handle_type)) + ((NBL_CONCEPT_REQ_TYPE)(T::anisotropic_interaction_type)) ((NBL_CONCEPT_REQ_TYPE)(T::closest_hit_type)) ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(IntersectorClosestHit, typename T::closest_hit_type)) ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Ray, typename T::ray_type)) @@ -136,6 +147,26 @@ NBL_CONCEPT_END( #undef intersect #include +#define NBL_CONCEPT_NAME UnidirectionalInteractionContract +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (RayT)(IntersectorT)(MaterialSystemT) +#define NBL_CONCEPT_PARAM_0 (ray, RayT) +#define NBL_CONCEPT_PARAM_1 (hit, typename IntersectorT::closest_hit_type) +#define NBL_CONCEPT_PARAM_2 (interaction, typename MaterialSystemT::anisotropic_interaction_type) +NBL_CONCEPT_BEGIN(3) +#define ray NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define hit NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define interaction NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((hit.getInteraction()), ::nbl::hlsl::is_same_v, typename IntersectorT::anisotropic_interaction_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((hit.getInteraction()), ::nbl::hlsl::is_same_v, typename MaterialSystemT::anisotropic_interaction_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((ray.setInteraction(interaction)), ::nbl::hlsl::is_same_v, void)) +); +#undef interaction +#undef hit +#undef ray +#include + #define NBL_CONCEPT_NAME BxdfNode #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (T) diff --git a/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl b/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl index 3fff1bc929..0c47c48085 100644 --- a/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl +++ b/include/nbl/builtin/hlsl/path_tracing/unidirectional.hlsl @@ -5,11 +5,8 @@ #define _NBL_BUILTIN_HLSL_PATH_TRACING_UNIDIRECTIONAL_INCLUDED_ #include -#include #include #include -#include -#include #include namespace nbl @@ -22,6 +19,7 @@ namespace path_tracing template && concepts::Ray && concepts::Intersector && concepts::MaterialSystem && + concepts::UnidirectionalInteractionContract && concepts::NextEventEstimator && concepts::Accumulator && concepts::Scene) struct Unidirectional diff --git a/include/nbl/builtin/hlsl/tgmath.hlsl b/include/nbl/builtin/hlsl/tgmath.hlsl index c569d34f85..40ad48c13c 100644 --- a/include/nbl/builtin/hlsl/tgmath.hlsl +++ b/include/nbl/builtin/hlsl/tgmath.hlsl @@ -7,12 +7,8 @@ #include #include #include -#include #include #include -#include -#include -#include #include // C++ headers diff --git a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h index 597ebdbd4e..39013417dc 100644 --- a/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h +++ b/include/nbl/ext/FullScreenTriangle/FullScreenTriangle.h @@ -24,7 +24,8 @@ struct ProtoPipeline final const video::IGPURenderpass* renderpass, const uint32_t subpassIx=0, asset::SBlendParams blendParams = {}, - const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform=hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT + const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform=hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT, + video::IGPUPipelineCache* pipelineCache = nullptr ); core::smart_refctd_ptr m_vxShader; diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 4af5b150ea..36c19f1961 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -144,6 +144,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted void unmountBuiltins(); bool areBuiltinsMounted() const; size_t getMountedBuiltinArchiveCount() const; + core::vector getBuiltinMountAliases() const; inline size_t getMountedArchiveCount() const { return m_cachedArchiveFiles.getSize(); } // diff --git a/include/nbl/video/CJITIncludeLoader.h b/include/nbl/video/CJITIncludeLoader.h index 3b341631f4..04be1ea60a 100644 --- a/include/nbl/video/CJITIncludeLoader.h +++ b/include/nbl/video/CJITIncludeLoader.h @@ -20,7 +20,7 @@ class NBL_API2 CJITIncludeLoader : public asset::IShaderCompiler::IIncludeLoader m_includes["nbl/builtin/hlsl/jit/device_capabilities.hlsl"] = collectDeviceCaps(limits,features); } - found_t getInclude(const system::path& searchPath, const std::string& includeName) const override; + found_t getInclude(const system::path& searchPath, const std::string& includeName, bool needHash = true) const override; protected: template diff --git a/src/nbl/asset/utils/CGLSLCompiler.cpp b/src/nbl/asset/utils/CGLSLCompiler.cpp index a593a11597..9b8bde5d0c 100644 --- a/src/nbl/asset/utils/CGLSLCompiler.cpp +++ b/src/nbl/asset/utils/CGLSLCompiler.cpp @@ -44,11 +44,13 @@ namespace nbl::asset::impl class Includer : public shaderc::CompileOptions::IncluderInterface { const IShaderCompiler::CIncludeFinder* m_defaultIncludeFinder; + IShaderCompiler::CIncludeFinder::SSessionCache* m_readIncludeSessionCache; + IShaderCompiler::CIncludeFinder::SSessionCache* m_writeIncludeSessionCache; const system::ISystem* m_system; const uint32_t m_maxInclCnt; public: - Includer(const IShaderCompiler::CIncludeFinder* _inclFinder, const system::ISystem* _fs, uint32_t _maxInclCnt) : m_defaultIncludeFinder(_inclFinder), m_system(_fs), m_maxInclCnt{ _maxInclCnt } {} + Includer(const IShaderCompiler::CIncludeFinder* _inclFinder, IShaderCompiler::CIncludeFinder::SSessionCache* _readIncludeSessionCache, IShaderCompiler::CIncludeFinder::SSessionCache* _writeIncludeSessionCache, const system::ISystem* _fs, uint32_t _maxInclCnt) : m_defaultIncludeFinder(_inclFinder), m_readIncludeSessionCache(_readIncludeSessionCache), m_writeIncludeSessionCache(_writeIncludeSessionCache), m_system(_fs), m_maxInclCnt{ _maxInclCnt } {} //_requesting_source in top level #include's is what shaderc::Compiler's compiling functions get as `input_file_name` parameter //so in order for properly working relative #include's (""-type) `input_file_name` has to be path to file from which the GLSL source really come from @@ -81,11 +83,11 @@ namespace nbl::asset::impl IShaderCompiler::IIncludeLoader::found_t result; if (_type == shaderc_include_type_relative) { - result = m_defaultIncludeFinder->getIncludeRelative(relDir, _requested_source); + result = m_defaultIncludeFinder->getIncludeRelative(relDir, _requested_source, true, m_readIncludeSessionCache, m_writeIncludeSessionCache); } else //shaderc_include_type_standard { - result = m_defaultIncludeFinder->getIncludeStandard(relDir, _requested_source); + result = m_defaultIncludeFinder->getIncludeStandard(relDir, _requested_source, true, m_readIncludeSessionCache, m_writeIncludeSessionCache); } if (!result) @@ -136,10 +138,23 @@ CGLSLCompiler::CGLSLCompiler(core::smart_refctd_ptr&& system) std::string CGLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADER_STAGE& stage, const SPreprocessorOptions& preprocessOptions, std::vector* dependencies) const { + auto effectiveOptions = preprocessOptions; + IShaderCompiler::CIncludeFinder::SSessionCache localIncludeSessionCache; + if (effectiveOptions.includeFinder) + { + if (!effectiveOptions.readIncludeSessionCache && !effectiveOptions.writeIncludeSessionCache) + { + effectiveOptions.readIncludeSessionCache = &localIncludeSessionCache; + effectiveOptions.writeIncludeSessionCache = &localIncludeSessionCache; + } + else if (!effectiveOptions.readIncludeSessionCache && effectiveOptions.writeIncludeSessionCache) + effectiveOptions.readIncludeSessionCache = effectiveOptions.writeIncludeSessionCache; + } + if (!preprocessOptions.extraDefines.empty()) { std::ostringstream insertion; - for (const auto& define : preprocessOptions.extraDefines) + for (const auto& define : effectiveOptions.extraDefines) insertion << "#define " << define.identifier << " " << define.definition << "\n"; insertIntoStart(code,std::move(insertion)); } @@ -149,15 +164,15 @@ std::string CGLSLCompiler::preprocessShader(std::string&& code, IShader::E_SHADE shaderc::CompileOptions options; options.SetTargetSpirv(shaderc_spirv_version_1_6); - if (preprocessOptions.includeFinder != nullptr) + if (effectiveOptions.includeFinder != nullptr) { - options.SetIncluder(std::make_unique(preprocessOptions.includeFinder, m_system.get(), /*maxSelfInclusionCount*/5));//custom #include handler + options.SetIncluder(std::make_unique(effectiveOptions.includeFinder, effectiveOptions.readIncludeSessionCache, effectiveOptions.writeIncludeSessionCache, m_system.get(), /*maxSelfInclusionCount*/5));//custom #include handler } const shaderc_shader_kind scstage = stage == IShader::E_SHADER_STAGE::ESS_UNKNOWN ? shaderc_glsl_infer_from_source : ESStoShadercEnum(stage); - auto res = comp.PreprocessGlsl(code, scstage, preprocessOptions.sourceIdentifier.data(), options); + auto res = comp.PreprocessGlsl(code, scstage, effectiveOptions.sourceIdentifier.data(), options); if (res.GetCompilationStatus() != shaderc_compilation_status_success) { - preprocessOptions.logger.log("%s\n", system::ILogger::ELL_ERROR, res.GetErrorMessage().c_str()); + effectiveOptions.logger.log("%s\n", system::ILogger::ELL_ERROR, res.GetErrorMessage().c_str()); return nullptr; } diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index a3d1b3acf9..6fec81c8cc 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -454,12 +454,25 @@ static std::string preprocessShaderImpl( std::vector* dependencies, system::ISystem* system) { - const bool depfileEnabled = preprocessOptions.depfile; + auto effectiveOptions = preprocessOptions; + IShaderCompiler::CIncludeFinder::SSessionCache localIncludeSessionCache; + if (effectiveOptions.includeFinder) + { + if (!effectiveOptions.readIncludeSessionCache && !effectiveOptions.writeIncludeSessionCache) + { + effectiveOptions.readIncludeSessionCache = &localIncludeSessionCache; + effectiveOptions.writeIncludeSessionCache = &localIncludeSessionCache; + } + else if (!effectiveOptions.readIncludeSessionCache && effectiveOptions.writeIncludeSessionCache) + effectiveOptions.readIncludeSessionCache = effectiveOptions.writeIncludeSessionCache; + } + + const bool depfileEnabled = effectiveOptions.depfile; if (depfileEnabled) { - if (preprocessOptions.depfilePath.empty()) + if (effectiveOptions.depfilePath.empty()) { - preprocessOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); + effectiveOptions.logger.log("Depfile path is empty.", system::ILogger::ELL_ERROR); return {}; } } @@ -473,7 +486,7 @@ static std::string preprocessShaderImpl( ensureTrailingNewline(code); // preprocess - core::string resolvedString = nbl::wave::preprocess(code, preprocessOptions, bool(dependenciesOut), + core::string resolvedString = nbl::wave::preprocess(code, effectiveOptions, bool(dependenciesOut), [&dxc_compile_flags_override, &stage, &dependenciesOut](nbl::wave::context& context) -> void { if (context.get_hooks().m_dxc_compile_flags_override.size() != 0) @@ -494,13 +507,13 @@ static std::string preprocessShaderImpl( if (depfileEnabled) { IShaderCompiler::DepfileWriteParams params = {}; - const std::string depfilePathString = preprocessOptions.depfilePath.generic_string(); + const std::string depfilePathString = effectiveOptions.depfilePath.generic_string(); params.depfilePath = depfilePathString; - params.sourceIdentifier = preprocessOptions.sourceIdentifier; + params.sourceIdentifier = effectiveOptions.sourceIdentifier; if (!params.sourceIdentifier.empty()) params.workingDirectory = std::filesystem::path(std::string(params.sourceIdentifier)).parent_path(); params.system = system; - if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, preprocessOptions.includeFinder, preprocessOptions.logger)) + if (!IShaderCompiler::writeDepfile(params, *dependenciesOut, effectiveOptions.includeFinder, effectiveOptions.logger)) return {}; } diff --git a/src/nbl/asset/utils/CWaveStringResolver.cpp b/src/nbl/asset/utils/CWaveStringResolver.cpp index 8da0e828ec..f8f3b8aa5c 100644 --- a/src/nbl/asset/utils/CWaveStringResolver.cpp +++ b/src/nbl/asset/utils/CWaveStringResolver.cpp @@ -8,35 +8,6 @@ options remain and there is no mismatch, we force agressive inlining and optimizations mostly regardless build configuration by default */ -/* - Arek leaving thoughts, TODO: - - in NBL_WAVE_STRING_RESOLVER_TU_DEBUG_OPTIMISATION mode enabled -> here in this TU do - - #define _ITERATOR_DEBUG_LEVEL 0 - #define _HAS_ITERATOR_DEBUGGING 0 - - and allow Nabla to mismatch debug iterator *on purpose* by - - #define _ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH - - in Debug/RWDI - - then make preprocess full C API with raw in/out pointers and bytes out pointer, - with mismtach we must be very careful about memory ownership as STL stuff will have - different struct layouts and its easy to make a crash, we will have extra memcpy and - deallocation but as a trade each config will have almost the same preprocessing perf - which matters for our NSC integration - - then we can think to make use of existing shader cache and maybe consider HLSL PCH - which NSC would inject into each input - - NOTE: this approach allows to do all in single Nabla module, no extra proxy/fake shared DLL needed! - NOTE: yep I know I have currently a callback for which context size will differ accross TUs afterwards but will think about it - - or ignore it and take care of NSC special target creating global HLSL PCH injected into each registered input -*/ - #include "nabla.h" #include #include @@ -54,16 +25,27 @@ constexpr size_t kWaveFailureLogOutputTailMaxChars = 4096ull; constexpr size_t kWaveFailureLogOutputTailMaxLines = 16ull; constexpr size_t kWaveFailureLogTokenPreviewMaxChars = 160ull; +auto subtractSessionCacheStats( + const IShaderCompiler::CIncludeFinder::SSessionCache::Stats& end, + const IShaderCompiler::CIncludeFinder::SSessionCache::Stats& begin) -> IShaderCompiler::CIncludeFinder::SSessionCache::Stats +{ + IShaderCompiler::CIncludeFinder::SSessionCache::Stats result; + result.lookupFound = end.lookupFound - begin.lookupFound; + result.lookupMissing = end.lookupMissing - begin.lookupMissing; + result.lookupMiss = end.lookupMiss - begin.lookupMiss; + result.storeFound = end.storeFound - begin.storeFound; + result.storeMissing = end.storeMissing - begin.storeMissing; + return result; +} + struct WaveRenderProgress { core::string output; - std::optional previousPosition = std::nullopt; + std::string previousFile; + int previousLine = 0; + bool hasPreviousToken = false; bool previousWasExplicitWhitespace = false; size_t emittedTokenCount = 0ull; - std::string lastTokenFile; - int lastTokenLine = 0; - int lastTokenColumn = 0; - std::string lastTokenValue; }; std::string getLineSnippet(std::string_view text, const int lineNo) @@ -218,11 +200,6 @@ std::string makeWaveFailureContext( stream << "\n emitted_output_bytes: " << renderProgress.output.size(); stream << "\n emitted_output_lines: " << countLogicalLines(renderProgress.output); stream << "\n emitted_token_count: " << renderProgress.emittedTokenCount; - if (!renderProgress.lastTokenFile.empty()) - stream << "\n last_emitted_token_location: " << nbl::wave::detail::escape_control_chars(renderProgress.lastTokenFile) << ':' << renderProgress.lastTokenLine << ':' << renderProgress.lastTokenColumn; - if (!renderProgress.lastTokenValue.empty()) - stream << "\n last_emitted_token_value: " << truncateEscapedPreview(nbl::wave::detail::escape_control_chars(renderProgress.lastTokenValue), kWaveFailureLogTokenPreviewMaxChars); - const auto snippet = getLineSnippet(code, lineNo); if (!snippet.empty() && fileName && preprocessOptions.sourceIdentifier == fileName) { @@ -248,64 +225,90 @@ bool isWhitespaceLikeToken(const TokenT& token) return id == T_NEWLINE || id == T_GENERATEDNEWLINE || id == T_CONTLINE || IS_CATEGORY(token, WhiteSpaceTokenType); } -template -std::string tokenValueToString(const TokenT& token) -{ - const auto& value = token.get_value(); - return std::string(value.data(), value.size()); -} - void renderPreprocessedOutput(nbl::wave::context& context, WaveRenderProgress& renderProgress) { using namespace boost::wave; util::insert_whitespace_detection whitespace(true); - - for (auto it = context.begin(); it != context.end(); ++it) + auto& perfStats = nbl::wave::detail::perf_stats(); + auto it = context.begin(); + const auto end = context.end(); + while (it != end) { + std::optional loopBodyTimer; + if (perfStats.enabled) + loopBodyTimer.emplace(perfStats.loopBodyTime); + const auto& token = *it; const auto id = token_id(token); - if (id == T_EOF || id == T_EOI) - continue; + if (id != T_EOF && id != T_EOI) + { + std::optional tokenTimer; + if (perfStats.enabled) + tokenTimer.emplace(perfStats.tokenHandlingTime); - const auto explicitWhitespace = isWhitespaceLikeToken(token); - const auto& position = token.get_position(); - const auto value = tokenValueToString(token); + const auto explicitWhitespace = isWhitespaceLikeToken(token); + const auto& position = token.get_position(); + const auto& value = token.get_value(); - if (renderProgress.previousPosition.has_value() && !explicitWhitespace) - { - const auto movedToNewLogicalLine = - position.get_file() != renderProgress.previousPosition->get_file() || - position.get_line() > renderProgress.previousPosition->get_line(); + const auto currentLine = position.get_line(); + const auto& currentFile = position.get_file(); - if (movedToNewLogicalLine) + if (renderProgress.hasPreviousToken && !explicitWhitespace) { - if (renderProgress.output.empty() || renderProgress.output.back() != '\n') + bool movedToNewLogicalLine = currentLine > renderProgress.previousLine; + if (!movedToNewLogicalLine) { - renderProgress.output.push_back('\n'); - whitespace.shift_tokens(T_NEWLINE); + movedToNewLogicalLine = + renderProgress.previousFile.size() != currentFile.size() || + !std::equal(currentFile.begin(), currentFile.end(), renderProgress.previousFile.begin()); } - } - else if (!renderProgress.previousWasExplicitWhitespace && whitespace.must_insert(id, value)) - { - if (renderProgress.output.empty() || (renderProgress.output.back() != ' ' && renderProgress.output.back() != '\n' && renderProgress.output.back() != '\r' && renderProgress.output.back() != '\t')) + + if (movedToNewLogicalLine) { - renderProgress.output.push_back(' '); - whitespace.shift_tokens(T_SPACE); + if (renderProgress.output.empty() || renderProgress.output.back() != '\n') + { + renderProgress.output.push_back('\n'); + whitespace.shift_tokens(T_NEWLINE); + } + } + else if (!renderProgress.previousWasExplicitWhitespace && whitespace.must_insert(id, value)) + { + if (renderProgress.output.empty() || (renderProgress.output.back() != ' ' && renderProgress.output.back() != '\n' && renderProgress.output.back() != '\r' && renderProgress.output.back() != '\t')) + { + renderProgress.output.push_back(' '); + whitespace.shift_tokens(T_SPACE); + } } } + + renderProgress.output.append(value.data(), value.size()); + whitespace.shift_tokens(id); + if (!renderProgress.hasPreviousToken || + renderProgress.previousFile.size() != currentFile.size() || + !std::equal(currentFile.begin(), currentFile.end(), renderProgress.previousFile.begin())) + { + renderProgress.previousFile.assign(currentFile.c_str(), currentFile.size()); + } + renderProgress.previousLine = currentLine; + renderProgress.hasPreviousToken = true; + renderProgress.previousWasExplicitWhitespace = explicitWhitespace; + ++renderProgress.emittedTokenCount; + + if (tokenTimer.has_value()) + tokenTimer.reset(); } - renderProgress.output += value; - whitespace.shift_tokens(id); - renderProgress.previousPosition = position; - renderProgress.previousWasExplicitWhitespace = explicitWhitespace; - const auto& file = position.get_file(); - renderProgress.lastTokenFile.assign(file.c_str(), file.size()); - renderProgress.lastTokenLine = position.get_line(); - renderProgress.lastTokenColumn = position.get_column(); - renderProgress.lastTokenValue = value; - ++renderProgress.emittedTokenCount; + if (loopBodyTimer.has_value()) + loopBodyTimer.reset(); + + if (perfStats.enabled) + { + nbl::wave::detail::ScopedPerfTimer iteratorAdvanceTimer(perfStats.iteratorAdvanceTime); + ++it; + } + else + ++it; } } @@ -315,6 +318,10 @@ std::string preprocessImpl( const bool withCaching, std::function post) { + const auto emptySessionCacheStats = IShaderCompiler::CIncludeFinder::SSessionCache::Stats{}; + const auto readSessionCacheStatsBegin = preprocessOptions.readIncludeSessionCache ? preprocessOptions.readIncludeSessionCache->snapshotStats() : emptySessionCacheStats; + const auto writeSessionCacheStatsBegin = preprocessOptions.writeIncludeSessionCache ? preprocessOptions.writeIncludeSessionCache->snapshotStats() : emptySessionCacheStats; + nbl::wave::context context(code.begin(), code.end(), preprocessOptions.sourceIdentifier.data(), { preprocessOptions }); WaveRenderProgress renderProgress; @@ -331,6 +338,8 @@ std::string preprocessImpl( }; try { + const auto totalBegin = std::chrono::steady_clock::now(); + nbl::wave::detail::reset_perf_stats(); context.set_caching(withCaching); context.add_macro_definition("__HLSL_VERSION"); context.add_macro_definition("__SPIRV_MAJOR_VERSION__=" + std::to_string(IShaderCompiler::getSpirvMajor(preprocessOptions.targetSpirvVersion))); @@ -347,7 +356,14 @@ std::string preprocessImpl( activeMacroDefinition.clear(); phase = "expanding translation unit"; - renderPreprocessedOutput(context, renderProgress); + { + nbl::wave::detail::ScopedPerfTimer renderTimer(nbl::wave::detail::perf_stats().renderTime); + renderPreprocessedOutput(context, renderProgress); + } + auto& perfStats = nbl::wave::detail::perf_stats(); + perfStats.outputBytes = renderProgress.output.size(); + perfStats.emittedTokenCount = renderProgress.emittedTokenCount; + perfStats.totalPreprocessTime = std::chrono::steady_clock::now() - totalBegin; } catch (boost::wave::preprocess_exception& e) { @@ -384,6 +400,12 @@ std::string preprocessImpl( } post(context); + const auto readSessionCacheStatsEnd = preprocessOptions.readIncludeSessionCache ? preprocessOptions.readIncludeSessionCache->snapshotStats() : emptySessionCacheStats; + const auto writeSessionCacheStatsEnd = preprocessOptions.writeIncludeSessionCache ? preprocessOptions.writeIncludeSessionCache->snapshotStats() : emptySessionCacheStats; + nbl::wave::detail::set_session_cache_perf_stats( + subtractSessionCacheStats(readSessionCacheStatsEnd, readSessionCacheStatsBegin), + subtractSessionCacheStats(writeSessionCacheStatsEnd, writeSessionCacheStatsBegin)); + nbl::wave::detail::dump_perf_stats(); return std::move(renderProgress.output); } diff --git a/src/nbl/asset/utils/ISPIRVEntryPointTrimmer.cpp b/src/nbl/asset/utils/ISPIRVEntryPointTrimmer.cpp index 6695c78e96..8c3b72e08b 100644 --- a/src/nbl/asset/utils/ISPIRVEntryPointTrimmer.cpp +++ b/src/nbl/asset/utils/ISPIRVEntryPointTrimmer.cpp @@ -8,7 +8,9 @@ using namespace nbl::asset; -static constexpr spv_target_env SPIRV_VERSION = spv_target_env::SPV_ENV_UNIVERSAL_1_6; +// Why are we validating Universal instead of a Vulkan environment? +// Trimming works on generic SPIR-V before the Vulkan backend chooses its environment. +static constexpr spv_target_env SPIRV_VALIDATION_ENV = spv_target_env::SPV_ENV_UNIVERSAL_1_6; ISPIRVEntryPointTrimmer::ISPIRVEntryPointTrimmer() { @@ -31,27 +33,6 @@ ISPIRVEntryPointTrimmer::ISPIRVEntryPointTrimmer() m_optimizer = core::make_smart_refctd_ptr(std::span(optimizationPasses)); } -// This is for debugging temporarily. will be reworked after finish testing -static void printCapabilities(const uint32_t* spirv, uint32_t spirvDwordCount,nbl::system::logger_opt_ptr logger) -{ - spvtools::SpirvTools core(SPIRV_VERSION); - std::string disassembly; - core.Disassemble(spirv, spirvDwordCount, &disassembly, SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); - std::stringstream ss(disassembly); - std::string to; - const auto stringsToFind = std::array{ "OpCapability", "= OpFunction","OpFunctionEnd", "OpSpecConstant", "=OpType"}; - while(std::getline(ss, to, '\n')){ - if (to.size() > 1 && to.back() == ',') continue; - for (const auto& stringToFind: stringsToFind) - { - if (to.find(stringToFind) != std::string::npos) - { - logger.log("%s", nbl::system::ILogger::ELL_DEBUG, to.c_str()); - } - } - } -} - static bool validate(const uint32_t* binary, uint32_t binarySize, nbl::system::logger_opt_ptr logger) { auto msgConsumer = [&logger](spv_message_level_t level, const char* src, const spv_position_t& pos, const char* msg) @@ -76,7 +57,7 @@ static bool validate(const uint32_t* binary, uint32_t binarySize, nbl::system::l logger.log(location, lvl, msg); }; - spvtools::SpirvTools core(SPIRV_VERSION); + spvtools::SpirvTools core(SPIRV_VALIDATION_ENV); core.SetMessageConsumer(msgConsumer); spvtools::ValidatorOptions validatorOptions; // Nabla use Scalar block layout, we skip this validation to work around this and to save time @@ -84,10 +65,47 @@ static bool validate(const uint32_t* binary, uint32_t binarySize, nbl::system::l return core.Validate(binary, binarySize, validatorOptions); } +static nbl::core::blake3_hash_t getContentHash(const ICPUBuffer* spirvBuffer) +{ + auto contentHash = spirvBuffer->getContentHash(); + if (contentHash == ICPUBuffer::INVALID_HASH) + contentHash = spirvBuffer->computeContentHash(); + return contentHash; +} + +bool ISPIRVEntryPointTrimmer::ensureValidated(const ICPUBuffer* spirvBuffer, system::logger_opt_ptr logger) const +{ + const auto contentHash = getContentHash(spirvBuffer); + + { + std::lock_guard lock(m_validationCacheMutex); + if (m_validatedSpirvHashes.contains(contentHash)) + return true; + } + + const auto* spirv = static_cast(spirvBuffer->getPointer()); + const auto spirvDwordCount = spirvBuffer->getSize() / sizeof(uint32_t); + if (!validate(spirv, spirvDwordCount, logger)) + return false; + + { + std::lock_guard lock(m_validationCacheMutex); + m_validatedSpirvHashes.emplace(contentHash); + } + + return true; +} + +void ISPIRVEntryPointTrimmer::markValidated(const ICPUBuffer* spirvBuffer) const +{ + std::lock_guard lock(m_validationCacheMutex); + m_validatedSpirvHashes.emplace(getContentHash(spirvBuffer)); +} + ISPIRVEntryPointTrimmer::Result ISPIRVEntryPointTrimmer::trim(const ICPUBuffer* spirvBuffer, const core::set& entryPoints, system::logger_opt_ptr logger) const { const auto* spirv = static_cast(spirvBuffer->getPointer()); - const auto spirvDwordCount = spirvBuffer->getSize() / 4; + const auto spirvDwordCount = spirvBuffer->getSize() / sizeof(uint32_t); if (entryPoints.empty()) { @@ -98,18 +116,6 @@ ISPIRVEntryPointTrimmer::Result ISPIRVEntryPointTrimmer::trim(const ICPUBuffer* }; } - auto foundEntryPoint = 0; - - const bool isInputSpirvValid = validate(spirv, spirvDwordCount, logger); - if (!isInputSpirvValid) - { - logger.log("SPIR-V is not valid", system::ILogger::ELL_ERROR); - return Result{ - nullptr, - false - }; - } - auto getHlslShaderStage = [](spv::ExecutionModel executionModel) -> hlsl::ShaderStage { switch (executionModel) @@ -149,6 +155,76 @@ ISPIRVEntryPointTrimmer::Result ISPIRVEntryPointTrimmer::trim(const ICPUBuffer* return { length, opcode }; }; + if (!ensureValidated(spirvBuffer, logger)) + { + logger.log("SPIR-V is not valid", system::ILogger::ELL_ERROR); + return Result{ + .spirv = nullptr, + .isSuccess = false, + }; + } + + { + auto probeOffset = HEADER_SIZE; + auto totalEntryPoints = 0u; + auto matchingEntryPoints = 0u; + auto validFastPath = (spirvDwordCount >= HEADER_SIZE); + + while (validFastPath && probeOffset < spirvDwordCount) + { + const auto instruction = spirv[probeOffset]; + const auto [length, opcode] = parse_instruction(instruction); + if (length == 0u || (probeOffset + length) > spirvDwordCount) + { + validFastPath = false; + break; + } + if (opcode == spv::OpEntryPoint) + break; + probeOffset += length; + } + + while (validFastPath && probeOffset < spirvDwordCount) + { + const auto curOffset = probeOffset; + const auto instruction = spirv[curOffset]; + const auto [length, opcode] = parse_instruction(instruction); + if (length == 0u || (probeOffset + length) > spirvDwordCount) + { + validFastPath = false; + break; + } + if (opcode != spv::OpEntryPoint) + break; + probeOffset += length; + ++totalEntryPoints; + + const auto curExecutionModel = static_cast(spirv[curOffset + 1]); + const auto curEntryPointName = std::string_view(reinterpret_cast(spirv + curOffset + 3)); + const auto entryPoint = EntryPoint{ + .name = curEntryPointName, + .stage = getHlslShaderStage(curExecutionModel), + }; + if (entryPoint.stage == hlsl::ESS_UNKNOWN) + { + validFastPath = false; + break; + } + if (entryPoints.contains(entryPoint)) + ++matchingEntryPoints; + } + + if (validFastPath && totalEntryPoints == entryPoints.size() && matchingEntryPoints == entryPoints.size()) + { + return { + .spirv = nullptr, + .isSuccess = true, + }; + } + } + + auto foundEntryPoint = 0; + // Keep in mind about this layout while reading all the code below: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#LogicalLayout // skip until entry point @@ -240,21 +316,25 @@ ISPIRVEntryPointTrimmer::Result ISPIRVEntryPointTrimmer::trim(const ICPUBuffer* minimizedSpirv.insert(minimizedSpirv.end(), spirv + offset, spirv + spirvDwordCount); - assert(validate(minimizedSpirv.data(), minimizedSpirv.size(), logger)); - auto trimmedSpirv = m_optimizer->optimize(minimizedSpirv.data(), minimizedSpirv.size(), logger); + if (!trimmedSpirv) + { + logger.log("Failed to optimize trimmed SPIR-V", system::ILogger::ELL_ERROR); + return { + .spirv = nullptr, + .isSuccess = false, + }; + } -#ifdef _NBL_DEBUG - logger.log("Before stripping capabilities:", nbl::system::ILogger::ELL_DEBUG); - printCapabilities(spirv, spirvDwordCount, logger); - logger.log("\n", nbl::system::ILogger::ELL_DEBUG); - - const auto* trimmedSpirvBuffer = static_cast(trimmedSpirv->getPointer()); - const auto trimmedSpirvDwordCount = trimmedSpirv->getSize() / 4; - logger.log("After stripping capabilities:", nbl::system::ILogger::ELL_DEBUG); - printCapabilities(trimmedSpirvBuffer, trimmedSpirvDwordCount, logger); - logger.log("\n", nbl::system::ILogger::ELL_DEBUG); -#endif + trimmedSpirv->setContentHash(trimmedSpirv->computeContentHash()); + if (!ensureValidated(trimmedSpirv.get(), logger)) + { + logger.log("Trimmed SPIR-V is not valid", system::ILogger::ELL_ERROR); + return { + .spirv = nullptr, + .isSuccess = false, + }; + } return { .spirv = std::move(trimmedSpirv), diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 372e877e21..d180d83a5c 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -600,11 +600,9 @@ core::vector IShaderCompiler::IIncludeGenerator::parseArgumentsFrom IShaderCompiler::CFileSystemIncludeLoader::CFileSystemIncludeLoader(core::smart_refctd_ptr&& system) : m_system(std::move(system)) {} -auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& searchPath, const std::string& includeName) const -> found_t +auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& searchPath, const std::string& includeName, bool needHash) const -> found_t { - system::path path = searchPath / includeName; - if (std::filesystem::exists(path)) - path = std::filesystem::canonical(path); + system::path path = (searchPath / includeName).lexically_normal(); core::smart_refctd_ptr f; { @@ -624,7 +622,15 @@ auto IShaderCompiler::CFileSystemIncludeLoader::getInclude(const system::path& s const bool success = bool(succ); assert(success); - return { f->getFileName(),std::move(contents) }; + found_t retVal = { f->getFileName(),std::move(contents) }; + if (needHash) + { + core::blake3_hasher hasher; + hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); + } + + return retVal; } namespace @@ -644,79 +650,246 @@ std::string normalizeIncludeLookupName(const std::string& includeName) return includeName.substr(1ull); } + +std::string normalizeClassifiedRootPath(std::string value) +{ + std::replace(value.begin(), value.end(), '\\', '/'); + while (value.size() > 1ull && value.back() == '/') + value.pop_back(); + if (value.size() > 2ull && value.front() == '/' && value[1ull] != '/') + value.erase(value.begin()); + return value; +} + +bool pathHasRegisteredRoot(std::string_view path, std::string_view root) +{ + if (root.empty() || path.size() < root.size() || path.substr(0ull, root.size()) != root) + return false; + return path.size() == root.size() || path[root.size()] == '/'; +} + +template +auto withSessionCacheLock(IShaderCompiler::CIncludeFinder::SSessionCache* cache, Func&& func) -> decltype(func()) +{ + if (cache && cache->threadSafe) + { + std::lock_guard lock(cache->mutex); + return func(); + } + return func(); +} + +std::string makeIncludeSessionCacheKey(const system::path& requestingSourceDir, std::string_view includeName, const bool needHash, const char mode) +{ + const auto requestingDir = requestingSourceDir.generic_string(); + std::string key; + key.reserve(requestingDir.size() + includeName.size() + 4ull); + key.push_back(mode); + key.push_back('\n'); + key.push_back(needHash ? 'H' : 'N'); + key.push_back('\n'); + key.append(requestingDir); + key.push_back('\n'); + key.append(includeName.data(), includeName.size()); + return key; +} + +auto lookupIncludeSessionCache(IShaderCompiler::CIncludeFinder::SSessionCache* readCache, IShaderCompiler::CIncludeFinder::SSessionCache* writeCache, const std::string& key, IShaderCompiler::IIncludeLoader::found_t& result) -> IShaderCompiler::CIncludeFinder::SSessionCache::LookupResult +{ + using LookupResult = IShaderCompiler::CIncludeFinder::SSessionCache::LookupResult; + const auto lookupResult = readCache ? readCache->lookup(key, result) : LookupResult::Miss; + if (readCache && writeCache && readCache != writeCache) + { + switch (lookupResult) + { + case LookupResult::Found: + writeCache->store(key, result); + break; + case LookupResult::Missing: + writeCache->store(key, {}); + break; + case LookupResult::Miss: + break; + } + } + return lookupResult; +} + +void writeIncludeSessionCache(IShaderCompiler::CIncludeFinder::SSessionCache* writeCache, const std::string& key, const IShaderCompiler::IIncludeLoader::found_t& result) +{ + if (writeCache) + writeCache->store(key, result); +} + +} + +void IShaderCompiler::CIncludeFinder::SSessionCache::clear() +{ + withSessionCacheLock(this, [&]() + { + found.clear(); + missing.clear(); + }); +} + +auto IShaderCompiler::CIncludeFinder::SSessionCache::lookup(const std::string& key, IIncludeLoader::found_t& result) const -> LookupResult +{ + return withSessionCacheLock(const_cast(this), [&]() -> LookupResult + { + if (const auto foundIt = found.find(key); foundIt != found.end()) + { + ++stats.lookupFound; + result = foundIt->second; + return LookupResult::Found; + } + if (missing.contains(key)) + { + ++stats.lookupMissing; + return LookupResult::Missing; + } + ++stats.lookupMiss; + return LookupResult::Miss; + }); +} + +void IShaderCompiler::CIncludeFinder::SSessionCache::store(const std::string& key, IIncludeLoader::found_t result) +{ + withSessionCacheLock(this, [&]() + { + missing.erase(key); + if (result) + { + ++stats.storeFound; + found.insert_or_assign(key, std::move(result)); + } + else + { + ++stats.storeMissing; + missing.insert(key); + } + }); +} + +auto IShaderCompiler::CIncludeFinder::SSessionCache::snapshotStats() const -> Stats +{ + return withSessionCacheLock(const_cast(this), [&]() -> Stats + { + return stats; + }); } IShaderCompiler::CIncludeFinder::CIncludeFinder(core::smart_refctd_ptr&& system) - : m_defaultFileSystemLoader(core::make_smart_refctd_ptr(std::move(system))) + : m_defaultFileSystemLoader(core::make_smart_refctd_ptr(core::smart_refctd_ptr(system))) { addSearchPath("", m_defaultFileSystemLoader); + for (const auto& builtinRoot : system->getBuiltinMountAliases()) + registerHeaderRoot(builtinRoot.generic_string(), {IncludeRootOrigin::Builtin,HeaderClass::System}); } // ! includes within <> // @param requestingSourceDir: the directory where the incude was requested // @param includeName: the string within <> of the include preprocessing directive // @param -auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& requestingSourceDir, const std::string& includeName) const -> IIncludeLoader::found_t +auto IShaderCompiler::CIncludeFinder::getIncludeStandard(const system::path& requestingSourceDir, const std::string& includeName, bool needHash, SSessionCache* readSessionCache, SSessionCache* writeSessionCache) const -> IIncludeLoader::found_t { const auto lookupName = normalizeIncludeLookupName(includeName); + const auto cacheKey = makeIncludeSessionCacheKey(requestingSourceDir, lookupName, needHash, 'S'); IShaderCompiler::IIncludeLoader::found_t retVal; + switch (lookupIncludeSessionCache(readSessionCache, writeSessionCache, cacheKey, retVal)) + { + case SSessionCache::LookupResult::Found: + return retVal; + case SSessionCache::LookupResult::Missing: + return {}; + case SSessionCache::LookupResult::Miss: + break; + } + if (auto contents = tryIncludeGenerators(lookupName)) retVal = std::move(contents); - else if (auto contents = trySearchPaths(lookupName)) + else if (auto contents = trySearchPaths(lookupName, needHash)) retVal = std::move(contents); - else retVal = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), lookupName); + else retVal = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), lookupName, needHash); + + retVal = classifyFound(std::move(retVal)); - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + if (needHash && retVal && retVal.hash == core::blake3_hash_t{}) + { + core::blake3_hasher hasher; + hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); + } + writeIncludeSessionCache(writeSessionCache, cacheKey, retVal); return retVal; } // ! includes within "" // @param requestingSourceDir: the directory where the incude was requested // @param includeName: the string within "" of the include preprocessing directive -auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& requestingSourceDir, const std::string& includeName) const -> IIncludeLoader::found_t +auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& requestingSourceDir, const std::string& includeName, bool needHash, SSessionCache* readSessionCache, SSessionCache* writeSessionCache) const -> IIncludeLoader::found_t { const auto lookupName = normalizeIncludeLookupName(includeName); + const auto cacheKey = makeIncludeSessionCacheKey(requestingSourceDir, lookupName, needHash, 'R'); IShaderCompiler::IIncludeLoader::found_t retVal; - if (auto contents = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), lookupName)) + switch (lookupIncludeSessionCache(readSessionCache, writeSessionCache, cacheKey, retVal)) + { + case SSessionCache::LookupResult::Found: + return retVal; + case SSessionCache::LookupResult::Missing: + return {}; + case SSessionCache::LookupResult::Miss: + break; + } + + if (auto contents = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), lookupName, needHash)) retVal = std::move(contents); - else retVal = std::move(trySearchPaths(lookupName)); + else retVal = std::move(trySearchPaths(lookupName, needHash)); + + retVal = classifyFound(std::move(retVal)); - core::blake3_hasher hasher; - hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); - retVal.hash = static_cast(hasher); + if (needHash && retVal && retVal.hash == core::blake3_hash_t{}) + { + core::blake3_hasher hasher; + hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); + retVal.hash = static_cast(hasher); + } + writeIncludeSessionCache(writeSessionCache, cacheKey, retVal); return retVal; } -void IShaderCompiler::CIncludeFinder::addSearchPath(const std::string& searchPath, const core::smart_refctd_ptr& loader) +void IShaderCompiler::CIncludeFinder::addSearchPath(const std::string& searchPath, const core::smart_refctd_ptr& loader, IncludeClassification classification) { if (!loader) return; - m_loaders.emplace_back(LoaderSearchPath{ loader, searchPath }); + auto normalizedSearchPath = normalizeClassifiedRootPath(searchPath); + if (!normalizedSearchPath.empty()) + registerHeaderRoot(normalizedSearchPath, classification); + m_loaders.emplace_back(LoaderSearchPath{ loader, std::move(normalizedSearchPath), classification }); } -void IShaderCompiler::CIncludeFinder::addGenerator(const core::smart_refctd_ptr& generatorToAdd) +void IShaderCompiler::CIncludeFinder::addGenerator(const core::smart_refctd_ptr& generatorToAdd, IncludeClassification classification) { if (!generatorToAdd) return; + registerHeaderRoot(std::string(generatorToAdd->getPrefix()), classification); + // this will find the place of first generator with prefix <= generatorToAdd or end auto found = std::lower_bound(m_generators.begin(), m_generators.end(), generatorToAdd->getPrefix(), - [](const core::smart_refctd_ptr& generator, const std::string_view& value) + [](const GeneratorEntry& generator, const std::string_view& value) { - auto element = generator->getPrefix(); + auto element = generator.generator->getPrefix(); return element.compare(value) > 0; // first to return false is lower_bound -> first element that is <= value }); - m_generators.insert(found, generatorToAdd); + m_generators.insert(found, GeneratorEntry{ generatorToAdd, classification }); } -auto IShaderCompiler::CIncludeFinder::trySearchPaths(const std::string& includeName) const -> IIncludeLoader::found_t +auto IShaderCompiler::CIncludeFinder::trySearchPaths(const std::string& includeName, bool needHash) const -> IIncludeLoader::found_t { for (const auto& itr : m_loaders) - if (auto contents = itr.loader->getInclude(itr.searchPath, includeName)) + if (auto contents = itr.loader->getInclude(itr.searchPath, includeName, needHash)) return contents; return {}; } @@ -747,23 +920,23 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in while (!path.empty() && path.root_name().empty() && end != m_generators.end()) { auto begin = std::lower_bound(end, m_generators.end(), path.string(), - [&standardizePrefix](const core::smart_refctd_ptr& generator, const std::string& value) + [&standardizePrefix](const GeneratorEntry& generator, const std::string& value) { - const auto element = standardizePrefix(generator->getPrefix()); + const auto element = standardizePrefix(generator.generator->getPrefix()); return element.compare(value) > 0; // first to return false is lower_bound -> first element that is <= value }); // search from new beginning to real end end = std::upper_bound(begin, m_generators.end(), path.string(), - [&standardizePrefix](const std::string& value, const core::smart_refctd_ptr& generator) + [&standardizePrefix](const std::string& value, const GeneratorEntry& generator) { - const auto element = standardizePrefix(generator->getPrefix()); + const auto element = standardizePrefix(generator.generator->getPrefix()); return value.compare(element) > 0; // first to return true is upper_bound -> first element that is < value }); for (auto generatorIt = begin; generatorIt != end; generatorIt++) { - if (auto contents = (*generatorIt)->getInclude(includeName)) + if (auto contents = generatorIt->generator->getInclude(includeName)) return contents; } @@ -773,6 +946,53 @@ auto IShaderCompiler::CIncludeFinder::tryIncludeGenerators(const std::string& in return {}; } +bool IShaderCompiler::CIncludeFinder::isKnownGlobalInclude(std::string_view includeName) const +{ + const auto normalizedIncludeName = normalizeClassifiedRootPath(std::string(includeName)); + return std::any_of(m_headerRoots.begin(), m_headerRoots.end(), [&](const HeaderRoot& root) + { + return root.classification.headerClass == HeaderClass::System && pathHasRegisteredRoot(normalizedIncludeName, root.path); + }); +} + +auto IShaderCompiler::CIncludeFinder::classifyFound(IIncludeLoader::found_t found) const -> IIncludeLoader::found_t +{ + if (!found) + return found; + + const auto normalizedPath = normalizeClassifiedRootPath(found.absolutePath.generic_string()); + size_t bestMatchLength = 0ull; + for (const auto& root : m_headerRoots) + { + if (root.path.size() <= bestMatchLength) + continue; + if (!pathHasRegisteredRoot(normalizedPath, root.path)) + continue; + found.classification = root.classification; + bestMatchLength = root.path.size(); + } + return found; +} + +void IShaderCompiler::CIncludeFinder::registerHeaderRoot(std::string rootPath, IncludeClassification classification) +{ + rootPath = normalizeClassifiedRootPath(std::move(rootPath)); + if (rootPath.empty()) + return; + + const auto found = std::find_if(m_headerRoots.begin(), m_headerRoots.end(), [&](const HeaderRoot& entry) + { + return entry.path == rootPath; + }); + if (found != m_headerRoots.end()) + { + found->classification = classification; + return; + } + + m_headerRoots.push_back({ std::move(rootPath),classification }); +} + core::smart_refctd_ptr IShaderCompiler::CCache::find(const SEntry& mainFile, const IShaderCompiler::CIncludeFinder* finder) const { const auto found = find_impl(mainFile, finder); diff --git a/src/nbl/asset/utils/waveContext.h b/src/nbl/asset/utils/waveContext.h index 36f9d4ea99..4904846463 100644 --- a/src/nbl/asset/utils/waveContext.h +++ b/src/nbl/asset/utils/waveContext.h @@ -8,7 +8,12 @@ #include #include #include +#include +#include +#include +#include #include +#include #include #include "nbl/asset/utils/IShaderCompiler.h" @@ -21,6 +26,175 @@ using namespace boost::wave::util; namespace detail { +struct PerfStats +{ + bool enabled = false; + bool includeDetailsEnabled = false; + uint64_t includeRequests = 0ull; + uint64_t includeLookupCount = 0ull; + uint64_t includeResolutionCacheSkips = 0ull; + uint64_t postLoadPragmaSkips = 0ull; + uint64_t sessionLookupFound = 0ull; + uint64_t sessionLookupMissing = 0ull; + uint64_t sessionLookupMiss = 0ull; + uint64_t sessionStoreFound = 0ull; + uint64_t sessionStoreMissing = 0ull; + std::chrono::nanoseconds includeLookupTime = std::chrono::nanoseconds::zero(); + std::chrono::nanoseconds tokenHandlingTime = std::chrono::nanoseconds::zero(); + std::chrono::nanoseconds iteratorAdvanceTime = std::chrono::nanoseconds::zero(); + std::chrono::nanoseconds loopBodyTime = std::chrono::nanoseconds::zero(); + std::chrono::nanoseconds renderTime = std::chrono::nanoseconds::zero(); + std::chrono::nanoseconds totalPreprocessTime = std::chrono::nanoseconds::zero(); + size_t outputBytes = 0ull; + uint64_t emittedTokenCount = 0ull; + std::unordered_map requestedIncludeSpellingCounts; + std::unordered_map resolvedIncludePathCounts; +}; + +inline PerfStats& perf_stats() +{ + static PerfStats stats = []() + { + PerfStats value; + value.enabled = std::getenv("NBL_WAVE_PROFILE") != nullptr; + value.includeDetailsEnabled = std::getenv("NBL_WAVE_PROFILE_INCLUDES") != nullptr; + return value; + }(); + return stats; +} + +inline void reset_perf_stats() +{ + auto& stats = perf_stats(); + const bool enabled = stats.enabled; + const bool includeDetailsEnabled = stats.includeDetailsEnabled; + stats = {}; + stats.enabled = enabled; + stats.includeDetailsEnabled = includeDetailsEnabled; +} + +class ScopedPerfTimer +{ + public: + explicit ScopedPerfTimer(std::chrono::nanoseconds& target) : m_target(target), m_begin(std::chrono::steady_clock::now()) {} + ~ScopedPerfTimer() + { + m_target += std::chrono::steady_clock::now() - m_begin; + } + + private: + std::chrono::nanoseconds& m_target; + std::chrono::steady_clock::time_point m_begin; +}; + +inline void dump_perf_stats() +{ + const auto& stats = perf_stats(); + if (!stats.enabled) + return; + + const auto to_ms = [](const std::chrono::nanoseconds value) -> double + { + return std::chrono::duration(value).count(); + }; + + std::fprintf( + stderr, + "[wave-profile] total_ms=%.3f include_lookup_ms=%.3f token_handling_ms=%.3f iterator_advance_ms=%.3f loop_body_ms=%.3f render_ms=%.3f include_requests=%llu include_lookups=%llu resolution_cache_skips=%llu postload_pragma_skips=%llu session_lookup_found=%llu session_lookup_missing=%llu session_lookup_miss=%llu session_store_found=%llu session_store_missing=%llu emitted_tokens=%llu output_bytes=%zu\n", + to_ms(stats.totalPreprocessTime), + to_ms(stats.includeLookupTime), + to_ms(stats.tokenHandlingTime), + to_ms(stats.iteratorAdvanceTime), + to_ms(stats.loopBodyTime), + to_ms(stats.renderTime), + static_cast(stats.includeRequests), + static_cast(stats.includeLookupCount), + static_cast(stats.includeResolutionCacheSkips), + static_cast(stats.postLoadPragmaSkips), + static_cast(stats.sessionLookupFound), + static_cast(stats.sessionLookupMissing), + static_cast(stats.sessionLookupMiss), + static_cast(stats.sessionStoreFound), + static_cast(stats.sessionStoreMissing), + static_cast(stats.emittedTokenCount), + stats.outputBytes + ); + + if (!stats.includeDetailsEnabled) + return; + + auto dumpTopCounts = [](const char* const label, const std::unordered_map& counts) + { + if (counts.empty()) + return; + + std::vector> entries; + entries.reserve(counts.size()); + for (const auto& [name, count] : counts) + entries.emplace_back(name, count); + + std::sort(entries.begin(), entries.end(), [](const auto& lhs, const auto& rhs) + { + if (lhs.second != rhs.second) + return lhs.second > rhs.second; + return lhs.first < rhs.first; + }); + + constexpr size_t kMaxEntries = 24ull; + const auto limit = std::min(entries.size(), kMaxEntries); + for (size_t i = 0ull; i < limit; ++i) + { + const auto& entry = entries[i]; + std::fprintf(stderr, "[wave-profile] %s[%zu]=%llu %s\n", label, i, static_cast(entry.second), entry.first.c_str()); + } + }; + + dumpTopCounts("requested_include", stats.requestedIncludeSpellingCounts); + dumpTopCounts("resolved_include", stats.resolvedIncludePathCounts); +} + +struct LanguageFlagConfig +{ + bool preserveComments = false; + bool enableCpp20 = true; + bool preferPpNumbers = true; + bool emitLineDirectives = true; + bool includeGuardDetection = true; + bool emitPragmaDirectives = true; +}; + +inline boost::wave::language_support make_language_flags(const LanguageFlagConfig& config) +{ + auto flags = boost::wave::language_support(); + if (config.enableCpp20) + flags = boost::wave::language_support(flags | support_cpp20); // C++20 lexer mode. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L56-L59 + if (config.preferPpNumbers) + flags = boost::wave::language_support(flags | support_option_prefer_pp_numbers); // Prefer pp-number lexing before retokenization. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L71 + if (config.preserveComments) + flags = boost::wave::language_support(flags | support_option_preserve_comments); // Keep comments in the token stream. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L67 + if (config.emitLineDirectives) + flags = boost::wave::language_support(flags | support_option_emit_line_directives); // Emit #line directives in the output. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L72 + if (config.includeGuardDetection) + flags = boost::wave::language_support(flags | support_option_include_guard_detection); // Let Wave short-circuit classic include guards. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/libs/wave/include/boost/wave/language_support.hpp#L239 + if (config.emitPragmaDirectives) + flags = boost::wave::language_support(flags | support_option_emit_pragma_directives); // Keep pragma directives in the output. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L74 + // support_option_emit_contnewlines // Emit escaped line continuations. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L65 + // support_option_insert_whitespace // Let Wave inject separator whitespace. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L66 + return flags; +} + +inline void set_session_cache_perf_stats( + const asset::IShaderCompiler::CIncludeFinder::SSessionCache::Stats& readDelta, + const asset::IShaderCompiler::CIncludeFinder::SSessionCache::Stats& writeDelta) +{ + auto& stats = perf_stats(); + stats.sessionLookupFound = readDelta.lookupFound; + stats.sessionLookupMissing = readDelta.lookupMissing; + stats.sessionLookupMiss = readDelta.lookupMiss; + stats.sessionStoreFound = writeDelta.storeFound; + stats.sessionStoreMissing = writeDelta.storeMissing; +} + inline std::string escape_control_chars(std::string_view text) { static constexpr char hex[] = "0123456789ABCDEF"; @@ -81,7 +255,7 @@ struct load_to_string final template static void init_iterators(IterContextT& iter_ctx, PositionT const& act_pos, boost::wave::language_support language) { - iter_ctx.instring = iter_ctx.ctx.get_located_include_content(); + iter_ctx.instring = iter_ctx.ctx.take_located_include_content(); if (!iter_ctx.instring.empty() && iter_ctx.instring.back() != '\n' && iter_ctx.instring.back() != '\r') iter_ctx.instring.push_back('\n'); @@ -99,7 +273,7 @@ struct load_to_string final struct preprocessing_hooks final : public boost::wave::context_policies::default_preprocessing_hooks { preprocessing_hooks(const nbl::asset::IShaderCompiler::SPreprocessorOptions& _preprocessOptions) - : m_includeFinder(_preprocessOptions.includeFinder), m_logger(_preprocessOptions.logger), m_pragmaStage(nbl::asset::IShader::E_SHADER_STAGE::ESS_UNKNOWN), m_dxc_compile_flags_override() + : m_includeFinder(_preprocessOptions.includeFinder), m_readIncludeSessionCache(_preprocessOptions.readIncludeSessionCache), m_writeIncludeSessionCache(_preprocessOptions.writeIncludeSessionCache), m_logger(_preprocessOptions.logger), m_preserveComments(_preprocessOptions.preserveComments), m_pragmaStage(nbl::asset::IShader::E_SHADER_STAGE::ESS_UNKNOWN), m_dxc_compile_flags_override() { hash_token_occurences = 0; } @@ -205,9 +379,11 @@ struct preprocessing_hooks final : public boost::wave::context_policies::default return false; } - const asset::IShaderCompiler::CIncludeFinder* m_includeFinder; + asset::IShaderCompiler::CIncludeFinder::SSessionCache* m_readIncludeSessionCache; + asset::IShaderCompiler::CIncludeFinder::SSessionCache* m_writeIncludeSessionCache; system::logger_opt_ptr m_logger; + bool m_preserveComments; asset::IShader::E_SHADER_STAGE m_pragmaStage; int hash_token_occurences; std::vector m_dxc_compile_flags_override; @@ -250,15 +426,12 @@ class context : private boost::noncopyable , current_filename(fname) , current_relative_filename(fname) , macros(*this_()) - , language(language_support( - support_cpp20 // C++20 lexer mode. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L56-L59 - | support_option_prefer_pp_numbers // Prefer pp-number lexing before retokenization. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L71 - | support_option_preserve_comments // Keep comments in the token stream. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L67 - | support_option_emit_line_directives // Emit #line directives in the output. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L72 - | support_option_emit_pragma_directives // Keep pragma directives in the output. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L74 -// | support_option_emit_contnewlines // Emit escaped line continuations. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L65 -// | support_option_insert_whitespace // Let Wave inject separator whitespace. https://github.com/Devsh-Graphics-Programming/wave/blob/e02cda69e4d070fd9b16a39282d6b5c717cb3da4/include/boost/wave/language_support.hpp#L66 - )) + , language([&hooks_] + { + auto config = detail::LanguageFlagConfig{}; + config.preserveComments = hooks_.m_preserveComments; + return detail::make_language_flags(config); + }()) , hooks(hooks_) { macros.init_predefined_macros(fname); @@ -414,6 +587,10 @@ class context : private boost::noncopyable { return located_include_content; } + core::string take_located_include_content() + { + return std::move(located_include_content); + } // Nabla Additions End #if !defined(BOOST_NO_MEMBER_TEMPLATE_FRIENDS) @@ -512,6 +689,19 @@ class context : private boost::noncopyable { return pragma_once_headers.contains(filename_); } + bool has_cached_include_resolution(std::string_view includeName, bool is_system, std::string& absolutePath) const + { + const auto found = include_resolution_cache.find(make_include_resolution_key(includeName, is_system)); + if (found == include_resolution_cache.end()) + return false; + + absolutePath = found->second; + return true; + } + void cache_include_resolution(std::string_view includeName, bool is_system, const std::string& absolutePath) + { + include_resolution_cache.insert_or_assign(make_include_resolution_key(includeName, is_system), absolutePath); + } bool add_pragma_once_header(std::string const& filename_, std::string const& guard_name) { get_hooks().detected_include_guard(derived(), filename_, guard_name); @@ -558,6 +748,7 @@ class context : private boost::noncopyable #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 std::unordered_set pragma_once_headers; #endif + std::unordered_map include_resolution_cache; // Cache Additions bool cachingRequested = false; std::vector dependencies = {}; @@ -568,6 +759,25 @@ class context : private boost::noncopyable macromap_type macros; // map of defined macros const boost::wave::language_support language; // supported language/extensions preprocessing_hooks hooks; // hook policy instance + + std::string make_include_resolution_key(std::string_view includeName, bool is_system) const + { + std::string key; + const bool globallyResolved = is_system || (hooks.m_includeFinder && hooks.m_includeFinder->isKnownGlobalInclude(includeName)); + if (!globallyResolved) + { + const auto currentDirString = current_dir.generic_string(); + key.reserve(currentDirString.size() + includeName.size() + 3ull); + key.append(currentDirString); + key.push_back('\n'); + } + else + key.reserve(includeName.size() + 2ull); + key.push_back(globallyResolved ? 'G' : 'R'); + key.push_back('\n'); + key.append(includeName.data(), includeName.size()); + return key; + } }; } @@ -588,19 +798,49 @@ template<> inline bool boost::wave::impl::pp_iterator_functorclassifyFound(includeFinder->getDefaultFileSystemLoader()->getInclude(nbl::system::path{}, cachedAbsolutePath, needHash)); + standardInclude = is_system; + } + } - if (includeFinder) + if (!result && includeFinder) { + nbl::wave::detail::ScopedPerfTimer lookupTimer(perfStats.includeLookupTime); + if (perfStats.enabled) + ++perfStats.includeLookupCount; if (is_system) { - result = includeFinder->getIncludeStandard(ctx.get_current_directory(), file_path); + result = includeFinder->getIncludeStandard(ctx.get_current_directory(), file_path, needHash, ctx.get_hooks().m_readIncludeSessionCache, ctx.get_hooks().m_writeIncludeSessionCache); standardInclude = true; } else { - result = includeFinder->getIncludeRelative(ctx.get_current_directory(), file_path); + result = includeFinder->getIncludeRelative(ctx.get_current_directory(), file_path, needHash, ctx.get_hooks().m_readIncludeSessionCache, ctx.get_hooks().m_writeIncludeSessionCache); standardInclude = false; } } - else { + else if (!result) { const auto escapedPath = nbl::wave::detail::escape_control_chars(file_path); ctx.get_hooks().m_logger.log("Pre-processor error: Include finder not assigned, preprocessor will not include file %s", nbl::system::ILogger::ELL_ERROR, escapedPath.c_str()); return false; @@ -618,9 +858,20 @@ template<> inline bool boost::wave::impl::pp_iterator_functor inline bool boost::wave::impl::pp_iterator_functor new_iter_ctx( new iteration_context_type(ctx,result.absolutePath.string().c_str(),act_pos, boost::wave::enable_prefer_pp_numbers(ctx.get_language()), - is_system ? base_iteration_context_type::system_header : + systemHeader ? base_iteration_context_type::system_header : base_iteration_context_type::user_header)); // call the include policy trace function diff --git a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp index fd5411c2ab..58b1f2ea84 100644 --- a/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp +++ b/src/nbl/ext/FullScreenTriangle/CFullScreenTriangle.cpp @@ -84,7 +84,8 @@ smart_refctd_ptr ProtoPipeline::createPipeline( const IGPURenderpass* renderpass, const uint32_t subpassIx, SBlendParams blendParams, - const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform) + const hlsl::SurfaceTransform::FLAG_BITS swapchainTransform, + IGPUPipelineCache* pipelineCache) { if (!renderpass || !bool(*this) || hlsl::bitCount(swapchainTransform) != 1) return nullptr; @@ -116,7 +117,7 @@ smart_refctd_ptr ProtoPipeline::createPipeline( }; params[0].renderpass = renderpass; - if (!device->createGraphicsPipelines(nullptr, params, &m_retval)) + if (!device->createGraphicsPipelines(pipelineCache, params, &m_retval)) return nullptr; } return m_retval; diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index bd07c7a81a..079a861bfd 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -341,6 +341,15 @@ size_t ISystem::getMountedBuiltinArchiveCount() const return retval; } +core::vector ISystem::getBuiltinMountAliases() const +{ + core::vector retval; + retval.reserve(m_builtinMounts.size()); + for (const auto& mount : m_builtinMounts) + retval.push_back(mount.pathAlias.empty() ? mount.archive->getDefaultAbsolutePath() : mount.pathAlias); + return retval; +} + void ISystem::mountBuiltin(core::smart_refctd_ptr&& archive, const system::path& pathAlias) { auto tracked = archive; diff --git a/src/nbl/video/CJITIncludeLoader.cpp b/src/nbl/video/CJITIncludeLoader.cpp index 59c894358a..c013f043f3 100644 --- a/src/nbl/video/CJITIncludeLoader.cpp +++ b/src/nbl/video/CJITIncludeLoader.cpp @@ -4,7 +4,7 @@ namespace nbl::video { -auto CJITIncludeLoader::getInclude(const system::path& searchPath, const std::string& includeName) const -> found_t +auto CJITIncludeLoader::getInclude(const system::path& searchPath, const std::string& includeName, bool) const -> found_t { assert(searchPath=="nbl/builtin/hlsl/jit"); diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 7958efa5c0..8379d0bf0e 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -159,7 +159,7 @@ ILogicalDevice::ILogicalDevice(core::smart_refctd_ptr&& ap } if (auto hlslCompiler = m_compilerSet ? m_compilerSet->getShaderCompiler(asset::IShader::E_CONTENT_TYPE::ECT_HLSL) : nullptr) - hlslCompiler->getDefaultIncludeFinder()->addSearchPath("nbl/builtin/hlsl/jit", core::make_smart_refctd_ptr(m_physicalDevice->getLimits(), m_enabledFeatures)); + hlslCompiler->getDefaultIncludeFinder()->addSearchPath("nbl/builtin/hlsl/jit", core::make_smart_refctd_ptr(m_physicalDevice->getLimits(), m_enabledFeatures), {asset::IShaderCompiler::IncludeRootOrigin::Generated,asset::IShaderCompiler::HeaderClass::System}); } E_API_TYPE ILogicalDevice::getAPIType() const { return m_physicalDevice->getAPIType(); } @@ -1155,4 +1155,4 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } return retval; } -#include "nbl/undef_logging_macros.h" \ No newline at end of file +#include "nbl/undef_logging_macros.h" diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 94180ba71c..ac2586f858 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -378,11 +378,25 @@ class ShaderCompiler final : public IApplicationFramework m_arguments.push_back("main"); } - for (size_t i = 0; i + 1 < m_arguments.size(); ++i) + std::vector normalizedArguments; + normalizedArguments.reserve(m_arguments.size()); + for (size_t i = 0; i < m_arguments.size(); ++i) { - if (m_arguments[i] == "-I") - m_include_search_paths.emplace_back(m_arguments[i + 1]); + const auto& argument = m_arguments[i]; + if ((argument == "-I" || argument == "-isystem") && i + 1 < m_arguments.size()) + { + const auto classification = IShaderCompiler::IncludeClassification{ + IShaderCompiler::IncludeRootOrigin::User, + argument == "-isystem" ? IShaderCompiler::HeaderClass::System : IShaderCompiler::HeaderClass::User + }; + m_include_search_paths.push_back({ m_arguments[i + 1],classification }); + ++i; + continue; + } + + normalizedArguments.emplace_back(argument); } + m_arguments = std::move(normalizedArguments); const char* const action = preprocessOnly ? "Preprocessing" : "Compiling"; const char* const outType = preprocessOnly ? "Preprocessed" : "Compiled"; @@ -470,6 +484,8 @@ class ShaderCompiler final : public IApplicationFramework for (const auto& a : args) { + if (split(a, "-isystem")) continue; + if (split(a, "-I")) continue; if (split(a, "-MF")) continue; if (split(a, "-Fo")) continue; if (split(a, "-Fc")) continue; @@ -544,8 +560,8 @@ class ShaderCompiler final : public IApplicationFramework auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - for (const auto& p : m_include_search_paths) - includeFinder->addSearchPath(p, includeLoader); + for (const auto& searchPath : m_include_search_paths) + includeFinder->addSearchPath(searchPath.path, includeLoader, searchPath.classification); // need this struct becuase fields of IShaderCompiler::SMacroDefinition are string views struct SMacroDefinitionBuffer @@ -678,7 +694,14 @@ class ShaderCompiler final : public IApplicationFramework smart_refctd_ptr m_system; smart_refctd_ptr m_logger; - std::vector m_arguments, m_include_search_paths; + struct SearchPathArgument + { + std::string path; + IShaderCompiler::IncludeClassification classification = {}; + }; + + std::vector m_arguments; + std::vector m_include_search_paths; smart_refctd_ptr m_assetMgr; }; diff --git a/tools/nsc/manifests/nsc-windows-x64-release.tag b/tools/nsc/manifests/nsc-windows-x64-release.tag index cd8d651439..94e66c264a 100644 --- a/tools/nsc/manifests/nsc-windows-x64-release.tag +++ b/tools/nsc/manifests/nsc-windows-x64-release.tag @@ -1 +1 @@ -nsc-windows-x64-release-66da590b3f06b586f69bdb522bad2f2eebf11b6f +nsc-windows-x64-release-27a4d6f68b9e1757fbeb86181d179c27251f81c9 diff --git a/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/build-info.json.dvc b/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/build-info.json.dvc index 2e26e723f6..bfd2a26c39 100644 --- a/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/build-info.json.dvc +++ b/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/build-info.json.dvc @@ -1,5 +1,5 @@ outs: -- md5: 3422c063e9f0078b4efae5aa374e12c6 - size: 1286 +- md5: 3d428b7ea712df7f29476037f3a4cc79 + size: 1269 hash: md5 path: build-info.json diff --git a/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/nsc.exe.dvc b/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/nsc.exe.dvc index 976c2e55fb..16abc634c4 100644 --- a/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/nsc.exe.dvc +++ b/tools/nsc/manifests/nsc-windows-x64-release/exe/tools/nsc/bin/nsc.exe.dvc @@ -1,5 +1,5 @@ outs: -- md5: e8859f963019b7c7dd0fd815e625e4ee - size: 256512 +- md5: 610fe2e3519a71dd4c55116ceb08bec7 + size: 258048 hash: md5 path: nsc.exe diff --git a/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/3rdparty/dxc/dxcompiler.dll.dvc b/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/3rdparty/dxc/dxcompiler.dll.dvc index 3b22d29bd8..eddd5001c1 100644 --- a/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/3rdparty/dxc/dxcompiler.dll.dvc +++ b/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/3rdparty/dxc/dxcompiler.dll.dvc @@ -1,5 +1,5 @@ outs: -- md5: bcdd137482f6fd4a3b55da0884978d58 +- md5: 33669fee00214d4b52d983a99c0a001d size: 21367296 hash: md5 path: dxcompiler.dll diff --git a/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/Nabla.dll.dvc b/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/Nabla.dll.dvc index 5ad31c5b16..dbd68f38f3 100644 --- a/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/Nabla.dll.dvc +++ b/tools/nsc/manifests/nsc-windows-x64-release/runtime/nbl/Nabla.dll.dvc @@ -1,5 +1,5 @@ outs: -- md5: 5707af6c5ca1d82db41e877d075af6b2 - size: 29018624 +- md5: 10ade1f6844aa6e984d499f386700fb4 + size: 29079040 hash: md5 path: Nabla.dll