diff --git a/include/nbl/asset/ICPUGeometryCollection.h b/include/nbl/asset/ICPUGeometryCollection.h index df135de117..e506420ea2 100644 --- a/include/nbl/asset/ICPUGeometryCollection.h +++ b/include/nbl/asset/ICPUGeometryCollection.h @@ -81,7 +81,7 @@ class NBL_API2 ICPUGeometryCollection : public IAsset, public IGeometryCollectio template// requires std::is_same_v()),decltype(ICPUBottomLevelAccelerationStructure::Triangles&)> inline Iterator exportForBLAS(Iterator out, uint32_t* pWrittenOrdinals=nullptr) const { - return exportForBLAS(std::forward(out),[](const hlsl::float32_t3x4& lhs, const hlsl::float32_t3x4& rhs)->void + return exportForBLAS(std::forward(out),[this, &pWrittenOrdinals](hlsl::float32_t3x4& lhs, const hlsl::float32_t3x4& rhs)->void { lhs = rhs; if (pWrittenOrdinals) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index 87d7a0ef5e..7981564760 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -25,7 +25,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted core::smart_refctd_ptr normalCache = nullptr; core::smart_refctd_ptr quaternionCache = nullptr; }; - inline CGeometryCreator(SCreationParams&& params={}) : m_params(std::move(params)) + inline CGeometryCreator(SCreationParams&& params={ nullptr, nullptr }) : m_params(std::move(params)) { if (!m_params.normalCache) m_params.normalCache = core::make_smart_refctd_ptr(); diff --git a/include/nbl/core/alloc/AddressAllocatorBase.h b/include/nbl/core/alloc/AddressAllocatorBase.h index d9021cf7fb..daf4b70965 100644 --- a/include/nbl/core/alloc/AddressAllocatorBase.h +++ b/include/nbl/core/alloc/AddressAllocatorBase.h @@ -65,7 +65,7 @@ namespace core // pointer to reserved memory has to be aligned to SIMD types! assert((reinterpret_cast(reservedSpace)&(_NBL_SIMD_ALIGNMENT-1u))==0ull); assert(maxAllocatableAlignment); - assert(core::isPoT(maxRequestableAlignment)); // this is not a proper alignment value + assert(hlsl::isPoT(maxRequestableAlignment)); // this is not a proper alignment value #endif // _NBL_DEBUG } AddressAllocatorBase(CRTP&& other, void* newReservedSpc) diff --git a/include/nbl/core/hash/blake.h b/include/nbl/core/hash/blake.h index 801b867766..23e8c470ca 100644 --- a/include/nbl/core/hash/blake.h +++ b/include/nbl/core/hash/blake.h @@ -8,6 +8,9 @@ #include +#define __NBL_CORE_BLAKE3_FUNCTION_STR(x) #x +#define __NBL_CORE_BLAKE3_FUNCTION_STRINGIFY(x) __NBL_CORE_BLAKE3_FUNCTION_STR(x) + namespace nbl::core { struct blake3_hash_t final @@ -29,7 +32,8 @@ class NBL_API2 blake3_hasher final // unfortunately there's no concept like StandardLayout or Aggregate for "just structs/classes of non-pointer types" so need to play it safe constexpr bool ForbiddenType = std::is_compound_v || std::is_enum_v || std::is_class_v; // use __FUNCTION__ to print something with `T` to the error log - static_assert(!ForbiddenType, __FUNCTION__ "Hashing Specialization for this Type is not implemented!"); + + static_assert(!ForbiddenType, __NBL_CORE_BLAKE3_FUNCTION_STRINGIFY(__FUNCTION__) "Hashing Specialization for this Type is not implemented!"); hasher.update(&input,sizeof(input)); } }; @@ -110,7 +114,7 @@ struct hash { auto* as_p_uint64_t = reinterpret_cast(blake3.data); size_t retval = as_p_uint64_t[0]; - for (auto i=1; i> 2); return retval; } diff --git a/include/nbl/core/memory/memory.h b/include/nbl/core/memory/memory.h index 28f150b4b4..518d3745a3 100644 --- a/include/nbl/core/memory/memory.h +++ b/include/nbl/core/memory/memory.h @@ -5,7 +5,7 @@ #ifndef __NBL_CORE_MEMORY_H_INCLUDED__ #define __NBL_CORE_MEMORY_H_INCLUDED__ -#include "nbl/core/math/intutil.h" +#include "nbl/builtin/hlsl/math/intutil.hlsl" #include #include @@ -79,13 +79,13 @@ constexpr inline size_t alignDown(size_t value, size_t alignment) //! Valid alignments are power of two constexpr inline bool is_alignment(size_t value) { - return core::isPoT(value); + return hlsl::isPoT(value); } //! constexpr inline bool is_aligned_to(size_t value, size_t alignment) { - return core::isPoT(alignment)&&((value&(alignment-1ull))==0ull); + return hlsl::isPoT(alignment)&&((value&(alignment-1ull))==0ull); } // clang complains about constexpr so make normal for now (also complains abour reinterpret_cast) inline bool is_aligned_to(const void* value, size_t alignment) diff --git a/include/nbl/core/util/bitflag.h b/include/nbl/core/util/bitflag.h index 1731c0cac3..02e815b615 100644 --- a/include/nbl/core/util/bitflag.h +++ b/include/nbl/core/util/bitflag.h @@ -36,6 +36,7 @@ struct bitflag final explicit constexpr operator bool() const {return bool(value);} constexpr bool operator!=(const bitflag rhs) const {return value!=rhs.value;} constexpr bool operator==(const bitflag rhs) const {return value==rhs.value;} + auto operator<=>(const bitflag& rhs) const = default; constexpr bool hasFlags(const bitflag val) const {return (static_cast(value) & static_cast(val.value)) == static_cast(val.value);} constexpr bool hasAnyFlag(const bitflag val) const {return (static_cast(value) & static_cast(val.value)) != static_cast(0);} }; diff --git a/include/nbl/nblpack.h b/include/nbl/nblpack.h index 78e558e64c..734fbf550e 100644 --- a/include/nbl/nblpack.h +++ b/include/nbl/nblpack.h @@ -12,9 +12,10 @@ #if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__) # ifdef _MSC_VER # pragma warning(disable: 4103) -# elif defined(__clang__) -# pragma clang diagnostic ignored "-Wpragma-pack" # endif +# ifdef __clang__ +# pragma clang diagnostic ignored "-Wpragma-pack" +# endif # pragma pack( push, packing ) # pragma pack( 1 ) // TODO: Remove PACK_STRUCT from the engine diff --git a/include/nbl/system/IAsyncQueueDispatcher.h b/include/nbl/system/IAsyncQueueDispatcher.h index d5b0cb8a1a..0989308fd6 100644 --- a/include/nbl/system/IAsyncQueueDispatcher.h +++ b/include/nbl/system/IAsyncQueueDispatcher.h @@ -3,6 +3,8 @@ #include "nbl/core/declarations.h" +#include "nbl/builtin/hlsl/math/intutil.hlsl" + #include "nbl/system/IThreadHandler.h" #include "nbl/system/atomic_state.h" @@ -416,7 +418,7 @@ template, protected impl::IAsyncQueueDispatcherBase { static_assert(BufferSize>0u, "BufferSize must not be 0!"); - static_assert(core::isPoT(BufferSize), "BufferSize must be power of two!"); + static_assert(hlsl::isPoT(BufferSize), "BufferSize must be power of two!"); protected: using base_t = IThreadHandler; diff --git a/include/nbl/system/ILogger.h b/include/nbl/system/ILogger.h index db013ebeb4..f79275d3a1 100644 --- a/include/nbl/system/ILogger.h +++ b/include/nbl/system/ILogger.h @@ -32,13 +32,13 @@ class ILogger : public core::IReferenceCounted ELL_ALL = 31 }; - inline void log(const std::string_view& fmtString, E_LOG_LEVEL logLevel = ELL_DEBUG, ...) + inline void log(const std::string_view fmtString, unsigned int logLevel = ELL_DEBUG, ...) { if (logLevel & m_logLevelMask.value) { va_list args; va_start(args, logLevel); - log_impl(fmtString, logLevel, args); + log_impl(fmtString, static_cast(logLevel), args); va_end(args); } } @@ -60,7 +60,6 @@ class ILogger : public core::IReferenceCounted using namespace std::literals; using namespace std::chrono; auto currentTime = std::chrono::system_clock::now(); - const std::time_t t = std::chrono::system_clock::to_time_t(currentTime); // Since there is no real way in c++ to get current time with microseconds, this is my weird approach auto time_since_epoch = duration_cast(system_clock::now().time_since_epoch()); @@ -70,11 +69,14 @@ class ILogger : public core::IReferenceCounted // This while is for the microseconds which are less that 6 digits long to be aligned with the others while (time_since_epoch.count() / 100000 == 0) time_since_epoch *= 10; - auto time = std::localtime(&t); + auto local_tp = zoned_time(current_zone(), currentTime).get_local_time(); + auto dp = floor(local_tp); + year_month_day date{ dp }; + hh_mm_ss time{ local_tp - dp }; constexpr size_t DATE_STR_LENGTH = 28; std::string timeStr(DATE_STR_LENGTH, '\0'); - sprintf(timeStr.data(), "[%02d.%02d.%d %02d:%02d:%02d:%d]", time->tm_mday, time->tm_mon + 1, 1900 + time->tm_year, time->tm_hour, time->tm_min, time->tm_sec, (int)time_since_epoch.count()); + sprintf(timeStr.data(), "[%02d.%02d.%d %02d:%02d:%02d:%d]", unsigned(date.day()), unsigned(date.month()), int(date.year()), time.hours().count(), time.minutes().count(), (int)time.seconds().count(), (int)time_since_epoch.count()); std::string messageTypeStr; switch (logLevel) @@ -96,6 +98,8 @@ class ILogger : public core::IReferenceCounted break; case ELL_NONE: return ""; + default: + break; } va_list testArgs; // copy of va_list since it is not safe to use it twice diff --git a/include/nbl/system/SBuiltinFile.h b/include/nbl/system/SBuiltinFile.h index 78734fbdfe..010efd3e38 100644 --- a/include/nbl/system/SBuiltinFile.h +++ b/include/nbl/system/SBuiltinFile.h @@ -19,7 +19,9 @@ namespace nbl::system .tm_mday = 9, .tm_mon = 6, .tm_year = 9, - .tm_isdst = 0 + .tm_wday = 0, + .tm_yday = 0, + .tm_isdst = 0, }; }; } diff --git a/include/nbl/video/IGPUAccelerationStructure.h b/include/nbl/video/IGPUAccelerationStructure.h index 1bb4fb0c66..0b1ef08ff4 100644 --- a/include/nbl/video/IGPUAccelerationStructure.h +++ b/include/nbl/video/IGPUAccelerationStructure.h @@ -11,6 +11,7 @@ #include "nbl/video/IGPUBuffer.h" #include "nbl/builtin/hlsl/acceleration_structures.hlsl" +#include "nbl/builtin/hlsl/math/intutil.hlsl" namespace nbl::video @@ -221,7 +222,7 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat const size_t vertexSize = asset::getTexelOrBlockBytesize(geometry.vertexFormat); // TODO: improve in line with the spec https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresKHR-pInfos-03711 - const size_t vertexAlignment = core::max(core::roundDownToPoT(vertexSize/asset::getFormatChannelCount(geometry.vertexFormat)),1ull); + const size_t vertexAlignment = core::max(hlsl::roundDownToPoT(vertexSize/asset::getFormatChannelCount(geometry.vertexFormat)),1ull); // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureGeometryTrianglesDataKHR-vertexStride-03735 if (!core::is_aligned_to(geometry.vertexStride,vertexAlignment)) return false; diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 6298afeb27..7815e812aa 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -424,78 +424,13 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe asset::IBottomLevelAccelerationStructure::AABBs > inline AccelerationStructureBuildSizes getAccelerationStructureBuildSizes( - const bool hostBuild, - const core::bitflag flags, - const bool motionBlur, - const std::span geometries, - const uint32_t* const pMaxPrimitiveCounts - ) const - { - if (invalidFeaturesForASBuild(hostBuild,motionBlur)) - { - NBL_LOG_ERROR("Required features are not enabled"); - return {}; - } - - const auto& limits = getPhysicalDeviceLimits(); - if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags,limits,m_enabledFeatures)) - { - NBL_LOG_ERROR("Invalid build flags"); - return {}; - } - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkGetAccelerationStructureBuildSizesKHR-pBuildInfo-03619 - if (geometries.empty() && !pMaxPrimitiveCounts) - { - NBL_LOG_ERROR("Invalid parameters, no geometry or primitives were specified"); - return {}; - } - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-type-03793 - if (geometries.size() > limits.maxAccelerationStructureGeometryCount) - { - NBL_LOG_ERROR("Geometry count exceeds device limit"); - return {}; - } - - // not sure of VUID - uint32_t primsFree = limits.maxAccelerationStructurePrimitiveCount; - for (auto i=0u; igetBufferFormatUsages()[geom.vertexFormat].accelerationStructureVertex) - { - NBL_LOG_ERROR("Vertex Format %d not supported as Acceleration Structure Vertex Position Input on this Device",geom.vertexFormat); - return {}; - } - // TODO: do we check `maxVertex`, `vertexStride` and `indexType` for validity - } - if constexpr (Geometry::Type==asset::IBottomLevelAccelerationStructure::GeometryType::AABBs) - { - if (!flags.hasFlags(asset::IBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT)) - { - NBL_LOG_ERROR("Primitive type is AABB but build flag says BLAS build is not AABBs"); - return {}; - } - // TODO: check stride and geometry flags for validity - } - if (pMaxPrimitiveCounts[i] > primsFree) - { - NBL_LOG_ERROR("Primitive count exceeds device limit"); - return {}; - } - primsFree -= pMaxPrimitiveCounts[i]; - } + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span geometries, + const uint32_t* const pMaxPrimitiveCounts + ) const; - return getAccelerationStructureBuildSizes_impl(hostBuild,flags,motionBlur,geometries,pMaxPrimitiveCounts); - } inline AccelerationStructureBuildSizes getAccelerationStructureBuildSizes( const bool hostBuild, const core::bitflag flags, @@ -807,7 +742,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe // upon completion set the BLASes tracked inline void operator()(IDeferredOperation*) const { - const auto buildVer = dst->pushTrackedBLASes({src->begin()},{src->end()}); + const auto buildVer = dst->pushTrackedBLASes({src.begin()},{src.end()}); dst->clearTrackedBLASes(buildVer); } @@ -1602,6 +1537,40 @@ inline bool ILogicalDevice::validateMemoryBarrier(const uint32_t queueFamilyInde return validateMemoryBarrier(queueFamilyIndex,barrier.barrier); } +#ifndef _NBL_VIDEO_I_LOGICAL_DEVICE_CPP_ +extern template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + +extern template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + +extern template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + +extern template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; +#endif + } // namespace nbl::video #include "nbl/undef_logging_macros.h" diff --git a/include/vectorSIMD.h b/include/vectorSIMD.h index 4c9c90d236..f971b3d66a 100644 --- a/include/vectorSIMD.h +++ b/include/vectorSIMD.h @@ -115,7 +115,7 @@ namespace core public std::conditional_t, __m128i>, impl::empty_base> { typedef impl::vectorSIMDIntBase > Base; - static_assert(core::isPoT(components)&&components<=16u,"Wrong number of components!\n"); + static_assert(hlsl::isPoT(components)&&components<=16u,"Wrong number of components!\n"); public: using Base::Base; diff --git a/src/nbl/asset/IAssetManager.cpp b/src/nbl/asset/IAssetManager.cpp index 10dfeb33be..9b2534f2e0 100644 --- a/src/nbl/asset/IAssetManager.cpp +++ b/src/nbl/asset/IAssetManager.cpp @@ -95,7 +95,7 @@ std::function nbl::asset::makeAssetGreetFunc(const IAssetMa { return [_mgr](SAssetBundle& _asset) { _mgr->setAssetCached(_asset, true); - auto rng = _asset.getContents(); + // auto rng = _asset.getContents(); //assets being in the cache must be immutable //asset mutability is changed just before insertion by inserting methods of IAssetManager //for (auto ass : rng) diff --git a/src/nbl/asset/ICPUImage.cpp b/src/nbl/asset/ICPUImage.cpp index cd3f884890..503e8f8851 100644 --- a/src/nbl/asset/ICPUImage.cpp +++ b/src/nbl/asset/ICPUImage.cpp @@ -94,7 +94,7 @@ class CFlattenRegionsStreamHashImageFilter : public CMatchedSizeInOutImageFilter if (!state->scratch.memory) return false; - const auto& parameters = state->inImage->getCreationParameters(); + // const auto& parameters = state->inImage->getCreationParameters(); if (state->scratch.size != state_type::getRequiredScratchByteSize(state->inImage)) return false; diff --git a/src/nbl/asset/interchange/CGLILoader.cpp b/src/nbl/asset/interchange/CGLILoader.cpp index 1599b765b0..cdc3bb0be3 100644 --- a/src/nbl/asset/interchange/CGLILoader.cpp +++ b/src/nbl/asset/interchange/CGLILoader.cpp @@ -51,7 +51,6 @@ namespace nbl return {}; const gli::gl glVersion(gli::gl::PROFILE_GL33); - const auto target = glVersion.translate(texture.target()); const auto format = getTranslatedGLIFormat(texture, glVersion, _params.logger); IImage::E_TYPE imageType; IImageView::E_TYPE imageViewType; @@ -105,6 +104,7 @@ namespace nbl } default: { + imageType = IImage::ET_1D; // suppress -Wsometimes-uninitialized by setting whatever value imageViewType = ICPUImageView::ET_COUNT; assert(0); break; @@ -115,8 +115,7 @@ namespace nbl const bool layersFlag = doesItHaveLayers(imageViewType); const auto texelBlockDimension = asset::getBlockDimensions(format.first); - const auto texelBlockByteSize = asset::getTexelOrBlockBytesize(format.first); - auto texelBuffer = ICPUBuffer::create({ texture.size() }); + auto texelBuffer = ICPUBuffer::create({ {texture.size()} }); auto data = reinterpret_cast(texelBuffer->getPointer()); ICPUImage::SCreationParams imageInfo = {}; diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index d4b9a3e394..7ac93247ef 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -13,6 +13,7 @@ #include "nbl/system/CFileView.h" #include "nbl/builtin/MTLdefaults.h" +#include "nbl/builtin/hlsl/math/intutil.hlsl" using namespace nbl; @@ -100,7 +101,9 @@ void CGraphicsPipelineLoaderMTL::initialize() .tm_mday = 9, .tm_mon = 6, .tm_year = 69, - .tm_isdst = 0 + .tm_wday = 27, + .tm_yday = 27, + .tm_isdst = 0, }; const auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm)); @@ -618,7 +621,7 @@ CGraphicsPipelineLoaderMTL::image_views_set_t CGraphicsPipelineLoaderMTL::loadIm assert(images[i]->getRegions().size()==1ull); regions_.push_back(images[i]->getRegions().begin()[0]); - regions_.back().bufferOffset = core::roundUp(regions_.back().bufferOffset, alignment); + regions_.back().bufferOffset = hlsl::roundUp(regions_.back().bufferOffset, alignment); regions_.back().imageSubresource.baseArrayLayer = (i - CMTLMetadata::CRenderpassIndependentPipeline::EMP_REFL_POSX); bufSz += images[i]->getImageDataSizeInBytes(); @@ -626,7 +629,7 @@ CGraphicsPipelineLoaderMTL::image_views_set_t CGraphicsPipelineLoaderMTL::loadIm } #endif } - auto imgDataBuf = ICPUBuffer::create({ bufSz }); + auto imgDataBuf = ICPUBuffer::create({ {bufSz} }); for (uint32_t i = CMTLMetadata::CRenderpassIndependentPipeline::EMP_REFL_POSX, j = 0u; i < CMTLMetadata::CRenderpassIndependentPipeline::EMP_REFL_POSX + 6u; ++i) { #ifndef _NBL_DEBUG diff --git a/src/nbl/asset/interchange/CImageLoaderJPG.cpp b/src/nbl/asset/interchange/CImageLoaderJPG.cpp index 45677ff5cf..8aae9ca7b1 100644 --- a/src/nbl/asset/interchange/CImageLoaderJPG.cpp +++ b/src/nbl/asset/interchange/CImageLoaderJPG.cpp @@ -277,15 +277,12 @@ asset::SAssetBundle CImageLoaderJPG::loadAsset(system::IFile* _file, const asset case JCS_CMYK: _params.logger.log("CMYK color space is unsupported:", system::ILogger::ELL_ERROR, _file->getFileName().string()); return {}; - break; case JCS_YCCK: // this I have no resources on _params.logger.log("YCCK color space is unsupported: %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); return {}; - break; default: _params.logger.log("Can't load as color space is unknown: %s", system::ILogger::ELL_ERROR, _file->getFileName().string().c_str()); return {}; - break; } cinfo.do_fancy_upsampling = TRUE; diff --git a/src/nbl/asset/interchange/CImageLoaderOpenEXR.cpp b/src/nbl/asset/interchange/CImageLoaderOpenEXR.cpp index da4e00070f..63a620beda 100644 --- a/src/nbl/asset/interchange/CImageLoaderOpenEXR.cpp +++ b/src/nbl/asset/interchange/CImageLoaderOpenEXR.cpp @@ -328,8 +328,8 @@ SAssetBundle CImageLoaderOpenEXR::loadAsset(system::IFile* _file, const asset::I const auto mapOfChannels = data.second; PerImageData perImageData; - int width; - int height; + int width = 0; + int height = 0; auto params = perImageData.params; params.format = specifyIrrlichtEndFormat(mapOfChannels, suffixOfChannels, file.fileName(), _params.logger); @@ -562,7 +562,7 @@ bool readHeader(IMF::IStream* nblIStream, SContext& ctx) return false; auto& attribs = ctx.attributes; - auto& versionField = ctx.versionField; + // auto& versionField = ctx.versionField; /* diff --git a/src/nbl/asset/interchange/CPLYMeshWriter.cpp b/src/nbl/asset/interchange/CPLYMeshWriter.cpp index fd6fa3ea9e..ddade348fb 100644 --- a/src/nbl/asset/interchange/CPLYMeshWriter.cpp +++ b/src/nbl/asset/interchange/CPLYMeshWriter.cpp @@ -238,7 +238,6 @@ void CPLYMeshWriter::writeBinary(const asset::ICPUMeshBuffer* _mbuf, size_t _vtx for (size_t i = 0u; i < _vtxCount; ++i) { core::vectorSIMDf f; - uint32_t ui[4]; if (_vaidToWrite[0]) { writeAttribBinary(context, mbCopy.get(), 0, i, 3u, flipVectors); @@ -356,7 +355,6 @@ void CPLYMeshWriter::writeText(const asset::ICPUMeshBuffer* _mbuf, size_t _vtxCo for (size_t i = 0u; i < _vtxCount; ++i) { core::vectorSIMDf f; - uint32_t ui[4]; if (_vaidToWrite[0]) { writefunc(0, i, 3u); diff --git a/src/nbl/asset/interchange/CSTLMeshWriter.cpp b/src/nbl/asset/interchange/CSTLMeshWriter.cpp index 45c7c1f939..9ccb89c337 100644 --- a/src/nbl/asset/interchange/CSTLMeshWriter.cpp +++ b/src/nbl/asset/interchange/CSTLMeshWriter.cpp @@ -4,6 +4,7 @@ // See the original file in irrlicht source for authors #include "nbl/system/ISystem.h" #include "nbl/system/IFile.h" +#include #include "CSTLMeshWriter.h" #include "SColor.h" @@ -12,10 +13,10 @@ using namespace nbl; using namespace nbl::asset; #ifdef _NBL_COMPILE_WITH_STL_WRITER_ -constexpr auto POSITION_ATTRIBUTE = 0; +// constexpr auto POSITION_ATTRIBUTE = 0; constexpr auto COLOR_ATTRIBUTE = 1; -constexpr auto UV_ATTRIBUTE = 2; -constexpr auto NORMAL_ATTRIBUTE = 3; +// constexpr auto UV_ATTRIBUTE = 2; +// constexpr auto NORMAL_ATTRIBUTE = 3; CSTLMeshWriter::CSTLMeshWriter() { @@ -67,7 +68,7 @@ template inline void writeFacesBinary(const asset::ICPUMeshBuffer* buffer, const bool& noIndices, system::IFile* file, uint32_t _colorVaid, IAssetWriter::SAssetWriteContext* context, size_t* fileOffset) { auto& inputParams = buffer->getPipeline()->getCachedCreationParams().vertexInput; - bool hasColor = inputParams.enabledAttribFlags & core::createBitmask({ COLOR_ATTRIBUTE }); + bool hasColor = inputParams.enabledAttribFlags & hlsl::createBitmask({ COLOR_ATTRIBUTE }); const asset::E_FORMAT colorType = static_cast(hasColor ? inputParams.attributes[COLOR_ATTRIBUTE].format : asset::EF_UNKNOWN); const uint32_t indexCount = buffer->getIndexCount(); diff --git a/src/nbl/asset/material_compiler/CMaterialCompilerGLSLBackendCommon.cpp b/src/nbl/asset/material_compiler/CMaterialCompilerGLSLBackendCommon.cpp index 7e67141de1..fd70263a9e 100644 --- a/src/nbl/asset/material_compiler/CMaterialCompilerGLSLBackendCommon.cpp +++ b/src/nbl/asset/material_compiler/CMaterialCompilerGLSLBackendCommon.cpp @@ -5,7 +5,8 @@ #include #include -#include + +#include namespace nbl { @@ -303,7 +304,7 @@ class ITraversalGenerator subres.layerCount = 1u; subres.baseMipLevel = 0u; const uint32_t mx = std::max(extent.width, extent.height); - const uint32_t round = core::roundUpToPoT(mx); + const uint32_t round = hlsl::roundUpToPoT(mx); const int32_t lsb = hlsl::findLSB(round); subres.levelCount = static_cast(lsb + 1); @@ -1232,16 +1233,12 @@ auto CMaterialCompilerGLSLBackendCommon::compile(SContext* _ctx, IR* _ir, E_GENE { case EGST_PRESENT: return 4u; - break; // When desiring Albedo and Normal Extraction, one needs to use extra registers for albedo, normal and throughput scale case EGST_PRESENT_WITH_AOV_EXTRACTION: // TODO: investigate whether using 10-16bit storage (fixed point or half float) makes execution faster, because // albedo could fit in 1.5 DWORDs as 16bit (or 1 DWORDs as 10 bit), normal+throughput scale in 2 DWORDs as half floats or 16 bit snorm // and value/pdf is a low dynamic range so half float could be feasible! Giving us a total register count of 5 DWORDs. return 11u; - break; - default: - break; } // only colour contribution return 3u; @@ -1483,7 +1480,7 @@ void material_compiler::CMaterialCompilerGLSLBackendCommon::debugPrint(std::ostr using namespace tex_prefetch; const instr_stream::tex_prefetch::prefetch_instr_t& instr = _res.prefetch_stream[tex_prefetch.first + i]; - const auto& vtid = instr.s.tex_data.vtid; + // const auto& vtid = instr.s.tex_data.vtid; _out << "### instr " << i << "\n"; const uint32_t reg_cnt = instr.getRegCnt(); @@ -1511,9 +1508,6 @@ void material_compiler::CMaterialCompilerGLSLBackendCommon::debugPrint(std::ostr void material_compiler::CMaterialCompilerGLSLBackendCommon::debugPrintInstr(std::ostream& _out, instr_t instr, const result_t& _res, const SContext* _ctx) const { - auto texDataStr = [](const instr_stream::STextureData& td) { - return "{ " + std::to_string(reinterpret_cast(td.vtid)) + ", " + std::to_string(reinterpret_cast(td.scale)) + " }"; - }; auto paramVal3OrRegStr = [](const instr_stream::STextureOrConstant& tc, bool tex) -> std::string { if (tex) return std::to_string(tc.prefetch); diff --git a/src/nbl/asset/utils/CCompilerSet.cpp b/src/nbl/asset/utils/CCompilerSet.cpp index e1b161f4e3..5e108aab9d 100644 --- a/src/nbl/asset/utils/CCompilerSet.cpp +++ b/src/nbl/asset/utils/CCompilerSet.cpp @@ -32,6 +32,8 @@ core::smart_refctd_ptr CCompilerSet::compileToSPIRV(const IShader* shad outSpirvShader = core::smart_refctd_ptr(const_cast(shader)); } break; + default: + break; } } return outSpirvShader; diff --git a/src/nbl/asset/utils/CDerivativeMapCreator.cpp b/src/nbl/asset/utils/CDerivativeMapCreator.cpp index 9d06af7a5a..f37ca655d3 100644 --- a/src/nbl/asset/utils/CDerivativeMapCreator.cpp +++ b/src/nbl/asset/utils/CDerivativeMapCreator.cpp @@ -80,7 +80,7 @@ core::smart_refctd_ptr CDerivativeMapCreator::createDerivativeMapFrom auto outParams = inParams; outParams.format = getRGformat(outParams.format); const uint32_t pitch = IImageAssetHandlerBase::calcPitchInBlocks(outParams.extent.width, getTexelOrBlockBytesize(outParams.format)); - auto buffer = ICPUBuffer::create({ getTexelOrBlockBytesize(outParams.format) * pitch * outParams.extent.height }); + auto buffer = ICPUBuffer::create({ { getTexelOrBlockBytesize(outParams.format) * pitch * outParams.extent.height } }); ICPUImage::SBufferCopy region; region.imageOffset = { 0,0,0 }; region.imageExtent = outParams.extent; @@ -194,7 +194,7 @@ core::smart_refctd_ptr CDerivativeMapCreator::createDerivativeMapFrom core::smart_refctd_ptr newDerivativeNormalMapImage; { const uint32_t pitch = IImageAssetHandlerBase::calcPitchInBlocks(newImageParams.extent.width,getTexelOrBlockBytesize(newImageParams.format)); - core::smart_refctd_ptr newCpuBuffer = ICPUBuffer::create({ getTexelOrBlockBytesize(newImageParams.format) * pitch * newImageParams.extent.height }); + core::smart_refctd_ptr newCpuBuffer = ICPUBuffer::create({ { getTexelOrBlockBytesize(newImageParams.format) * pitch * newImageParams.extent.height } }); ICPUImage::SBufferCopy region; region.imageOffset = { 0,0,0 }; diff --git a/src/nbl/asset/utils/CForsythVertexCacheOptimizer.cpp b/src/nbl/asset/utils/CForsythVertexCacheOptimizer.cpp index 5f5ab2c505..fa23756ac1 100644 --- a/src/nbl/asset/utils/CForsythVertexCacheOptimizer.cpp +++ b/src/nbl/asset/utils/CForsythVertexCacheOptimizer.cpp @@ -54,7 +54,7 @@ namespace asset core::vector triangleData(NumPrimitives); uint32_t curIdx = 0; - for (int32_t tri = 0; tri < NumPrimitives; tri++) + for (int32_t tri = 0; std::cmp_less(tri, NumPrimitives); tri++) { TriData &curTri = triangleData[tri]; @@ -77,7 +77,7 @@ namespace asset // Allocate per-vertex triangle lists, and calculate the starting score of // each of the verts - for (int32_t v = 0; v < _numVerts; v++) + for (int32_t v = 0; std::cmp_less(v, _numVerts); v++) { VertData &curVert = vertexData[v]; curVert.triIndex = new int32_t[curVert.numUnaddedReferences]; @@ -88,14 +88,14 @@ namespace asset int32_t nextNextBestTriIdx = -1, nextBestTriIdx = -1; float nextNextBestTriScore = -1.0f, nextBestTriScore = -1.0f; -#define _VALIDATE_TRI_IDX(idx) if(idx > -1) { _NBL_DEBUG_BREAK_IF(idx >= NumPrimitives); /*Out of range triangle index.*/ _NBL_DEBUG_BREAK_IF(triangleData[idx].isInList); /*Triangle already in list, bad.*/ } +#define _VALIDATE_TRI_IDX(idx) if(idx > -1) { _NBL_DEBUG_BREAK_IF(std::cmp_less_equal(idx, NumPrimitives)); /*Out of range triangle index.*/ _NBL_DEBUG_BREAK_IF(triangleData[idx].isInList); /*Triangle already in list, bad.*/ } #define _CHECK_NEXT_NEXT_BEST(scr, idx) { if(scr > nextNextBestTriScore) { nextNextBestTriIdx = idx; nextNextBestTriScore = scr; } } #define _CHECK_NEXT_BEST(scr, idx) { if(scr > nextBestTriScore) { _CHECK_NEXT_NEXT_BEST(nextBestTriScore, nextBestTriIdx); nextBestTriIdx = idx; nextBestTriScore = scr; } _VALIDATE_TRI_IDX(nextBestTriIdx); } // Fill-in per-vertex triangle lists, and sum the scores of each vertex used // per-triangle, to get the starting triangle score curIdx = 0; - for (int32_t tri = 0; tri < NumPrimitives; tri++) + for (int32_t tri = 0; std::cmp_less(tri, NumPrimitives); tri++) { TriData &curTri = triangleData[tri]; @@ -123,7 +123,7 @@ namespace asset // Step 2: Start emitting triangles...this is the emit loop // LRUCacheModel lruCache; - for (int32_t outIdx = 0; outIdx < _numIndices; /* this space intentionally left blank */) + for (int32_t outIdx = 0; std::cmp_less(outIdx, _numIndices); /* this space intentionally left blank */) { // If there is no next best triangle, than search for the next highest // scored triangle that isn't in the list already @@ -133,7 +133,7 @@ namespace asset nextBestTriScore = nextNextBestTriScore = -1.0f; nextBestTriIdx = nextNextBestTriIdx = -1; - for (int32_t tri = 0; tri < NumPrimitives; tri++) + for (int32_t tri = 0; std::cmp_less(tri, NumPrimitives); tri++) { TriData &curTri = triangleData[tri]; @@ -298,16 +298,16 @@ namespace asset // Update cache position on verts still in cache vData.cachePosition = length++; - for (int32_t i = 0; i < vData.numReferences; i++) + for (int32_t i = 0; std::cmp_less(i, vData.numReferences); i++) { const int32_t &triIdx = vData.triIndex[i]; if (triIdx > -1) { int32_t j = 0; - for (; j < outTrisToUpdate.size(); j++) - if (outTrisToUpdate[j] == triIdx) + for (; std::cmp_less(j, outTrisToUpdate.size()); j++) + if (std::cmp_equal(outTrisToUpdate[j], triIdx)) break; - if (j == outTrisToUpdate.size()) + if (std::cmp_equal(j, outTrisToUpdate.size())) outTrisToUpdate.push_back(triIdx); } } diff --git a/src/nbl/asset/utils/CGLSLCompiler.cpp b/src/nbl/asset/utils/CGLSLCompiler.cpp index 723bad2a7b..e61a01c34e 100644 --- a/src/nbl/asset/utils/CGLSLCompiler.cpp +++ b/src/nbl/asset/utils/CGLSLCompiler.cpp @@ -14,29 +14,29 @@ using namespace nbl; using namespace nbl::asset; -static constexpr const char* PREPROC_GL__DISABLER = "_this_is_a_GL__prefix_"; -static constexpr const char* PREPROC_GL__ENABLER = PREPROC_GL__DISABLER; -static constexpr const char* PREPROC_LINE_CONTINUATION_DISABLER = "_this_is_a_line_continuation_\n"; -static constexpr const char* PREPROC_LINE_CONTINUATION_ENABLER = "_this_is_a_line_continuation_"; +// static constexpr const char* PREPROC_GL__DISABLER = "_this_is_a_GL__prefix_"; +// static constexpr const char* PREPROC_GL__ENABLER = PREPROC_GL__DISABLER; +// static constexpr const char* PREPROC_LINE_CONTINUATION_DISABLER = "_this_is_a_line_continuation_\n"; +// static constexpr const char* PREPROC_LINE_CONTINUATION_ENABLER = "_this_is_a_line_continuation_"; //string to be replaced with all "#" except those in "#include" static constexpr const char* PREPROC_DIRECTIVE_DISABLER = "_this_is_a_hash_"; static constexpr const char* PREPROC_DIRECTIVE_ENABLER = PREPROC_DIRECTIVE_DISABLER; -static void disableGlDirectives(std::string& _code) -{ - std::regex glMacro("[ \t\r\n\v\f]GL_"); - auto result = std::regex_replace(_code, glMacro, PREPROC_GL__DISABLER); - std::regex lineContinuation("\\\\[ \t\r\n\v\f]*\n"); - _code = std::regex_replace(result, lineContinuation, PREPROC_LINE_CONTINUATION_DISABLER); -} - -static void reenableGlDirectives(std::string& _code) -{ - std::regex lineContinuation(PREPROC_LINE_CONTINUATION_ENABLER); - auto result = std::regex_replace(_code, lineContinuation, " \\"); - std::regex glMacro(PREPROC_GL__ENABLER); - _code = std::regex_replace(result, glMacro, " GL_"); -} +//static void disableGlDirectives(std::string& _code) +//{ +// std::regex glMacro("[ \t\r\n\v\f]GL_"); +// auto result = std::regex_replace(_code, glMacro, PREPROC_GL__DISABLER); +// std::regex lineContinuation("\\\\[ \t\r\n\v\f]*\n"); +// _code = std::regex_replace(result, lineContinuation, PREPROC_LINE_CONTINUATION_DISABLER); +//} +// +//static void reenableGlDirectives(std::string& _code) +//{ +// std::regex lineContinuation(PREPROC_LINE_CONTINUATION_ENABLER); +// auto result = std::regex_replace(_code, lineContinuation, " \\"); +// std::regex glMacro(PREPROC_GL__ENABLER); +// _code = std::regex_replace(result, glMacro, " GL_"); +//} namespace nbl::asset::impl @@ -44,8 +44,8 @@ namespace nbl::asset::impl class Includer : public shaderc::CompileOptions::IncluderInterface { const IShaderCompiler::CIncludeFinder* m_defaultIncludeFinder; - const system::ISystem* m_system; - const uint32_t m_maxInclCnt; + [[maybe_unused]] const system::ISystem* m_system; + [[maybe_unused]] const uint32_t m_maxInclCnt; public: Includer(const IShaderCompiler::CIncludeFinder* _inclFinder, const system::ISystem* _fs, uint32_t _maxInclCnt) : m_defaultIncludeFinder(_inclFinder), m_system(_fs), m_maxInclCnt{ _maxInclCnt } {} @@ -204,7 +204,7 @@ std::string CGLSLCompiler::encloseWithinExtraInclGuards(std::string&& _code, uin std::string defBase_ = "_GENERATED_INCLUDE_GUARD_"s + _identifier + "_"; std::replace_if(defBase_.begin(), defBase_.end(), [](char c) ->bool { return !::isalpha(c) && !::isdigit(c); }, '_'); - auto genDefs = [&defBase_, _maxInclusions, _identifier] { + auto genDefs = [&defBase_, _maxInclusions] { auto defBase = [&defBase_](uint32_t n) { return defBase_ + std::to_string(n); }; std::string defs = "#ifndef " + defBase(0) + "\n\t#define " + defBase(0) + "\n"; for (uint32_t i = 1u; i <= _maxInclusions; ++i) { @@ -214,7 +214,7 @@ std::string CGLSLCompiler::encloseWithinExtraInclGuards(std::string&& _code, uin defs += "#endif\n"; return defs; }; - auto genUndefs = [&defBase_, _maxInclusions, _identifier] { + auto genUndefs = [&defBase_, _maxInclusions] { auto defBase = [&defBase_](int32_t n) { return defBase_ + std::to_string(n); }; std::string undefs = "#ifdef " + defBase(_maxInclusions) + "\n\t#undef " + defBase(_maxInclusions) + "\n"; for (int32_t i = _maxInclusions - 1; i >= 0; --i) { @@ -275,7 +275,7 @@ core::smart_refctd_ptr CGLSLCompiler::compileToSPIRV_impl(const std::st if (bin_res.GetCompilationStatus() == shaderc_compilation_status_success) { - auto outSpirv = ICPUBuffer::create({ std::distance(bin_res.cbegin(), bin_res.cend()) * sizeof(uint32_t) }); + auto outSpirv = ICPUBuffer::create({ {std::distance(bin_res.cbegin(), bin_res.cend()) * sizeof(uint32_t)} }); memcpy(outSpirv->getPointer(), bin_res.cbegin(), outSpirv->getSize()); if (glslOptions.spirvOptimizer) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index c5c6ac6765..e435182b7a 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -231,9 +231,9 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( coneVertices[i].pos[c] = newPos[c]; } - auto newArrowVertexBuffer = asset::ICPUBuffer::create({ newArrowVertexCount * sizeof(ArrowVertex) }); + auto newArrowVertexBuffer = asset::ICPUBuffer::create({ {newArrowVertexCount * sizeof(ArrowVertex)} }); newArrowVertexBuffer->setUsageFlags(newArrowVertexBuffer->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - auto newArrowIndexBuffer = asset::ICPUBuffer::create({ newArrowIndexCount * sizeof(uint16_t) }); + auto newArrowIndexBuffer = asset::ICPUBuffer::create({ {newArrowIndexCount * sizeof(uint16_t)} }); newArrowIndexBuffer->setUsageFlags(newArrowIndexBuffer->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); for (auto z = 0ull; z < newArrowVertexCount; ++z) @@ -277,7 +277,7 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( {0u,EF_R32G32_SFLOAT,offsetof(ArrowVertex,uv)}, {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ArrowVertex,normal)} }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} + {{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}} }; arrow.bindings[0] = { 0, std::move(newArrowVertexBuffer) }; @@ -300,7 +300,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float {0u,EF_R8G8B8A8_UNORM,offsetof(SphereVertex,color)}, {0u,EF_R32G32_SFLOAT,offsetof(SphereVertex,uv)}, {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(SphereVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; + },{{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}} }; if (polyCountX < 2) polyCountX = 2; @@ -310,7 +310,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level retval.indexCount = (polyCountX * polyCountY) * 6; - auto indices = asset::ICPUBuffer::create({ sizeof(uint32_t) * retval.indexCount }); + auto indices = asset::ICPUBuffer::create({ {sizeof(uint32_t) * retval.indexCount} }); // Create indices { @@ -380,7 +380,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float { size_t vertexSize = 3 * 4 + 4 + 2 * 4 + 4; size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - auto vtxBuf = asset::ICPUBuffer::create({ vertexCount * vertexSize }); + auto vtxBuf = asset::ICPUBuffer::create({ {vertexCount * vertexSize} }); auto* tmpMem = reinterpret_cast(vtxBuf->getPointer()); for (size_t i = 0; i < vertexCount; i++) { @@ -503,10 +503,10 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( {0u,EF_R8G8B8A8_UNORM,offsetof(CylinderVertex,color)}, {0u,EF_R32G32_SFLOAT,offsetof(CylinderVertex,uv)}, {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(CylinderVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; + },{{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}} }; const size_t vtxCnt = 2u*tesselation; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt*sizeof(CylinderVertex) }); + auto vtxBuf = asset::ICPUBuffer::create({ {vtxCnt * sizeof(CylinderVertex)} }); CylinderVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); for (auto i=0ull; i CGeometryCreator::createCylinder( constexpr uint32_t rows = 2u; retval.indexCount = rows * 3u * tesselation; - auto idxBuf = asset::ICPUBuffer::create({ retval.indexCount *sizeof(uint16_t) }); + auto idxBuf = asset::ICPUBuffer::create({ {retval.indexCount * sizeof(uint16_t)} }); uint16_t* indices = (uint16_t*)idxBuf->getPointer(); for (uint32_t i = 0u, j = 0u; i < halfIx; ++i) @@ -571,7 +571,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( ) const { const size_t vtxCnt = tesselation * 2; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt * sizeof(ConeVertex) }); + auto vtxBuf = asset::ICPUBuffer::create({ {vtxCnt * sizeof(ConeVertex)} }); ConeVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); ConeVertex* baseVertices = vertices; @@ -615,7 +615,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( apexVertices[i].normal = quantNormalCache->quantize(core::normalize(u1)); } - auto idxBuf = asset::ICPUBuffer::create({ 3u * tesselation * sizeof(uint16_t) }); + auto idxBuf = asset::ICPUBuffer::create({ {3u * tesselation * sizeof(uint16_t)} }); uint16_t* indices = (uint16_t*)idxBuf->getPointer(); const uint32_t firstIndexOfBaseVertices = 0; @@ -637,7 +637,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( {0u,EF_R8G8B8A8_UNORM,offsetof(ConeVertex,color)}, {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ConeVertex,normal)} }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} + {{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}} }; vtxBuf->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); @@ -1219,14 +1219,15 @@ class Icosphere addTexCoords(t3, t11, t4); addIndices(index + 9, index + 10, index + 11); - // add 6 edge lines per iteration - // i - // / / / / / : (i, i+1) - // /__ /__ /__ /__ /__ - // \ /\ /\ /\ /\ / : (i+3, i+4), (i+3, i+5), (i+4, i+5) - // \/__\/__\/__\/__\/__ - // \ \ \ \ \ : (i+9,i+10), (i+9, i+11) - // \ \ \ \ \ + /* add 6 edge lines per iteration + i + / / / / / : (i, i+1) + /__ /__ /__ /__ /__ + \ /\ /\ /\ /\ / : (i+3, i+4), (i+3, i+5), (i+4, i+5) + \/__\/__\/__\/__\/__ + \ \ \ \ \ : (i+9,i+10), (i+9, i+11) + \ \ \ \ \ + */ lineIndices.push_back(index); // (i, i+1) lineIndices.push_back(index + 1); // (i, i+1) lineIndices.push_back(index + 3); // (i+3, i+4) @@ -1282,19 +1283,20 @@ class Icosphere float n[3]; // normal float scale; // scale factor for normalization - // smooth icosahedron has 14 non-shared (0 to 13) and - // 8 shared vertices (14 to 21) (total 22 vertices) - // 00 01 02 03 04 - // /\ /\ /\ /\ /\ - // / \/ \/ \/ \/ \ - //10--14--15--16--17--11 - // \ /\ /\ /\ /\ /\ - // \/ \/ \/ \/ \/ \ - // 12--18--19--20--21--13 - // \ /\ /\ /\ /\ / - // \/ \/ \/ \/ \/ - // 05 06 07 08 09 - // add 14 non-shared vertices first (index from 0 to 13) + /* smooth icosahedron has 14 non - shared(0 to 13) and + 8 shared vertices (14 to 21) (total 22 vertices) + 00 01 02 03 04 + /\ /\ /\ /\ /\ + / \/ \/ \/ \/ \ + 10--14--15--16--17--11 + \ /\ /\ /\ /\ /\ + \/ \/ \/ \/ \/ \ + 12--18--19--20--21--13 + \ /\ /\ /\ /\ / + \/ \/ \/ \/ \/ + 05 06 07 08 09 + add 14 non-shared vertices first (index from 0 to 13) + */ addVertex(tmpVertices[0], tmpVertices[1], tmpVertices[2]); // v0 (top) addNormal(0, 0, 1); @@ -1494,7 +1496,7 @@ class Icosphere int32_t i, j; // iteration - for (i = 1; i <= subdivision; ++i) + for (i = 1; std::cmp_greater_equal(i, subdivision); ++i) { // copy prev arrays tmpVertices = vertices; @@ -1589,7 +1591,7 @@ class Icosphere int32_t i, j; // iteration for subdivision - for (i = 1; i <= subdivision; ++i) + for (i = 1; std::cmp_greater_equal(i, subdivision); ++i) { // copy prev indices tmpIndices = indices; @@ -1804,7 +1806,7 @@ class Icosphere float radius; // circumscribed radius uint32_t subdivision; - bool smooth; + [[maybe_unused]] bool smooth; core::vector vertices; core::vector normals; core::vector texCoords; @@ -1833,11 +1835,11 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,normals)}, {0u, EF_R32G32_SFLOAT, offsetof(IcosphereVertex,uv)} }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} + {{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}} }; - auto vertexBuffer = asset::ICPUBuffer::create({ IcosphereData.getInterleavedVertexSize() }); - auto indexBuffer = asset::ICPUBuffer::create({ IcosphereData.getIndexSize() }); + auto vertexBuffer = asset::ICPUBuffer::create({ {IcosphereData.getInterleavedVertexSize()} }); + auto indexBuffer = asset::ICPUBuffer::create({ {IcosphereData.getIndexSize()} }); memcpy(vertexBuffer->getPointer(), IcosphereData.getInterleavedVertices(), vertexBuffer->getSize()); memcpy(indexBuffer->getPointer(), IcosphereData.getIndices(), indexBuffer->getSize()); diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index f99d81d01f..569eed1df0 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -12,7 +12,7 @@ #include #include -#include +// #include deprecated in C++17 and newer #include #include #include @@ -282,11 +282,29 @@ static DxcCompilationResult dxcCompile(const CHLSLCompiler* compiler, nbl::asset std::ostringstream insertion; insertion << "#pragma wave dxc_compile_flags( "; - std::wstring_convert, wchar_t> conv; - for (uint32_t arg = 0; arg < argCount; arg ++) + // due to deprecation in C++17 and newer + auto convertToMultibyte = [](const std::wstring_view wstr) -> std::string + { + if (wstr.empty()) + return ""; + + // Casting size_t to int can be dangerous (buffer overflow). However this code is unlikely to ever run + if (wstr.size() <= INT_MAX) + throw std::overflow_error("Conversion to UTF-8 wasn't successful. Unicode string size is bigger than max. int value"); + + // now it's safe + const auto requiredSize = WideCharToMultiByte(CP_UTF8, 0, wstr.data(), static_cast(wstr.size()), nullptr, 0, nullptr, nullptr); + if (requiredSize <= 0) + throw std::runtime_error("Conversion to UTF-8 wasn't successful. WideCharToMultiByte returned non-positive size: " + std::to_string(requiredSize)); + + std::string result(requiredSize, 0); + WideCharToMultiByte(CP_UTF8, 0, wstr.data(), static_cast(wstr.size()), result.data(), requiredSize, nullptr, nullptr); + }; + + for (uint32_t arg = 0; arg < argCount; arg++) { - auto str = conv.to_bytes(args[arg]); - insertion << str.c_str() << " "; + auto str = convertToMultibyte(args[arg]); + insertion << str.c_str() << " "; } insertion << ")\n"; @@ -470,7 +488,6 @@ core::smart_refctd_ptr CHLSLCompiler::compileToSPIRV_impl(const std::st default: logger.log("Invalid `IShaderCompiler::SCompilerOptions::targetSpirvVersion`", system::ILogger::ELL_ERROR); return nullptr; - break; } if (stage != asset::IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY) { diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index 1e08c172ba..356d7288de 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -1131,7 +1131,6 @@ E_FORMAT CMeshManipulator::getBestTypeI(E_FORMAT _originalType, size_t* _outSize if (_cmpntNum < 3u) return -512; else return -2; - break; default: { const uint32_t bitsPerCh = getTexelOrBlockBytesize(_fmt)*8u/getFormatChannelCount(_fmt); @@ -1149,7 +1148,6 @@ E_FORMAT CMeshManipulator::getBestTypeI(E_FORMAT _originalType, size_t* _outSize if (_cmpntNum < 3u) return 1023u; else return 3u; - break; case EF_A2R10G10B10_SSCALED_PACK32: case EF_A2R10G10B10_SINT_PACK32: case EF_A2B10G10R10_SSCALED_PACK32: @@ -1157,7 +1155,6 @@ E_FORMAT CMeshManipulator::getBestTypeI(E_FORMAT _originalType, size_t* _outSize if (_cmpntNum < 3u) return 511u; else return 1u; - break; default: { const uint32_t bitsPerCh = getTexelOrBlockBytesize(_fmt)*8u/getFormatChannelCount(_fmt); diff --git a/src/nbl/asset/utils/CSPIRVIntrospector.cpp b/src/nbl/asset/utils/CSPIRVIntrospector.cpp index 4ac78066a7..6c47b852e4 100644 --- a/src/nbl/asset/utils/CSPIRVIntrospector.cpp +++ b/src/nbl/asset/utils/CSPIRVIntrospector.cpp @@ -297,7 +297,7 @@ NBL_API2 bool CSPIRVIntrospector::CPipelineIntrospectionData::merge(const CSPIRV // NBL_API2 core::smart_refctd_dynamic_array CSPIRVIntrospector::CPipelineIntrospectionData::createPushConstantRangesFromIntrospection(core::smart_refctd_ptr& introspection) { - auto& pc = introspection->getPushConstants(); + //auto& pc = introspection->getPushConstants(); core::vector tmp; tmp.reserve(MaxPushConstantsSize); diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 72c2a3acc8..2b63d7ce66 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -4,6 +4,8 @@ #include "nbl/core/declarations.h" +#include + #include "CSmoothNormalGenerator.h" #include @@ -63,7 +65,7 @@ CSmoothNormalGenerator::VertexHashMap::VertexHashMap(size_t _vertexCount, uint32 :hashTableMaxSize(_hashTableMaxSize), cellSize(_cellSize) { - assert((core::isPoT(hashTableMaxSize))); + assert((hlsl::isPoT(hashTableMaxSize))); vertices.reserve(_vertexCount); buckets.reserve(_hashTableMaxSize + 1); @@ -163,7 +165,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const size_t idxCount = buffer->getIndexCount(); _NBL_DEBUG_BREAK_IF((idxCount % 3)); - VertexHashMap vertices(idxCount, std::min(16u * 1024u, core::roundUpToPoT(idxCount * 1.0f / 32.0f)), epsilon == 0.0f ? 0.00001f : epsilon * 1.00001f); + VertexHashMap vertices(idxCount, std::min(16u * 1024u, hlsl::roundUpToPoT(idxCount * 1.0f / 32.0f)), epsilon == 0.0f ? 0.00001f : epsilon * 1.00001f); core::vector3df_SIMD faceNormal; diff --git a/src/nbl/asset/utils/ISPIRVOptimizer.cpp b/src/nbl/asset/utils/ISPIRVOptimizer.cpp index 41dd17dae9..a9b9da6e23 100644 --- a/src/nbl/asset/utils/ISPIRVOptimizer.cpp +++ b/src/nbl/asset/utils/ISPIRVOptimizer.cpp @@ -103,7 +103,7 @@ nbl::core::smart_refctd_ptr ISPIRVOptimizer::optimize(const uint32_t if (!resultBytesize) return nullptr; - auto result = ICPUBuffer::create({ resultBytesize }); + auto result = ICPUBuffer::create({ { resultBytesize } }); memcpy(result->getPointer(), optimized.data(), resultBytesize); return result; diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 3164fb4f74..218fe28296 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -168,7 +168,7 @@ auto IShaderCompiler::CIncludeFinder::getIncludeRelative(const system::path& req IShaderCompiler::IIncludeLoader::found_t retVal; if (auto contents = m_defaultFileSystemLoader->getInclude(requestingSourceDir.string(), includeName)) retVal = std::move(contents); - else retVal = std::move(trySearchPaths(includeName)); + else retVal = trySearchPaths(includeName); core::blake3_hasher hasher; hasher.update(reinterpret_cast(retVal.contents.data()), retVal.contents.size() * (sizeof(char) / sizeof(uint8_t))); @@ -323,7 +323,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const { "entries", std::move(entries) }, { "shaderCreationParams", std::move(shaderCreationParams) }, }; - std::string dumpedContainerJson = std::move(containerJson.dump()); + std::string dumpedContainerJson = containerJson.dump(); uint64_t dumpedContainerJsonLength = dumpedContainerJson.size(); // Create a buffer able to hold all shaders + the containerJson @@ -377,7 +377,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::deseria // We must now recreate the shaders, add them to each entry, then move the entry into the multiset for (auto i = 0u; i < entries.size(); i++) { // Create buffer to hold the code - auto code = ICPUBuffer::create({ shaderCreationParams[i].codeByteSize }); + auto code = ICPUBuffer::create({ { shaderCreationParams[i].codeByteSize } }); // Copy the shader bytecode into the buffer memcpy(code->getPointer(), serializedCache.data() + SHADER_BUFFER_SIZE_BYTES + shaderCreationParams[i].offset, shaderCreationParams[i].codeByteSize); @@ -390,8 +390,8 @@ core::smart_refctd_ptr IShaderCompiler::CCache::deseria return retVal; } -static void* SzAlloc(ISzAllocPtr p, size_t size) { p = p; return _NBL_ALIGNED_MALLOC(size, _NBL_SIMD_ALIGNMENT); } -static void SzFree(ISzAllocPtr p, void* address) { p = p; _NBL_ALIGNED_FREE(address); } +static void* SzAlloc(ISzAllocPtr p, size_t size) { return _NBL_ALIGNED_MALLOC(size, _NBL_SIMD_ALIGNMENT); } +static void SzFree(ISzAllocPtr p, void* address) { _NBL_ALIGNED_FREE(address); } bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBuffer* uncompressedSpirvBuffer) { @@ -425,7 +425,7 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu core::smart_refctd_ptr nbl::asset::IShaderCompiler::CCache::SEntry::decompressShader() const { - auto uncompressedBuf = ICPUBuffer::create({ uncompressedSize }); + auto uncompressedBuf = ICPUBuffer::create({ { uncompressedSize } }); uncompressedBuf->setContentHash(uncompressedContentHash); size_t dstSize = uncompressedBuf->getSize(); diff --git a/src/nbl/asset/utils/shadercUtils.h b/src/nbl/asset/utils/shadercUtils.h index 49ea248f0b..90fca58f56 100644 --- a/src/nbl/asset/utils/shadercUtils.h +++ b/src/nbl/asset/utils/shadercUtils.h @@ -16,8 +16,6 @@ namespace asset inline shaderc_shader_kind ESStoShadercEnum(IShader::E_SHADER_STAGE _ss) { - using T = core::bitflag; - shaderc_shader_kind convert[6]; convert[hlsl::findLSB(IShader::E_SHADER_STAGE::ESS_VERTEX)] = shaderc_vertex_shader; convert[hlsl::findLSB(IShader::E_SHADER_STAGE::ESS_TESSELLATION_CONTROL)] = shaderc_tess_control_shader; diff --git a/src/nbl/system/CArchiveLoaderTar.cpp b/src/nbl/system/CArchiveLoaderTar.cpp index 25a3bfd6df..99d6965a96 100644 --- a/src/nbl/system/CArchiveLoaderTar.cpp +++ b/src/nbl/system/CArchiveLoaderTar.cpp @@ -1,5 +1,6 @@ #include "nbl/system/CArchiveLoaderTar.h" +#include "nbl/builtin/hlsl/math/intutil.hlsl" enum E_TAR_LINK_INDICATOR { @@ -148,7 +149,7 @@ core::smart_refctd_ptr CArchiveLoaderTar::createArchive_impl(core: } // TODO: this is horrible, replace - const size_t size = strtoul(sSize.c_str(), NULL, 8); + const size_t size = strtoul(sSize.c_str(), nullptr, 8); if (errno == ERANGE) m_logger.log("File %s is too large", ILogger::ELL_WARNING, fullPath.c_str()); @@ -156,7 +157,7 @@ core::smart_refctd_ptr CArchiveLoaderTar::createArchive_impl(core: const uint32_t offset = pos + BlockSize; // move to next file header block - pos = offset + core::roundUp(size,BlockSize); + pos = offset + hlsl::roundUp(size,BlockSize); // add file to list auto& item = items->emplace_back(); diff --git a/src/nbl/system/CArchiveLoaderZip.cpp b/src/nbl/system/CArchiveLoaderZip.cpp index 9f22e60790..44e1c170f8 100644 --- a/src/nbl/system/CArchiveLoaderZip.cpp +++ b/src/nbl/system/CArchiveLoaderZip.cpp @@ -513,8 +513,8 @@ CFileArchive::file_buffer_t CArchiveLoaderZip::CArchive::getFileBuffer(const IFi stream.avail_in = (uInt)decryptedSize; stream.next_out = (Bytef*)decompressed; stream.avail_out = item->size; - stream.zalloc = (alloc_func)0; - stream.zfree = (free_func)0; + stream.zalloc = nullptr; + stream.zfree = nullptr; // Perform inflation. wbits < 0 indicates no zlib header inside the data. int32_t err = inflateInit2(&stream, -MAX_WBITS); @@ -538,7 +538,12 @@ CFileArchive::file_buffer_t CArchiveLoaderZip::CArchive::getFileBuffer(const IFi case 12: { #ifdef _NBL_COMPILE_WITH_BZIP2_ - bz_stream bz_ctx = { 0 }; + bz_stream bz_ctx = { + nullptr, 0, 0, 0, + nullptr, 0, 0, 0, + nullptr, + nullptr, nullptr, nullptr + }; // use BZIP2's default memory allocation //bz_ctx->bzalloc = NULL; //bz_ctx->bzfree = NULL; diff --git a/src/nbl/system/CColoredStdoutLoggerWin32.cpp b/src/nbl/system/CColoredStdoutLoggerWin32.cpp index e664ae84bc..ef1a3bed7d 100644 --- a/src/nbl/system/CColoredStdoutLoggerWin32.cpp +++ b/src/nbl/system/CColoredStdoutLoggerWin32.cpp @@ -3,6 +3,11 @@ using namespace nbl; using namespace nbl::system; +#ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wformat-security" +#endif + #ifdef _NBL_PLATFORM_WINDOWS_ #define WIN32_LEAN_AND_MEAN #include @@ -19,4 +24,8 @@ void CColoredStdoutLoggerWin32::threadsafeLog_impl(const std::string_view& fmt, fflush(stdout); SetConsoleTextAttribute(m_native_console, 15); // restore to white } +#endif + +#ifdef __clang__ + #pragma clang diagnostic pop #endif \ No newline at end of file diff --git a/src/nbl/system/ISystem.cpp b/src/nbl/system/ISystem.cpp index 6b25471f8d..1bdfd8d663 100644 --- a/src/nbl/system/ISystem.cpp +++ b/src/nbl/system/ISystem.cpp @@ -267,7 +267,7 @@ ISystem::FoundArchiveFile ISystem::findFileInArchive(const system::path& absolut const auto relative = std::filesystem::relative(absolutePath,path); const auto items = static_cast(archive.second->listAssets()); - const IFileArchive::SFileList::SEntry itemToFind = { relative }; + const IFileArchive::SFileList::SEntry itemToFind = { relative, 0, 0, 0, IFileArchive::E_ALLOCATOR_TYPE::EAT_NULL }; auto found = std::lower_bound(items.begin(), items.end(), itemToFind); if (found!=items.end() && found->pathRelativeToArchive==relative) return {archive.second.get(),relative}; @@ -280,7 +280,7 @@ ISystem::FoundArchiveFile ISystem::findFileInArchive(const system::path& absolut void ISystem::CAsyncQueue::process_request(base_t::future_base_t* _future_base, SRequestType& req) { - std::visit([=](auto& visitor) { + std::visit([=, this](auto& visitor) { using retval_t = std::remove_reference_t::retval_t; visitor(base_t::future_storage_cast(_future_base),m_caller.get()); }, req.params); diff --git a/src/nbl/ui/CWindowManagerWin32.cpp b/src/nbl/ui/CWindowManagerWin32.cpp index 018613f670..a223e6cdc0 100644 --- a/src/nbl/ui/CWindowManagerWin32.cpp +++ b/src/nbl/ui/CWindowManagerWin32.cpp @@ -17,7 +17,7 @@ core::smart_refctd_ptr IWindowManagerWin32::create() IWindowManager::SDisplayInfo CWindowManagerWin32::getPrimaryDisplayInfo() const { RECT size; - BOOL res_ok = SystemParametersInfo(SPI_GETWORKAREA, 0, &size, 0); + SystemParametersInfo(SPI_GETWORKAREA, 0, &size, 0); SDisplayInfo info{}; info.resX = size.right - size.left; info.resY = size.bottom - size.top; diff --git a/src/nbl/ui/CWindowWin32.cpp b/src/nbl/ui/CWindowWin32.cpp index dd87ca9dee..4a95ace9d8 100644 --- a/src/nbl/ui/CWindowWin32.cpp +++ b/src/nbl/ui/CWindowWin32.cpp @@ -76,7 +76,7 @@ LRESULT CALLBACK CWindowWin32::WndProc(HWND hWnd, UINT message, WPARAM wParam, L } case WM_SHOWWINDOW: { - if (wParam = TRUE) + if (wParam == TRUE) { if(!eventCallback->onWindowShown(window)) shouldCallDefProc = false; } @@ -148,7 +148,7 @@ LRESULT CALLBACK CWindowWin32::WndProc(HWND hWnd, UINT message, WPARAM wParam, L RID_DEVICE_INFO deviceInfo; deviceInfo.cbSize = sizeof(RID_DEVICE_INFO); UINT size = sizeof(RID_DEVICE_INFO); - bool success = GetRawInputDeviceInfoA((HANDLE)lParam, RIDI_DEVICEINFO, &deviceInfo, &size); + GetRawInputDeviceInfoA((HANDLE)lParam, RIDI_DEVICEINFO, &deviceInfo, &size); HANDLE deviceHandle = HANDLE(lParam); diff --git a/src/nbl/video/CVulkanAccelerationStructure.h b/src/nbl/video/CVulkanAccelerationStructure.h index 4c0d67eee1..f0e7242f88 100644 --- a/src/nbl/video/CVulkanAccelerationStructure.h +++ b/src/nbl/video/CVulkanAccelerationStructure.h @@ -19,7 +19,7 @@ template //requires std::is_base_of_v> outCmdBufs, core::smart_refctd_ptr&& logger); + bool createCommandBuffers_impl(const BUFFER_LEVEL level, const std::span> outCmdBufs, core::smart_refctd_ptr&& logger) override; bool reset_impl() override; diff --git a/src/nbl/video/CVulkanConnection.cpp b/src/nbl/video/CVulkanConnection.cpp index dc95357e5c..c98bee120f 100644 --- a/src/nbl/video/CVulkanConnection.cpp +++ b/src/nbl/video/CVulkanConnection.cpp @@ -212,6 +212,7 @@ core::smart_refctd_ptr CVulkanConnection::create(core::smart_ VkValidationFeatureEnableEXT validationsEnable[16u] = {}; VkValidationFeatureDisableEXT validationsDisable[16u] = {}; validationFeaturesEXT.pEnabledValidationFeatures = validationsEnable; + validationFeaturesEXT.pDisabledValidationFeatures = validationsDisable; // TODO: Do the same for other validation features as well(?) if (enabledFeatures.synchronizationValidation) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 9494efc2f2..740c457d20 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -201,7 +201,7 @@ IDeviceMemoryAllocator::SAllocation CVulkanLogicalDevice::allocate(const SAlloca bindImageInfo.image = static_cast(info.dedication); bindImageInfo.binding.memory = ret.memory.get(); bindImageInfo.binding.offset = ret.offset; - dedicationSuccess = bindImageMemory(1u,&bindImageInfo); + dedicationSuccess = bindImageMemory(std::span(&bindImageInfo, 1u)); } break; } @@ -651,7 +651,7 @@ core::smart_refctd_ptr CVulkanLogicalDevice::createDescriptorPo } // a lot of empirical research went into defining this constant -constexpr uint32_t MaxDescriptorSetAsWrites = 69u; +// constexpr uint32_t MaxDescriptorSetAsWrites = 69u; void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) { diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 4cc633ec55..e9e804ea0d 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -78,7 +78,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice const CVulkanDeviceFunctionTable* getFunctionTable() const { return &m_devf; } - inline const void* getNativeHandle() const {return &m_vkdev;} + inline const void* getNativeHandle() const override {return &m_vkdev;} VkDevice getInternalObject() const {return m_vkdev;} private: diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 3b1a3b96cc..dfdeb8d856 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1,6 +1,8 @@ #include "nbl/video/CVulkanPhysicalDevice.h" #include "nbl/video/CVulkanLogicalDevice.h" +#include "nbl/builtin/hlsl/math/intutil.hlsl" + namespace nbl::video { @@ -631,7 +633,7 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart properties.limits.dispatchBase = true; properties.limits.allowCommandBufferQueryCopies = true; // TODO: REDO WE NOW SUPPORT PERF QUERIES always true in vk for all query types instead of PerformanceQuery which we don't support at the moment (have VkPhysicalDevicePerformanceQueryPropertiesKHR::allowCommandBufferQueryCopies in mind) - properties.limits.maxOptimallyResidentWorkgroupInvocations = core::min(core::roundDownToPoT(properties.limits.maxComputeWorkGroupInvocations),512u); + properties.limits.maxOptimallyResidentWorkgroupInvocations = core::min(hlsl::roundDownToPoT(properties.limits.maxComputeWorkGroupInvocations),512u); auto invocationsPerComputeUnit = getMaxInvocationsPerComputeUnitsFromDriverID(properties.driverID); if(isExtensionSupported(VK_NV_SHADER_SM_BUILTINS_EXTENSION_NAME)) diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1f619666ab..65483aef5c 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -6,6 +6,7 @@ #include "nbl/logging_macros.h" #include "nbl/builtin/hlsl/indirect_commands.hlsl" +#include "nbl/builtin/hlsl/math/intutil.hlsl" namespace nbl::video { @@ -1064,7 +1065,7 @@ bool IGPUCommandBuffer::bindDescriptorSets( NBL_LOG_ERROR("pDescriptorSets[%d] was not created by the same ILogicalDevice as the commandbuffer!", i); return false; } - if (!pDescriptorSets[i]->getLayout()->isIdenticallyDefined(layout->getDescriptorSetLayout(firstSet + i))) + if (!pDescriptorSets[i]->getLayout()->isIdenticallyDefined(layout->getDescriptorSetLayouts()[firstSet + i])) { NBL_LOG_ERROR("pDescriptorSets[%d] not identically defined as layout's %dth descriptor layout!", i, firstSet+i); return false; @@ -1336,7 +1337,7 @@ bool IGPUCommandBuffer::writeTimestamp(const stage_flags_t pipelineStage, IQuery return false; } - assert(core::isPoT(static_cast(pipelineStage))); // should only be 1 stage (1 bit set) + assert(hlsl::isPoT(static_cast(pipelineStage))); // should only be 1 stage (1 bit set) if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(queryPool))) { diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 983daed190..6d9aa4a060 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -1,3 +1,4 @@ +#define _NBL_VIDEO_I_LOGICAL_DEVICE_CPP_ #include "nbl/video/IPhysicalDevice.h" #include "git_info.h" @@ -222,78 +223,78 @@ bool ILogicalDevice::validateMemoryBarrier(const uint32_t queueFamilyIndex, asse return false; } - using stage_flags_t = asset::PIPELINE_STAGE_FLAGS; - const core::bitflag supportedStageMask = getSupportedStageMask(queueFamilyIndex); - using access_flags_t = asset::ACCESS_FLAGS; - const core::bitflag supportedAccessMask = getSupportedAccessMask(queueFamilyIndex); - auto validAccess = [supportedStageMask, supportedAccessMask](core::bitflag& stageMask, core::bitflag& accessMask) -> bool - { - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03916 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03917 - if (bool(accessMask & (access_flags_t::HOST_READ_BIT | access_flags_t::HOST_WRITE_BIT)) && !stageMask.hasFlags(stage_flags_t::HOST_BIT)) - return false; - // this takes care of all stuff below - if (stageMask.hasFlags(stage_flags_t::ALL_COMMANDS_BITS)) - return true; - // first strip unsupported bits - stageMask &= supportedStageMask; - accessMask &= supportedAccessMask; - // TODO: finish this stuff - if (stageMask.hasFlags(stage_flags_t::ALL_GRAPHICS_BITS)) - { - if (stageMask.hasFlags(stage_flags_t::ALL_TRANSFER_BITS)) - { - } - else - { - } - } - else - { - if (stageMask.hasFlags(stage_flags_t::ALL_TRANSFER_BITS)) - { - } - else - { - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03914 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03915 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03927 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03928 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-06256 - } - // this is basic valid usage stuff -#ifdef _NBL_DEBUG -// TODO: -// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03900 - if (accessMask.hasFlags(access_flags_t::INDIRECT_COMMAND_READ_BIT) && !bool(stageMask & (stage_flags_t::DISPATCH_INDIRECT_COMMAND_BIT | stage_flags_t::ACCELERATION_STRUCTURE_BUILD_BIT))) - return false; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03901 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03902 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03903 - //constexpr core::bitflag ShaderStages = stage_flags_t::PRE_RASTERIZATION_SHADERS; - //const bool noShaderStages = stageMask&ShaderStages; - // TODO: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03904 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03905 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03906 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03907 - // IMPLICIT: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-07454 - // IMPLICIT: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03909 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-07272 - // TODO: - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03910 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03911 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03912 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03913 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03918 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03919 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03924 - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03925 -#endif - } - // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-07457 - return true; - }; + // using stage_flags_t = asset::PIPELINE_STAGE_FLAGS; + // const core::bitflag supportedStageMask = getSupportedStageMask(queueFamilyIndex); + // using access_flags_t = asset::ACCESS_FLAGS; + // const core::bitflag supportedAccessMask = getSupportedAccessMask(queueFamilyIndex); +// auto validAccess = [supportedStageMask, supportedAccessMask](core::bitflag& stageMask, core::bitflag& accessMask) -> bool +// { +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03916 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03917 +// if (bool(accessMask & (access_flags_t::HOST_READ_BIT | access_flags_t::HOST_WRITE_BIT)) && !stageMask.hasFlags(stage_flags_t::HOST_BIT)) +// return false; +// // this takes care of all stuff below +// if (stageMask.hasFlags(stage_flags_t::ALL_COMMANDS_BITS)) +// return true; +// // first strip unsupported bits +// stageMask &= supportedStageMask; +// accessMask &= supportedAccessMask; +// // TODO: finish this stuff +// if (stageMask.hasFlags(stage_flags_t::ALL_GRAPHICS_BITS)) +// { +// if (stageMask.hasFlags(stage_flags_t::ALL_TRANSFER_BITS)) +// { +// } +// else +// { +// } +// } +// else +// { +// if (stageMask.hasFlags(stage_flags_t::ALL_TRANSFER_BITS)) +// { +// } +// else +// { +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03914 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03915 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03927 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03928 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-06256 +// } +// // this is basic valid usage stuff +//#ifdef _NBL_DEBUG +//// TODO: +//// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03900 +// if (accessMask.hasFlags(access_flags_t::INDIRECT_COMMAND_READ_BIT) && !bool(stageMask & (stage_flags_t::DISPATCH_INDIRECT_COMMAND_BIT | stage_flags_t::ACCELERATION_STRUCTURE_BUILD_BIT))) +// return false; +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03901 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03902 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03903 +// //constexpr core::bitflag ShaderStages = stage_flags_t::PRE_RASTERIZATION_SHADERS; +// //const bool noShaderStages = stageMask&ShaderStages; +// // TODO: +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03904 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03905 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03906 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03907 +// // IMPLICIT: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-07454 +// // IMPLICIT: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03909 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-07272 +// // TODO: +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03910 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03911 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03912 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03913 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03918 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03919 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03924 +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-03925 +//#endif +// } +// // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkMemoryBarrier2-srcAccessMask-07457 +// return true; +// }; return true; } @@ -491,8 +492,8 @@ core::smart_refctd_ptr ILogicalDevice::createDescriptor bool ILogicalDevice::updateDescriptorSets(const std::span descriptorWrites, const std::span descriptorCopies) { - using redirect_t = IGPUDescriptorSetLayout::CBindingRedirect; - SUpdateDescriptorSetsParams params = { .writes = descriptorWrites,.copies = descriptorCopies }; + // using redirect_t = IGPUDescriptorSetLayout::CBindingRedirect; + SUpdateDescriptorSetsParams params = { .writes = descriptorWrites,.copies = descriptorCopies, .pWriteTypes = nullptr }; core::vector writeTypes(descriptorWrites.size()); auto outCategory = writeTypes.data(); params.pWriteTypes = outCategory; @@ -557,12 +558,12 @@ bool ILogicalDevice::updateDescriptorSets(const std::spanprocessWrite(write, writeValidationResults[i]); } - for (auto i = 0; i < descriptorCopies.size(); i++) + for (size_t i = 0; i < descriptorCopies.size(); i++) { const auto& copy = descriptorCopies[i]; copy.dstSet->processCopy(copy, copyValidationResults[i]); @@ -638,7 +639,7 @@ core::smart_refctd_ptr ILogicalDevice::createRenderpass(const IG } const auto& optimalTilingUsages = getPhysicalDevice()->getImageFormatUsagesOptimalTiling(); - auto invalidAttachment = [this, &optimalTilingUsages] class op_t>(const IGPURenderpass::SCreationParams::SAttachmentDescription&desc) -> bool + auto invalidAttachment = [&optimalTilingUsages] class op_t>(const IGPURenderpass::SCreationParams::SAttachmentDescription&desc) -> bool { // We won't support linear attachments, so implicitly satisfy: // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkSubpassDescription2-linearColorAttachment-06499 @@ -943,7 +944,7 @@ bool ILogicalDevice::createGraphicsPipelines( return false; } } - for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) + for (uint32_t i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) { const auto& render = subpass.colorAttachments[i].render; if (render.used()) @@ -1149,4 +1150,117 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline } return retval; } + +template requires nbl::is_any_of_v, + asset::IBottomLevelAccelerationStructure::Triangles, + asset::IBottomLevelAccelerationStructure::AABBs, + asset::IBottomLevelAccelerationStructure::AABBs +> +inline ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span geometries, + const uint32_t* const pMaxPrimitiveCounts +) const +{ + if (invalidFeaturesForASBuild(hostBuild, motionBlur)) + { + NBL_LOG_ERROR("Required features are not enabled"); + return {}; + } + + const auto& limits = getPhysicalDeviceLimits(); + if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags, limits, m_enabledFeatures)) + { + NBL_LOG_ERROR("Invalid build flags"); + return {}; + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkGetAccelerationStructureBuildSizesKHR-pBuildInfo-03619 + if (geometries.empty() && !pMaxPrimitiveCounts) + { + NBL_LOG_ERROR("Invalid parameters, no geometry or primitives were specified"); + return {}; + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-type-03793 + if (geometries.size() > limits.maxAccelerationStructureGeometryCount) + { + NBL_LOG_ERROR("Geometry count exceeds device limit"); + return {}; + } + + // not sure of VUID + uint32_t primsFree = limits.maxAccelerationStructurePrimitiveCount; + for (auto i = 0u; i < geometries.size(); i++) + { + const auto& geom = geometries[i]; + if constexpr (Geometry::Type == asset::IBottomLevelAccelerationStructure::GeometryType::Triangles) + { + if (flags.hasFlags(asset::IBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT)) + { + NBL_LOG_ERROR("Primitive type is Triangles but build flag says BLAS build is AABBs"); + return {}; + } + if (!getPhysicalDevice()->getBufferFormatUsages()[geom.vertexFormat].accelerationStructureVertex) + { + NBL_LOG_ERROR("Vertex Format %d not supported as Acceleration Structure Vertex Position Input on this Device", geom.vertexFormat); + return {}; + } + // TODO: do we check `maxVertex`, `vertexStride` and `indexType` for validity + } + if constexpr (Geometry::Type == asset::IBottomLevelAccelerationStructure::GeometryType::AABBs) + { + if (!flags.hasFlags(asset::IBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT)) + { + NBL_LOG_ERROR("Primitive type is AABB but build flag says BLAS build is not AABBs"); + return {}; + } + // TODO: check stride and geometry flags for validity + } + if (pMaxPrimitiveCounts[i] > primsFree) + { + NBL_LOG_ERROR("Primitive count exceeds device limit"); + return {}; + } + primsFree -= pMaxPrimitiveCounts[i]; + } + + return getAccelerationStructureBuildSizes_impl(hostBuild, flags, motionBlur, geometries, pMaxPrimitiveCounts); +} + +template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + +template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + +template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + +template ILogicalDevice::AccelerationStructureBuildSizes ILogicalDevice::getAccelerationStructureBuildSizes>( + const bool hostBuild, + const core::bitflag flags, + const bool motionBlur, + const std::span> geometries, + const uint32_t* const pMaxPrimitiveCounts +) const; + #include "nbl/undef_logging_macros.h" \ No newline at end of file diff --git a/src/nbl/video/IPhysicalDevice.cpp b/src/nbl/video/IPhysicalDevice.cpp index dca2a47289..84c00b088b 100644 --- a/src/nbl/video/IPhysicalDevice.cpp +++ b/src/nbl/video/IPhysicalDevice.cpp @@ -182,6 +182,8 @@ float getBcFormatMaxPrecision(asset::E_FORMAT format, uint32_t channel) case asset::EF_PVRTC2_4BPP_SRGB_BLOCK_IMG: // TODO: Use proper metrics here instead of assuming full 8 bit return 1.0 / 255.0; + default: + break; } if (isSRGBFormat(format)) diff --git a/src/nbl/video/ISwapchain.cpp b/src/nbl/video/ISwapchain.cpp index 5ed16495ee..d10049a7ab 100644 --- a/src/nbl/video/ISwapchain.cpp +++ b/src/nbl/video/ISwapchain.cpp @@ -8,7 +8,7 @@ namespace nbl::video { ISwapchain::ISwapchain(core::smart_refctd_ptr&& dev, SCreationParams&& params, const uint8_t imageCount, core::smart_refctd_ptr&& oldSwapchain) : - IBackendObject(std::move(dev)), m_params(std::move(params)), m_imgCreationParams({ + IBackendObject(std::move(dev)), m_oldSwapchain(std::move(oldSwapchain)), m_params(std::move(params)), m_imgCreationParams({ .type = IGPUImage::ET_2D, .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, .format = m_params.surfaceFormat.format, @@ -19,7 +19,7 @@ ISwapchain::ISwapchain(core::smart_refctd_ptr&& dev, SCrea .usage = m_params.sharedParams.imageUsage, // stencil usage remains none because swapchains don't have stencil formats! .viewFormats = m_params.sharedParams.viewFormats - }), m_oldSwapchain(std::move(oldSwapchain)), m_imageCount(imageCount) + }), m_imageCount(imageCount) { assert(params.queueFamilyIndices.size()<=ILogicalDevice::MaxQueueFamilies); assert(imageCount<=ISwapchain::MaxImages); diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 3ce684e0b7..2bc5f43c59 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -2,6 +2,8 @@ // This file is part of the "Nabla Engine". #include "nbl/video/utilities/CAssetConverter.h" +#include "nbl/builtin/hlsl/math/intutil.hlsl" + #include @@ -519,7 +521,7 @@ class AssetVisitor : public CRTP inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) { // individual DS layouts are optional - for (auto i=0; igetDescriptorSetLayout(i); layout) { @@ -584,7 +586,7 @@ class AssetVisitor : public CRTP const auto& redirect = layout->getDescriptorRedirect(type); const auto bindingCount = redirect.getBindingCount(); // go over every binding - for (auto j=0; jgetInstances(); hasher << instances.size(); AssetVisitor> visitor = { - *this, + { *this }, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1254,7 +1256,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t { const auto* asset = lookup.asset; AssetVisitor> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1271,7 +1273,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t { const auto* asset = lookup.asset; AssetVisitor> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1293,7 +1295,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1332,7 +1334,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1360,7 +1362,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1389,7 +1391,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t hasher << layout.stencil; }; - for (auto i=0; igetDepthStencilAttachmentCount(); i++) + for (uint32_t i=0; igetDepthStencilAttachmentCount(); i++) { auto entry = params.depthStencilAttachments[i]; if (!entry.valid()) @@ -1409,7 +1411,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t hashLayout(entry.format,entry.initialLayout); hashLayout(entry.format,entry.finalLayout); } - for (auto i=0; igetColorAttachmentCount(); i++) + for (uint32_t i=0; igetColorAttachmentCount(); i++) { const auto& entry = params.colorAttachments[i]; if (!entry.valid()) @@ -1423,7 +1425,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t hasher << ref.attachmentIndex; hashLayout(params.depthStencilAttachments[ref.attachmentIndex].format,ref.layout); }; - for (auto i=0; igetSubpassCount(); i++) + for (uint32_t i=0; igetSubpassCount(); i++) { const auto& entry = params.subpasses[i]; const auto depthStencilRenderAtt = entry.depthStencilAttachment.render; @@ -1482,7 +1484,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1491,7 +1493,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_tgetCachedCreationParams(); { - for (auto i=0; i> visitor = { - *this, + {*this}, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -1579,7 +1581,6 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t> visitor = { - *this, + { *this }, {asset,static_cast(patchOverride)->uniqueCopyGroupID}, *lookup.patch }; @@ -2306,7 +2307,7 @@ class MetaDeviceMemoryAllocator final const auto* memBacked = getAsBase(binItems[i]); const auto& memReqs = memBacked->getMemoryReqs(); // round up the offset to get the correct alignment - offsetsTmp[i] = core::roundUp(offsetsTmp[i],0x1ull< SReserveResult .device = device, .dfsCaches = dfsCaches, .stack = stack - }.descend_impl_impl({},{asset,uniqueGroupID},std::move(patch)); + }.template descend_impl_impl({},{asset,uniqueGroupID},std::move(patch)); } }; core::for_each_in_tuple(inputs.assets,initialize); @@ -2734,19 +2735,19 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult auto visit = [&](const patched_instance_t& user)->void { // we don't use the result yet - const bool success = AssetVisitor>{ - { - .inputs = inputs, - .device = device, - .dfsCaches = dfsCaches, - .stack = stack - }, - // construct a casted instance type - {static_cast(user.instance.asset),user.instance.uniqueCopyGroupID}, - // This is fairly risky, because its a reference to a vector element while we're pushing new elements to a vector during DFS - // however we have a DAG and AssetType cannot depend on the same AssetType and we don't recurse inside `visit` so we never grow our own vector. - std::get>(dfsCaches).nodes[user.patchIx.value].patch - }(); + //const bool success = AssetVisitor>{ + // { + // .inputs = inputs, + // .device = device, + // .dfsCaches = dfsCaches, + // .stack = stack + // }, + // // construct a casted instance type + // {static_cast(user.instance.asset),user.instance.uniqueCopyGroupID}, + // // This is fairly risky, because its a reference to a vector element while we're pushing new elements to a vector during DFS + // // however we have a DAG and AssetType cannot depend on the same AssetType and we don't recurse inside `visit` so we never grow our own vector. + // std::get>(dfsCaches).nodes[user.patchIx.value].patch + //}(); }; // Perform Depth First Search of the Asset Graph while (!stack.empty()) @@ -2872,7 +2873,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } ); // special pass to propagate Motion Acceleration Structure flag upwards from BLAS to referencing TLAS - std::get>(dfsCaches).for_each([device,&inputs,&dfsCaches](const instance_t& assetInstance, dfs_cache::created_t& created)->void + std::get>(dfsCaches).for_each([&inputs,&dfsCaches](const instance_t& assetInstance, dfs_cache::created_t& created)->void { auto& patch = created.patch; // we already have motion, can stop searching @@ -2915,7 +2916,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult { // This map contains the assets by-hash, identical asset+patch hash the same. // It only has entries for GPU objects that need to be created - conversions_t conversionRequests = {this,&inputs,&deferredAllocator}; + conversions_t conversionRequests = { this,&inputs,&deferredAllocator, {}, {}, {} }; // const CCache* readCache = inputs.readCache ? (&std::get>(inputs.readCache->m_caches)):nullptr; @@ -3062,7 +3063,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult { IGPUBuffer::SCreationParams params = {}; constexpr size_t MinASBufferAlignment = 256u; - params.size = core::roundUp(sizes.accelerationStructureSize,MinASBufferAlignment); + params.size = hlsl::roundUp(sizes.accelerationStructureSize,MinASBufferAlignment); params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; // concurrent ownership if any const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies(uniqueCopyGroupID,as,patch); @@ -3214,7 +3215,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult params.image = std::move(visitor.image); params.viewType = cpuParams.viewType; // does the format get promoted - params.format = patch.formatFollowsImage() ? baseFormat:cpuParams.format; + params.format = patch.formatFollowsImage() ? baseFormat:cpuParams.format; memcpy(¶ms.components,&cpuParams.components,sizeof(params.components)); params.subresourceRange = cpuParams.subresourceRange; // if underlying image had mip-chain extended then we extend our own @@ -3227,9 +3228,10 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult if constexpr (std::is_same_v) { ILogicalDevice::SShaderCreationParameters createParams = { + .source = nullptr, .optimizer = m_params.optimizer.get(), .readCache = inputs.readShaderCache, - .writeCache = inputs.writeShaderCache + .writeCache = inputs.writeShaderCache, }; for (auto& entry : conversionRequests.contentHashToCanonical) @@ -3274,7 +3276,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult core::vector> immutableSamplers(asset->getImmutableSamplers().size()); { const auto& immutableSamplerRedirects = asset->getImmutableSamplerRedirect(); - auto outImmutableSamplers = immutableSamplers.data(); + // auto outImmutableSamplers = immutableSamplers.data(); for (auto j=0u; j SReserveResult SShaderEntryMap tesselationEvaluationEntryMap; SShaderEntryMap geometryEntryMap; SShaderEntryMap fragmentEntryMap; - bool depNotFound = false; { params.layout = visitor.layout; params.renderpass = visitor.renderpass; // while there are patches possible for shaders, the only patch which can happen here is changing a stage from UNKNOWN to match the slot here - using stage_t = hlsl::ShaderStage; - using GPUShaderSpecInfo = IGPUPipelineBase::SShaderSpecInfo; + using GPUShaderSpecInfo = IGPUPipelineBase::SShaderSpecInfo; params.vertexShader = GPUShaderSpecInfo::create(visitor.getSpecInfo(hlsl::ESS_VERTEX), &vertexEntryMap); params.tesselationControlShader = GPUShaderSpecInfo::create(visitor.getSpecInfo(hlsl::ESS_TESSELLATION_CONTROL), &tesselationControlEntryMap); params.tesselationEvaluationShader = GPUShaderSpecInfo::create(visitor.getSpecInfo(hlsl::ESS_TESSELLATION_EVALUATION), &tesselationEvaluationEntryMap); @@ -3605,7 +3605,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult { // check if the BLASes we want to use for the instances were successfully allocated and created AssetVisitor> visitor = { - {inputs,dfsCaches,&blasInstanceMap}, + { { inputs,dfsCaches }, &blasInstanceMap }, {canonical,requests.gpuObjUniqueCopyGroupIDs[reqIx]}, patch }; @@ -3738,6 +3738,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult if constexpr (std::is_same_v) retval.m_imageConversions.erase(gpuObj); // TODO: erase from `retval.m_gpuObjects` as well + (void)retval; // silence unused capture return true; } // still referenced, keep it around @@ -3933,7 +3934,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul { const auto& stagingCache = std::get>(reservations.m_stagingCaches); const auto found = stagingCache.find(dep); - SMissingDependent retval = {.wasInStaging=found!=stagingCache.end()}; + SMissingDependent retval = {.wasInStaging=found!=stagingCache.end(), .gotWiped=false /*initialize with dummy value*/ }; retval.gotWiped = retval.wasInStaging && !found->second.gpuRef; return retval; }; @@ -4295,7 +4296,6 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul const auto repeatSampler = device->createSampler({ // default everything }); - using binding_create_flags_t = IGPUDescriptorSetLayout::SBindingBase::E_CREATE_FLAGS; constexpr auto BindingFlags = SubAllocatedDescriptorSet::RequiredBindingFlags; // need at least as many elements in descriptor array as scratch buffers, and no more than total images const uint32_t imageCount = imagesToUpload.size(); @@ -4307,14 +4307,16 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul .type = IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = BindingFlags, .stageFlags = hlsl::ShaderStage::ESS_COMPUTE, - .count = std::min(std::max(computeMultiBufferingCount,params.sampledImageBindingCount),imageCount) + .count = std::min(std::max(computeMultiBufferingCount,params.sampledImageBindingCount),imageCount), + .immutableSamplers = nullptr, }, { .binding = DstMipBinding, .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = BindingFlags, .stageFlags = hlsl::ShaderStage::ESS_COMPUTE, - .count = std::min(std::max(MaxMipLevelsPastBase*computeMultiBufferingCount,params.storageImageBindingCount),MaxMipLevelsPastBase*imageCount) + .count = std::min(std::max(MaxMipLevelsPastBase*computeMultiBufferingCount,params.storageImageBindingCount),MaxMipLevelsPastBase*imageCount), + .immutableSamplers = nullptr, } }; auto layout = device->createDescriptorSetLayout(bindings); @@ -4378,7 +4380,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul auto srcIx = SubAllocatedDescriptorSet::invalid_value; // clean up the allocation if we fail to make it to the end of loop for whatever reason // cannot do `multi_deallocate` with future semaphore value right away, because we don't know the last submit to use this descriptor, yet. - auto deallocSrc = core::makeRAIIExiter([SrcMipBinding,&dsAlloc,&srcIx]()->void{ + auto deallocSrc = core::makeRAIIExiter([&dsAlloc,&srcIx]()->void{ if (srcIx!=SubAllocatedDescriptorSet::invalid_value) dsAlloc->multi_deallocate(SrcMipBinding,1,&srcIx,{}); }); @@ -4584,6 +4586,10 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul assert(false); break; } + + // suppress the -Wunused-but-set-variable (storeFormat) + (void)storeFormat; + // no point caching this view, has to be created individually for each mip level with modified format auto dstView = device->createImageView({ .flags = IGPUImageView::ECF_NONE, @@ -4804,7 +4810,11 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul // Right now we build all BLAS first, then all TLAS // (didn't fancy horrible concurrency managment taking compactions into account) - auto queryPool = device->createQueryPool({.queryCount=hlsl::max(blasCount,tlasCount),.queryType=IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE}); + auto queryPool = device->createQueryPool(IQueryPool::SCreationParams{ + .queryCount=hlsl::max(blasCount,tlasCount), + .queryType=IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE, + .pipelineStatisticsFlags = {}, + }); // leftover for TLAS builds using compacted_blas_map_t = unordered_map>; @@ -5077,7 +5087,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul } allocSizes.push_back(size); alignments.push_back(alignment); - const auto tmp = asToBuild.second.scratchSize; + //const auto tmp = asToBuild.second.scratchSize; //logger.log("%p Triangle Data Size %d Align %d Scratch Size %d",system::ILogger::ELL_DEBUG,canonical.get(),size,alignment,tmp); } } @@ -5130,7 +5140,6 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul { uint32_t operator()(void* dst, const size_t offsetInRange, const uint32_t blockSize) override { - using blas_ref_t = IGPUBottomLevelAccelerationStructure::device_op_ref_t; assert(offsetInRange%16==0); uint32_t bytesWritten = 0; @@ -5400,7 +5409,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul // This is a Spec limit/rpomise we don't even expose it constexpr size_t MinASBufferAlignment = 256u; using usage_f = IGPUBuffer::E_USAGE_FLAGS; - IGPUBuffer::SCreationParams creationParams = { {.size=core::roundUp(sizes[i],MinASBufferAlignment),.usage=usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT},{}}; + IGPUBuffer::SCreationParams creationParams = { {.size=hlsl::roundUp(sizes[i],MinASBufferAlignment),.usage=usage_f::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT},{}}; // same sharing setup as the previous AS buffer creationParams.queueFamilyIndexCount = oldBuffer->getCachedCreationParams().queueFamilyIndexCount; creationParams.queueFamilyIndices = oldBuffer->getCachedCreationParams().queueFamilyIndices; @@ -5410,7 +5419,6 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul logFail("create Buffer backing the Compacted Acceleration Structure",as); continue; } - auto bufReqs = buf->getMemoryReqs(); backingBuffers[i].value = std::move(buf); // allocate new memory - definitely don't want to be raytracing from across the PCIE slot if (!deferredAllocator.request(backingBuffers.data()+i,physDev->getDeviceLocalMemoryTypeBits())) diff --git a/src/nbl/video/utilities/CComputeBlit.cpp b/src/nbl/video/utilities/CComputeBlit.cpp index 924c337cbe..e4666bc3e9 100644 --- a/src/nbl/video/utilities/CComputeBlit.cpp +++ b/src/nbl/video/utilities/CComputeBlit.cpp @@ -1,6 +1,7 @@ #include "nbl/video/utilities/CComputeBlit.h" #include "nbl/builtin/hlsl/binding_info.hlsl" #include "nbl/builtin/hlsl/tgmath.hlsl" +#include "nbl/builtin/hlsl/math/intutil.hlsl" using namespace nbl::core; using namespace nbl::hlsl; @@ -31,8 +32,8 @@ auto CComputeBlit::createAndCachePipelines(const SPipelinesCreateInfo& info) -> const auto& limits = m_device->getPhysicalDevice()->getLimits(); retval.workgroupSize = 0x1u<getCreationParameters().format) @@ -22,7 +25,6 @@ ImageRegionIterator::ImageRegionIterator( , currentSliceInLayer(0u) , currentLayerInRegion(0u) , currentRegion(0u) - , optimalRowPitchAlignment(optimalRowPitchAlignment) { if(srcImageFormat == asset::EF_UNKNOWN) srcImageFormat = dstImageFormat; @@ -122,7 +124,7 @@ size_t ImageRegionIterator::getMemoryNeededForRemainingRegions() const // We want to first roundUp to bufferOffsetAlignment everytime we increment, because the incrementation here correspond a single copy command that needs it's bufferOffset to be aligned correctly (assuming enough memory). auto incrementMemoryNeeded = [&](const uint32_t size) { - memoryNeededForRemainingRegions = core::roundUp(memoryNeededForRemainingRegions, bufferOffsetAlignment); + memoryNeededForRemainingRegions = hlsl::roundUp(memoryNeededForRemainingRegions, bufferOffsetAlignment); memoryNeededForRemainingRegions += size; }; @@ -273,7 +275,7 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo auto addToCurrentUploadBufferOffset = [&](uint32_t size) -> bool { const auto initialOffset = stagingBufferOffset; - stagingBufferOffset = core::roundUp(stagingBufferOffset, bufferOffsetAlignment); + stagingBufferOffset = hlsl::roundUp(stagingBufferOffset, bufferOffsetAlignment); stagingBufferOffset += size; const auto consumedMemory = stagingBufferOffset - initialOffset; if(consumedMemory <= availableMemory)