From aa1317b6f25213016b6f6dbaa208086b9b53145a Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:06:30 +0000 Subject: [PATCH 01/17] Create pointer-to-matrices memory (m_d_ptrMatrices) Allocating and destroying memory No memory leaks evident atm --- include/tensor.cuh | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 0a8d1d1..00e5068 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -181,16 +181,22 @@ class DTensor { private: T *m_d_data = nullptr; ///< Pointer to device data + T **m_d_ptrMatrices = nullptr; ///< Pointer to matrices in tensor size_t m_numRows = 0; ///< Number of rows size_t m_numCols = 0; ///< Number of columns size_t m_numMats = 0; ///< Number of matrices - bool m_doDestroy = false; ///< Whether to destroy memory + bool m_doDestroyData = false; ///< Whether to destroy memory + bool m_doDestroyPtrMatrices = false; ///< Whether to destroy memory - bool destroy() { - if (!m_doDestroy) return false; - if (m_d_data) cudaFree(m_d_data); - m_d_data = nullptr; - return true; + void destroy() { + if (m_doDestroyData) { + if (m_d_data) gpuErrChk(cudaFree(m_d_data)); + m_d_data = nullptr; + } + if (m_doDestroyPtrMatrices) { + if (m_d_ptrMatrices) gpuErrChk(cudaFree(m_d_ptrMatrices)); + m_d_ptrMatrices = nullptr; + } } /** @@ -586,7 +592,7 @@ DTensor::DTensor(const DTensor &other, size_t axis, size_t from, size_t to m_numMats = 1; } m_d_data = other.m_d_data + offset; - m_doDestroy = false; + m_doDestroyData = false; } template @@ -595,8 +601,8 @@ DTensor::DTensor(DTensor &&other) { m_numRows = other.m_numRows; m_numMats = other.m_numMats; m_d_data = other.m_d_data; - m_doDestroy = true; - other.m_doDestroy = false; + m_doDestroyData = true; + other.m_doDestroyData = false; other.m_d_data = nullptr; other.m_numCols = 0; other.m_numRows = 0; @@ -757,12 +763,16 @@ template inline bool DTensor::allocateOnDevice(size_t size, bool zero) { if (size <= 0) return false; destroy(); - m_doDestroy = true; + m_doDestroyData = true; size_t buffer_size = size * sizeof(T); bool cudaStatus = cudaMalloc(&m_d_data, buffer_size); if (cudaStatus != cudaSuccess) return false; if (zero) gpuErrChk(cudaMemset(m_d_data, 0, buffer_size)); // set to zero all elements - return true; + + m_doDestroyPtrMatrices = true; + cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T*)); + + return (cudaStatus != cudaSuccess); } template @@ -854,7 +864,7 @@ DTensor &DTensor::operator=(const DTensor &other) { m_numMats = other.m_numMats; m_numRows = other.m_numRows; m_numCols = other.m_numCols; - m_doDestroy = false; + m_doDestroyData = false; m_d_data = other.m_d_data; return *this; } From 2ac61cf073fdcecd4229aac49c774c0e3a01dddf Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:18:25 +0000 Subject: [PATCH 02/17] Initialise m_d_ptrMatrices Introduce initialisePointersToMatricesData to initialise m_d_ptrMatrices; appropriate checks for safety. --- include/tensor.cuh | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 00e5068..1be39a9 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -42,8 +42,7 @@ static std::random_device RND_DEVICE; * @param hi * @return */ -TEMPLATE_WITH_TYPE_T -TEMPLATE_CONSTRAINT_REQUIRES_FPX +TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX std::vector generateRealRandomVector(size_t n, T low, T hi) { std::mt19937_64 mersenne_engine(RND_DEVICE()); std::uniform_real_distribution dist(low, hi); @@ -231,6 +230,23 @@ private: */ std::ostream &print(std::ostream &out) const; + void initialisePointersToMatricesData() { + /* Make sure m_d_ptrMatrices has been allocated */ + if (!m_d_ptrMatrices || !m_doDestroyPtrMatrices) { + throw std::runtime_error("Unallocated memory (m_d_ptrMatrices)"); + } + /* Host-based vector of pointers */ + std::vector h_pointers(m_numMats); + size_t numelMat = m_numRows * m_numCols; + h_pointers[0] = m_d_data; + for (size_t i = 1; i < m_numMats; i++) { + h_pointers[i] = m_d_data + i * numelMat; + } + /* Upload data to m_d_ptrMatrices */ + size_t buffer_size = m_numMats * sizeof(T *); + gpuErrChk(cudaMemcpy(m_d_ptrMatrices, h_pointers.data(), buffer_size, cudaMemcpyHostToDevice)); + } + public: /** @@ -770,7 +786,10 @@ inline bool DTensor::allocateOnDevice(size_t size, bool zero) { if (zero) gpuErrChk(cudaMemset(m_d_data, 0, buffer_size)); // set to zero all elements m_doDestroyPtrMatrices = true; - cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T*)); + cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T *)); + + /* Initialise m_d_ptrMatrices */ + initialisePointersToMatricesData(); return (cudaStatus != cudaSuccess); } From 3338852def77ee30e7923f7a0a42f0a4394e3c94 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:24:28 +0000 Subject: [PATCH 03/17] Use initialisePointersToMatricesData to initialise m_d_ptrMatrices Also in the slice constructor, keep m_d_ptrMatrices=nullptr when we dont slice along axis=2 --- include/tensor.cuh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 1be39a9..8ef0d90 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -564,6 +564,8 @@ DTensor::DTensor(size_t m, size_t n, size_t k, bool zero) { m_numMats = k; size_t size = m * n * k; allocateOnDevice(size, zero); + /* Initialise m_d_ptrMatrices */ + initialisePointersToMatricesData(); } template @@ -574,6 +576,8 @@ DTensor::DTensor(const std::vector &data, size_t m, size_t n, size_t k, St size_t size = m * n * k; allocateOnDevice(size); upload(data, mode); + /* Initialise m_d_ptrMatrices */ + initialisePointersToMatricesData(); } template @@ -585,6 +589,8 @@ DTensor::DTensor(const DTensor &other) { allocateOnDevice(m_numRows * m_numCols * m_numMats); gpuErrChk(cudaMemcpy(m_d_data, other.raw(), m_numRows * m_numCols * m_numMats * sizeof(T), cudaMemcpyDeviceToDevice)); + /* Initialise m_d_ptrMatrices */ + initialisePointersToMatricesData(); } template @@ -609,6 +615,11 @@ DTensor::DTensor(const DTensor &other, size_t axis, size_t from, size_t to } m_d_data = other.m_d_data + offset; m_doDestroyData = false; + m_doDestroyPtrMatrices = false; + if (axis != 2) { + // m_d_ptrMatrices is not needed for vectors and matrices + m_d_ptrMatrices = nullptr; + } } template @@ -788,9 +799,6 @@ inline bool DTensor::allocateOnDevice(size_t size, bool zero) { m_doDestroyPtrMatrices = true; cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T *)); - /* Initialise m_d_ptrMatrices */ - initialisePointersToMatricesData(); - return (cudaStatus != cudaSuccess); } From 36bf6ed6c60b9157b915f8fc651824cbb547df1e Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:35:41 +0000 Subject: [PATCH 04/17] update addAB to use preallocated memory use cublasDGemm if nMats=1 --- include/tensor.cuh | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 8ef0d90..65ebdd5 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -624,12 +624,18 @@ DTensor::DTensor(const DTensor &other, size_t axis, size_t from, size_t to template DTensor::DTensor(DTensor &&other) { + /* Steal everything from other */ m_numCols = other.m_numCols; m_numRows = other.m_numRows; m_numMats = other.m_numMats; m_d_data = other.m_d_data; - m_doDestroyData = true; + m_doDestroyData = other.m_doDestroyData; + m_doDestroyPtrMatrices = other.m_doDestroyPtrMatrices; + m_d_ptrMatrices = other.m_d_ptrMatrices; + /* Invalidate other */ + other.m_doDestroyPtrMatrices = false; other.m_doDestroyData = false; + other.m_d_ptrMatrices = nullptr; other.m_d_data = nullptr; other.m_numCols = 0; other.m_numRows = 0; @@ -965,18 +971,25 @@ inline void DTensor::addAB(const DTensor &A, const DTensor ptrA = A.pointersToMatrices(); - DTensor ptrB = B.pointersToMatrices(); - DTensor ptr = pointersToMatrices(); double _alpha = alpha, _beta = beta; - gpuErrChk(cublasDgemmBatched(Session::getInstance().cuBlasHandle(), - CUBLAS_OP_N, CUBLAS_OP_N, - nRA, nCB, nCA, &_alpha, - ptrA.raw(), nRA, - ptrB.raw(), nCA, - &_beta, - ptr.raw(), nRA, - nMat)); + if (nMat > 1) { + gpuErrChk(cublasDgemmBatched(Session::getInstance().cuBlasHandle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nRA, nCB, nCA, &_alpha, + A.m_d_ptrMatrices, nRA, + B.m_d_ptrMatrices, nCA, + &_beta, + m_d_ptrMatrices, nRA, + nMat)); + } else { + gpuErrChk(cublasDgemm(Session::getInstance().cuBlasHandle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nRA, nCB, nCA, &_alpha, + A.raw(), nRA, + B.raw(), nCA, + &_beta, + raw(), nRA)); + } } template<> From 4e5da4dd952be5a881ddb094e35c8796f6f2e770 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:39:25 +0000 Subject: [PATCH 05/17] mirror implementation for DTensor::addAB --- include/tensor.cuh | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 65ebdd5..3d650d4 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -998,18 +998,25 @@ inline void DTensor::addAB(const DTensor &A, const DTensor size_t nRA = A.numRows(); size_t nCA = A.numCols(); size_t nCB = B.numCols(); - DTensor ptrA = A.pointersToMatrices(); - DTensor ptrB = B.pointersToMatrices(); - DTensor ptr = pointersToMatrices(); float _alpha = alpha, _beta = beta; - gpuErrChk(cublasSgemmBatched(Session::getInstance().cuBlasHandle(), - CUBLAS_OP_N, CUBLAS_OP_N, - nRA, nCB, nCA, &_alpha, - ptrA.raw(), nRA, - ptrB.raw(), nCA, - &_beta, - ptr.raw(), nRA, - nMat)); + if (nMat > 1) { + gpuErrChk(cublasSgemmBatched(Session::getInstance().cuBlasHandle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nRA, nCB, nCA, &_alpha, + A.m_d_ptrMatrices, nRA, + B.m_d_ptrMatrices, nCA, + &_beta, + m_d_ptrMatrices, nRA, + nMat)); + } else { + gpuErrChk(cublasSgemm(Session::getInstance().cuBlasHandle(), + CUBLAS_OP_N, CUBLAS_OP_N, + nRA, nCB, nCA, &_alpha, + A.raw(), nRA, + B.raw(), nCA, + &_beta, + raw(), nRA)); + } } template<> From 296efd4b18b6cc68339462d6d953fdfbd59fbbea Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:46:59 +0000 Subject: [PATCH 06/17] Get rid of pointersToMatrices() in LS --- include/tensor.cuh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 3d650d4..465eacd 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -359,6 +359,7 @@ public: * Creates a vector of pointers to the matrices of this tensor. * The vector is an (n,1,1)-tensor, where n is the number of matrices in this tensor. * @return vector of pointers to the first element of each matrix + * @deprecated */ DTensor pointersToMatrices() const; @@ -1032,17 +1033,15 @@ inline void DTensor::leastSquaresBatched(DTensor &B) { if (m_numCols > m_numRows) throw std::invalid_argument("[Least squares batched] supports square or tall matrices only"); int info = 0; - DTensor infoArray(batchSize); - DTensor As = pointersToMatrices(); - DTensor Bs = B.pointersToMatrices(); + DTensor infoArray(batchSize); // TODO consider preallocating? gpuErrChk(cublasDgelsBatched(Session::getInstance().cuBlasHandle(), CUBLAS_OP_N, m_numRows, m_numCols, nColsB, - As.raw(), + m_d_ptrMatrices, m_numRows, - Bs.raw(), + B.m_d_ptrMatrices, m_numRows, &info, infoArray.raw(), @@ -1062,17 +1061,15 @@ inline void DTensor::leastSquaresBatched(DTensor &B) { if (m_numCols > m_numRows) throw std::invalid_argument("[Least squares batched] supports square or tall matrices only"); int info = 0; - DTensor infoArray(batchSize); - DTensor As = pointersToMatrices(); - DTensor Bs = B.pointersToMatrices(); + DTensor infoArray(batchSize); // TODO consider preallocating? gpuErrChk(cublasSgelsBatched(Session::getInstance().cuBlasHandle(), CUBLAS_OP_N, m_numRows, m_numCols, nColsB, - As.raw(), + m_d_ptrMatrices, m_numRows, - Bs.raw(), + B.m_d_ptrMatrices, m_numRows, &info, infoArray.raw(), From 02564d57cb8de04850df8a4f961ecad7e3338ff5 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:53:38 +0000 Subject: [PATCH 07/17] New method DTensor::ptrMatrices() exposes m_d_ptrMatrices --- include/tensor.cuh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/tensor.cuh b/include/tensor.cuh index 465eacd..6f15916 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -316,6 +316,12 @@ public: */ T *raw() const; + /** + * Pointers to matrices (on device) + * @return + */ + T **ptrMatrices(); + /** * @return number of rows */ @@ -842,6 +848,12 @@ inline T *DTensor::raw() const { return m_d_data; } +template +inline T **DTensor::ptrMatrices() { + return m_d_ptrMatrices; +} + + template<> inline DTensor DTensor::tr() const { DTensor transposes(m_numCols, m_numRows, m_numMats); From f2e86c79e274a3f022965e07008be071152154ed Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 01:57:44 +0000 Subject: [PATCH 08/17] Completely got rid of pointersToMatrices() --- include/tensor.cuh | 38 ++++++-------------------------------- test/testTensor.cu | 24 ------------------------ 2 files changed, 6 insertions(+), 56 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 6f15916..9fc4703 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -361,14 +361,6 @@ public: */ void deviceCopyTo(DTensor &other) const; - /** - * Creates a vector of pointers to the matrices of this tensor. - * The vector is an (n,1,1)-tensor, where n is the number of matrices in this tensor. - * @return vector of pointers to the first element of each matrix - * @deprecated - */ - DTensor pointersToMatrices() const; - /** * Slices rows from specified matrix. * @param rowsFrom index to slice rows from (zero-indexed) @@ -966,18 +958,6 @@ inline T DTensor::operator()(size_t i, size_t j, size_t k) const { return hostDst; } -template -inline DTensor DTensor::pointersToMatrices() const { - std::vector h_pointers(m_numMats); - size_t numelMat = m_numRows * m_numCols; - h_pointers[0] = m_d_data; - for (size_t i = 1; i < m_numMats; i++) { - h_pointers[i] = m_d_data + i * numelMat; - } - DTensor t(h_pointers, m_numMats, 1, 1); - return t; -} - template<> inline void DTensor::addAB(const DTensor &A, const DTensor &B, double alpha, double beta) { size_t nMat = A.numMats(); @@ -1841,11 +1821,10 @@ public: template<> inline void CholeskyBatchFactoriser::factorise() { if (m_factorisationDone) return; - DTensor ptrA = m_matrix->pointersToMatrices(); gpuErrChk(cusolverDnDpotrfBatched(Session::getInstance().cuSolverHandle(), CUBLAS_FILL_MODE_LOWER, m_numRows, - ptrA.raw(), + m_matrix->ptrMatrices(), m_numRows, m_deviceInfo->raw(), m_numMats)); @@ -1855,11 +1834,10 @@ inline void CholeskyBatchFactoriser::factorise() { template<> inline void CholeskyBatchFactoriser::factorise() { if (m_factorisationDone) return; - DTensor ptrA = m_matrix->pointersToMatrices(); gpuErrChk(cusolverDnSpotrfBatched(Session::getInstance().cuSolverHandle(), CUBLAS_FILL_MODE_LOWER, m_numRows, - ptrA.raw(), + m_matrix->ptrMatrices(), m_numRows, m_deviceInfo->raw(), m_numMats)); @@ -1873,15 +1851,13 @@ inline void CholeskyBatchFactoriser::solve(DTensor &b) { throw std::invalid_argument("[CholeskyBatchSolve] A and b incompatible"); } if (b.numCols() != 1) throw std::invalid_argument("[CholeskyBatchSolve] only supports `b` with one column"); - DTensor ptrA = m_matrix->pointersToMatrices(); - DTensor ptrB = b.pointersToMatrices(); gpuErrChk(cusolverDnDpotrsBatched(Session::getInstance().cuSolverHandle(), CUBLAS_FILL_MODE_LOWER, m_numRows, 1, ///< only supports rhs = 1 - ptrA.raw(), + m_matrix->ptrMatrices(), m_numRows, - ptrB.raw(), + b.ptrMatrices(), m_numRows, m_deviceInfo->raw(), m_numMats)); @@ -1894,15 +1870,13 @@ inline void CholeskyBatchFactoriser::solve(DTensor &b) { throw std::invalid_argument("[CholeskyBatchSolve] A and b incompatible"); } if (b.numCols() != 1) throw std::invalid_argument("[CholeskyBatchSolve] only supports `b` with one column"); - DTensor ptrA = m_matrix->pointersToMatrices(); - DTensor ptrB = b.pointersToMatrices(); gpuErrChk(cusolverDnSpotrsBatched(Session::getInstance().cuSolverHandle(), CUBLAS_FILL_MODE_LOWER, m_numRows, 1, ///< only supports rhs = 1 - ptrA.raw(), + m_matrix->ptrMatrices(), m_numRows, - ptrB.raw(), + b.ptrMatrices(), m_numRows, m_deviceInfo->raw(), m_numMats)); diff --git a/test/testTensor.cu b/test/testTensor.cu index 0a33ffc..902a27c 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -635,30 +635,6 @@ TEST_F(TensorTest, tensorMinusTensor) { tensorMinusTensor(); } -/* --------------------------------------- - * Tensor: pointers to matrices (on device) - * --------------------------------------- */ - -TEMPLATE_WITH_TYPE_T -void tensorPointersToMatrices() { - std::vector dataA = TENSOR_DATA_234A; - DTensor A(dataA, 2, 3, 4); - DTensor pointers = A.pointersToMatrices(); - EXPECT_EQ(4, pointers.numRows()); - EXPECT_EQ(1, pointers.numCols()); - EXPECT_EQ(1, pointers.numMats()); - T *p1 = pointers(1, 0, 0); // pointer to matrix #1 - T hostDst; // let's see what's there... - cudaMemcpy(&hostDst, p1, sizeof(T), cudaMemcpyDeviceToHost); - EXPECT_EQ(dataA[6], hostDst); -} - -TEST_F(TensorTest, tensorPointersToMatrices) { - tensorPointersToMatrices(); - tensorPointersToMatrices(); - tensorPointersToMatrices(); -} - /* --------------------------------------- * Tensor: C = AB * --------------------------------------- */ From 424a251a1d4008932e8791d1a0843b6575e6770e Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 02:36:51 +0000 Subject: [PATCH 09/17] No memory allocation for m_d_ptrMatrices unless nMats > 1 --- CHANGELOG.md | 9 +++++++++ include/tensor.cuh | 14 ++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e409628..4d19eb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.3.1 - 8-11-2024 + +### Fixed + +- Memory management improvements: we got rid of `pointerToMatrices`, which would unnecessarily allocate memory and `addAB` does not allocate any new memory internally. + diff --git a/include/tensor.cuh b/include/tensor.cuh index 9fc4703..6cd64e1 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -232,8 +232,8 @@ private: void initialisePointersToMatricesData() { /* Make sure m_d_ptrMatrices has been allocated */ - if (!m_d_ptrMatrices || !m_doDestroyPtrMatrices) { - throw std::runtime_error("Unallocated memory (m_d_ptrMatrices)"); + if (m_numMats <= 1 | !m_d_ptrMatrices || !m_doDestroyPtrMatrices) { + return; } /* Host-based vector of pointers */ std::vector h_pointers(m_numMats); @@ -287,6 +287,7 @@ public: * @param n number of columns * @param k number of matrices */ + DTensor(const std::vector &data, size_t m, size_t n = 1, size_t k = 1, StorageMode mode = StorageMode::defaultMajor); @@ -553,6 +554,7 @@ void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats } m_numRows = newNumRows; m_numCols = newNumCols; + // TODO allocate or reallocate new memory m_numMats = newNumMats; } @@ -801,8 +803,12 @@ inline bool DTensor::allocateOnDevice(size_t size, bool zero) { if (cudaStatus != cudaSuccess) return false; if (zero) gpuErrChk(cudaMemset(m_d_data, 0, buffer_size)); // set to zero all elements - m_doDestroyPtrMatrices = true; - cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T *)); + if (numMats() > 1) { + m_doDestroyPtrMatrices = true; + cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T *)); + } else { + m_doDestroyPtrMatrices = false; + } return (cudaStatus != cudaSuccess); } From b2857cd6c135c241ce655dc71422688b94c217b5 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 02:47:51 +0000 Subject: [PATCH 10/17] ptrMatrices now declared as a const method --- include/tensor.cuh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 6cd64e1..3c8c83c 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -287,7 +287,6 @@ public: * @param n number of columns * @param k number of matrices */ - DTensor(const std::vector &data, size_t m, size_t n = 1, size_t k = 1, StorageMode mode = StorageMode::defaultMajor); @@ -321,7 +320,7 @@ public: * Pointers to matrices (on device) * @return */ - T **ptrMatrices(); + T **ptrMatrices() const; /** * @return number of rows @@ -847,7 +846,7 @@ inline T *DTensor::raw() const { } template -inline T **DTensor::ptrMatrices() { +inline T **DTensor::ptrMatrices() const { return m_d_ptrMatrices; } From 7c32dae4ca0a2e6fd91281225cdee35d66521ae4 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 13:55:36 +0000 Subject: [PATCH 11/17] Update DTensor::reshape When necessary, memory is reallocated for m_d_ptrMatrices within reshape --- include/tensor.cuh | 64 ++++++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 3c8c83c..ff35317 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -230,32 +230,24 @@ private: */ std::ostream &print(std::ostream &out) const; - void initialisePointersToMatricesData() { - /* Make sure m_d_ptrMatrices has been allocated */ - if (m_numMats <= 1 | !m_d_ptrMatrices || !m_doDestroyPtrMatrices) { - return; - } - /* Host-based vector of pointers */ - std::vector h_pointers(m_numMats); - size_t numelMat = m_numRows * m_numCols; - h_pointers[0] = m_d_data; - for (size_t i = 1; i < m_numMats; i++) { - h_pointers[i] = m_d_data + i * numelMat; - } - /* Upload data to m_d_ptrMatrices */ - size_t buffer_size = m_numMats * sizeof(T *); - gpuErrChk(cudaMemcpy(m_d_ptrMatrices, h_pointers.data(), buffer_size, cudaMemcpyHostToDevice)); - } + /** + * Initialises an array of pointers to the sub-matrices of the + * tensor (on the device). No allocation takes place if the tensor + * has only one matrix. + */ + void initialisePointersToMatricesData(); public: /** * Create a tensor with random elements - * @param numRows - * @param numCols - * @param numMats - * @param low - * @param hi + * @param numRows number of rows + * @param numCols number of columns + * @param numMats number of matrices + * @param low minimum value of random elements + * @param hi maximum value of random elements + * + * @throws std::invalid_argument if T is other than double, float, or int */ static DTensor createRandomTensor(size_t numRows, size_t numCols, size_t numMats, T low, T hi); @@ -526,6 +518,24 @@ public: }; /* END OF DTENSOR */ +template +void DTensor::initialisePointersToMatricesData() { + /* Make sure m_d_ptrMatrices has been allocated */ + if (m_numMats <= 1 || !m_d_ptrMatrices || !m_doDestroyPtrMatrices) { + return; + } + /* Host-based vector of pointers */ + std::vector h_pointers(m_numMats); + size_t numelMat = m_numRows * m_numCols; + h_pointers[0] = m_d_data; + for (size_t i = 1; i < m_numMats; i++) { + h_pointers[i] = m_d_data + i * numelMat; + } + /* Upload data to m_d_ptrMatrices */ + size_t buffer_size = m_numMats * sizeof(T *); + gpuErrChk(cudaMemcpy(m_d_ptrMatrices, h_pointers.data(), buffer_size, cudaMemcpyHostToDevice)); +} + template DTensor DTensor::createRandomTensor(size_t numRows, size_t numCols, size_t numMats, T low, T hi) { if constexpr (std::is_floating_point::value) { @@ -543,6 +553,7 @@ DTensor DTensor::createRandomTensor(size_t numRows, size_t numCols, size_t template void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats) { + if (m_numRows == newNumRows && m_numCols == newNumCols && m_numMats == newNumMats) return; size_t newNumElements = newNumRows * newNumCols * newNumMats; if (numEl() != newNumElements) { char errMessage[256]; @@ -553,8 +564,17 @@ void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats } m_numRows = newNumRows; m_numCols = newNumCols; - // TODO allocate or reallocate new memory m_numMats = newNumMats; + /* Free the memory for m_d_ptrMatrices */ + if (m_d_ptrMatrices && m_doDestroyPtrMatrices) { + gpuErrChk(cudaFree(m_d_ptrMatrices)); + m_d_ptrMatrices = nullptr; + } + /* Reallocate memory for m_d_ptrMatrices, if necessary */ + if (m_numMats > 1) { + gpuErrChk(cudaMalloc(&m_d_ptrMatrices, m_numMats * sizeof(T *))); + } + initialisePointersToMatricesData(); } template From 225ba0da797f37cc4911efc9a33b99d721401d3d Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 14:03:26 +0000 Subject: [PATCH 12/17] Better memory management when reshaping No need to free/reallocate when we can reuse the already allocated memory space (when the number of matrices decreases); no new memory allocation when numMats=1 --- include/tensor.cuh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index ff35317..44a448a 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -555,6 +555,7 @@ template void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats) { if (m_numRows == newNumRows && m_numCols == newNumCols && m_numMats == newNumMats) return; size_t newNumElements = newNumRows * newNumCols * newNumMats; + /* Check whether dimensions are compatible */ if (numEl() != newNumElements) { char errMessage[256]; sprintf(errMessage, @@ -562,18 +563,22 @@ void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats numRows(), numRows(), numMats(), numEl(), newNumRows, newNumCols, newNumMats, newNumElements); throw std::invalid_argument(errMessage); } + + /* Only free/reallocate if newNumMats > m_numMats + * otherwise, reuse the already allocated memory space */ + if (newNumMats > m_numMats) { + /* Free the memory for m_d_ptrMatrices */ + if (m_d_ptrMatrices && m_doDestroyPtrMatrices) { + gpuErrChk(cudaFree(m_d_ptrMatrices)); + m_d_ptrMatrices = nullptr; + } + /* Reallocate memory for m_d_ptrMatrices, if necessary */ + if (newNumMats > 1) gpuErrChk(cudaMalloc(&m_d_ptrMatrices, newNumMats * sizeof(T *))); + } + m_numRows = newNumRows; m_numCols = newNumCols; m_numMats = newNumMats; - /* Free the memory for m_d_ptrMatrices */ - if (m_d_ptrMatrices && m_doDestroyPtrMatrices) { - gpuErrChk(cudaFree(m_d_ptrMatrices)); - m_d_ptrMatrices = nullptr; - } - /* Reallocate memory for m_d_ptrMatrices, if necessary */ - if (m_numMats > 1) { - gpuErrChk(cudaMalloc(&m_d_ptrMatrices, m_numMats * sizeof(T *))); - } initialisePointersToMatricesData(); } From d4b5472bd3d555815e9e69e7a51ed01330a69152 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 14:32:49 +0000 Subject: [PATCH 13/17] Safe memory allocation in reshape and unit test --- include/tensor.cuh | 6 +- test/testTensor.cu | 752 +++++++++++++++++++++++++++------------------ 2 files changed, 451 insertions(+), 307 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 44a448a..55b661b 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -571,9 +571,13 @@ void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats if (m_d_ptrMatrices && m_doDestroyPtrMatrices) { gpuErrChk(cudaFree(m_d_ptrMatrices)); m_d_ptrMatrices = nullptr; + m_doDestroyPtrMatrices = false; } /* Reallocate memory for m_d_ptrMatrices, if necessary */ - if (newNumMats > 1) gpuErrChk(cudaMalloc(&m_d_ptrMatrices, newNumMats * sizeof(T *))); + if (newNumMats > 1) { + gpuErrChk(cudaMalloc(&m_d_ptrMatrices, newNumMats * sizeof(T *))); + m_doDestroyPtrMatrices = true; + } } m_numRows = newNumRows; diff --git a/test/testTensor.cu b/test/testTensor.cu index 902a27c..982934c 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -26,7 +26,7 @@ protected: TEMPLATE_WITH_TYPE_T void tensorConstructionZero() { - DTensor zero(2, 3, 4, true); + DTensor zero(2, 3, 4, true); EXPECT_EQ(2, zero.numRows()); EXPECT_EQ(3, zero.numCols()); EXPECT_EQ(4, zero.numMats()); @@ -36,10 +36,14 @@ void tensorConstructionZero() { EXPECT_EQ(expectedResult, zeroDown); } -TEST_F(TensorTest, tensorConstructionZero) { - tensorConstructionZero(); - tensorConstructionZero(); - tensorConstructionZero(); +TEST_F(TensorTest, tensorConstructionZero +) { +tensorConstructionZero(); + +tensorConstructionZero(); + +tensorConstructionZero(); + } /* --------------------------------------- @@ -65,21 +69,21 @@ void tensorConstructionStorageMode() { std::vector Rm = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; std::vector hostData(rows * cols * mats); // test constructor - DTensor testCm(Cm, rows, cols, mats, columnMajor); - DTensor testRm(Rm, rows, cols, mats, rowMajor); + DTensor testCm(Cm, rows, cols, mats, columnMajor); + DTensor testRm(Rm, rows, cols, mats, rowMajor); testCm.download(hostData); EXPECT_EQ(Cm, hostData); testRm.download(hostData); EXPECT_EQ(Cm, hostData); // test .upload() - DTensor testSplitCm(rows, cols, mats); - DTensor ACm(testSplitCm, 2, 0, 0); - DTensor BCm(testSplitCm, 2, 1, 1); + DTensor testSplitCm(rows, cols, mats); + DTensor ACm(testSplitCm, 2, 0, 0); + DTensor BCm(testSplitCm, 2, 1, 1); ACm.upload(aCm, columnMajor); BCm.upload(bCm, columnMajor); - DTensor testSplitRm(rows, cols, mats); - DTensor ARm(testSplitRm, 2, 0, 0); - DTensor BRm(testSplitRm, 2, 1, 1); + DTensor testSplitRm(rows, cols, mats); + DTensor ARm(testSplitRm, 2, 0, 0); + DTensor BRm(testSplitRm, 2, 1, 1); ARm.upload(aRm, rowMajor); BRm.upload(bRm, rowMajor); testSplitCm.download(hostData); @@ -88,10 +92,14 @@ void tensorConstructionStorageMode() { EXPECT_EQ(Cm, hostData); } -TEST_F(TensorTest, tensorConstructionStorageMode) { - tensorConstructionStorageMode(); - tensorConstructionStorageMode(); - tensorConstructionStorageMode(); +TEST_F(TensorTest, tensorConstructionStorageMode +) { +tensorConstructionStorageMode(); + +tensorConstructionStorageMode(); + +tensorConstructionStorageMode(); + } /* --------------------------------------- @@ -109,10 +117,14 @@ void randomTensorCreation() { EXPECT_TRUE(rEle >= -1 && rEle <= 1); } -TEST_F(TensorTest, randomTensorCreation) { - randomTensorCreation(); - randomTensorCreation(); - randomTensorCreation(); +TEST_F(TensorTest, randomTensorCreation +) { +randomTensorCreation(); + +randomTensorCreation(); + +randomTensorCreation(); + } /* --------------------------------------- @@ -121,17 +133,23 @@ TEST_F(TensorTest, randomTensorCreation) { TEMPLATE_WITH_TYPE_T void tensorMoveConstructor() { - DTensor zero(2, 3, 4, true); - DTensor x(std::move(zero)); - DTensor y(DTensor{100, 10, 1000}); + DTensor zero(2, 3, 4, true); + DTensor x(std::move(zero)); + DTensor y(DTensor < T > {100, 10, 1000}); } -TEST_F(TensorTest, tensorMoveConstructor) { - tensorMoveConstructor(); - tensorMoveConstructor(); - tensorMoveConstructor(); - tensorMoveConstructor(); - tensorMoveConstructor(); +TEST_F(TensorTest, tensorMoveConstructor +) { +tensorMoveConstructor(); + +tensorMoveConstructor(); + +tensorMoveConstructor(); + +tensorMoveConstructor(); + +tensorMoveConstructor(); + } /* --------------------------------------- @@ -142,17 +160,21 @@ TEST_F(TensorTest, tensorMoveConstructor) { TEMPLATE_WITH_TYPE_T void tensorConstructionFromVector() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_EQ(2, tenz.numRows()); EXPECT_EQ(3, tenz.numCols()); EXPECT_EQ(4, tenz.numMats()); EXPECT_EQ(2 * 3 * 4, tenz.numEl()); } -TEST_F(TensorTest, tensorConstructionFromVector) { - tensorConstructionFromVector(); - tensorConstructionFromVector(); - tensorConstructionFromVector(); +TEST_F(TensorTest, tensorConstructionFromVector +) { +tensorConstructionFromVector(); + +tensorConstructionFromVector(); + +tensorConstructionFromVector(); + } /* --------------------------------------- @@ -162,8 +184,8 @@ TEST_F(TensorTest, tensorConstructionFromVector) { TEMPLATE_WITH_TYPE_T void tensorCopyConstructor() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor tenzCp(tenz); + DTensor tenz(data, 2, 3, 4); + DTensor tenzCp(tenz); EXPECT_EQ(2, tenzCp.numRows()); EXPECT_EQ(3, tenzCp.numCols()); EXPECT_EQ(4, tenzCp.numMats()); @@ -174,10 +196,14 @@ void tensorCopyConstructor() { EXPECT_NE(tenz.raw(), tenzCp.raw()); } -TEST_F(TensorTest, tensorCopyConstructor) { - tensorCopyConstructor(); - tensorCopyConstructor(); - tensorCopyConstructor(); +TEST_F(TensorTest, tensorCopyConstructor +) { +tensorCopyConstructor(); + +tensorCopyConstructor(); + +tensorCopyConstructor(); + } /* --------------------------------------- @@ -188,18 +214,22 @@ TEST_F(TensorTest, tensorCopyConstructor) { TEMPLATE_WITH_TYPE_T void tensorSlicingConstructorAxis2() { std::vector data = TENSOR_DATA_234A; - DTensor tens(data, 2, 3, 4); - DTensor tensSlice(tens, 2, 0, 1); // matrices #0 and #1 + DTensor tens(data, 2, 3, 4); + DTensor tensSlice(tens, 2, 0, 1); // matrices #0 and #1 EXPECT_EQ(2, tensSlice.numRows()); EXPECT_EQ(3, tensSlice.numCols()); EXPECT_EQ(2, tensSlice.numMats()); EXPECT_EQ(tens.raw(), tensSlice.raw()); // it is indeed a slice } -TEST_F(TensorTest, tensorSlicingConstructorAxis2) { - tensorSlicingConstructorAxis2(); - tensorSlicingConstructorAxis2(); - tensorSlicingConstructorAxis2(); +TEST_F(TensorTest, tensorSlicingConstructorAxis2 +) { +tensorSlicingConstructorAxis2(); + +tensorSlicingConstructorAxis2(); + +tensorSlicingConstructorAxis2(); + } /* --------------------------------------- @@ -210,8 +240,8 @@ TEST_F(TensorTest, tensorSlicingConstructorAxis2) { TEMPLATE_WITH_TYPE_T void tensorSlicingConstructorAxis1() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor tenzSlice(tenz, 1, 1, 2); // columns from 1 to 2 + DTensor tenz(data, 2, 3, 4); + DTensor tenzSlice(tenz, 1, 1, 2); // columns from 1 to 2 EXPECT_EQ(2, tenzSlice.numRows()); EXPECT_EQ(2, tenzSlice.numCols()); EXPECT_EQ(1, tenzSlice.numMats()); @@ -221,10 +251,14 @@ void tensorSlicingConstructorAxis1() { EXPECT_EQ(expected, tenzSliceDown); } -TEST_F(TensorTest, tensorSlicingConstructorAxis1) { - tensorSlicingConstructorAxis1(); - tensorSlicingConstructorAxis1(); - tensorSlicingConstructorAxis1(); +TEST_F(TensorTest, tensorSlicingConstructorAxis1 +) { +tensorSlicingConstructorAxis1(); + +tensorSlicingConstructorAxis1(); + +tensorSlicingConstructorAxis1(); + } /* --------------------------------------- @@ -235,8 +269,8 @@ TEST_F(TensorTest, tensorSlicingConstructorAxis1) { TEMPLATE_WITH_TYPE_T void tensorSlicingConstructorAxis0() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor tenzSlice(tenz, 0, 2, 3); // elements 2..3 + DTensor tenz(data, 2, 3, 4); + DTensor tenzSlice(tenz, 0, 2, 3); // elements 2..3 EXPECT_EQ(2, tenzSlice.numRows()); EXPECT_EQ(1, tenzSlice.numCols()); EXPECT_EQ(1, tenzSlice.numMats()); @@ -246,10 +280,14 @@ void tensorSlicingConstructorAxis0() { EXPECT_EQ(expected, tenzSliceDown); } -TEST_F(TensorTest, tensorSlicingConstructorAxis0) { - tensorSlicingConstructorAxis0(); - tensorSlicingConstructorAxis0(); - tensorSlicingConstructorAxis0(); +TEST_F(TensorTest, tensorSlicingConstructorAxis0 +) { +tensorSlicingConstructorAxis0(); + +tensorSlicingConstructorAxis0(); + +tensorSlicingConstructorAxis0(); + } /* --------------------------------------- @@ -259,7 +297,7 @@ TEST_F(TensorTest, tensorSlicingConstructorAxis0) { TEMPLATE_WITH_TYPE_T void tensorUpload() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(2, 3, 4); + DTensor tenz(2, 3, 4); tenz.upload(data); EXPECT_EQ(2, tenz.numRows()); EXPECT_EQ(3, tenz.numCols()); @@ -269,10 +307,14 @@ void tensorUpload() { EXPECT_EQ(8, tenz(1, 2, 3)); } -TEST_F(TensorTest, tensorUpload) { - tensorUpload(); - tensorUpload(); - tensorUpload(); +TEST_F(TensorTest, tensorUpload +) { +tensorUpload(); + +tensorUpload(); + +tensorUpload(); + } /* --------------------------------------- @@ -282,9 +324,9 @@ TEST_F(TensorTest, tensorUpload) { TEMPLATE_WITH_TYPE_T void tensorDeviceCopyTo() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor other(2, 3, 5, true); - DTensor z(other, 2, 1, 4); + DTensor tenz(data, 2, 3, 4); + DTensor other(2, 3, 5, true); + DTensor z(other, 2, 1, 4); tenz.deviceCopyTo(z); std::vector expected = {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 10, 5, 4, 3, 2, 1, -1, 4, 3, 4, 3, 4, 8}; @@ -293,10 +335,42 @@ void tensorDeviceCopyTo() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorDeviceCopyTo) { - tensorDeviceCopyTo(); - tensorDeviceCopyTo(); - tensorDeviceCopyTo(); +TEST_F(TensorTest, tensorDeviceCopyTo +) { +tensorDeviceCopyTo(); + +tensorDeviceCopyTo(); + +tensorDeviceCopyTo(); + +} + + +/* --------------------------------------- + * Tensor: Frobenius dot product + * --------------------------------------- */ + +TEMPLATE_WITH_TYPE_T +void tensorReshape() { + size_t m = 5, n = 10, k = 3; + DTensor a = DTensor::createRandomTensor(m, n, k, -1, 1); // dim = (m, n, k) + ASSERT_EQ(m, a.numRows()); + ASSERT_EQ(n, a.numCols()); + ASSERT_EQ(k, a.numMats()); + a.reshape(m, k, n); // dim = (m, k, n) + ASSERT_EQ(m, a.numRows()); + ASSERT_EQ(k, a.numCols()); + ASSERT_EQ(n, a.numMats()); + a.reshape(k, n, m); // dim = (k, n, m) + ASSERT_EQ(k, a.numRows()); + ASSERT_EQ(n, a.numCols()); + ASSERT_EQ(m, a.numMats()); +} + +TEST_F(TensorTest, tensorReshape) { + tensorReshape(); + tensorReshape(); + tensorReshape(); } /* --------------------------------------- @@ -308,20 +382,21 @@ void tensorDotF(T epsilon) { // as vectors std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor vecA(dataA, dataA.size()); - DTensor vecB(dataB, dataB.size()); + DTensor vecA(dataA, dataA.size()); + DTensor vecB(dataB, dataB.size()); T dotVector = vecA.dotF(vecB); EXPECT_EQ(604, dotVector); // from MATLAB // as matrices - DTensor tenA(dataA, 2, 3, 4); - DTensor tenB(dataB, 2, 3, 4); + DTensor tenA(dataA, 2, 3, 4); + DTensor tenB(dataB, 2, 3, 4); T dotTensor = tenA.dotF(tenB); EXPECT_EQ(604, dotTensor); // from MATLAB } -TEST_F(TensorTest, tensorDotF) { - tensorDotF(PRECISION_LOW); - tensorDotF(PRECISION_HIGH); +TEST_F(TensorTest, tensorDotF +) { +tensorDotF(PRECISION_LOW); +tensorDotF(PRECISION_HIGH); } /* --------------------------------------- @@ -331,13 +406,14 @@ TEST_F(TensorTest, tensorDotF) { TEMPLATE_WITH_TYPE_T void tensorNormF(T epsilon) { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_NEAR(26.153393661244042, tenz.normF(), epsilon); // from MATLAB } -TEST_F(TensorTest, tensorNormF) { - tensorNormF(PRECISION_LOW); - tensorNormF(PRECISION_HIGH); +TEST_F(TensorTest, tensorNormF +) { +tensorNormF(PRECISION_LOW); +tensorNormF(PRECISION_HIGH); } /* --------------------------------------- @@ -348,13 +424,16 @@ TEST_F(TensorTest, tensorNormF) { TEMPLATE_WITH_TYPE_T void tensorSumAbs() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB } -TEST_F(TensorTest, tensorSumAbs) { - tensorSumAbs(); - tensorSumAbs(); +TEST_F(TensorTest, tensorSumAbs +) { +tensorSumAbs(); + +tensorSumAbs(); + } /* --------------------------------------- @@ -364,14 +443,17 @@ TEST_F(TensorTest, tensorSumAbs) { TEMPLATE_WITH_TYPE_T void tensorMax() { std::vector data = TENSOR_DATA_234AMB; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); T m = tenz.maxAbs(); EXPECT_EQ(27, m); } -TEST_F(TensorTest, tensorMax) { - tensorMax(); - tensorMax(); +TEST_F(TensorTest, tensorMax +) { +tensorMax(); + +tensorMax(); + } /* --------------------------------------- @@ -381,14 +463,17 @@ TEST_F(TensorTest, tensorMax) { TEMPLATE_WITH_TYPE_T void tensorMin() { std::vector data = TENSOR_DATA_234AMB; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); T m = tenz.minAbs(); EXPECT_EQ(0, m); } -TEST_F(TensorTest, tensorMin) { - tensorMin(); - tensorMin(); +TEST_F(TensorTest, tensorMin +) { +tensorMin(); + +tensorMin(); + } /* --------------------------------------- @@ -421,9 +506,10 @@ void tensorRightGivens(T epsilon) { } } -TEST_F(TensorTest, tensorRightGivens) { - tensorRightGivens(PRECISION_LOW); - tensorRightGivens(PRECISION_HIGH); +TEST_F(TensorTest, tensorRightGivens +) { +tensorRightGivens(PRECISION_LOW); +tensorRightGivens(PRECISION_HIGH); } /* --------------------------------------- @@ -458,9 +544,10 @@ void tensorLeftGivens(T epsilon) { } } -TEST_F(TensorTest, tensorLeftGivens) { - tensorLeftGivens(1e-10); - tensorLeftGivens(1e-14); +TEST_F(TensorTest, tensorLeftGivens +) { +tensorLeftGivens(1e-10); +tensorLeftGivens(1e-14); } /* --------------------------------------- @@ -471,16 +558,20 @@ TEST_F(TensorTest, tensorLeftGivens) { TEMPLATE_WITH_TYPE_T void tensorBracketOperator() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_EQ(1, tenz(0, 0, 0)); EXPECT_EQ(3, tenz(0, 1, 2)); EXPECT_EQ(8, tenz(1, 2, 3)); } -TEST_F(TensorTest, tensorBracketOperator) { - tensorBracketOperator(); - tensorBracketOperator(); - tensorBracketOperator(); +TEST_F(TensorTest, tensorBracketOperator +) { +tensorBracketOperator(); + +tensorBracketOperator(); + +tensorBracketOperator(); + } /* --------------------------------------- @@ -490,8 +581,8 @@ TEST_F(TensorTest, tensorBracketOperator) { TEMPLATE_WITH_TYPE_T void tensorAssignmentOperator() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor other; + DTensor tenz(data, 2, 3, 4); + DTensor other; other = tenz; EXPECT_EQ(tenz.raw(), other.raw()); EXPECT_EQ(2, other.numRows()); @@ -499,10 +590,14 @@ void tensorAssignmentOperator() { EXPECT_EQ(4, other.numMats()); } -TEST_F(TensorTest, tensorAssignmentOperator) { - tensorAssignmentOperator(); - tensorAssignmentOperator(); - tensorAssignmentOperator(); +TEST_F(TensorTest, tensorAssignmentOperator +) { +tensorAssignmentOperator(); + +tensorAssignmentOperator(); + +tensorAssignmentOperator(); + } /* --------------------------------------- @@ -514,16 +609,19 @@ void tensorTimesEqualsScalar() { std::vector data = TENSOR_DATA_234A; std::vector dataTimes3 = {3, 6, 9, 12, 15, 18, 21, 24, 27, 24, 21, 30, 15, 12, 9, 6, 3, -3, 12, 9, 12, 9, 12, 24}; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); tenz *= 3.0; std::vector actual; tenz.download(actual); EXPECT_EQ(dataTimes3, actual); } -TEST_F(TensorTest, tensorTimesEqualsScalar) { - tensorTimesEqualsScalar(); - tensorTimesEqualsScalar(); +TEST_F(TensorTest, tensorTimesEqualsScalar +) { +tensorTimesEqualsScalar(); + +tensorTimesEqualsScalar(); + } /* --------------------------------------- @@ -535,16 +633,19 @@ void tensorTimesScalar() { std::vector data = TENSOR_DATA_234A; std::vector dataTimes3 = {3, 6, 9, 12, 15, 18, 21, 24, 27, 24, 21, 30, 15, 12, 9, 6, 3, -3, 12, 9, 12, 9, 12, 24}; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); auto tripleTensor = 3.0 * tenz; std::vector actual; tripleTensor.download(actual); EXPECT_EQ(dataTimes3, actual); } -TEST_F(TensorTest, tensorTimesScalar) { - tensorTimesScalar(); - tensorTimesScalar(); +TEST_F(TensorTest, tensorTimesScalar +) { +tensorTimesScalar(); + +tensorTimesScalar(); + } /* --------------------------------------- @@ -555,8 +656,8 @@ TEMPLATE_WITH_TYPE_T void tensorPlusEqualsTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); A += B; std::vector expected = TENSOR_DATA_234APB; std::vector actual; @@ -564,9 +665,12 @@ void tensorPlusEqualsTensor() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorPlusEqualsTensor) { - tensorPlusEqualsTensor(); - tensorPlusEqualsTensor(); +TEST_F(TensorTest, tensorPlusEqualsTensor +) { +tensorPlusEqualsTensor(); + +tensorPlusEqualsTensor(); + } /* --------------------------------------- @@ -577,8 +681,8 @@ TEMPLATE_WITH_TYPE_T void tensorMinusEqualsTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); A -= B; std::vector expected = TENSOR_DATA_234AMB; std::vector actual; @@ -586,9 +690,12 @@ void tensorMinusEqualsTensor() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorMinusEqualsTensor) { - tensorMinusEqualsTensor(); - tensorMinusEqualsTensor(); +TEST_F(TensorTest, tensorMinusEqualsTensor +) { +tensorMinusEqualsTensor(); + +tensorMinusEqualsTensor(); + } /* --------------------------------------- @@ -599,18 +706,21 @@ TEMPLATE_WITH_TYPE_T void tensorPlusTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); - DTensor C = A + B; + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); + DTensor C = A + B; std::vector expected = TENSOR_DATA_234APB; std::vector actual; C.download(actual); EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorPlusTensor) { - tensorPlusTensor(); - tensorPlusTensor(); +TEST_F(TensorTest, tensorPlusTensor +) { +tensorPlusTensor(); + +tensorPlusTensor(); + } /* --------------------------------------- @@ -621,18 +731,21 @@ TEMPLATE_WITH_TYPE_T void tensorMinusTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); - DTensor C = A - B; + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); + DTensor C = A - B; std::vector expected = TENSOR_DATA_234AMB; std::vector actual; C.download(actual); EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorMinusTensor) { - tensorMinusTensor(); - tensorMinusTensor(); +TEST_F(TensorTest, tensorMinusTensor +) { +tensorMinusTensor(); + +tensorMinusTensor(); + } /* --------------------------------------- @@ -647,9 +760,9 @@ void tensorAddAB() { std::vector bData = {6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 2, 1, 5, -6, 8}; - DTensor A(aData, 2, 3, 3); - DTensor B(bData, 3, 2, 3); - DTensor C(2, 2, 3, true); + DTensor A(aData, 2, 3, 3); + DTensor B(bData, 3, 2, 3); + DTensor C(2, 2, 3, true); C.addAB(A, B); std::vector expected = {41, 56, 14, 20, 158, 176, 77, 86, 60, 64, 111, 118}; std::vector actual; @@ -657,9 +770,12 @@ void tensorAddAB() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorAddAB) { - tensorAddAB(); - tensorAddAB(); +TEST_F(TensorTest, tensorAddAB +) { +tensorAddAB(); + +tensorAddAB(); + } /* --------------------------------------- @@ -674,23 +790,26 @@ void tensorGetRows() { 5., 6., 7., 8., 9., 10., 11., 12., 13}; - DTensor A(aData, 3, 3, 2); - DTensor Ar0 = A.getRows(1, 1, 0); + DTensor A(aData, 3, 3, 2); + DTensor Ar0 = A.getRows(1, 1, 0); std::vector expected0 = {25., 720., -1.}; std::vector actual0(3); Ar0.download(actual0); EXPECT_EQ(expected0, actual0); - DTensor Ar1 = A.getRows(1, 2, 1); + DTensor Ar1 = A.getRows(1, 2, 1); std::vector expected1 = {6., 7., 9., 10., 12., 13.}; std::vector actual1(6); Ar1.download(actual1); EXPECT_EQ(expected1, actual1); } -TEST_F(TensorTest, tensorGetRows) { - tensorGetRows(); - tensorGetRows(); +TEST_F(TensorTest, tensorGetRows +) { +tensorGetRows(); + +tensorGetRows(); + } @@ -701,8 +820,8 @@ TEST_F(TensorTest, tensorGetRows) { TEMPLATE_WITH_TYPE_T void tensorTranspose() { std::vector aData = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - DTensor A(aData, 3, 2, 2); - DTensor Atranspose = A.tr(); + DTensor A(aData, 3, 2, 2); + DTensor Atranspose = A.tr(); EXPECT_EQ(2, Atranspose.numRows()); EXPECT_EQ(3, Atranspose.numCols()); EXPECT_EQ(2, Atranspose.numMats()); @@ -713,9 +832,12 @@ void tensorTranspose() { } -TEST_F(TensorTest, tensorTranspose) { - tensorTranspose(); - tensorTranspose(); +TEST_F(TensorTest, tensorTranspose +) { +tensorTranspose(); + +tensorTranspose(); + } /* ================================================================================================ @@ -742,21 +864,22 @@ void tensorLeastSquares1(T epsilon) { 6, 8, -9, 20}; std::vector bData = {1, 1, -1, 2, 30, -80}; - DTensor A0(aData, 2, 2, 3); - DTensor A(A0); - DTensor B(bData, 2, 1, 3); - DTensor sol(B); + DTensor A0(aData, 2, 2, 3); + DTensor A(A0); + DTensor B(bData, 2, 1, 3); + DTensor sol(B); A0.leastSquaresBatched(sol); - DTensor C(2, 1, 3); + DTensor C(2, 1, 3); C.addAB(A, sol); C -= B; T nrmErr = C.normF(); EXPECT_LT(nrmErr, epsilon); } -TEST_F(LeastSquaresTest, tensorLS1) { - tensorLeastSquares1(PRECISION_LOW); - tensorLeastSquares1(PRECISION_HIGH); +TEST_F(LeastSquaresTest, tensorLS1 +) { +tensorLeastSquares1(PRECISION_LOW); +tensorLeastSquares1(PRECISION_HIGH); } @@ -780,8 +903,8 @@ void singularValuesComputation(float epsilon) { std::vector bData = {1, 6, 6, 6, 6, 6, 6, 6, 2, 7, 7, 7, 7, 7, 7, 7, 3, 8, 8, 8, 8, 8, 8, 8,}; - DTensor B(bData, 8, 3); - Svd svd(B, true, false); + DTensor B(bData, 8, 3); + Svd svd(B, true, false); EXPECT_EQ(true, svd.factorise()); auto S = svd.singularValues(); EXPECT_NEAR(32.496241123753592, S(0), epsilon); // value from MATLAB @@ -791,9 +914,10 @@ void singularValuesComputation(float epsilon) { EXPECT_TRUE(U.has_value()); } -TEST_F(SvdTest, singularValuesComputation) { - singularValuesComputation(PRECISION_LOW); - singularValuesComputation(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesComputation +) { +singularValuesComputation(PRECISION_LOW); +singularValuesComputation(PRECISION_HIGH); } @@ -806,15 +930,15 @@ void singularValuesMemory(float epsilon) { std::vector bData = {1, 6, 6, 6, 6, 6, 6, 6, 2, 7, 7, 7, 7, 7, 7, 7, 3, 8, 8, 8, 8, 8, 8, 8,}; - DTensor B(bData, 8, 3); - Svd svd(B, true, false); + DTensor B(bData, 8, 3); + Svd svd(B, true, false); EXPECT_EQ(true, svd.factorise()); - DTensor const &v1 = svd.rightSingularVectors(); - DTensor const &v2 = svd.rightSingularVectors(); + DTensor const &v1 = svd.rightSingularVectors(); + DTensor const &v2 = svd.rightSingularVectors(); EXPECT_EQ(&v1, &v2); EXPECT_EQ(v1.raw(), v2.raw()); - DTensor const &s1 = svd.singularValues(); - DTensor const &s2 = svd.singularValues(); + DTensor const &s1 = svd.singularValues(); + DTensor const &s2 = svd.singularValues(); EXPECT_EQ(&s1, &s2); EXPECT_EQ(s1.raw(), s2.raw()); auto u1 = svd.leftSingularVectors().value(); @@ -823,9 +947,10 @@ void singularValuesMemory(float epsilon) { EXPECT_EQ(u1->raw(), u2->raw()); } -TEST_F(SvdTest, singularValuesMemory) { - singularValuesMemory(PRECISION_LOW); - singularValuesMemory(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesMemory +) { +singularValuesMemory(PRECISION_LOW); +singularValuesMemory(PRECISION_HIGH); } @@ -835,11 +960,11 @@ TEST_F(SvdTest, singularValuesMemory) { TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void singularValuesMultipleMatrices(float epsilon) { std::vector aData = {1, 2, 3, 4, 5, 6, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1}; - DTensor A(aData, 3, 2, 3); - Svd svd(A, true); // do compute U (A will be destroyed) + DTensor A(aData, 3, 2, 3); + Svd svd(A, true); // do compute U (A will be destroyed) svd.factorise(); - DTensor const &S = svd.singularValues(); - DTensor const &V = svd.rightSingularVectors(); + DTensor const &S = svd.singularValues(); + DTensor const &V = svd.rightSingularVectors(); auto Uopt = svd.leftSingularVectors(); auto U = Uopt.value(); std::vector expected_v = {-0.386317703118612, -0.922365780077058, -0.922365780077058, 0.386317703118612, @@ -869,9 +994,10 @@ void singularValuesMultipleMatrices(float epsilon) { } -TEST_F(SvdTest, singularValuesMultipleMatrices) { - singularValuesMultipleMatrices(10 * PRECISION_LOW); // SVD with float performs quite poorly - singularValuesMultipleMatrices(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesMultipleMatrices +) { +singularValuesMultipleMatrices(10 * PRECISION_LOW); // SVD with float performs quite poorly +singularValuesMultipleMatrices(PRECISION_HIGH); } @@ -884,9 +1010,9 @@ void singularValuesRankMultipleMatrices(float epsilon) { std::vector aData = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 0, 1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12, 1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12}; - DTensor A(aData, 4, 3, 3); + DTensor A(aData, 4, 3, 3); - Svd svd(A); + Svd svd(A); svd.factorise(); auto rank = svd.rank(epsilon); EXPECT_EQ(3, rank(0, 0, 0)); @@ -894,9 +1020,10 @@ void singularValuesRankMultipleMatrices(float epsilon) { EXPECT_EQ(1, rank(0, 0, 2)); } -TEST_F(SvdTest, singularValuesRankMultipleMatrices) { - singularValuesRankMultipleMatrices(PRECISION_LOW); // SVD with float performs quite poorly - singularValuesRankMultipleMatrices(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesRankMultipleMatrices +) { +singularValuesRankMultipleMatrices(PRECISION_LOW); // SVD with float performs quite poorly +singularValuesRankMultipleMatrices(PRECISION_HIGH); } /* ================================================================================================ @@ -919,17 +1046,18 @@ void choleskyFactorisation(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(aData, 3, 3, 1); - CholeskyFactoriser chol(A); + DTensor A(aData, 3, 3, 1); + CholeskyFactoriser chol(A); chol.factorise(); EXPECT_NEAR(3.162277660168380, A(0, 0), epsilon); EXPECT_NEAR(-0.361403161162101, A(2, 1), epsilon); EXPECT_NEAR(5.382321781081287, A(2, 2), epsilon); } -TEST_F(CholeskyTest, choleskyFactorisation) { - choleskyFactorisation(PRECISION_LOW); - choleskyFactorisation(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyFactorisation +) { +choleskyFactorisation(PRECISION_LOW); +choleskyFactorisation(PRECISION_HIGH); } /* --------------------------------------- @@ -941,14 +1069,14 @@ void choleskyFactorisationSolution(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(aData, 3, 3, 1); - DTensor L(A); // L = A - CholeskyFactoriser chol(L); + DTensor A(aData, 3, 3, 1); + DTensor L(A); // L = A + CholeskyFactoriser chol(L); chol.factorise(); std::vector bData = {-1., -3., 5.}; - DTensor rhs(bData, 3, 1, 1); - DTensor sol(rhs); + DTensor rhs(bData, 3, 1, 1); + DTensor sol(rhs); chol.solve(sol); std::vector expected = {-0.126805213103205, -0.128566396618528, 0.175061641423036}; @@ -956,15 +1084,16 @@ void choleskyFactorisationSolution(T epsilon) { sol.download(actual); for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i], epsilon); - DTensor error = A * sol; + DTensor error = A * sol; error -= rhs; EXPECT_TRUE(error.normF() < epsilon); } -TEST_F(CholeskyTest, choleskyFactorisationSolution) { - choleskyFactorisationSolution(PRECISION_LOW); - choleskyFactorisationSolution(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyFactorisationSolution +) { +choleskyFactorisationSolution(PRECISION_LOW); +choleskyFactorisationSolution(PRECISION_HIGH); } /* --------------------------------------- @@ -976,12 +1105,12 @@ void choleskyBatchFactorisation(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); + DTensor A(3, 3, 2); + DTensor A0(A, 2, 0, 0); + DTensor A1(A, 2, 1, 1); A0.upload(aData); A1.upload(aData); - CholeskyBatchFactoriser chol(A); + CholeskyBatchFactoriser chol(A); chol.factorise(); // 0 EXPECT_NEAR(3.162277660168380, A(0, 0, 0), epsilon); @@ -993,9 +1122,10 @@ void choleskyBatchFactorisation(T epsilon) { EXPECT_NEAR(5.382321781081287, A(2, 2, 1), epsilon); } -TEST_F(CholeskyTest, choleskyBatchFactorisation) { - choleskyBatchFactorisation(PRECISION_LOW); - choleskyBatchFactorisation(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyBatchFactorisation +) { +choleskyBatchFactorisation(PRECISION_LOW); +choleskyBatchFactorisation(PRECISION_HIGH); } /* --------------------------------------- @@ -1007,35 +1137,36 @@ void choleskyBatchFactorSolve(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); + DTensor A(3, 3, 2); + DTensor A0(A, 2, 0, 0); + DTensor A1(A, 2, 1, 1); A0.upload(aData); A1.upload(aData); - DTensor L(A); // L = A - CholeskyBatchFactoriser chol(L); + DTensor L(A); // L = A + CholeskyBatchFactoriser chol(L); chol.factorise(); std::vector bData = {-1., -3., 5.}; - DTensor rhs(3, 1, 2); - DTensor rhs0(rhs, 2, 0, 0); - DTensor rhs1(rhs, 2, 1, 1); + DTensor rhs(3, 1, 2); + DTensor rhs0(rhs, 2, 0, 0); + DTensor rhs1(rhs, 2, 1, 1); rhs0.upload(bData); rhs1.upload(bData); - DTensor sol(rhs); + DTensor sol(rhs); chol.solve(sol); std::vector expected = {-0.126805213103205, -0.128566396618528, 0.175061641423036}; std::vector actual(6); sol.download(actual); for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i], epsilon); // 0 for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i + 3], epsilon); // 1 - DTensor error = A * sol; + DTensor error = A * sol; error -= rhs; EXPECT_TRUE(error.normF() < epsilon); } -TEST_F(CholeskyTest, choleskyBatchFactorSolve) { - choleskyBatchFactorSolve(PRECISION_LOW); - choleskyBatchFactorSolve(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyBatchFactorSolve +) { +choleskyBatchFactorSolve(PRECISION_LOW); +choleskyBatchFactorSolve(PRECISION_HIGH); } /* --------------------------------------- @@ -1047,42 +1178,43 @@ void choleskyBatchSolve(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); + DTensor A(3, 3, 2); + DTensor A0(A, 2, 0, 0); + DTensor A1(A, 2, 1, 1); A0.upload(aData); A1.upload(aData); std::vector lowData = {3.162277660168380, 0, 0, 0.632455532033676, 4.427188724235731, 0, 0.948683298050514, -0.361403161162101, 5.382321781081287}; // from matlab - DTensor low(3, 3, 2); - DTensor low0(low, 2, 0, 0); - DTensor low1(low, 2, 1, 1); + DTensor low(3, 3, 2); + DTensor low0(low, 2, 0, 0); + DTensor low1(low, 2, 1, 1); low0.upload(lowData, rowMajor); low1.upload(lowData, rowMajor); - DTensor L(low); - CholeskyBatchFactoriser chol(L, true); + DTensor L(low); + CholeskyBatchFactoriser chol(L, true); std::vector bData = {-1., -3., 5.}; - DTensor rhs(3, 1, 2); - DTensor rhs0(rhs, 2, 0, 0); - DTensor rhs1(rhs, 2, 1, 1); + DTensor rhs(3, 1, 2); + DTensor rhs0(rhs, 2, 0, 0); + DTensor rhs1(rhs, 2, 1, 1); rhs0.upload(bData); rhs1.upload(bData); - DTensor sol(rhs); + DTensor sol(rhs); chol.solve(sol); std::vector expected = {-0.126805213103205, -0.128566396618528, 0.175061641423036}; std::vector actual(6); sol.download(actual); for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i], epsilon); // 0 for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i + 3], epsilon); // 1 - DTensor error = A * sol; + DTensor error = A * sol; error -= rhs; EXPECT_TRUE(error.normF() < epsilon); } -TEST_F(CholeskyTest, choleskyBatchSolve) { - choleskyBatchSolve(PRECISION_LOW); - choleskyBatchSolve(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyBatchSolve +) { +choleskyBatchSolve(PRECISION_LOW); +choleskyBatchSolve(PRECISION_HIGH); } @@ -1105,15 +1237,15 @@ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void qrFactorisation(T epsilon) { size_t nR = 4; size_t nC = 3; - DTensor temp(nR, nC); - DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); - QRFactoriser qr(temp); + DTensor temp(nR, nC); + DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); + QRFactoriser qr(temp); A.deviceCopyTo(temp); int status = qr.factorise(); EXPECT_EQ(status, 0); - DTensor Q(nR, nC); - DTensor R(nC, nC, 1, true); - DTensor QR(nR, nC); + DTensor Q(nR, nC); + DTensor R(nC, nC, 1, true); + DTensor QR(nR, nC); status = qr.getQR(Q, R); EXPECT_EQ(status, 0); QR.addAB(Q, R); @@ -1122,9 +1254,10 @@ void qrFactorisation(T epsilon) { EXPECT_NEAR(nrm, 0., epsilon); } -TEST_F(QRTest, qrFactorisation) { - qrFactorisation(PRECISION_LOW); - qrFactorisation(PRECISION_HIGH); +TEST_F(QRTest, qrFactorisation +) { +qrFactorisation(PRECISION_LOW); +qrFactorisation(PRECISION_HIGH); } /* --------------------------------------- @@ -1136,15 +1269,15 @@ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void qrFactorisationTall(T epsilon) { size_t nR = 20; size_t nC = 3; - DTensor temp(nR, nC); - DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); - QRFactoriser qr(temp); + DTensor temp(nR, nC); + DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); + QRFactoriser qr(temp); A.deviceCopyTo(temp); int status = qr.factorise(); EXPECT_EQ(status, 0); - DTensor Q(nR, nC); - DTensor R(nC, nC, 1, true); - DTensor QR(nR, nC); + DTensor Q(nR, nC); + DTensor R(nC, nC, 1, true); + DTensor QR(nR, nC); status = qr.getQR(Q, R); EXPECT_EQ(status, 0); QR.addAB(Q, R); @@ -1153,9 +1286,10 @@ void qrFactorisationTall(T epsilon) { EXPECT_NEAR(nrm, 0., epsilon); } -TEST_F(QRTest, qrFactorisationTall) { - qrFactorisationTall(PRECISION_LOW); - qrFactorisationTall(PRECISION_HIGH); +TEST_F(QRTest, qrFactorisationTall +) { +qrFactorisationTall(PRECISION_LOW); +qrFactorisationTall(PRECISION_HIGH); } /* --------------------------------------- @@ -1166,7 +1300,7 @@ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void qrLeastSquares(T epsilon) { size_t nR = 4; size_t nC = 3; - DTensor temp(nR, nC); + DTensor temp(nR, nC); std::vector vecA = {85.5638, -59.4001, -80.1992, 99.9464, 5.51393, 5.17935, 6.87488, -26.7536, 36.0914, @@ -1175,12 +1309,12 @@ void qrLeastSquares(T epsilon) { -48.5744, 43.4229, -56.5081}; // Random vector - DTensor A(vecA, nR, nC, 1, rowMajor); - DTensor b(vecB, nR); - DTensor xFull(nR); - DTensor x(xFull, 0, 0, nC - 1); - DTensor Ax(nR); - QRFactoriser qr(temp); + DTensor A(vecA, nR, nC, 1, rowMajor); + DTensor b(vecB, nR); + DTensor xFull(nR); + DTensor x(xFull, 0, 0, nC - 1); + DTensor Ax(nR); + QRFactoriser qr(temp); A.deviceCopyTo(temp); int status = qr.factorise(); EXPECT_EQ(status, 0); @@ -1193,9 +1327,10 @@ void qrLeastSquares(T epsilon) { EXPECT_NEAR(nrm, 80.003169364198072, epsilon); // From MatLab } -TEST_F(QRTest, qrLeastSquares) { - qrLeastSquares(PRECISION_LOW); - qrLeastSquares(PRECISION_HIGH); +TEST_F(QRTest, qrLeastSquares +) { +qrLeastSquares(PRECISION_LOW); +qrLeastSquares(PRECISION_HIGH); } @@ -1221,19 +1356,19 @@ void computeNullspaceTensor(T epsilon) { 1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - DTensor A(aData, 3, 4, 5); - Nullspace ns(A); - DTensor nA = ns.nullspace(); + DTensor A(aData, 3, 4, 5); + Nullspace ns(A); + DTensor nA = ns.nullspace(); size_t nMats = nA.numMats(); EXPECT_EQ(nMats, 5); for (size_t i = 0; i < nMats; i++) { - DTensor nAi(nA, 2, i, i); - DTensor Ai(A, 2, i, i); - DTensor mustBeZero = Ai * nAi; + DTensor nAi(nA, 2, i, i); + DTensor Ai(A, 2, i, i); + DTensor mustBeZero = Ai * nAi; EXPECT_LT(mustBeZero.normF(), epsilon); - DTensor nAiTr = nAi.tr(); - DTensor mustBeEye = nAiTr * nAi; + DTensor nAiTr = nAi.tr(); + DTensor mustBeEye = nAiTr * nAi; EXPECT_NEAR(1, mustBeEye(0, 0, 0), epsilon); for (size_t ir = 0; ir < mustBeEye.numRows(); ir++) { for (size_t ic = 0; ic < mustBeEye.numCols(); ic++) { @@ -1245,9 +1380,10 @@ void computeNullspaceTensor(T epsilon) { } } -TEST_F(NullspaceTest, computeNullspaceTensor) { - computeNullspaceTensor(PRECISION_LOW); - computeNullspaceTensor(PRECISION_HIGH); +TEST_F(NullspaceTest, computeNullspaceTensor +) { +computeNullspaceTensor(PRECISION_LOW); +computeNullspaceTensor(PRECISION_HIGH); } /* --------------------------------------- @@ -1262,15 +1398,16 @@ void computeNullspaceTrivial(T epsilon) { 1, 1, 1, 5, 6, 7, 9, 0, 3}; - DTensor A(data, 3, 3, 2, rowMajor); - Nullspace nullA(A); - DTensor N = nullA.nullspace(); + DTensor A(data, 3, 3, 2, rowMajor); + Nullspace nullA(A); + DTensor N = nullA.nullspace(); EXPECT_EQ(N.normF(), 0); } -TEST_F(NullspaceTest, computeNullspaceTrivial) { - computeNullspaceTrivial(PRECISION_LOW); - computeNullspaceTrivial(PRECISION_HIGH); +TEST_F(NullspaceTest, computeNullspaceTrivial +) { +computeNullspaceTrivial(PRECISION_LOW); +computeNullspaceTrivial(PRECISION_HIGH); } /* --------------------------------------- @@ -1285,34 +1422,35 @@ void projectOnNullspaceTensor(T epsilon) { std::vector mat{1, -2, 3, 4, -1, -1, -1, 1, 2, -3, 4, -1, -1, -1, -1, 3, 5, -7, -1, -1, -1}; - DTensor A(m, n, 1); + DTensor A(m, n, 1); A.upload(mat, rowMajor); - Nullspace ns = Nullspace(A); - DTensor N = ns.nullspace(); + Nullspace ns = Nullspace(A); + DTensor N = ns.nullspace(); // online std::vector vec{1, 2, 3, 4, 5, 6, 7}; - DTensor x(vec, n); - DTensor proj(x); + DTensor x(vec, n); + DTensor proj(x); ns.project(proj); // Testing that proj is indeed in ker A - DTensor error(m, 1, 1, true); + DTensor error(m, 1, 1, true); error.addAB(A, proj); EXPECT_TRUE(error.normF() < epsilon); // Orthogonality test (other - p) † (p - x) std::vector h_other{1, -2, 5, 4, 0, 0, 0}; - DTensor other(h_other, n); - DTensor y = N * other; - DTensor delta1 = y - proj; - DTensor delta2 = proj - x; + DTensor other(h_other, n); + DTensor y = N * other; + DTensor delta1 = y - proj; + DTensor delta2 = proj - x; EXPECT_LT(delta1.dotF(delta2), epsilon); } -TEST_F(NullspaceTest, projectOnNullspaceTensor) { - projectOnNullspaceTensor(PRECISION_LOW); - projectOnNullspaceTensor(PRECISION_HIGH); +TEST_F(NullspaceTest, projectOnNullspaceTensor +) { +projectOnNullspaceTensor(PRECISION_LOW); +projectOnNullspaceTensor(PRECISION_HIGH); } @@ -1350,9 +1488,10 @@ void givensAnnihilateElement(T epsilon) { } } -TEST_F(GivensAnnihilatorTest, givensAnnihilateElement) { - givensAnnihilateElement(PRECISION_LOW); - givensAnnihilateElement(PRECISION_HIGH); +TEST_F(GivensAnnihilatorTest, givensAnnihilateElement +) { +givensAnnihilateElement(PRECISION_LOW); +givensAnnihilateElement(PRECISION_HIGH); } @@ -1379,9 +1518,10 @@ void givensAnnihilateCorrectness(T epsilon) { } -TEST_F(GivensAnnihilatorTest, givensAnnihilateCorrectness) { - givensAnnihilateCorrectness(1e-14); - givensAnnihilateCorrectness(1e-12); +TEST_F(GivensAnnihilatorTest, givensAnnihilateCorrectness +) { +givensAnnihilateCorrectness(1e-14); +givensAnnihilateCorrectness(1e-12); } From 5b3a67798f6f8e8e0642cbe038dfdc50c6a1b3a8 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 14:47:22 +0000 Subject: [PATCH 14/17] Test reshape more thoroughly and format code --- test/testTensor.cu | 440 ++++++++++++++++++--------------------------- 1 file changed, 172 insertions(+), 268 deletions(-) diff --git a/test/testTensor.cu b/test/testTensor.cu index 982934c..3b28a34 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -36,14 +36,10 @@ void tensorConstructionZero() { EXPECT_EQ(expectedResult, zeroDown); } -TEST_F(TensorTest, tensorConstructionZero -) { -tensorConstructionZero(); - -tensorConstructionZero(); - -tensorConstructionZero(); - +TEST_F(TensorTest, tensorConstructionZero) { + tensorConstructionZero(); + tensorConstructionZero(); + tensorConstructionZero(); } /* --------------------------------------- @@ -92,14 +88,10 @@ void tensorConstructionStorageMode() { EXPECT_EQ(Cm, hostData); } -TEST_F(TensorTest, tensorConstructionStorageMode -) { -tensorConstructionStorageMode(); - -tensorConstructionStorageMode(); - -tensorConstructionStorageMode(); - +TEST_F(TensorTest, tensorConstructionStorageMode) { + tensorConstructionStorageMode(); + tensorConstructionStorageMode(); + tensorConstructionStorageMode(); } /* --------------------------------------- @@ -117,14 +109,10 @@ void randomTensorCreation() { EXPECT_TRUE(rEle >= -1 && rEle <= 1); } -TEST_F(TensorTest, randomTensorCreation -) { -randomTensorCreation(); - -randomTensorCreation(); - -randomTensorCreation(); - +TEST_F(TensorTest, randomTensorCreation) { + randomTensorCreation(); + randomTensorCreation(); + randomTensorCreation(); } /* --------------------------------------- @@ -138,18 +126,12 @@ void tensorMoveConstructor() { DTensor y(DTensor < T > {100, 10, 1000}); } -TEST_F(TensorTest, tensorMoveConstructor -) { -tensorMoveConstructor(); - -tensorMoveConstructor(); - -tensorMoveConstructor(); - -tensorMoveConstructor(); - -tensorMoveConstructor(); - +TEST_F(TensorTest, tensorMoveConstructor) { + tensorMoveConstructor(); + tensorMoveConstructor(); + tensorMoveConstructor(); + tensorMoveConstructor(); + tensorMoveConstructor(); } /* --------------------------------------- @@ -167,14 +149,10 @@ void tensorConstructionFromVector() { EXPECT_EQ(2 * 3 * 4, tenz.numEl()); } -TEST_F(TensorTest, tensorConstructionFromVector -) { -tensorConstructionFromVector(); - -tensorConstructionFromVector(); - -tensorConstructionFromVector(); - +TEST_F(TensorTest, tensorConstructionFromVector) { + tensorConstructionFromVector(); + tensorConstructionFromVector(); + tensorConstructionFromVector(); } /* --------------------------------------- @@ -196,14 +174,10 @@ void tensorCopyConstructor() { EXPECT_NE(tenz.raw(), tenzCp.raw()); } -TEST_F(TensorTest, tensorCopyConstructor -) { -tensorCopyConstructor(); - -tensorCopyConstructor(); - -tensorCopyConstructor(); - +TEST_F(TensorTest, tensorCopyConstructor) { + tensorCopyConstructor(); + tensorCopyConstructor(); + tensorCopyConstructor(); } /* --------------------------------------- @@ -222,14 +196,10 @@ void tensorSlicingConstructorAxis2() { EXPECT_EQ(tens.raw(), tensSlice.raw()); // it is indeed a slice } -TEST_F(TensorTest, tensorSlicingConstructorAxis2 -) { -tensorSlicingConstructorAxis2(); - -tensorSlicingConstructorAxis2(); - -tensorSlicingConstructorAxis2(); - +TEST_F(TensorTest, tensorSlicingConstructorAxis2) { + tensorSlicingConstructorAxis2(); + tensorSlicingConstructorAxis2(); + tensorSlicingConstructorAxis2(); } /* --------------------------------------- @@ -251,14 +221,10 @@ void tensorSlicingConstructorAxis1() { EXPECT_EQ(expected, tenzSliceDown); } -TEST_F(TensorTest, tensorSlicingConstructorAxis1 -) { -tensorSlicingConstructorAxis1(); - -tensorSlicingConstructorAxis1(); - -tensorSlicingConstructorAxis1(); - +TEST_F(TensorTest, tensorSlicingConstructorAxis1) { + tensorSlicingConstructorAxis1(); + tensorSlicingConstructorAxis1(); + tensorSlicingConstructorAxis1(); } /* --------------------------------------- @@ -280,14 +246,10 @@ void tensorSlicingConstructorAxis0() { EXPECT_EQ(expected, tenzSliceDown); } -TEST_F(TensorTest, tensorSlicingConstructorAxis0 -) { -tensorSlicingConstructorAxis0(); - -tensorSlicingConstructorAxis0(); - -tensorSlicingConstructorAxis0(); - +TEST_F(TensorTest, tensorSlicingConstructorAxis0) { + tensorSlicingConstructorAxis0(); + tensorSlicingConstructorAxis0(); + tensorSlicingConstructorAxis0(); } /* --------------------------------------- @@ -307,14 +269,10 @@ void tensorUpload() { EXPECT_EQ(8, tenz(1, 2, 3)); } -TEST_F(TensorTest, tensorUpload -) { -tensorUpload(); - -tensorUpload(); - -tensorUpload(); - +TEST_F(TensorTest, tensorUpload) { + tensorUpload(); + tensorUpload(); + tensorUpload(); } /* --------------------------------------- @@ -335,14 +293,10 @@ void tensorDeviceCopyTo() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorDeviceCopyTo -) { -tensorDeviceCopyTo(); - -tensorDeviceCopyTo(); - -tensorDeviceCopyTo(); - +TEST_F(TensorTest, tensorDeviceCopyTo) { + tensorDeviceCopyTo(); + tensorDeviceCopyTo(); + tensorDeviceCopyTo(); } @@ -354,6 +308,8 @@ TEMPLATE_WITH_TYPE_T void tensorReshape() { size_t m = 5, n = 10, k = 3; DTensor a = DTensor::createRandomTensor(m, n, k, -1, 1); // dim = (m, n, k) + T lastElement = a(m - 1, n - 1, k - 1); // last element + T firstElement = a(0, 0, 0); ASSERT_EQ(m, a.numRows()); ASSERT_EQ(n, a.numCols()); ASSERT_EQ(k, a.numMats()); @@ -365,6 +321,20 @@ void tensorReshape() { ASSERT_EQ(k, a.numRows()); ASSERT_EQ(n, a.numCols()); ASSERT_EQ(m, a.numMats()); + a.reshape(k * n, m, 1); // dim = (k*n, m, 1) + ASSERT_EQ(k * n, a.numRows()); + ASSERT_EQ(m, a.numCols()); + ASSERT_EQ(1, a.numMats()); + a.reshape(m, k * n, 1); // dim = (m, k*n, 1) + ASSERT_EQ(m, a.numRows()); + ASSERT_EQ(k * n, a.numCols()); + ASSERT_EQ(1, a.numMats()); + a.reshape(m * k * n, 1, 1); // dim = (m*k*n, 1, 1) + ASSERT_EQ(m * k * n, a.numRows()); + ASSERT_EQ(1, a.numCols()); + ASSERT_EQ(1, a.numMats()); + ASSERT_EQ(lastElement, a(m * n * k - 1, 0, 0)); + ASSERT_EQ(firstElement, a(0, 0, 0)); } TEST_F(TensorTest, tensorReshape) { @@ -393,10 +363,9 @@ void tensorDotF(T epsilon) { EXPECT_EQ(604, dotTensor); // from MATLAB } -TEST_F(TensorTest, tensorDotF -) { -tensorDotF(PRECISION_LOW); -tensorDotF(PRECISION_HIGH); +TEST_F(TensorTest, tensorDotF) { + tensorDotF(PRECISION_LOW); + tensorDotF(PRECISION_HIGH); } /* --------------------------------------- @@ -410,10 +379,9 @@ void tensorNormF(T epsilon) { EXPECT_NEAR(26.153393661244042, tenz.normF(), epsilon); // from MATLAB } -TEST_F(TensorTest, tensorNormF -) { -tensorNormF(PRECISION_LOW); -tensorNormF(PRECISION_HIGH); +TEST_F(TensorTest, tensorNormF) { + tensorNormF(PRECISION_LOW); + tensorNormF(PRECISION_HIGH); } /* --------------------------------------- @@ -428,12 +396,9 @@ void tensorSumAbs() { EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB } -TEST_F(TensorTest, tensorSumAbs -) { -tensorSumAbs(); - -tensorSumAbs(); - +TEST_F(TensorTest, tensorSumAbs) { + tensorSumAbs(); + tensorSumAbs(); } /* --------------------------------------- @@ -448,12 +413,9 @@ void tensorMax() { EXPECT_EQ(27, m); } -TEST_F(TensorTest, tensorMax -) { -tensorMax(); - -tensorMax(); - +TEST_F(TensorTest, tensorMax) { + tensorMax(); + tensorMax(); } /* --------------------------------------- @@ -468,12 +430,9 @@ void tensorMin() { EXPECT_EQ(0, m); } -TEST_F(TensorTest, tensorMin -) { -tensorMin(); - -tensorMin(); - +TEST_F(TensorTest, tensorMin) { + tensorMin(); + tensorMin(); } /* --------------------------------------- @@ -506,10 +465,9 @@ void tensorRightGivens(T epsilon) { } } -TEST_F(TensorTest, tensorRightGivens -) { -tensorRightGivens(PRECISION_LOW); -tensorRightGivens(PRECISION_HIGH); +TEST_F(TensorTest, tensorRightGivens ) { + tensorRightGivens(PRECISION_LOW); + tensorRightGivens(PRECISION_HIGH); } /* --------------------------------------- @@ -544,10 +502,9 @@ void tensorLeftGivens(T epsilon) { } } -TEST_F(TensorTest, tensorLeftGivens -) { -tensorLeftGivens(1e-10); -tensorLeftGivens(1e-14); +TEST_F(TensorTest, tensorLeftGivens) { + tensorLeftGivens(1e-10); + tensorLeftGivens(1e-14); } /* --------------------------------------- @@ -564,14 +521,10 @@ void tensorBracketOperator() { EXPECT_EQ(8, tenz(1, 2, 3)); } -TEST_F(TensorTest, tensorBracketOperator -) { -tensorBracketOperator(); - -tensorBracketOperator(); - -tensorBracketOperator(); - +TEST_F(TensorTest, tensorBracketOperator) { + tensorBracketOperator(); + tensorBracketOperator(); + tensorBracketOperator(); } /* --------------------------------------- @@ -590,14 +543,10 @@ void tensorAssignmentOperator() { EXPECT_EQ(4, other.numMats()); } -TEST_F(TensorTest, tensorAssignmentOperator -) { -tensorAssignmentOperator(); - -tensorAssignmentOperator(); - -tensorAssignmentOperator(); - +TEST_F(TensorTest, tensorAssignmentOperator) { + tensorAssignmentOperator(); + tensorAssignmentOperator(); + tensorAssignmentOperator(); } /* --------------------------------------- @@ -616,12 +565,9 @@ void tensorTimesEqualsScalar() { EXPECT_EQ(dataTimes3, actual); } -TEST_F(TensorTest, tensorTimesEqualsScalar -) { -tensorTimesEqualsScalar(); - -tensorTimesEqualsScalar(); - +TEST_F(TensorTest, tensorTimesEqualsScalar) { + tensorTimesEqualsScalar(); + tensorTimesEqualsScalar(); } /* --------------------------------------- @@ -640,12 +586,9 @@ void tensorTimesScalar() { EXPECT_EQ(dataTimes3, actual); } -TEST_F(TensorTest, tensorTimesScalar -) { -tensorTimesScalar(); - -tensorTimesScalar(); - +TEST_F(TensorTest, tensorTimesScalar) { + tensorTimesScalar(); + tensorTimesScalar(); } /* --------------------------------------- @@ -665,12 +608,9 @@ void tensorPlusEqualsTensor() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorPlusEqualsTensor -) { -tensorPlusEqualsTensor(); - -tensorPlusEqualsTensor(); - +TEST_F(TensorTest, tensorPlusEqualsTensor) { + tensorPlusEqualsTensor(); + tensorPlusEqualsTensor(); } /* --------------------------------------- @@ -690,12 +630,9 @@ void tensorMinusEqualsTensor() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorMinusEqualsTensor -) { -tensorMinusEqualsTensor(); - -tensorMinusEqualsTensor(); - +TEST_F(TensorTest, tensorMinusEqualsTensor) { + tensorMinusEqualsTensor(); + tensorMinusEqualsTensor(); } /* --------------------------------------- @@ -715,12 +652,9 @@ void tensorPlusTensor() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorPlusTensor -) { -tensorPlusTensor(); - -tensorPlusTensor(); - +TEST_F(TensorTest, tensorPlusTensor) { + tensorPlusTensor(); + tensorPlusTensor(); } /* --------------------------------------- @@ -740,12 +674,9 @@ void tensorMinusTensor() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorMinusTensor -) { -tensorMinusTensor(); - -tensorMinusTensor(); - +TEST_F(TensorTest, tensorMinusTensor) { + tensorMinusTensor(); + tensorMinusTensor(); } /* --------------------------------------- @@ -770,12 +701,9 @@ void tensorAddAB() { EXPECT_EQ(expected, actual); } -TEST_F(TensorTest, tensorAddAB -) { -tensorAddAB(); - -tensorAddAB(); - +TEST_F(TensorTest, tensorAddAB) { + tensorAddAB(); + tensorAddAB(); } /* --------------------------------------- @@ -804,12 +732,9 @@ void tensorGetRows() { EXPECT_EQ(expected1, actual1); } -TEST_F(TensorTest, tensorGetRows -) { -tensorGetRows(); - -tensorGetRows(); - +TEST_F(TensorTest, tensorGetRows) { + tensorGetRows(); + tensorGetRows(); } @@ -832,12 +757,9 @@ void tensorTranspose() { } -TEST_F(TensorTest, tensorTranspose -) { -tensorTranspose(); - -tensorTranspose(); - +TEST_F(TensorTest, tensorTranspose) { + tensorTranspose(); + tensorTranspose(); } /* ================================================================================================ @@ -876,10 +798,9 @@ void tensorLeastSquares1(T epsilon) { EXPECT_LT(nrmErr, epsilon); } -TEST_F(LeastSquaresTest, tensorLS1 -) { -tensorLeastSquares1(PRECISION_LOW); -tensorLeastSquares1(PRECISION_HIGH); +TEST_F(LeastSquaresTest, tensorLS1) { + tensorLeastSquares1(PRECISION_LOW); + tensorLeastSquares1(PRECISION_HIGH); } @@ -914,10 +835,9 @@ void singularValuesComputation(float epsilon) { EXPECT_TRUE(U.has_value()); } -TEST_F(SvdTest, singularValuesComputation -) { -singularValuesComputation(PRECISION_LOW); -singularValuesComputation(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesComputation) { + singularValuesComputation(PRECISION_LOW); + singularValuesComputation(PRECISION_HIGH); } @@ -947,10 +867,9 @@ void singularValuesMemory(float epsilon) { EXPECT_EQ(u1->raw(), u2->raw()); } -TEST_F(SvdTest, singularValuesMemory -) { -singularValuesMemory(PRECISION_LOW); -singularValuesMemory(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesMemory) { + singularValuesMemory(PRECISION_LOW); + singularValuesMemory(PRECISION_HIGH); } @@ -994,10 +913,9 @@ void singularValuesMultipleMatrices(float epsilon) { } -TEST_F(SvdTest, singularValuesMultipleMatrices -) { -singularValuesMultipleMatrices(10 * PRECISION_LOW); // SVD with float performs quite poorly -singularValuesMultipleMatrices(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesMultipleMatrices) { + singularValuesMultipleMatrices(10 * PRECISION_LOW); // SVD with float performs quite poorly + singularValuesMultipleMatrices(PRECISION_HIGH); } @@ -1020,10 +938,9 @@ void singularValuesRankMultipleMatrices(float epsilon) { EXPECT_EQ(1, rank(0, 0, 2)); } -TEST_F(SvdTest, singularValuesRankMultipleMatrices -) { -singularValuesRankMultipleMatrices(PRECISION_LOW); // SVD with float performs quite poorly -singularValuesRankMultipleMatrices(PRECISION_HIGH); +TEST_F(SvdTest, singularValuesRankMultipleMatrices) { + singularValuesRankMultipleMatrices(PRECISION_LOW); // SVD with float performs quite poorly + singularValuesRankMultipleMatrices(PRECISION_HIGH); } /* ================================================================================================ @@ -1054,10 +971,9 @@ void choleskyFactorisation(T epsilon) { EXPECT_NEAR(5.382321781081287, A(2, 2), epsilon); } -TEST_F(CholeskyTest, choleskyFactorisation -) { -choleskyFactorisation(PRECISION_LOW); -choleskyFactorisation(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyFactorisation) { + choleskyFactorisation(PRECISION_LOW); + choleskyFactorisation(PRECISION_HIGH); } /* --------------------------------------- @@ -1090,10 +1006,9 @@ void choleskyFactorisationSolution(T epsilon) { } -TEST_F(CholeskyTest, choleskyFactorisationSolution -) { -choleskyFactorisationSolution(PRECISION_LOW); -choleskyFactorisationSolution(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyFactorisationSolution) { + choleskyFactorisationSolution(PRECISION_LOW); + choleskyFactorisationSolution(PRECISION_HIGH); } /* --------------------------------------- @@ -1122,10 +1037,9 @@ void choleskyBatchFactorisation(T epsilon) { EXPECT_NEAR(5.382321781081287, A(2, 2, 1), epsilon); } -TEST_F(CholeskyTest, choleskyBatchFactorisation -) { -choleskyBatchFactorisation(PRECISION_LOW); -choleskyBatchFactorisation(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyBatchFactorisation) { + choleskyBatchFactorisation(PRECISION_LOW); + choleskyBatchFactorisation(PRECISION_HIGH); } /* --------------------------------------- @@ -1163,10 +1077,9 @@ void choleskyBatchFactorSolve(T epsilon) { EXPECT_TRUE(error.normF() < epsilon); } -TEST_F(CholeskyTest, choleskyBatchFactorSolve -) { -choleskyBatchFactorSolve(PRECISION_LOW); -choleskyBatchFactorSolve(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyBatchFactorSolve) { + choleskyBatchFactorSolve(PRECISION_LOW); + choleskyBatchFactorSolve(PRECISION_HIGH); } /* --------------------------------------- @@ -1211,10 +1124,9 @@ void choleskyBatchSolve(T epsilon) { EXPECT_TRUE(error.normF() < epsilon); } -TEST_F(CholeskyTest, choleskyBatchSolve -) { -choleskyBatchSolve(PRECISION_LOW); -choleskyBatchSolve(PRECISION_HIGH); +TEST_F(CholeskyTest, choleskyBatchSolve) { + choleskyBatchSolve(PRECISION_LOW); + choleskyBatchSolve(PRECISION_HIGH); } @@ -1254,10 +1166,9 @@ void qrFactorisation(T epsilon) { EXPECT_NEAR(nrm, 0., epsilon); } -TEST_F(QRTest, qrFactorisation -) { -qrFactorisation(PRECISION_LOW); -qrFactorisation(PRECISION_HIGH); +TEST_F(QRTest, qrFactorisation) { + qrFactorisation(PRECISION_LOW); + qrFactorisation(PRECISION_HIGH); } /* --------------------------------------- @@ -1286,10 +1197,9 @@ void qrFactorisationTall(T epsilon) { EXPECT_NEAR(nrm, 0., epsilon); } -TEST_F(QRTest, qrFactorisationTall -) { -qrFactorisationTall(PRECISION_LOW); -qrFactorisationTall(PRECISION_HIGH); +TEST_F(QRTest, qrFactorisationTall) { + qrFactorisationTall(PRECISION_LOW); + qrFactorisationTall(PRECISION_HIGH); } /* --------------------------------------- @@ -1327,10 +1237,9 @@ void qrLeastSquares(T epsilon) { EXPECT_NEAR(nrm, 80.003169364198072, epsilon); // From MatLab } -TEST_F(QRTest, qrLeastSquares -) { -qrLeastSquares(PRECISION_LOW); -qrLeastSquares(PRECISION_HIGH); +TEST_F(QRTest, qrLeastSquares) { + qrLeastSquares(PRECISION_LOW); + qrLeastSquares(PRECISION_HIGH); } @@ -1380,10 +1289,9 @@ void computeNullspaceTensor(T epsilon) { } } -TEST_F(NullspaceTest, computeNullspaceTensor -) { -computeNullspaceTensor(PRECISION_LOW); -computeNullspaceTensor(PRECISION_HIGH); +TEST_F(NullspaceTest, computeNullspaceTensor) { + computeNullspaceTensor(PRECISION_LOW); + computeNullspaceTensor(PRECISION_HIGH); } /* --------------------------------------- @@ -1404,10 +1312,9 @@ void computeNullspaceTrivial(T epsilon) { EXPECT_EQ(N.normF(), 0); } -TEST_F(NullspaceTest, computeNullspaceTrivial -) { -computeNullspaceTrivial(PRECISION_LOW); -computeNullspaceTrivial(PRECISION_HIGH); +TEST_F(NullspaceTest, computeNullspaceTrivial) { + computeNullspaceTrivial(PRECISION_LOW); + computeNullspaceTrivial(PRECISION_HIGH); } /* --------------------------------------- @@ -1447,10 +1354,9 @@ void projectOnNullspaceTensor(T epsilon) { EXPECT_LT(delta1.dotF(delta2), epsilon); } -TEST_F(NullspaceTest, projectOnNullspaceTensor -) { -projectOnNullspaceTensor(PRECISION_LOW); -projectOnNullspaceTensor(PRECISION_HIGH); +TEST_F(NullspaceTest, projectOnNullspaceTensor) { + projectOnNullspaceTensor(PRECISION_LOW); + projectOnNullspaceTensor(PRECISION_HIGH); } @@ -1488,10 +1394,9 @@ void givensAnnihilateElement(T epsilon) { } } -TEST_F(GivensAnnihilatorTest, givensAnnihilateElement -) { -givensAnnihilateElement(PRECISION_LOW); -givensAnnihilateElement(PRECISION_HIGH); +TEST_F(GivensAnnihilatorTest, givensAnnihilateElement) { + givensAnnihilateElement(PRECISION_LOW); + givensAnnihilateElement(PRECISION_HIGH); } @@ -1518,10 +1423,9 @@ void givensAnnihilateCorrectness(T epsilon) { } -TEST_F(GivensAnnihilatorTest, givensAnnihilateCorrectness -) { -givensAnnihilateCorrectness(1e-14); -givensAnnihilateCorrectness(1e-12); +TEST_F(GivensAnnihilatorTest, givensAnnihilateCorrectness) { + givensAnnihilateCorrectness(1e-14); + givensAnnihilateCorrectness(1e-12); } From 75576ac96237608c2ce384aaf3ba3d3072c11a9c Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 16:49:35 +0000 Subject: [PATCH 15/17] Further unit tests for reshape and docs Unit test: Slice along axis=2 and reshape Write documentation for DTensor::reshape Fix formatting issues in testTensor --- include/tensor.cuh | 15 ++ test/testTensor.cu | 338 +++++++++++++++++++++++++-------------------- 2 files changed, 200 insertions(+), 153 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 55b661b..d62803c 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -471,6 +471,21 @@ public: */ void addAB(const DTensor &A, const DTensor &B, T alpha = 1, T beta = 0); + /** + * Reshapes the tensor + * + * If the new number of tensors is larger than the current one, + * this method will allocate a device array of type T* and length + * equal to the new number of matrices. + * + * No new memory is allocated if newNumMats = 1 + * + * @param newNumRows new number of rows + * @param newNumCols new number of columns + * @param newNumMats new number of matrices + * + * @throws std::invalid_argument if the provided dimensions are incompatible + */ void reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats = 1); /* ------------- OPERATORS ------------- */ diff --git a/test/testTensor.cu b/test/testTensor.cu index 3b28a34..4c1c147 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -26,7 +26,7 @@ protected: TEMPLATE_WITH_TYPE_T void tensorConstructionZero() { - DTensor zero(2, 3, 4, true); + DTensor zero(2, 3, 4, true); EXPECT_EQ(2, zero.numRows()); EXPECT_EQ(3, zero.numCols()); EXPECT_EQ(4, zero.numMats()); @@ -65,21 +65,21 @@ void tensorConstructionStorageMode() { std::vector Rm = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; std::vector hostData(rows * cols * mats); // test constructor - DTensor testCm(Cm, rows, cols, mats, columnMajor); - DTensor testRm(Rm, rows, cols, mats, rowMajor); + DTensor testCm(Cm, rows, cols, mats, columnMajor); + DTensor testRm(Rm, rows, cols, mats, rowMajor); testCm.download(hostData); EXPECT_EQ(Cm, hostData); testRm.download(hostData); EXPECT_EQ(Cm, hostData); // test .upload() - DTensor testSplitCm(rows, cols, mats); - DTensor ACm(testSplitCm, 2, 0, 0); - DTensor BCm(testSplitCm, 2, 1, 1); + DTensor testSplitCm(rows, cols, mats); + DTensor ACm(testSplitCm, 2, 0, 0); + DTensor BCm(testSplitCm, 2, 1, 1); ACm.upload(aCm, columnMajor); BCm.upload(bCm, columnMajor); - DTensor testSplitRm(rows, cols, mats); - DTensor ARm(testSplitRm, 2, 0, 0); - DTensor BRm(testSplitRm, 2, 1, 1); + DTensor testSplitRm(rows, cols, mats); + DTensor ARm(testSplitRm, 2, 0, 0); + DTensor BRm(testSplitRm, 2, 1, 1); ARm.upload(aRm, rowMajor); BRm.upload(bRm, rowMajor); testSplitCm.download(hostData); @@ -121,9 +121,9 @@ TEST_F(TensorTest, randomTensorCreation) { TEMPLATE_WITH_TYPE_T void tensorMoveConstructor() { - DTensor zero(2, 3, 4, true); - DTensor x(std::move(zero)); - DTensor y(DTensor < T > {100, 10, 1000}); + DTensor zero(2, 3, 4, true); + DTensor x(std::move(zero)); + DTensor y(DTensor < T > {100, 10, 1000}); } TEST_F(TensorTest, tensorMoveConstructor) { @@ -142,7 +142,7 @@ TEST_F(TensorTest, tensorMoveConstructor) { TEMPLATE_WITH_TYPE_T void tensorConstructionFromVector() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_EQ(2, tenz.numRows()); EXPECT_EQ(3, tenz.numCols()); EXPECT_EQ(4, tenz.numMats()); @@ -162,8 +162,8 @@ TEST_F(TensorTest, tensorConstructionFromVector) { TEMPLATE_WITH_TYPE_T void tensorCopyConstructor() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor tenzCp(tenz); + DTensor tenz(data, 2, 3, 4); + DTensor tenzCp(tenz); EXPECT_EQ(2, tenzCp.numRows()); EXPECT_EQ(3, tenzCp.numCols()); EXPECT_EQ(4, tenzCp.numMats()); @@ -188,8 +188,8 @@ TEST_F(TensorTest, tensorCopyConstructor) { TEMPLATE_WITH_TYPE_T void tensorSlicingConstructorAxis2() { std::vector data = TENSOR_DATA_234A; - DTensor tens(data, 2, 3, 4); - DTensor tensSlice(tens, 2, 0, 1); // matrices #0 and #1 + DTensor tens(data, 2, 3, 4); + DTensor tensSlice(tens, 2, 0, 1); // matrices #0 and #1 EXPECT_EQ(2, tensSlice.numRows()); EXPECT_EQ(3, tensSlice.numCols()); EXPECT_EQ(2, tensSlice.numMats()); @@ -210,8 +210,8 @@ TEST_F(TensorTest, tensorSlicingConstructorAxis2) { TEMPLATE_WITH_TYPE_T void tensorSlicingConstructorAxis1() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor tenzSlice(tenz, 1, 1, 2); // columns from 1 to 2 + DTensor tenz(data, 2, 3, 4); + DTensor tenzSlice(tenz, 1, 1, 2); // columns from 1 to 2 EXPECT_EQ(2, tenzSlice.numRows()); EXPECT_EQ(2, tenzSlice.numCols()); EXPECT_EQ(1, tenzSlice.numMats()); @@ -235,8 +235,8 @@ TEST_F(TensorTest, tensorSlicingConstructorAxis1) { TEMPLATE_WITH_TYPE_T void tensorSlicingConstructorAxis0() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor tenzSlice(tenz, 0, 2, 3); // elements 2..3 + DTensor tenz(data, 2, 3, 4); + DTensor tenzSlice(tenz, 0, 2, 3); // elements 2..3 EXPECT_EQ(2, tenzSlice.numRows()); EXPECT_EQ(1, tenzSlice.numCols()); EXPECT_EQ(1, tenzSlice.numMats()); @@ -259,7 +259,7 @@ TEST_F(TensorTest, tensorSlicingConstructorAxis0) { TEMPLATE_WITH_TYPE_T void tensorUpload() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(2, 3, 4); + DTensor tenz(2, 3, 4); tenz.upload(data); EXPECT_EQ(2, tenz.numRows()); EXPECT_EQ(3, tenz.numCols()); @@ -282,9 +282,9 @@ TEST_F(TensorTest, tensorUpload) { TEMPLATE_WITH_TYPE_T void tensorDeviceCopyTo() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor other(2, 3, 5, true); - DTensor z(other, 2, 1, 4); + DTensor tenz(data, 2, 3, 4); + DTensor other(2, 3, 5, true); + DTensor z(other, 2, 1, 4); tenz.deviceCopyTo(z); std::vector expected = {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 10, 5, 4, 3, 2, 1, -1, 4, 3, 4, 3, 4, 8}; @@ -301,13 +301,13 @@ TEST_F(TensorTest, tensorDeviceCopyTo) { /* --------------------------------------- - * Tensor: Frobenius dot product + * Tensor: Reshape * --------------------------------------- */ TEMPLATE_WITH_TYPE_T void tensorReshape() { size_t m = 5, n = 10, k = 3; - DTensor a = DTensor::createRandomTensor(m, n, k, -1, 1); // dim = (m, n, k) + DTensor a = DTensor::createRandomTensor(m, n, k, -1, 1); // dim = (m, n, k) T lastElement = a(m - 1, n - 1, k - 1); // last element T firstElement = a(0, 0, 0); ASSERT_EQ(m, a.numRows()); @@ -343,6 +343,38 @@ TEST_F(TensorTest, tensorReshape) { tensorReshape(); } +/* --------------------------------------- + * Tensor: Slice, reshape and add/multiply + * --------------------------------------- */ + +TEMPLATE_WITH_TYPE_T +void tensorSliceAndReshape(T epsilon) { + std::vector dataA = TENSOR_DATA_234A; + std::vector dataB = TENSOR_DATA_234B; + DTensor a(dataA, 2, 3, 4); + DTensor b(dataB, 2, 3, 4); + + /* ---- Slicing axis = 2 ---- */ + DTensor aSlice(a, 2, 1, 3); + DTensor bSlice(b, 2, 1, 3); + aSlice.reshape(2, 9, 1); + bSlice.reshape(2, 9, 1); + aSlice += bSlice; + + std::vector dataAExpected = {1, 2, 3, 4, 5, 6, 41, 7, 5, 5, + 19, 17, 14, 13, 5, 11, -8, -4, + 6, 8, 8, -2, 8, 13}; + DTensor aExpected(dataAExpected, 2, 3, 4); + + DTensor err = aExpected - a; + ASSERT_LT(err.normF(), epsilon); +} + +TEST_F(TensorTest, tensorSliceAndReshape) { + tensorSliceAndReshape(PRECISION_LOW); + tensorSliceAndReshape(PRECISION_HIGH); +} + /* --------------------------------------- * Tensor: Frobenius dot product * --------------------------------------- */ @@ -352,13 +384,13 @@ void tensorDotF(T epsilon) { // as vectors std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor vecA(dataA, dataA.size()); - DTensor vecB(dataB, dataB.size()); + DTensor vecA(dataA, dataA.size()); + DTensor vecB(dataB, dataB.size()); T dotVector = vecA.dotF(vecB); EXPECT_EQ(604, dotVector); // from MATLAB // as matrices - DTensor tenA(dataA, 2, 3, 4); - DTensor tenB(dataB, 2, 3, 4); + DTensor tenA(dataA, 2, 3, 4); + DTensor tenB(dataB, 2, 3, 4); T dotTensor = tenA.dotF(tenB); EXPECT_EQ(604, dotTensor); // from MATLAB } @@ -375,7 +407,7 @@ TEST_F(TensorTest, tensorDotF) { TEMPLATE_WITH_TYPE_T void tensorNormF(T epsilon) { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_NEAR(26.153393661244042, tenz.normF(), epsilon); // from MATLAB } @@ -392,7 +424,7 @@ TEST_F(TensorTest, tensorNormF) { TEMPLATE_WITH_TYPE_T void tensorSumAbs() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB } @@ -408,7 +440,7 @@ TEST_F(TensorTest, tensorSumAbs) { TEMPLATE_WITH_TYPE_T void tensorMax() { std::vector data = TENSOR_DATA_234AMB; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); T m = tenz.maxAbs(); EXPECT_EQ(27, m); } @@ -425,7 +457,7 @@ TEST_F(TensorTest, tensorMax) { TEMPLATE_WITH_TYPE_T void tensorMin() { std::vector data = TENSOR_DATA_234AMB; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); T m = tenz.minAbs(); EXPECT_EQ(0, m); } @@ -465,7 +497,7 @@ void tensorRightGivens(T epsilon) { } } -TEST_F(TensorTest, tensorRightGivens ) { +TEST_F(TensorTest, tensorRightGivens) { tensorRightGivens(PRECISION_LOW); tensorRightGivens(PRECISION_HIGH); } @@ -515,7 +547,7 @@ TEST_F(TensorTest, tensorLeftGivens) { TEMPLATE_WITH_TYPE_T void tensorBracketOperator() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); EXPECT_EQ(1, tenz(0, 0, 0)); EXPECT_EQ(3, tenz(0, 1, 2)); EXPECT_EQ(8, tenz(1, 2, 3)); @@ -534,8 +566,8 @@ TEST_F(TensorTest, tensorBracketOperator) { TEMPLATE_WITH_TYPE_T void tensorAssignmentOperator() { std::vector data = TENSOR_DATA_234A; - DTensor tenz(data, 2, 3, 4); - DTensor other; + DTensor tenz(data, 2, 3, 4); + DTensor other; other = tenz; EXPECT_EQ(tenz.raw(), other.raw()); EXPECT_EQ(2, other.numRows()); @@ -558,7 +590,7 @@ void tensorTimesEqualsScalar() { std::vector data = TENSOR_DATA_234A; std::vector dataTimes3 = {3, 6, 9, 12, 15, 18, 21, 24, 27, 24, 21, 30, 15, 12, 9, 6, 3, -3, 12, 9, 12, 9, 12, 24}; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); tenz *= 3.0; std::vector actual; tenz.download(actual); @@ -579,7 +611,7 @@ void tensorTimesScalar() { std::vector data = TENSOR_DATA_234A; std::vector dataTimes3 = {3, 6, 9, 12, 15, 18, 21, 24, 27, 24, 21, 30, 15, 12, 9, 6, 3, -3, 12, 9, 12, 9, 12, 24}; - DTensor tenz(data, 2, 3, 4); + DTensor tenz(data, 2, 3, 4); auto tripleTensor = 3.0 * tenz; std::vector actual; tripleTensor.download(actual); @@ -599,8 +631,8 @@ TEMPLATE_WITH_TYPE_T void tensorPlusEqualsTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); A += B; std::vector expected = TENSOR_DATA_234APB; std::vector actual; @@ -621,8 +653,8 @@ TEMPLATE_WITH_TYPE_T void tensorMinusEqualsTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); A -= B; std::vector expected = TENSOR_DATA_234AMB; std::vector actual; @@ -643,9 +675,9 @@ TEMPLATE_WITH_TYPE_T void tensorPlusTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); - DTensor C = A + B; + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); + DTensor C = A + B; std::vector expected = TENSOR_DATA_234APB; std::vector actual; C.download(actual); @@ -665,9 +697,9 @@ TEMPLATE_WITH_TYPE_T void tensorMinusTensor() { std::vector dataA = TENSOR_DATA_234A; std::vector dataB = TENSOR_DATA_234B; - DTensor A(dataA, 2, 3, 4); - DTensor B(dataB, 2, 3, 4); - DTensor C = A - B; + DTensor A(dataA, 2, 3, 4); + DTensor B(dataB, 2, 3, 4); + DTensor C = A - B; std::vector expected = TENSOR_DATA_234AMB; std::vector actual; C.download(actual); @@ -691,9 +723,9 @@ void tensorAddAB() { std::vector bData = {6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 2, 1, 5, -6, 8}; - DTensor A(aData, 2, 3, 3); - DTensor B(bData, 3, 2, 3); - DTensor C(2, 2, 3, true); + DTensor A(aData, 2, 3, 3); + DTensor B(bData, 3, 2, 3); + DTensor C(2, 2, 3, true); C.addAB(A, B); std::vector expected = {41, 56, 14, 20, 158, 176, 77, 86, 60, 64, 111, 118}; std::vector actual; @@ -718,14 +750,14 @@ void tensorGetRows() { 5., 6., 7., 8., 9., 10., 11., 12., 13}; - DTensor A(aData, 3, 3, 2); - DTensor Ar0 = A.getRows(1, 1, 0); + DTensor A(aData, 3, 3, 2); + DTensor Ar0 = A.getRows(1, 1, 0); std::vector expected0 = {25., 720., -1.}; std::vector actual0(3); Ar0.download(actual0); EXPECT_EQ(expected0, actual0); - DTensor Ar1 = A.getRows(1, 2, 1); + DTensor Ar1 = A.getRows(1, 2, 1); std::vector expected1 = {6., 7., 9., 10., 12., 13.}; std::vector actual1(6); Ar1.download(actual1); @@ -745,8 +777,8 @@ TEST_F(TensorTest, tensorGetRows) { TEMPLATE_WITH_TYPE_T void tensorTranspose() { std::vector aData = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - DTensor A(aData, 3, 2, 2); - DTensor Atranspose = A.tr(); + DTensor A(aData, 3, 2, 2); + DTensor Atranspose = A.tr(); EXPECT_EQ(2, Atranspose.numRows()); EXPECT_EQ(3, Atranspose.numCols()); EXPECT_EQ(2, Atranspose.numMats()); @@ -786,12 +818,12 @@ void tensorLeastSquares1(T epsilon) { 6, 8, -9, 20}; std::vector bData = {1, 1, -1, 2, 30, -80}; - DTensor A0(aData, 2, 2, 3); - DTensor A(A0); - DTensor B(bData, 2, 1, 3); - DTensor sol(B); + DTensor A0(aData, 2, 2, 3); + DTensor A(A0); + DTensor B(bData, 2, 1, 3); + DTensor sol(B); A0.leastSquaresBatched(sol); - DTensor C(2, 1, 3); + DTensor C(2, 1, 3); C.addAB(A, sol); C -= B; T nrmErr = C.normF(); @@ -824,8 +856,8 @@ void singularValuesComputation(float epsilon) { std::vector bData = {1, 6, 6, 6, 6, 6, 6, 6, 2, 7, 7, 7, 7, 7, 7, 7, 3, 8, 8, 8, 8, 8, 8, 8,}; - DTensor B(bData, 8, 3); - Svd svd(B, true, false); + DTensor B(bData, 8, 3); + Svd svd(B, true, false); EXPECT_EQ(true, svd.factorise()); auto S = svd.singularValues(); EXPECT_NEAR(32.496241123753592, S(0), epsilon); // value from MATLAB @@ -850,15 +882,15 @@ void singularValuesMemory(float epsilon) { std::vector bData = {1, 6, 6, 6, 6, 6, 6, 6, 2, 7, 7, 7, 7, 7, 7, 7, 3, 8, 8, 8, 8, 8, 8, 8,}; - DTensor B(bData, 8, 3); - Svd svd(B, true, false); + DTensor B(bData, 8, 3); + Svd svd(B, true, false); EXPECT_EQ(true, svd.factorise()); - DTensor const &v1 = svd.rightSingularVectors(); - DTensor const &v2 = svd.rightSingularVectors(); + DTensor const &v1 = svd.rightSingularVectors(); + DTensor const &v2 = svd.rightSingularVectors(); EXPECT_EQ(&v1, &v2); EXPECT_EQ(v1.raw(), v2.raw()); - DTensor const &s1 = svd.singularValues(); - DTensor const &s2 = svd.singularValues(); + DTensor const &s1 = svd.singularValues(); + DTensor const &s2 = svd.singularValues(); EXPECT_EQ(&s1, &s2); EXPECT_EQ(s1.raw(), s2.raw()); auto u1 = svd.leftSingularVectors().value(); @@ -879,11 +911,11 @@ TEST_F(SvdTest, singularValuesMemory) { TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void singularValuesMultipleMatrices(float epsilon) { std::vector aData = {1, 2, 3, 4, 5, 6, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1}; - DTensor A(aData, 3, 2, 3); - Svd svd(A, true); // do compute U (A will be destroyed) + DTensor A(aData, 3, 2, 3); + Svd svd(A, true); // do compute U (A will be destroyed) svd.factorise(); - DTensor const &S = svd.singularValues(); - DTensor const &V = svd.rightSingularVectors(); + DTensor const &S = svd.singularValues(); + DTensor const &V = svd.rightSingularVectors(); auto Uopt = svd.leftSingularVectors(); auto U = Uopt.value(); std::vector expected_v = {-0.386317703118612, -0.922365780077058, -0.922365780077058, 0.386317703118612, @@ -928,9 +960,9 @@ void singularValuesRankMultipleMatrices(float epsilon) { std::vector aData = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 0, 1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12, 1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12}; - DTensor A(aData, 4, 3, 3); + DTensor A(aData, 4, 3, 3); - Svd svd(A); + Svd svd(A); svd.factorise(); auto rank = svd.rank(epsilon); EXPECT_EQ(3, rank(0, 0, 0)); @@ -963,8 +995,8 @@ void choleskyFactorisation(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(aData, 3, 3, 1); - CholeskyFactoriser chol(A); + DTensor A(aData, 3, 3, 1); + CholeskyFactoriser chol(A); chol.factorise(); EXPECT_NEAR(3.162277660168380, A(0, 0), epsilon); EXPECT_NEAR(-0.361403161162101, A(2, 1), epsilon); @@ -985,14 +1017,14 @@ void choleskyFactorisationSolution(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(aData, 3, 3, 1); - DTensor L(A); // L = A - CholeskyFactoriser chol(L); + DTensor A(aData, 3, 3, 1); + DTensor L(A); // L = A + CholeskyFactoriser chol(L); chol.factorise(); std::vector bData = {-1., -3., 5.}; - DTensor rhs(bData, 3, 1, 1); - DTensor sol(rhs); + DTensor rhs(bData, 3, 1, 1); + DTensor sol(rhs); chol.solve(sol); std::vector expected = {-0.126805213103205, -0.128566396618528, 0.175061641423036}; @@ -1000,7 +1032,7 @@ void choleskyFactorisationSolution(T epsilon) { sol.download(actual); for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i], epsilon); - DTensor error = A * sol; + DTensor error = A * sol; error -= rhs; EXPECT_TRUE(error.normF() < epsilon); @@ -1020,12 +1052,12 @@ void choleskyBatchFactorisation(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); + DTensor A(3, 3, 2); + DTensor A0(A, 2, 0, 0); + DTensor A1(A, 2, 1, 1); A0.upload(aData); A1.upload(aData); - CholeskyBatchFactoriser chol(A); + CholeskyBatchFactoriser chol(A); chol.factorise(); // 0 EXPECT_NEAR(3.162277660168380, A(0, 0, 0), epsilon); @@ -1051,28 +1083,28 @@ void choleskyBatchFactorSolve(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); + DTensor A(3, 3, 2); + DTensor A0(A, 2, 0, 0); + DTensor A1(A, 2, 1, 1); A0.upload(aData); A1.upload(aData); - DTensor L(A); // L = A - CholeskyBatchFactoriser chol(L); + DTensor L(A); // L = A + CholeskyBatchFactoriser chol(L); chol.factorise(); std::vector bData = {-1., -3., 5.}; - DTensor rhs(3, 1, 2); - DTensor rhs0(rhs, 2, 0, 0); - DTensor rhs1(rhs, 2, 1, 1); + DTensor rhs(3, 1, 2); + DTensor rhs0(rhs, 2, 0, 0); + DTensor rhs1(rhs, 2, 1, 1); rhs0.upload(bData); rhs1.upload(bData); - DTensor sol(rhs); + DTensor sol(rhs); chol.solve(sol); std::vector expected = {-0.126805213103205, -0.128566396618528, 0.175061641423036}; std::vector actual(6); sol.download(actual); for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i], epsilon); // 0 for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i + 3], epsilon); // 1 - DTensor error = A * sol; + DTensor error = A * sol; error -= rhs; EXPECT_TRUE(error.normF() < epsilon); } @@ -1091,35 +1123,35 @@ void choleskyBatchSolve(T epsilon) { std::vector aData = {10.0, 2.0, 3.0, 2.0, 20.0, -1.0, 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); + DTensor A(3, 3, 2); + DTensor A0(A, 2, 0, 0); + DTensor A1(A, 2, 1, 1); A0.upload(aData); A1.upload(aData); std::vector lowData = {3.162277660168380, 0, 0, 0.632455532033676, 4.427188724235731, 0, 0.948683298050514, -0.361403161162101, 5.382321781081287}; // from matlab - DTensor low(3, 3, 2); - DTensor low0(low, 2, 0, 0); - DTensor low1(low, 2, 1, 1); + DTensor low(3, 3, 2); + DTensor low0(low, 2, 0, 0); + DTensor low1(low, 2, 1, 1); low0.upload(lowData, rowMajor); low1.upload(lowData, rowMajor); - DTensor L(low); - CholeskyBatchFactoriser chol(L, true); + DTensor L(low); + CholeskyBatchFactoriser chol(L, true); std::vector bData = {-1., -3., 5.}; - DTensor rhs(3, 1, 2); - DTensor rhs0(rhs, 2, 0, 0); - DTensor rhs1(rhs, 2, 1, 1); + DTensor rhs(3, 1, 2); + DTensor rhs0(rhs, 2, 0, 0); + DTensor rhs1(rhs, 2, 1, 1); rhs0.upload(bData); rhs1.upload(bData); - DTensor sol(rhs); + DTensor sol(rhs); chol.solve(sol); std::vector expected = {-0.126805213103205, -0.128566396618528, 0.175061641423036}; std::vector actual(6); sol.download(actual); for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i], epsilon); // 0 for (size_t i = 0; i < 3; i++) EXPECT_NEAR(expected[i], actual[i + 3], epsilon); // 1 - DTensor error = A * sol; + DTensor error = A * sol; error -= rhs; EXPECT_TRUE(error.normF() < epsilon); } @@ -1149,15 +1181,15 @@ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void qrFactorisation(T epsilon) { size_t nR = 4; size_t nC = 3; - DTensor temp(nR, nC); - DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); - QRFactoriser qr(temp); + DTensor temp(nR, nC); + DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); + QRFactoriser qr(temp); A.deviceCopyTo(temp); int status = qr.factorise(); EXPECT_EQ(status, 0); - DTensor Q(nR, nC); - DTensor R(nC, nC, 1, true); - DTensor QR(nR, nC); + DTensor Q(nR, nC); + DTensor R(nC, nC, 1, true); + DTensor QR(nR, nC); status = qr.getQR(Q, R); EXPECT_EQ(status, 0); QR.addAB(Q, R); @@ -1180,15 +1212,15 @@ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void qrFactorisationTall(T epsilon) { size_t nR = 20; size_t nC = 3; - DTensor temp(nR, nC); - DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); - QRFactoriser qr(temp); + DTensor temp(nR, nC); + DTensor A = DTensor::createRandomTensor(nR, nC, 1, -100, 100); + QRFactoriser qr(temp); A.deviceCopyTo(temp); int status = qr.factorise(); EXPECT_EQ(status, 0); - DTensor Q(nR, nC); - DTensor R(nC, nC, 1, true); - DTensor QR(nR, nC); + DTensor Q(nR, nC); + DTensor R(nC, nC, 1, true); + DTensor QR(nR, nC); status = qr.getQR(Q, R); EXPECT_EQ(status, 0); QR.addAB(Q, R); @@ -1210,7 +1242,7 @@ TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX void qrLeastSquares(T epsilon) { size_t nR = 4; size_t nC = 3; - DTensor temp(nR, nC); + DTensor temp(nR, nC); std::vector vecA = {85.5638, -59.4001, -80.1992, 99.9464, 5.51393, 5.17935, 6.87488, -26.7536, 36.0914, @@ -1219,12 +1251,12 @@ void qrLeastSquares(T epsilon) { -48.5744, 43.4229, -56.5081}; // Random vector - DTensor A(vecA, nR, nC, 1, rowMajor); - DTensor b(vecB, nR); - DTensor xFull(nR); - DTensor x(xFull, 0, 0, nC - 1); - DTensor Ax(nR); - QRFactoriser qr(temp); + DTensor A(vecA, nR, nC, 1, rowMajor); + DTensor b(vecB, nR); + DTensor xFull(nR); + DTensor x(xFull, 0, 0, nC - 1); + DTensor Ax(nR); + QRFactoriser qr(temp); A.deviceCopyTo(temp); int status = qr.factorise(); EXPECT_EQ(status, 0); @@ -1265,19 +1297,19 @@ void computeNullspaceTensor(T epsilon) { 1, 2, 3, 4, 2, 4, 6, 8, 3, 6, 9, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - DTensor A(aData, 3, 4, 5); - Nullspace ns(A); - DTensor nA = ns.nullspace(); + DTensor A(aData, 3, 4, 5); + Nullspace ns(A); + DTensor nA = ns.nullspace(); size_t nMats = nA.numMats(); EXPECT_EQ(nMats, 5); for (size_t i = 0; i < nMats; i++) { - DTensor nAi(nA, 2, i, i); - DTensor Ai(A, 2, i, i); - DTensor mustBeZero = Ai * nAi; + DTensor nAi(nA, 2, i, i); + DTensor Ai(A, 2, i, i); + DTensor mustBeZero = Ai * nAi; EXPECT_LT(mustBeZero.normF(), epsilon); - DTensor nAiTr = nAi.tr(); - DTensor mustBeEye = nAiTr * nAi; + DTensor nAiTr = nAi.tr(); + DTensor mustBeEye = nAiTr * nAi; EXPECT_NEAR(1, mustBeEye(0, 0, 0), epsilon); for (size_t ir = 0; ir < mustBeEye.numRows(); ir++) { for (size_t ic = 0; ic < mustBeEye.numCols(); ic++) { @@ -1306,9 +1338,9 @@ void computeNullspaceTrivial(T epsilon) { 1, 1, 1, 5, 6, 7, 9, 0, 3}; - DTensor A(data, 3, 3, 2, rowMajor); - Nullspace nullA(A); - DTensor N = nullA.nullspace(); + DTensor A(data, 3, 3, 2, rowMajor); + Nullspace nullA(A); + DTensor N = nullA.nullspace(); EXPECT_EQ(N.normF(), 0); } @@ -1329,28 +1361,28 @@ void projectOnNullspaceTensor(T epsilon) { std::vector mat{1, -2, 3, 4, -1, -1, -1, 1, 2, -3, 4, -1, -1, -1, -1, 3, 5, -7, -1, -1, -1}; - DTensor A(m, n, 1); + DTensor A(m, n, 1); A.upload(mat, rowMajor); - Nullspace ns = Nullspace(A); - DTensor N = ns.nullspace(); + Nullspace ns = Nullspace(A); + DTensor N = ns.nullspace(); // online std::vector vec{1, 2, 3, 4, 5, 6, 7}; - DTensor x(vec, n); - DTensor proj(x); + DTensor x(vec, n); + DTensor proj(x); ns.project(proj); // Testing that proj is indeed in ker A - DTensor error(m, 1, 1, true); + DTensor error(m, 1, 1, true); error.addAB(A, proj); EXPECT_TRUE(error.normF() < epsilon); // Orthogonality test (other - p) † (p - x) std::vector h_other{1, -2, 5, 4, 0, 0, 0}; - DTensor other(h_other, n); - DTensor y = N * other; - DTensor delta1 = y - proj; - DTensor delta2 = proj - x; + DTensor other(h_other, n); + DTensor y = N * other; + DTensor delta1 = y - proj; + DTensor delta2 = proj - x; EXPECT_LT(delta1.dotF(delta2), epsilon); } From 4198ecf5589996055a07a2f7dec037b4c4e0349d Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 23:16:23 +0000 Subject: [PATCH 16/17] Better memory management in DTensor::allocateOnDevice Free all allocated memory if allocation fails Fix code formatting in testTensor allocateOnDevice made void --- include/tensor.cuh | 16 ++++++++-------- test/testTensor.cu | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index d62803c..0f8b5ee 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -202,9 +202,8 @@ private: * Allocate `size` number of `T` data on the device. * @param size number of data elements to allocate * @param zero sets allocated data to `0` - * @return */ - bool allocateOnDevice(size_t size, bool zero = false); + void allocateOnDevice(size_t size, bool zero = false); /** * Create column-major `std::vector` from a row-major one. @@ -837,23 +836,24 @@ void DTensor::applyLeftGivensRotation(size_t i, size_t j, const T *c, const T } template -inline bool DTensor::allocateOnDevice(size_t size, bool zero) { - if (size <= 0) return false; +inline void DTensor::allocateOnDevice(size_t size, bool zero) { + cudaError_t cudaStatus; + if (size <= 0) return; destroy(); m_doDestroyData = true; size_t buffer_size = size * sizeof(T); - bool cudaStatus = cudaMalloc(&m_d_data, buffer_size); - if (cudaStatus != cudaSuccess) return false; + gpuErrChk(cudaMalloc(&m_d_data, buffer_size)); if (zero) gpuErrChk(cudaMemset(m_d_data, 0, buffer_size)); // set to zero all elements if (numMats() > 1) { m_doDestroyPtrMatrices = true; cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T *)); + if (cudaStatus != cudaSuccess) { + gpuErrChk(cudaFree(m_d_data)); + } } else { m_doDestroyPtrMatrices = false; } - - return (cudaStatus != cudaSuccess); } template diff --git a/test/testTensor.cu b/test/testTensor.cu index 4c1c147..d68ca02 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -123,7 +123,7 @@ TEMPLATE_WITH_TYPE_T void tensorMoveConstructor() { DTensor zero(2, 3, 4, true); DTensor x(std::move(zero)); - DTensor y(DTensor < T > {100, 10, 1000}); + DTensor y(DTensor {100, 10, 1000}); } TEST_F(TensorTest, tensorMoveConstructor) { From 2c326fc3926af7f2a2b44e5ebdcd2b93d00e250e Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Thu, 7 Nov 2024 23:19:18 +0000 Subject: [PATCH 17/17] DTensor::allocateOnDevice: memento mori --- include/tensor.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 0f8b5ee..40e05f0 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -849,7 +849,8 @@ inline void DTensor::allocateOnDevice(size_t size, bool zero) { m_doDestroyPtrMatrices = true; cudaStatus = cudaMalloc(&m_d_ptrMatrices, numMats() * sizeof(T *)); if (cudaStatus != cudaSuccess) { - gpuErrChk(cudaFree(m_d_data)); + gpuErrChk(cudaFree(m_d_data)); // ... free previously allocated memory + gpuErrChk(cudaStatus); // ... and memento mori } } else { m_doDestroyPtrMatrices = false;