Skip to content

Commit b690452

Browse files
committed
fixed issue with cudaFree
1 parent 29042ba commit b690452

File tree

3 files changed

+43
-9
lines changed

3 files changed

+43
-9
lines changed

include/tensor.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ public:
177177
* @return Total allocated bytes
178178
*/
179179
size_t totalAllocatedBytes() const { return bytesAllocated; }
180+
181+
void incrementAllocatedBytes(size_t s) { bytesAllocated += s; }
180182
};
181183

182184

@@ -216,10 +218,12 @@ private:
216218
if (m_doDestroyData) {
217219
if (m_d_data) gpuErrChk(cudaFree(m_d_data));
218220
m_d_data = nullptr;
221+
Session::getInstance().incrementAllocatedBytes(-m_numRows * m_numCols * m_numMats * sizeof(T));
219222
}
220223
if (m_doDestroyPtrMatrices) {
221224
if (m_d_ptrMatrices) gpuErrChk(cudaFree(m_d_ptrMatrices));
222225
m_d_ptrMatrices = nullptr;
226+
Session::getInstance().incrementAllocatedBytes(-m_numMats * sizeof(T *));
223227
}
224228
}
225229

main.cu

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,26 @@
44
#include <string>
55
#include <vector>
66

7-
8-
int main() {
7+
void xyz() {
98
/* Write to binary file */
109
auto r = DTensor<double>::createRandomTensor(3, 6, 4, -1, 1);
1110
auto r2 = DTensor<double>::createRandomTensor(300, 600, 4, -1, 1);
1211
std::string fName = "tensor.bt"; // binary tensor file extension: .bt
13-
r.saveToFile(fName);
1412

1513
/* Parse binary file */
1614
auto recov = DTensor<double>::parseFromFile(fName);
1715
auto err = r - recov;
1816
std::cout << "max error : " << err.maxAbs() << std::endl;
1917
std::cout << "Memory: " << std::setprecision(3)
2018
<< (float) Session::getInstance().totalAllocatedBytes() / 1e6
21-
<< " MB" << std::endl;
19+
<< " MB" << std::endl;
20+
}
21+
22+
23+
int main() {
24+
xyz();
25+
std::cout << "Memory (outside): " << std::setprecision(3)
26+
<< (float) Session::getInstance().totalAllocatedBytes() / 1e6
27+
<< " MB" << std::endl;
2228
return 0;
2329
}

test/testTensor.cu

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -950,16 +950,16 @@ TEST_F(TensorTest, tensorTranspose) {
950950
}
951951

952952
/* ---------------------------------------
953-
* Tensor: transpose
953+
* Tensor: total bytes allocated
954954
* --------------------------------------- */
955955
TEMPLATE_WITH_TYPE_T
956956
void tensorBytesAllocated() {
957-
size_t previouslyAllocatedBytes = Session::getInstance().totalAllocatedBytes();
957+
const size_t previouslyAllocatedBytes = Session::getInstance().totalAllocatedBytes();
958958
size_t m = 10, n = 10, k = 20;
959959
DTensor<T> A(m, n, k);
960-
size_t allocatedBytes = Session::getInstance().totalAllocatedBytes();
961-
size_t currentAllocatedBytes = m * n * k * sizeof(T) + k * sizeof(T *);
962-
size_t expectedBytes = currentAllocatedBytes + previouslyAllocatedBytes;
960+
const size_t allocatedBytes = Session::getInstance().totalAllocatedBytes();
961+
const size_t currentAllocatedBytes = m * n * k * sizeof(T) + k * sizeof(T *);
962+
const size_t expectedBytes = currentAllocatedBytes + previouslyAllocatedBytes;
963963
EXPECT_EQ(expectedBytes, allocatedBytes);
964964
}
965965

@@ -969,6 +969,30 @@ TEST_F(TensorTest, tensorBytesAllocated) {
969969
tensorBytesAllocated<int>();
970970
}
971971

972+
973+
/* ---------------------------------------
974+
* Tensor: total bytes allocated
975+
* --------------------------------------- */
976+
TEMPLATE_WITH_TYPE_T
977+
void tensorBytesAllocatedDeallocated() {
978+
const size_t previouslyAllocatedBytes = Session::getInstance().totalAllocatedBytes();
979+
size_t m = 10, n = 10, k = 20;
980+
auto *A = new DTensor<T>(m, n, k); // new allocation (increments session)
981+
const size_t allocatedBytes = Session::getInstance().totalAllocatedBytes();
982+
const size_t expectedBytes = previouslyAllocatedBytes + m * n * k * sizeof(T) + k * sizeof(T *);
983+
EXPECT_EQ(expectedBytes, allocatedBytes);
984+
delete A;
985+
EXPECT_EQ(previouslyAllocatedBytes, Session::getInstance().totalAllocatedBytes());
986+
}
987+
988+
TEST_F(TensorTest, tensorBytesAllocatedDeallocated) {
989+
tensorBytesAllocatedDeallocated<float>();
990+
tensorBytesAllocatedDeallocated<double>();
991+
tensorBytesAllocatedDeallocated<int>();
992+
}
993+
994+
995+
972996
/* ================================================================================================
973997
* LEAST SQUARES TESTS
974998
* ================================================================================================ */

0 commit comments

Comments
 (0)