From 7f6c166ce7a98be35a4537a5aface44eaf40a482 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Fri, 8 Aug 2025 13:36:52 +0200 Subject: [PATCH 1/2] initialize MSpM kernel --- common/cuda_hip/matrix/dense_kernels.cpp | 28 ++++++++++++++ core/device_hooks/common_kernels.inc.cpp | 2 + core/matrix/dense.cpp | 49 ++++++++++++++++++------ core/matrix/dense_kernels.hpp | 19 ++++++++- dpcpp/matrix/dense_kernels.dp.cpp | 28 ++++++++++++++ omp/matrix/dense_kernels.cpp | 28 ++++++++++++++ reference/matrix/dense_kernels.cpp | 28 ++++++++++++++ 7 files changed, 169 insertions(+), 13 deletions(-) diff --git a/common/cuda_hip/matrix/dense_kernels.cpp b/common/cuda_hip/matrix/dense_kernels.cpp index 5e31215faaa..9d9b1780897 100644 --- a/common/cuda_hip/matrix/dense_kernels.cpp +++ b/common/cuda_hip/matrix/dense_kernels.cpp @@ -792,6 +792,34 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void simple_mspm(std::shared_ptr exec, + const matrix::Dense* a, + const matrix::Csr* b, + matrix::Dense* c) +{ + // TODO: implement c = a * b with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); + + +template +void mspm(std::shared_ptr exec, + const matrix::Dense* alpha, + const matrix::Dense* a, + const matrix::Csr* b, + const matrix::Dense* beta, matrix::Dense* c) +{ + // TODO: implement c = alpha * a * b + beta * c with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); + + template void transpose(std::shared_ptr exec, const matrix::Dense* orig, diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index d5a0fec4fbd..e7f89786afe 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -440,6 +440,8 @@ namespace dense { GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); GKO_STUB_VALUE_CONVERSION_OR_COPY(GKO_DECLARE_DENSE_COPY_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL); GKO_STUB_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index d3868e7741b..d897271b480 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -42,6 +42,8 @@ namespace { GKO_REGISTER_OPERATION(simple_apply, dense::simple_apply); GKO_REGISTER_OPERATION(apply, dense::apply); +GKO_REGISTER_OPERATION(simple_mspm, dense::simple_mspm); +GKO_REGISTER_OPERATION(mspm, dense::mspm); GKO_REGISTER_OPERATION(copy, dense::copy); GKO_REGISTER_OPERATION(fill, dense::fill); GKO_REGISTER_OPERATION(scale, dense::scale); @@ -110,12 +112,23 @@ GKO_REGISTER_OPERATION(add_scaled_identity, dense::add_scaled_identity); template void Dense::apply_impl(const LinOp* b, LinOp* x) const { - precision_dispatch_real_complex( - [this](auto dense_b, auto dense_x) { - this->get_executor()->run( - dense::make_simple_apply(this, dense_b, dense_x)); - }, - b, x); + // TODO: it does not consider mixed precision for MSpM + if (auto b_csr = + dynamic_cast*>(b)) { + this->get_executor()->run( + dense::make_simple_mspm(this, b_csr, as(x))); + } else if (auto b_csr = + dynamic_cast*>(b)) { + this->get_executor()->run( + dense::make_simple_mspm(this, b_csr, as(x))); + } else { + precision_dispatch_real_complex( + [this](auto dense_b, auto dense_x) { + this->get_executor()->run( + dense::make_simple_apply(this, dense_b, dense_x)); + }, + b, x); + } } @@ -123,12 +136,24 @@ template void Dense::apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const { - precision_dispatch_real_complex( - [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { - this->get_executor()->run(dense::make_apply( - dense_alpha, this, dense_b, dense_beta, dense_x)); - }, - alpha, b, beta, x); + // TODO: it does not consider mixed precision for MSpM + if (auto b_csr = + dynamic_cast*>(b)) { + this->get_executor()->run(dense::make_mspm( + as(alpha), this, b_csr, as(beta), as(x))); + } else if (auto b_csr = + dynamic_cast*>(b)) { + this->get_executor()->run(dense::make_mspm( + as(alpha), this, b_csr, as(beta), as(x))); + } else { + precision_dispatch_real_complex( + [this](auto dense_alpha, auto dense_b, auto dense_beta, + auto dense_x) { + this->get_executor()->run(dense::make_apply( + dense_alpha, this, dense_b, dense_beta, dense_x)); + }, + alpha, b, beta, x); + } } diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 7422b431aa0..a18ca982f9d 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -31,6 +31,19 @@ namespace kernels { const matrix::Dense<_type>* a, const matrix::Dense<_type>* b, \ const matrix::Dense<_type>* beta, matrix::Dense<_type>* c) +#define GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL(_vtype, _itype) \ + void simple_mspm(std::shared_ptr exec, \ + const matrix::Dense<_vtype>* a, \ + const matrix::Csr<_vtype, _itype>* b, \ + matrix::Dense<_vtype>* c) + +#define GKO_DECLARE_DENSE_MSPM_KERNEL(_vtype, _itype) \ + void mspm(std::shared_ptr exec, \ + const matrix::Dense<_vtype>* alpha, \ + const matrix::Dense<_vtype>* a, \ + const matrix::Csr<_vtype, _itype>* b, \ + const matrix::Dense<_vtype>* beta, matrix::Dense<_vtype>* c) + #define GKO_DECLARE_DENSE_COPY_KERNEL(_intype, _outtype) \ void copy(std::shared_ptr exec, \ const matrix::Dense<_intype>* input, \ @@ -354,6 +367,10 @@ namespace kernels { GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_DENSE_MSPM_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType); \ template \ diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 99aac7064e5..fa847b86f62 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -277,6 +277,34 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void simple_mspm(std::shared_ptr exec, + const matrix::Dense* a, + const matrix::Csr* b, + matrix::Dense* c) +{ + // TODO: implement c = a * b with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); + + +template +void mspm(std::shared_ptr exec, + const matrix::Dense* alpha, + const matrix::Dense* a, + const matrix::Csr* b, + const matrix::Dense* beta, matrix::Dense* c) +{ + // TODO: implement c = alpha * a * b + beta * c with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); + + template void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index fe1f58ef93d..9e0a4f05031 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -139,6 +139,34 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void simple_mspm(std::shared_ptr exec, + const matrix::Dense* a, + const matrix::Csr* b, + matrix::Dense* c) +{ + // TODO: implement c = a * b with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); + + +template +void mspm(std::shared_ptr exec, + const matrix::Dense* alpha, + const matrix::Dense* a, + const matrix::Csr* b, + const matrix::Dense* beta, matrix::Dense* c) +{ + // TODO: implement c = alpha * a * b + beta * c with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); + + template void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 7c36d9101d5..7d6b0ccdbdf 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -92,6 +92,34 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void simple_mspm(std::shared_ptr exec, + const matrix::Dense* a, + const matrix::Csr* b, + matrix::Dense* c) +{ + // TODO: implement c = a * b with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); + + +template +void mspm(std::shared_ptr exec, + const matrix::Dense* alpha, + const matrix::Dense* a, + const matrix::Csr* b, + const matrix::Dense* beta, matrix::Dense* c) +{ + // TODO: implement c = alpha * a * b + beta * c with single thread + GKO_NOT_IMPLEMENTED; +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); + + template void copy(std::shared_ptr exec, const matrix::Dense* input, From 4000577528a3866b5c64de2760669a89a743a601 Mon Sep 17 00:00:00 2001 From: Coxy Date: Wed, 13 Aug 2025 15:40:52 +0200 Subject: [PATCH 2/2] implementation and tests for simple and advanced MSpM on reference and omp executors --- omp/matrix/dense_kernels.cpp | 66 ++++++++++- reference/matrix/dense_kernels.cpp | 59 +++++++++- reference/test/matrix/dense_kernels.cpp | 149 +++++++++++++++++++++++- test/matrix/dense_kernels.cpp | 36 +++++- 4 files changed, 299 insertions(+), 11 deletions(-) diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 9e0a4f05031..9020d69d5cd 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -23,6 +23,7 @@ #include "accessor/block_col_major.hpp" #include "accessor/range.hpp" #include "core/components/prefix_sum_kernels.hpp" +#include "core/matrix/csr_accessor_helper.hpp" namespace gko { @@ -138,6 +139,50 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void mspm_auxiliary(std::shared_ptr exec, + const matrix::Dense* a, + const matrix::Csr* b, + matrix::Dense* c, + InitAcc initialize_accumulator, + DefMultOperand define_multiplication_operand) +{ + //initialization + const auto b_rowptrs = b->get_const_row_ptrs(); + const auto b_cols = b->get_const_col_idxs(); + const auto a_vals = acc::helper::build_const_rrm_accessor(a); + const auto b_vals = acc::helper::build_const_rrm_accessor(b); + const auto c_vals_ptr = c->get_values(); + //accumulate partial results of a row + const auto sub_acc_size = b->get_size()[1]; //each accumulator stores a whole row + const size_t nb_th = omp_get_max_threads(); //number of threads + array acc_array(exec, sub_acc_size*nb_th); //one accumulator per row + auto acc_ptr = acc_array.get_data(); + //compute the multiplication, 1 thread per row + #pragma omp parallel + { + const auto th_id = omp_get_thread_num(); + const auto th_acc_begin_ptr = acc_ptr + th_id*sub_acc_size; + const auto th_acc_end_ptr = acc_ptr + (th_id+1)*sub_acc_size; + #pragma omp for + for(IndexType row=zero(); rowget_size()[0]; row++){ + //reinitialize accumulator to 0 + initialize_accumulator(th_acc_begin_ptr, sub_acc_size, row); + //iterate over the whole matrix b + for(IndexType k=zero(); kget_size()[0]; k++){ + const auto val_A = define_multiplication_operand(row, k); + //iterate over the non-zero values of a row + for(IndexType idx_B=b_rowptrs[k]; idx_Bget_stride(); + std::copy(th_acc_begin_ptr, th_acc_end_ptr, out_ptr); + } + } +} template void simple_mspm(std::shared_ptr exec, @@ -145,8 +190,13 @@ void simple_mspm(std::shared_ptr exec, const matrix::Csr* b, matrix::Dense* c) { - // TODO: implement c = a * b with single thread - GKO_NOT_IMPLEMENTED; + auto simple_init_acc = [b](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ + std::fill(acc_begin_ptr, acc_begin_ptr + acc_size, zero()); //reinitialize accumulator with zeroes + }; + auto simple_def_mult_operand = [a](IndexType row, IndexType k){ + return a->at(row, k); //no multiplication by alpha, just get value in a + }; + mspm_auxiliary(exec, a, b, c, simple_init_acc, simple_def_mult_operand); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -160,8 +210,16 @@ void mspm(std::shared_ptr exec, const matrix::Csr* b, const matrix::Dense* beta, matrix::Dense* c) { - // TODO: implement c = alpha * a * b + beta * c with single thread - GKO_NOT_IMPLEMENTED; + auto advanced_init_acc = [b, c, beta](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ + const auto begin_row_c_vals_ptr = c->get_const_values() + c->get_stride()*row; + std::transform( //initialize the accumulator with c + beta + begin_row_c_vals_ptr, begin_row_c_vals_ptr + acc_size, + acc_begin_ptr, std::bind1st(std::multiplies(), beta->at(0, 0))); + }; + auto advanced_def_mult_operand = [a, alpha](IndexType row, IndexType k){ + return alpha->at(0, 0) * a->at(row, k); //multiply a(row,k) by alpha + }; + mspm_auxiliary(exec, a, b, c, advanced_init_acc, advanced_def_mult_operand); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 7d6b0ccdbdf..5b476a90ef4 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -22,6 +22,7 @@ #include "accessor/range.hpp" #include "core/base/mixed_precision_types.hpp" #include "core/components/prefix_sum_kernels.hpp" +#include "core/matrix/csr_accessor_helper.hpp" namespace gko { @@ -91,6 +92,43 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void mspm_auxiliary(std::shared_ptr exec, + const matrix::Dense* a, + const matrix::Csr* b, + matrix::Dense* c, + InitAcc initialize_accumulator, + DefMultOperand define_multiplication_operand) +{ + //initialization + const auto b_rowptrs = b->get_const_row_ptrs(); + const auto b_cols = b->get_const_col_idxs(); + const auto a_vals = acc::helper::build_const_rrm_accessor(a); + const auto b_vals = acc::helper::build_const_rrm_accessor(b); + const auto c_vals_ptr = c->get_values(); + //accumulate partial results of a row + const auto acc_size = b->get_size()[1]; //the accumulator stores a whole row + array acc_array(exec, acc_size); + auto acc_begin_ptr = acc_array.get_data(); + auto acc_end_ptr = acc_begin_ptr + acc_size; + //compute the multiplication + for(IndexType row=zero(); rowget_size()[0]; row++){ //iterate over a's row + //reinitialize accumulator to 0 + initialize_accumulator(acc_begin_ptr, acc_size, row); + //iterate over the whole matrix b + for(IndexType k=zero(); kget_size()[0]; k++){ + const auto val_A = define_multiplication_operand(row, k); + //iterate over the non-zero values of a row + for(IndexType idx_B=b_rowptrs[k]; idx_Bget_stride(); + std::copy(acc_begin_ptr, acc_end_ptr, out_ptr); + } +} template void simple_mspm(std::shared_ptr exec, @@ -98,8 +136,13 @@ void simple_mspm(std::shared_ptr exec, const matrix::Csr* b, matrix::Dense* c) { - // TODO: implement c = a * b with single thread - GKO_NOT_IMPLEMENTED; + auto simple_init_acc = [b](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ + std::fill(acc_begin_ptr, acc_begin_ptr + acc_size, zero()); //reinitialize accumulator with zeroes + }; + auto simple_def_mult_operand = [a](IndexType row, IndexType k){ + return a->at(row, k); //no multiplication by alpha, just get value in a + }; + mspm_auxiliary(exec, a, b, c, simple_init_acc, simple_def_mult_operand); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -113,8 +156,16 @@ void mspm(std::shared_ptr exec, const matrix::Csr* b, const matrix::Dense* beta, matrix::Dense* c) { - // TODO: implement c = alpha * a * b + beta * c with single thread - GKO_NOT_IMPLEMENTED; + auto advanced_init_acc = [b, c, beta](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ + const auto begin_row_c_vals_ptr = c->get_const_values() + c->get_stride()*row; + std::transform( //initialize the accumulator with c + beta + begin_row_c_vals_ptr, begin_row_c_vals_ptr + acc_size, + acc_begin_ptr, std::bind1st(std::multiplies(), beta->at(0, 0))); + }; + auto advanced_def_mult_operand = [a, alpha](IndexType row, IndexType k){ + return alpha->at(0, 0) * a->at(row, k); //multiply a(row,k) by alpha + }; + mspm_auxiliary(exec, a, b, c, advanced_init_acc, advanced_def_mult_operand); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 2ce7b023a1c..90304e9e15b 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -55,7 +55,8 @@ class Dense : public ::testing::Test { mtx6(gko::initialize({{1.0, 2.0, 0.0}, {0.0, 1.5, 0.0}}, exec)), mtx7(gko::initialize({{1.0, 2.0, 3.0}, {0.0, 1.5, 0.0}}, exec)), mtx8(gko::initialize( - {I({1.0, -1.0}), I({-2.0, 2.0}), I({-3.0, 3.0})}, exec)) + {I({1.0, -1.0}), I({-2.0, 2.0}), I({-3.0, 3.0})}, exec)), + mtx9(gko::initialize({I({1.0}), I({2.0}), I({3.0}), I({4.0})}, exec)) {} std::shared_ptr exec; @@ -67,6 +68,7 @@ class Dense : public ::testing::Test { std::unique_ptr mtx6; std::unique_ptr mtx7; std::unique_ptr mtx8; + std::unique_ptr mtx9; std::default_random_engine rand_engine; template @@ -1425,6 +1427,151 @@ class DenseWithIndexType TYPED_TEST_SUITE(DenseWithIndexType, gko::test::ValueIndexTypes, PairTypenameNameGenerator); +TYPED_TEST(DenseWithIndexType, SimpleMspmVectors) +{ //dense vector (mtx9) x horizontal sparse vector + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using SMtx = gko::matrix::Csr; + using Mtx = gko::matrix::Dense; + //create sparse vector { 0 1 10 100 0 } + gko::array arr_val(this->exec, {1.0, 10.0, 100.0}); + gko::array arr_col(this->exec, {1, 2, 3}); + gko::array arr_row(this->exec, {0, 3}); + std::unique_ptr smtx( SMtx::create(this->exec, gko::dim<2>(1, 5), arr_val, arr_col, arr_row) ); + //declare result + std::unique_ptr res(gko::initialize({ + {-1.0, -1.0, -1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0, -1.0, -1.0} + }, this->exec)); + + this->mtx9->apply(smtx, res); + + for(index_type row=0; rowget_size()[0]; row++){ + EXPECT_EQ(res->at(row, 0), T{0.0}); + EXPECT_EQ(res->at(row, 4), T{0.0}); + } + EXPECT_EQ(res->at(0, 1), T{1.0}); + EXPECT_EQ(res->at(0, 2), T{10.0}); + EXPECT_EQ(res->at(0, 3), T{100.0}); + EXPECT_EQ(res->at(1, 1), T{2.0}); + EXPECT_EQ(res->at(1, 2), T{20.0}); + EXPECT_EQ(res->at(1, 3), T{200.0}); + EXPECT_EQ(res->at(2, 1), T{3.0}); + EXPECT_EQ(res->at(2, 2), T{30.0}); + EXPECT_EQ(res->at(2, 3), T{300.0}); + EXPECT_EQ(res->at(3, 1), T{4.0}); + EXPECT_EQ(res->at(3, 2), T{40.0}); + EXPECT_EQ(res->at(3, 3), T{400.0}); +} + +TYPED_TEST(DenseWithIndexType, SimpleMspmMatrices) +{ //dense matrix (mtx5) x sparse matrix (mtx3 of sparse test file) + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using SMtx = gko::matrix::Csr; + using Mtx = gko::matrix::Dense; + //create sparse matrix + // 0 2 1 + // 3 1 8 + // 0 0 0 + gko::array arr_val(this->exec, {2.0, 1.0, 3.0, 1.0, 8.0}); + gko::array arr_col(this->exec, {1, 2, 0, 1, 2}); + gko::array arr_row(this->exec, {0, 2, 5, 5}); + std::unique_ptr smtx( SMtx::create(this->exec, gko::dim<2>(3, 3), arr_val, arr_col, arr_row) ); + //declare result + std::unique_ptr res(gko::initialize({ + {-1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0} + }, this->exec)); + + this->mtx5->apply(smtx, res); + + std::unique_ptr expected(gko::initialize({ + {-3.0, 1.0, -7.0}, + {6.0, -2.0, 14.0}, + {10.2, 7.6, 29.3} + }, this->exec)); + GKO_ASSERT_MTX_NEAR(res, expected, r::value); +} + +TYPED_TEST(DenseWithIndexType, AdvancedMspmVectors) +{ //dense vector (mtx9) x horizontal sparse vector + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using SMtx = gko::matrix::Csr; + using Mtx = gko::matrix::Dense; + //create sparse vector { 0 1 10 100 0 } + gko::array arr_val(this->exec, {1.0, 10.0, 100.0}); + gko::array arr_col(this->exec, {1, 2, 3}); + gko::array arr_row(this->exec, {0, 3}); + std::unique_ptr smtx( SMtx::create(this->exec, gko::dim<2>(1, 5), arr_val, arr_col, arr_row) ); + //scalars + const auto alpha = gko::initialize({2.0}, this->exec); + const auto beta = gko::initialize({-1.0}, this->exec); + //declare result + std::unique_ptr res(gko::initialize({ + {-1.0, -1.0, -1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0, -1.0, -1.0} + }, this->exec)); + + this->mtx9->apply(alpha, smtx, beta, res); + + for(index_type row=0; rowget_size()[0]; row++){ + EXPECT_EQ(res->at(row, 0), T{1.0}); + EXPECT_EQ(res->at(row, 4), T{1.0}); + } + EXPECT_EQ(res->at(0, 1), T{3.0}); + EXPECT_EQ(res->at(0, 2), T{21.0}); + EXPECT_EQ(res->at(0, 3), T{201.0}); + EXPECT_EQ(res->at(1, 1), T{5.0}); + EXPECT_EQ(res->at(1, 2), T{41.0}); + EXPECT_EQ(res->at(1, 3), T{401.0}); + EXPECT_EQ(res->at(2, 1), T{7.0}); + EXPECT_EQ(res->at(2, 2), T{61.0}); + EXPECT_EQ(res->at(2, 3), T{601.0}); + EXPECT_EQ(res->at(3, 1), T{9.0}); + EXPECT_EQ(res->at(3, 2), T{81.0}); + EXPECT_EQ(res->at(3, 3), T{801.0}); +} + +TYPED_TEST(DenseWithIndexType, AdvancedMspmMatrices) +{ //dense matrix (mtx5) x sparse matrix (mtx3 of sparse test file) + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using SMtx = gko::matrix::Csr; + using Mtx = gko::matrix::Dense; + //create sparse matrix + // 0 2 1 + // 3 1 8 + // 0 0 0 + gko::array arr_val(this->exec, {2.0, 1.0, 3.0, 1.0, 8.0}); + gko::array arr_col(this->exec, {1, 2, 0, 1, 2}); + gko::array arr_row(this->exec, {0, 2, 5, 5}); + std::unique_ptr smtx( SMtx::create(this->exec, gko::dim<2>(3, 3), arr_val, arr_col, arr_row) ); + //scalars + const auto alpha = gko::initialize({2.0}, this->exec); + const auto beta = gko::initialize({-1.0}, this->exec); + //declare result + std::unique_ptr res(gko::initialize({ + {-1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0}, + {-1.0, -1.0, -1.0} + }, this->exec)); + + this->mtx5->apply(alpha, smtx, beta, res); + + std::unique_ptr expected(gko::initialize({ + {-5.0, 3.0, -13.0}, + {13.0, -3.0, 29.0}, + {21.4, 16.2, 59.6} + }, this->exec)); + GKO_ASSERT_MTX_NEAR(res, expected, r::value); +} template void assert_coo_eq_mtx4(const gko::matrix::Coo* coo_mtx) diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 727d6ee7d2f..f2813e7f969 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -34,6 +34,7 @@ class Dense : public CommonTestFixture { // in single mode, mixed_type will be the same as value_type using mixed_type = float; using Mtx = gko::matrix::Dense; + using CsrMtx = gko::matrix::Csr; using MixedMtx = gko::matrix::Dense; using NormVector = gko::matrix::Dense>; using Arr = gko::array; @@ -98,6 +99,7 @@ class Dense : public CommonTestFixture { x = gen_mtx(65, 25); y = gen_mtx(25, 35); c_x = gen_mtx(65, 25); + csr_y = gen_mtx(25, 35); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); result = gen_mtx(65, 35); @@ -105,6 +107,7 @@ class Dense : public CommonTestFixture { dx = gko::clone(exec, x); dy = gko::clone(exec, y); dc_x = gko::clone(exec, c_x); + dcsr_y = gko::clone(exec, csr_y); dresult = gko::clone(exec, result); dalpha = gko::clone(exec, alpha); dbeta = gko::clone(exec, beta); @@ -166,6 +169,7 @@ class Dense : public CommonTestFixture { std::unique_ptr c_y; std::unique_ptr c_alpha; std::unique_ptr y; + std::unique_ptr csr_y; std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr result; @@ -175,6 +179,7 @@ class Dense : public CommonTestFixture { std::unique_ptr dc_y; std::unique_ptr dc_alpha; std::unique_ptr dy; + std::unique_ptr dcsr_y; std::unique_ptr dalpha; std::unique_ptr dbeta; std::unique_ptr dresult; @@ -325,9 +330,9 @@ TEST_F(Dense, ApplyToComplexIsEquivalentToRef) TEST_F(Dense, ApplyToMixedComplexIsEquivalentToRef) { set_up_apply_data(); - auto complex_b = gen_mtx(x->get_size()[1], 1); + auto complex_b = gen_mtx(x->get_size()[1], 1); auto dcomplex_b = gko::clone(exec, complex_b); - auto complex_x = gen_mtx(x->get_size()[0], 1); + auto complex_x = gen_mtx(x->get_size()[0], 1); auto dcomplex_x = gko::clone(exec, complex_x); x->apply(complex_b, complex_x); @@ -368,6 +373,33 @@ TEST_F(Dense, AdvancedApplyToMixedComplexIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(dcomplex_x, complex_x, 2e-7); } +TEST_F(Dense, SimpleMspmIsEquivalentToRef) +{ + set_up_apply_data(); + + #ifdef GKO_COMPILING_OMP + x->apply(csr_y, result); + dx->apply(dcsr_y, dresult); + + GKO_ASSERT_MTX_NEAR(dresult, result, r::value); + #else + ASSERT_THROW(dx->apply(dcsr_y, dresult), gko::NotImplemented); + #endif +} + +TEST_F(Dense, AdvancedMspmIsEquivalentToRef) +{ + set_up_apply_data(); + + #ifdef GKO_COMPILING_OMP + x->apply(alpha, csr_y, beta, result); + dx->apply(dalpha, dcsr_y, dbeta, dresult); + + GKO_ASSERT_MTX_NEAR(dresult, result, r::value); + #else + ASSERT_THROW(dx->apply(dalpha, dcsr_y, dbeta, dresult), gko::NotImplemented); + #endif +} TEST_F(Dense, ComputeDotComplexIsEquivalentToRef) {