-
Notifications
You must be signed in to change notification settings - Fork 99
Implementation of MSpM on CPU #1911
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -792,6 +792,34 @@ void apply(std::shared_ptr<const DefaultExecutor> exec, | |||
| GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); | ||||
|
|
||||
|
|
||||
| template <typename ValueType, typename IndexType> | ||||
| void simple_mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||
| const matrix::Dense<ValueType>* a, | ||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||
| matrix::Dense<ValueType>* c) | ||||
| { | ||||
| // TODO: implement c = a * b with single thread | ||||
| GKO_NOT_IMPLEMENTED; | ||||
| } | ||||
|
|
||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( | ||||
| GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); | ||||
|
|
||||
|
|
||||
| template <typename ValueType, typename IndexType> | ||||
| void mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||
| const matrix::Dense<ValueType>* alpha, | ||||
| const matrix::Dense<ValueType>* a, | ||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||
| const matrix::Dense<ValueType>* beta, matrix::Dense<ValueType>* c) | ||||
| { | ||||
| // TODO: implement c = alpha * a * b + beta * c with single thread | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
you can delete this, too |
||||
| GKO_NOT_IMPLEMENTED; | ||||
| } | ||||
|
|
||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); | ||||
|
|
||||
|
|
||||
| template <typename ValueType> | ||||
| void transpose(std::shared_ptr<const DefaultExecutor> exec, | ||||
| const matrix::Dense<ValueType>* orig, | ||||
|
|
||||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -277,6 +277,34 @@ void apply(std::shared_ptr<const DefaultExecutor> exec, | |||
| GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); | ||||
|
|
||||
|
|
||||
| template <typename ValueType, typename IndexType> | ||||
| void simple_mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||
| const matrix::Dense<ValueType>* a, | ||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||
| matrix::Dense<ValueType>* c) | ||||
| { | ||||
| // TODO: implement c = a * b with single thread | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| GKO_NOT_IMPLEMENTED; | ||||
| } | ||||
|
|
||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( | ||||
| GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); | ||||
|
|
||||
|
|
||||
| template <typename ValueType, typename IndexType> | ||||
| void mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||
| const matrix::Dense<ValueType>* alpha, | ||||
| const matrix::Dense<ValueType>* a, | ||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||
| const matrix::Dense<ValueType>* beta, matrix::Dense<ValueType>* c) | ||||
| { | ||||
| // TODO: implement c = alpha * a * b + beta * c with single thread | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| GKO_NOT_IMPLEMENTED; | ||||
| } | ||||
|
|
||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); | ||||
|
|
||||
|
|
||||
| template <typename ValueType, typename IndexType> | ||||
| void convert_to_coo(std::shared_ptr<const DefaultExecutor> exec, | ||||
| const matrix::Dense<ValueType>* source, | ||||
|
|
||||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |||||||||||
| #include "accessor/block_col_major.hpp" | ||||||||||||
| #include "accessor/range.hpp" | ||||||||||||
| #include "core/components/prefix_sum_kernels.hpp" | ||||||||||||
| #include "core/matrix/csr_accessor_helper.hpp" | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| namespace gko { | ||||||||||||
|
|
@@ -138,6 +139,91 @@ void apply(std::shared_ptr<const DefaultExecutor> exec, | |||||||||||
|
|
||||||||||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); | ||||||||||||
|
|
||||||||||||
| template <typename ValueType, typename IndexType, typename InitAcc, typename DefMultOperand> | ||||||||||||
| void mspm_auxiliary(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||
| const matrix::Dense<ValueType>* a, | ||||||||||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||||||||||
| matrix::Dense<ValueType>* c, | ||||||||||||
| InitAcc initialize_accumulator, | ||||||||||||
| DefMultOperand define_multiplication_operand) | ||||||||||||
|
Comment on lines
+147
to
+148
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I might shorten these names. |
||||||||||||
| { | ||||||||||||
| //initialization | ||||||||||||
| const auto b_rowptrs = b->get_const_row_ptrs(); | ||||||||||||
| const auto b_cols = b->get_const_col_idxs(); | ||||||||||||
| const auto a_vals = acc::helper::build_const_rrm_accessor<ValueType>(a); | ||||||||||||
| const auto b_vals = acc::helper::build_const_rrm_accessor<ValueType>(b); | ||||||||||||
|
Comment on lines
+153
to
+154
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is unnecessary because the kernel is working for uniform precision now. |
||||||||||||
| const auto c_vals_ptr = c->get_values(); | ||||||||||||
| //accumulate partial results of a row | ||||||||||||
| const auto sub_acc_size = b->get_size()[1]; //each accumulator stores a whole row | ||||||||||||
| const size_t nb_th = omp_get_max_threads(); //number of threads | ||||||||||||
| array<ValueType> acc_array(exec, sub_acc_size*nb_th); //one accumulator per row | ||||||||||||
| auto acc_ptr = acc_array.get_data(); | ||||||||||||
| //compute the multiplication, 1 thread per row | ||||||||||||
| #pragma omp parallel | ||||||||||||
| { | ||||||||||||
| const auto th_id = omp_get_thread_num(); | ||||||||||||
| const auto th_acc_begin_ptr = acc_ptr + th_id*sub_acc_size; | ||||||||||||
| const auto th_acc_end_ptr = acc_ptr + (th_id+1)*sub_acc_size; | ||||||||||||
| #pragma omp for | ||||||||||||
| for(IndexType row=zero<IndexType>(); row<c->get_size()[0]; row++){ | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
nit |
||||||||||||
| //reinitialize accumulator to 0 | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wrong comment. |
||||||||||||
| initialize_accumulator(th_acc_begin_ptr, sub_acc_size, row); | ||||||||||||
| //iterate over the whole matrix b | ||||||||||||
| for(IndexType k=zero<IndexType>(); k<b->get_size()[0]; k++){ | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
nit |
||||||||||||
| const auto val_A = define_multiplication_operand(row, k); | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. our variable name should be |
||||||||||||
| //iterate over the non-zero values of a row | ||||||||||||
| for(IndexType idx_B=b_rowptrs[k]; idx_B<b_rowptrs[k+1]; idx_B++){ | ||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
also for the name |
||||||||||||
| const auto col = b_cols[idx_B]; | ||||||||||||
| th_acc_begin_ptr[col] += val_A * b_vals(idx_B); | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
| //move accumulator to result | ||||||||||||
| auto out_ptr = c_vals_ptr + row*c->get_stride(); | ||||||||||||
| std::copy(th_acc_begin_ptr, th_acc_end_ptr, out_ptr); | ||||||||||||
|
Comment on lines
+181
to
+182
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question: Maybe I missing that. Could you remind me why you need the accumulator array? |
||||||||||||
| } | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| template <typename ValueType, typename IndexType> | ||||||||||||
| void simple_mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||
| const matrix::Dense<ValueType>* a, | ||||||||||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||||||||||
| matrix::Dense<ValueType>* c) | ||||||||||||
| { | ||||||||||||
| auto simple_init_acc = [b](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ | ||||||||||||
| std::fill(acc_begin_ptr, acc_begin_ptr + acc_size, zero<ValueType>()); //reinitialize accumulator with zeroes | ||||||||||||
| }; | ||||||||||||
| auto simple_def_mult_operand = [a](IndexType row, IndexType k){ | ||||||||||||
| return a->at(row, k); //no multiplication by alpha, just get value in a | ||||||||||||
| }; | ||||||||||||
| mspm_auxiliary(exec, a, b, c, simple_init_acc, simple_def_mult_operand); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( | ||||||||||||
| GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| template <typename ValueType, typename IndexType> | ||||||||||||
| void mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||
| const matrix::Dense<ValueType>* alpha, | ||||||||||||
| const matrix::Dense<ValueType>* a, | ||||||||||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||||||||||
| const matrix::Dense<ValueType>* beta, matrix::Dense<ValueType>* c) | ||||||||||||
| { | ||||||||||||
| auto advanced_init_acc = [b, c, beta](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ | ||||||||||||
| const auto begin_row_c_vals_ptr = c->get_const_values() + c->get_stride()*row; | ||||||||||||
| std::transform( //initialize the accumulator with c + beta | ||||||||||||
| begin_row_c_vals_ptr, begin_row_c_vals_ptr + acc_size, | ||||||||||||
|
Comment on lines
+215
to
+216
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I would move comment out of function call to let clang-format do format unless there is a good reason. |
||||||||||||
| acc_begin_ptr, std::bind1st(std::multiplies<ValueType>(), beta->at(0, 0))); | ||||||||||||
| }; | ||||||||||||
| auto advanced_def_mult_operand = [a, alpha](IndexType row, IndexType k){ | ||||||||||||
| return alpha->at(0, 0) * a->at(row, k); //multiply a(row,k) by alpha | ||||||||||||
| }; | ||||||||||||
| mspm_auxiliary(exec, a, b, c, advanced_init_acc, advanced_def_mult_operand); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| template <typename ValueType, typename IndexType> | ||||||||||||
| void convert_to_coo(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||
|
|
||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |||||||||||||
| #include "accessor/range.hpp" | ||||||||||||||
| #include "core/base/mixed_precision_types.hpp" | ||||||||||||||
| #include "core/components/prefix_sum_kernels.hpp" | ||||||||||||||
| #include "core/matrix/csr_accessor_helper.hpp" | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| namespace gko { | ||||||||||||||
|
|
@@ -91,6 +92,84 @@ void apply(std::shared_ptr<const ReferenceExecutor> exec, | |||||||||||||
|
|
||||||||||||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); | ||||||||||||||
|
|
||||||||||||||
| template <typename ValueType, typename IndexType, typename InitAcc, typename DefMultOperand> | ||||||||||||||
| void mspm_auxiliary(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||||
| const matrix::Dense<ValueType>* a, | ||||||||||||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||||||||||||
| matrix::Dense<ValueType>* c, | ||||||||||||||
| InitAcc initialize_accumulator, | ||||||||||||||
| DefMultOperand define_multiplication_operand) | ||||||||||||||
| { | ||||||||||||||
| //initialization | ||||||||||||||
| const auto b_rowptrs = b->get_const_row_ptrs(); | ||||||||||||||
| const auto b_cols = b->get_const_col_idxs(); | ||||||||||||||
| const auto a_vals = acc::helper::build_const_rrm_accessor<ValueType>(a); | ||||||||||||||
| const auto b_vals = acc::helper::build_const_rrm_accessor<ValueType>(b); | ||||||||||||||
| const auto c_vals_ptr = c->get_values(); | ||||||||||||||
| //accumulate partial results of a row | ||||||||||||||
| const auto acc_size = b->get_size()[1]; //the accumulator stores a whole row | ||||||||||||||
| array<ValueType> acc_array(exec, acc_size); | ||||||||||||||
| auto acc_begin_ptr = acc_array.get_data(); | ||||||||||||||
| auto acc_end_ptr = acc_begin_ptr + acc_size; | ||||||||||||||
| //compute the multiplication | ||||||||||||||
| for(IndexType row=zero<IndexType>(); row<c->get_size()[0]; row++){ //iterate over a's row | ||||||||||||||
| //reinitialize accumulator to 0 | ||||||||||||||
| initialize_accumulator(acc_begin_ptr, acc_size, row); | ||||||||||||||
| //iterate over the whole matrix b | ||||||||||||||
| for(IndexType k=zero<IndexType>(); k<b->get_size()[0]; k++){ | ||||||||||||||
| const auto val_A = define_multiplication_operand(row, k); | ||||||||||||||
| //iterate over the non-zero values of a row | ||||||||||||||
| for(IndexType idx_B=b_rowptrs[k]; idx_B<b_rowptrs[k+1]; idx_B++){ | ||||||||||||||
| const auto col = b_cols[idx_B]; | ||||||||||||||
| acc_begin_ptr[col] += val_A * b_vals(idx_B); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| //move accumulator to result | ||||||||||||||
| auto out_ptr = c_vals_ptr + row*c->get_stride(); | ||||||||||||||
| std::copy(acc_begin_ptr, acc_end_ptr, out_ptr); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| template <typename ValueType, typename IndexType> | ||||||||||||||
| void simple_mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||||
| const matrix::Dense<ValueType>* a, | ||||||||||||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||||||||||||
| matrix::Dense<ValueType>* c) | ||||||||||||||
| { | ||||||||||||||
| auto simple_init_acc = [b](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ | ||||||||||||||
| std::fill(acc_begin_ptr, acc_begin_ptr + acc_size, zero<ValueType>()); //reinitialize accumulator with zeroes | ||||||||||||||
| }; | ||||||||||||||
| auto simple_def_mult_operand = [a](IndexType row, IndexType k){ | ||||||||||||||
| return a->at(row, k); //no multiplication by alpha, just get value in a | ||||||||||||||
| }; | ||||||||||||||
| mspm_auxiliary(exec, a, b, c, simple_init_acc, simple_def_mult_operand); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( | ||||||||||||||
| GKO_DECLARE_DENSE_SIMPLE_MSPM_KERNEL); | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| template <typename ValueType, typename IndexType> | ||||||||||||||
| void mspm(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||||
| const matrix::Dense<ValueType>* alpha, | ||||||||||||||
| const matrix::Dense<ValueType>* a, | ||||||||||||||
| const matrix::Csr<ValueType, IndexType>* b, | ||||||||||||||
| const matrix::Dense<ValueType>* beta, matrix::Dense<ValueType>* c) | ||||||||||||||
| { | ||||||||||||||
| auto advanced_init_acc = [b, c, beta](ValueType* acc_begin_ptr, IndexType acc_size, IndexType row){ | ||||||||||||||
| const auto begin_row_c_vals_ptr = c->get_const_values() + c->get_stride()*row; | ||||||||||||||
| std::transform( //initialize the accumulator with c + beta | ||||||||||||||
| begin_row_c_vals_ptr, begin_row_c_vals_ptr + acc_size, | ||||||||||||||
| acc_begin_ptr, std::bind1st(std::multiplies<ValueType>(), beta->at(0, 0))); | ||||||||||||||
| }; | ||||||||||||||
| auto advanced_def_mult_operand = [a, alpha](IndexType row, IndexType k){ | ||||||||||||||
| return alpha->at(0, 0) * a->at(row, k); //multiply a(row,k) by alpha | ||||||||||||||
| }; | ||||||||||||||
|
Comment on lines
+165
to
+167
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
from the name, I was expecting this form. also this will reduce the unclear data access from the function call, but I do not have strong opinion on this yet. |
||||||||||||||
| mspm_auxiliary(exec, a, b, c, advanced_init_acc, advanced_def_mult_operand); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_MSPM_KERNEL); | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| template <typename InValueType, typename OutValueType> | ||||||||||||||
| void copy(std::shared_ptr<const DefaultExecutor> exec, | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry for my wrong copy. you can delete this comment here