enable distributed matrix mixed precision

yhmtsai · yhmtsai · commit 83a7beb0cf8d · 2025-05-19T17:04:42.000+02:00
diff --git a/core/distributed/matrix.cpp b/core/distributed/matrix.cpp
@@ -438,12 +438,14 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
     const LinOp* b, LinOp* x) const
 {
-    distributed::precision_dispatch_real_complex<ValueType>(
+    distributed::mixed_precision_dispatch_real_complex<ValueType>(
         [this](const auto dense_b, auto dense_x) {
-            auto x_exec = dense_x->get_executor();
             using x_value_type =
                 typename std::decay_t<decltype(*dense_x)>::value_type;
-            auto local_x = gko::matrix::Dense<ValueType>::create(
+            using b_value_type =
+                typename std::decay_t<decltype(*dense_b)>::value_type;
+            auto x_exec = dense_x->get_executor();
+            auto local_x = gko::matrix::Dense<x_value_type>::create(
                 x_exec, dense_x->get_local_vector()->get_size(),
                 gko::make_array_view(
                     x_exec,
@@ -456,8 +458,8 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
             init_recv_buffers(exec, row_gatherer_.get(), dense_b->get_size()[1],
                               recv_buffer_, host_recv_buffer_);
             auto host_recv_vector =
-                host_recv_buffer_.template get<ValueType>(comm);
-            auto recv_vector = recv_buffer_.template get<ValueType>(comm);
+                host_recv_buffer_.template get<b_value_type>(comm);
+            auto recv_vector = recv_buffer_.template get<b_value_type>(comm);
             auto recv_ptr = mpi::requires_host_buffer(exec, comm)
                                 ? host_recv_vector.get()
                                 : recv_vector.get();
@@ -481,13 +483,17 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
     const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const
 {
-    distributed::precision_dispatch_real_complex<ValueType>(
-        [this](const auto local_alpha, const auto dense_b,
-               const auto local_beta, auto dense_x) {
-            const auto x_exec = dense_x->get_executor();
+    distributed::mixed_precision_dispatch_real_complex<ValueType>(
+        [this, alpha, beta](const auto dense_b, auto dense_x) {
             using x_value_type =
                 typename std::decay_t<decltype(*dense_x)>::value_type;
-            auto local_x = gko::matrix::Dense<ValueType>::create(
+            using b_value_type =
+                typename std::decay_t<decltype(*dense_b)>::value_type;
+            const auto x_exec = dense_x->get_executor();
+            auto local_alpha = gko::make_temporary_conversion<ValueType>(alpha);
+            auto local_beta =
+                gko::make_temporary_conversion<x_value_type>(beta);
+            auto local_x = gko::matrix::Dense<x_value_type>::create(
                 x_exec, dense_x->get_local_vector()->get_size(),
                 gko::make_array_view(
                     x_exec,
@@ -500,24 +506,24 @@ void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
             init_recv_buffers(exec, row_gatherer_.get(), dense_b->get_size()[1],
                               recv_buffer_, host_recv_buffer_);
             auto host_recv_vector =
-                host_recv_buffer_.template get<ValueType>(comm);
-            auto recv_vector = recv_buffer_.template get<ValueType>(comm);
+                host_recv_buffer_.template get<b_value_type>(comm);
+            auto recv_vector = recv_buffer_.template get<b_value_type>(comm);
             auto recv_ptr = mpi::requires_host_buffer(exec, comm)
                                 ? host_recv_vector.get()
                                 : recv_vector.get();
             auto req = this->row_gatherer_->apply_async(dense_b, recv_ptr);
-            local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
-                              local_beta, local_x);
+            local_mtx_->apply(local_alpha.get(), dense_b->get_local_vector(),
+                              local_beta.get(), local_x);
             req.wait();
 
             if (recv_ptr != recv_vector.get()) {
                 recv_vector->copy_from(host_recv_vector);
             }
             non_local_mtx_->apply(
-                local_alpha, recv_vector->get_local_vector(),
+                local_alpha.get(), recv_vector->get_local_vector(),
                 one_scalar_.template get<x_value_type>().get(), local_x);
         },
-        alpha, b, beta, x);
+        b, x);
 }
 
 
diff --git a/include/ginkgo/core/base/precision_dispatch.hpp b/include/ginkgo/core/base/precision_dispatch.hpp
@@ -49,13 +49,12 @@ make_temporary_conversion(Ptr&& matrix)
     using Pointee = detail::pointee<Ptr>;
     using Dense = matrix::Dense<ValueType>;
     using NextDense = matrix::Dense<next_precision<ValueType>>;
-    using Next2Dense = matrix::Dense<next_precision<ValueType, 2>>;
-    using Next3Dense = matrix::Dense<next_precision<ValueType, 3>>;
+    using NextNextDense =
+        matrix::Dense<next_precision<next_precision<ValueType>>>;
     using MaybeConstDense =
         std::conditional_t<std::is_const<Pointee>::value, const Dense, Dense>;
-    auto result =
-        detail::temporary_conversion<MaybeConstDense>::template create<
-            NextDense, Next2Dense, Next3Dense>(matrix);
+    auto result = detail::temporary_conversion<
+        MaybeConstDense>::template create<NextDense, NextNextDense>(matrix);
     if (!result) {
         GKO_NOT_SUPPORTED(matrix);
     }
@@ -230,17 +229,14 @@ void mixed_precision_dispatch(Function fn, const LinOp* in, LinOp* out)
 #ifdef GINKGO_MIXED_PRECISION
     using fst_type = matrix::Dense<ValueType>;
     using snd_type = matrix::Dense<next_precision<ValueType>>;
-    using trd_type = matrix::Dense<next_precision<ValueType, 2>>;
-    using fth_type = matrix::Dense<next_precision<ValueType, 3>>;
+    using trd_type = matrix::Dense<next_precision<next_precision<ValueType>>>;
     auto dispatch_out_vector = [&](auto dense_in) {
         if (auto dense_out = dynamic_cast<fst_type*>(out)) {
             fn(dense_in, dense_out);
         } else if (auto dense_out = dynamic_cast<snd_type*>(out)) {
             fn(dense_in, dense_out);
         } else if (auto dense_out = dynamic_cast<trd_type*>(out)) {
             fn(dense_in, dense_out);
-        } else if (auto dense_out = dynamic_cast<fth_type*>(out)) {
-            fn(dense_in, dense_out);
         } else {
             GKO_NOT_SUPPORTED(out);
         }
@@ -251,8 +247,6 @@ void mixed_precision_dispatch(Function fn, const LinOp* in, LinOp* out)
         dispatch_out_vector(dense_in);
     } else if (auto dense_in = dynamic_cast<const trd_type*>(in)) {
         dispatch_out_vector(dense_in);
-    } else if (auto dense_in = dynamic_cast<const fth_type*>(in)) {
-        dispatch_out_vector(dense_in);
     } else {
         GKO_NOT_SUPPORTED(in);
     }
@@ -347,8 +341,7 @@ gko::detail::temporary_conversion<Vector<ValueType>> make_temporary_conversion(
     auto result =
         gko::detail::temporary_conversion<Vector<ValueType>>::template create<
             Vector<next_precision<ValueType>>,
-            Vector<next_precision<ValueType, 2>>,
-            Vector<next_precision<ValueType, 3>>>(matrix);
+            Vector<next_precision<next_precision<ValueType>>>>(matrix);
     if (!result) {
         GKO_NOT_SUPPORTED(matrix);
     }
@@ -365,8 +358,8 @@ make_temporary_conversion(const LinOp* matrix)
 {
     auto result = gko::detail::temporary_conversion<const Vector<ValueType>>::
         template create<Vector<next_precision<ValueType>>,
-                        Vector<next_precision<ValueType, 2>>,
-                        Vector<next_precision<ValueType, 3>>>(matrix);
+                        Vector<next_precision<next_precision<ValueType>>>>(
+            matrix);
     if (!result) {
         GKO_NOT_SUPPORTED(matrix);
     }
@@ -395,6 +388,39 @@ void precision_dispatch(Function fn, Args*... linops)
 }
 
 
+template <typename ValueType, typename Function>
+void mixed_precision_dispatch(Function fn, const LinOp* in, LinOp* out)
+{
+#ifdef GINKGO_MIXED_PRECISION
+    using fst_type = Vector<ValueType>;
+    using snd_type = Vector<next_precision<ValueType>>;
+    using trd_type = Vector<next_precision<next_precision<ValueType>>>;
+    auto dispatch_out_vector = [&](auto vector_in) {
+        if (auto vector_out = dynamic_cast<fst_type*>(out)) {
+            fn(vector_in, vector_out);
+        } else if (auto vector_out = dynamic_cast<snd_type*>(out)) {
+            fn(vector_in, vector_out);
+        } else if (auto vector_out = dynamic_cast<trd_type*>(out)) {
+            fn(vector_in, vector_out);
+        } else {
+            GKO_NOT_SUPPORTED(out);
+        }
+    };
+    if (auto vector_in = dynamic_cast<const fst_type*>(in)) {
+        dispatch_out_vector(vector_in);
+    } else if (auto vector_in = dynamic_cast<const snd_type*>(in)) {
+        dispatch_out_vector(vector_in);
+    } else if (auto vector_in = dynamic_cast<const trd_type*>(in)) {
+        dispatch_out_vector(vector_in);
+    } else {
+        GKO_NOT_SUPPORTED(in);
+    }
+#else
+    precision_dispatch<ValueType>(fn, in, out);
+#endif
+}
+
+
 /**
  * Calls the given function with the given LinOps temporarily converted to
  * experimental::distributed::Vector<ValueType>* as parameters.
@@ -428,6 +454,27 @@ void precision_dispatch_real_complex(Function fn, const LinOp* in, LinOp* out)
 }
 
 
+template <typename ValueType, typename Function>
+void mixed_precision_dispatch_real_complex(Function fn, const LinOp* in,
+                                           LinOp* out)
+{
+    auto complex_to_real = !(
+        is_complex<ValueType>() ||
+        dynamic_cast<const ConvertibleTo<experimental::distributed::Vector<>>*>(
+            in));
+    if (complex_to_real) {
+        distributed::mixed_precision_dispatch<to_complex<ValueType>>(
+            [&fn](auto vector_in, auto vector_out) {
+                fn(vector_in->create_real_view().get(),
+                   vector_out->create_real_view().get());
+            },
+            in, out);
+    } else {
+        distributed::mixed_precision_dispatch<ValueType>(fn, in, out);
+    }
+}
+
+
 /**
  * @copydoc precision_dispatch_real_complex(Function, const LinOp*, LinOp*)
  */