diff --git a/CHANGELOG.md b/CHANGELOG.md index b61dd6e0b4..e153c2a73c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,10 +11,17 @@ The shared library version numbers for the oneMKL dense linear solver and matrix as well as the PETSc SNES nonlinear solver have been corrected. +Fixed a CMake bug where the MRI H-Tol controller was not included in the ARKODE +Fortran module. + Fixed a bug in the CUDA and HIP implementations of `SUNMemoryHelper_CopyAsync` where the execution stream is not extracted correctly from the helper when a stream is not provided to `SUNMemoryHelper_CopyAsync`. +Fixed a bug in MRIStep where a segfault would occur when an MRI coupling table +is not explicitly set and an MRI integrator is nested inside another MRI +integrator. + ### Deprecation Notices ## Changes to SUNDIALS in release 7.4.0 diff --git a/doc/shared/RecentChanges.rst b/doc/shared/RecentChanges.rst index 258958d3f3..b00bc2f284 100644 --- a/doc/shared/RecentChanges.rst +++ b/doc/shared/RecentChanges.rst @@ -10,8 +10,15 @@ The shared library version numbers for the oneMKL dense linear solver and matrix as well as the PETSc SNES nonlinear solver have been corrected. +Fixed a CMake bug where the MRI H-Tol controller was not included in the ARKODE +Fortran module. + Fixed a bug in the CUDA and HIP implementations of ``SUNMemoryHelper_CopyAsync`` where the execution stream is not extracted correctly from the helper when a stream is not provided to ``SUNMemoryHelper_CopyAsync``. +Fixed a bug in MRIStep where a segfault would occur when an MRI coupling table +is not explicitly set and an MRI integrator is nested inside another MRI +integrator. + **Deprecation Notices** diff --git a/doc/shared/sundials/Fortran.rst b/doc/shared/sundials/Fortran.rst index e640479484..994062bd46 100644 --- a/doc/shared/sundials/Fortran.rst +++ b/doc/shared/sundials/Fortran.rst @@ -504,11 +504,11 @@ a C file pointer, SUNDIALS provides two utility functions for creating a * ``r+`` to open a text file for reading/writing * ``w`` to truncate a text file to zero length or create it for writing * ``w+`` to open a text file for reading/writing or create it if it does - not exist - * ``a`` to open a text file for appending, see documentation of ``fopen`` for - your system/compiler + not exist + * ``a`` to open a text file for appending, see documentation of ``fopen`` + for your system/compiler * ``a+`` to open a text file for reading/appending, see documentation for - ``fopen`` for your system/compiler + ``fopen`` for your system/compiler :param fp: The ``FILE*`` that will be open when the function returns. This should be a `type(c_ptr)` in the Fortran. diff --git a/examples/arkode/F2003_serial/CMakeLists.txt b/examples/arkode/F2003_serial/CMakeLists.txt index 8ce2b906ab..2714848b03 100644 --- a/examples/arkode/F2003_serial/CMakeLists.txt +++ b/examples/arkode/F2003_serial/CMakeLists.txt @@ -18,7 +18,8 @@ # Example lists are tuples "name\;type" where the type is 'develop' for examples # excluded from 'make test' in releases -set(FARKODE_examples "ark_analytic_f2003\;\;develop") +set(FARKODE_examples "ark_analytic_f2003\;\;develop" + "ark_kpr_nestedmri_f2003\;\;develop") # Regression tests set(FARKODE_tests "test_ark_butcher_f2003\;develop") diff --git a/examples/arkode/F2003_serial/ark_kpr_nestedmri_f2003.f90 b/examples/arkode/F2003_serial/ark_kpr_nestedmri_f2003.f90 new file mode 100644 index 0000000000..feb317d55c --- /dev/null +++ b/examples/arkode/F2003_serial/ark_kpr_nestedmri_f2003.f90 @@ -0,0 +1,600 @@ +! ------------------------------------------------------------------------------ +! Programmer(s): David J. Gardner @ LLNL +! +! Based on the ark_kpr_nestedmri.cpp in examples/arkode/CXX_serial +! ------------------------------------------------------------------------------ +! SUNDIALS Copyright Start +! Copyright (c) 2002-2025, Lawrence Livermore National Security +! and Southern Methodist University. +! All rights reserved. +! +! See the top-level LICENSE and NOTICE files for details. +! +! SPDX-License-Identifier: BSD-3-Clause +! SUNDIALS Copyright End +! ------------------------------------------------------------------------------ +! Nested multirate nonlinear Kvaerno-Prothero-Robinson ODE test problem: +! +! [u]' = [ G e e ] [ (u^2 - p(t) - 2) / (2u) ] + [ p'(t)/(2u) ] +! [v] [ e al be ] [ (v^2 - q(t) - 2) / (2v) ] [ q'(t)/(2v) ] +! [w] [ e -be al ] [ (w^2 - r(t) - 2) / (2w) ] [ r'(t)/(2w) ] +! +! where +! +! p(t) = 0.5 * cos(t), +! q(t) = cos(om * t * (1 + exp(-(t - 2)^2))), and +! r(t) = cos(om * om * t * (1 + exp(-(t - 3)^2))). +! +! The first row corresponds to the slowest time scale, the second row to the +! intermediate time scale, and third row the fast time scale. +! +! This problem has the analytical solution: +! +! u(t) = sqrt(2 + p(t)), +! v(t) = sqrt(2 + q(t)), and +! w(t) = sqrt(2 + r(t)). +! +! The problem parameters are +! +! G = -10.0 : stiffness at slow time scale +! e = 0.5 : fast/slow coupling strength +! al = -1.0 : oscillatory coupling between v and w +! be = 1.0 : oscillatory coupling between v and w +! om = 50.0 : time-scale separation factor +! +! The stiffness of the slow time scale is determined by G and for |G| > 50 it is +! 'stiff' and ideally suited to a multirate method that is implicit at the slow +! time scale. +! +! The coupling strength between the slow and faster components is proportional +! to |e|. The coupling between the intermediate and fast components is +! determined by al and be. +! +! The "intermediate" variable, v, oscillates at a frequency "om" times faster +! than u, and the "fast" variable, w, oscillates at a frequency "om" times +! faster than v. +! ------------------------------------------------------------------------------ + +! ------------------------------------------------------------------------------ +! Module containing ODE parameters and right hand side functions +! ------------------------------------------------------------------------------ + +module kpr_mod + + use, intrinsic :: iso_c_binding + use fsundials_core_mod + + implicit none + + ! Problem parameters + real(c_double), parameter :: G = -10.0d0 + real(c_double), parameter :: e = 0.5d0 + real(c_double), parameter :: al = -1.0d0 + real(c_double), parameter :: be = 1.0d0 + real(c_double), parameter :: om = 50.0d0 + +contains + + ! ff routine to compute the fast portion of the ODE RHS + integer(c_int) function ff(t, y, ydot, user_data) result(ierr) bind(C) + + use, intrinsic :: iso_c_binding + implicit none + + real(c_double), value :: t + type(N_Vector) :: y + type(N_Vector) :: ydot + type(c_ptr), value :: user_data + + real(c_double), pointer, dimension(3) :: ydata(:) + real(c_double), pointer, dimension(3) :: ydotdata(:) + real(c_double) :: u, v, w, tmp1, tmp2, tmp3 + + ! Get vector data arrays + ydata => FN_VGetArrayPointer(y) + ydotdata => FN_VGetArrayPointer(ydot) + + ! Extract variables + u = ydata(1) + v = ydata(2) + w = ydata(3) + + ! fill in the RHS function: + ! [ 0 0 0 ] [ (u^2 - p - 2) / (2u) ] + [ 0 ] + ! [ 0 0 0 ] [ (v^2 - q - 2) / (2v) ] [ 0 ] + ! [ e -be al ] [ (w^2 - r - 2) / (2w) ] [ rdot(t) / (2w) ] + tmp1 = (-2.0d0 + u*u - p(t))/(2.0d0*u) + tmp2 = (-2.0d0 + v*v - q(t))/(2.0d0*v) + tmp3 = (-2.0d0 + w*w - r(t))/(2.0d0*w) + + ydotdata(1) = 0.0d0 + ydotdata(2) = 0.0d0 + ydotdata(3) = e*tmp1 - be*tmp2 + al*tmp3 + rdot(t)/(2.0d0*w) + + ! Return success + ierr = 0 + return + + end function ff + + ! fm routine to compute the intermediate portion of the ODE RHS + integer(c_int) function fm(t, y, ydot, user_data) result(ierr) bind(C) + + use, intrinsic :: iso_c_binding + implicit none + + real(c_double), value :: t + type(N_Vector) :: y + type(N_Vector) :: ydot + type(c_ptr), value :: user_data + + real(c_double), pointer, dimension(3) :: ydata(:) + real(c_double), pointer, dimension(3) :: ydotdata(:) + real(c_double) :: u, v, w, tmp1, tmp2, tmp3 + + ! Get vector data arrays + ydata => FN_VGetArrayPointer(y) + ydotdata => FN_VGetArrayPointer(ydot) + + ! Extract variables + u = ydata(1) + v = ydata(2) + w = ydata(3) + + ! fill in the RHS function: + ! [ 0 0 0 ] [ (u^2 - p - 2) / (2u) ] + [ 0 ] + ! [ e al be ] [ (v^2 - q - 2) / (2v) ] [ qdot(t) / (2v) ] + ! [ 0 0 0 ] [ (w^2 - r - 2) / (2w) ] [ 0 ] + tmp1 = (-2.0d0 + u*u - p(t))/(2.0d0*u) + tmp2 = (-2.0d0 + v*v - q(t))/(2.0d0*v) + tmp3 = (-2.0d0 + w*w - r(t))/(2.0d0*w) + + ydotdata(1) = 0.0d0 + ydotdata(2) = e*tmp1 + al*tmp2 + be*tmp3 + qdot(t)/(2.0d0*v) + ydotdata(3) = 0.0d0 + + ierr = 0 + return + + end function fm + + ! fs routine to compute the slow portion of the ODE RHS + integer(c_int) function fs(t, y, ydot, user_data) result(ierr) bind(C) + + use, intrinsic :: iso_c_binding + implicit none + + real(c_double), value :: t + type(N_Vector) :: y + type(N_Vector) :: ydot + type(c_ptr), value :: user_data + + real(c_double), pointer, dimension(3) :: ydata(:) + real(c_double), pointer, dimension(3) :: ydotdata(:) + real(c_double) :: u, v, w, tmp1, tmp2, tmp3 + + ! Get vector data arrays + ydata => FN_VGetArrayPointer(y) + ydotdata => FN_VGetArrayPointer(ydot) + + ! Extract variables + u = ydata(1) + v = ydata(2) + w = ydata(3) + + ! fill in the RHS function: + ! [ G e e ] [ (u^2 - p - 2) / (2u) ] + [ pdot(t) / (2u) ] + ! [ 0 0 0 ] [ (v^2 - q - 2) / (2v) ] [ 0 ] + ! [ 0 0 0 ] [ (w^2 - r - 2) / (2w) ] [ 0 ] + tmp1 = (-2.0d0 + u*u - p(t))/(2.0d0*u) + tmp2 = (-2.0d0 + v*v - q(t))/(2.0d0*v) + tmp3 = (-2.0d0 + w*w - r(t))/(2.0d0*w) + + ydotdata(1) = G*tmp1 + e*tmp2 + e*tmp3 + pdot(t)/(2.0d0*u) + ydotdata(2) = 0.0d0 + ydotdata(3) = 0.0d0 + + ierr = 0 + return + + end function fs + + ! ---------------- + ! Helper functions + ! ---------------- + + real(c_double) function p(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = 0.5d0*cos(t) + return + end function p + + real(c_double) function q(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = cos(om*t*(1.0d0 + exp(-(t - 2.0d0)*(t - 2.0d0)))) + return + end function q + + real(c_double) function r(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = cos(om*om*t*(1.0d0 + exp(-(t - 3.0d0)*(t - 3.0d0)))) + return + end function r + + real(c_double) function pdot(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = -0.5d0*sin(t) + return + end function pdot + + real(c_double) function qdot(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + real(c_double) :: tm2 + real(c_double) :: eterm + tm2 = t - 2.0d0 + eterm = exp(-tm2*tm2) + result = -sin(om*t*(1.0d0 + eterm))*om*(1.0d0 + eterm*(1.0d0 - 2.0d0*t*tm2)) + return + end function qdot + + real(c_double) function rdot(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + real(c_double) :: tm3 + real(c_double) :: eterm + tm3 = t - 3.0d0 + eterm = exp(-tm3*tm3) + result = -sin(om*om*t*(1.0d0 + eterm))*om*om*(1.0d0 + eterm*(1.0d0 - 2.0d0*t*tm3)) + end function rdot + + real(c_double) function utrue(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = sqrt(2.0d0 + p(t)) + return + end function utrue + + real(c_double) function vtrue(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = sqrt(2.0d0 + q(t)) + return + end function vtrue + + real(c_double) function wtrue(t) result(result) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + result = sqrt(2.0d0 + r(t)) + return + end function wtrue + + integer(c_int) function Ytrue(t, y) result(ierr) + use, intrinsic :: iso_c_binding + implicit none + real(c_double) :: t + type(N_Vector) :: y + real(c_double), pointer, dimension(3) :: ydata(:) + ydata => FN_VGetArrayPointer(y) + ydata(1) = utrue(t) + ydata(2) = vtrue(t) + ydata(3) = wtrue(t) + ierr = 0 + return + end function Ytrue + +end module kpr_mod + +! ------------------------------------------------------------------------------ +! Main driver program +! ------------------------------------------------------------------------------ + +program main + + use, intrinsic :: iso_fortran_env + use farkode_mod + use farkode_arkstep_mod + use farkode_erkstep_mod + use farkode_mristep_mod + use fnvector_serial_mod + use fsunadaptcontroller_soderlind_mod + use fsunadaptcontroller_mrihtol_mod + use kpr_mod + implicit none + + type(c_ptr) sunctx ! SUNDIALS simulation context + + type(N_Vector), pointer :: yvec ! solution vector + real(c_double), pointer, dimension(3) :: ydata(:) ! solution vector array + + ! ARKODE integrators + type(c_ptr) :: f_arkode_mem = c_null_ptr ! fast ARKODE integrator + type(c_ptr) :: m_arkode_mem = c_null_ptr ! intermediate ARKODE integrator + type(c_ptr) :: s_arkode_mem = c_null_ptr ! slow ARKODE integrator + + ! ARKODE inner steppers + type(c_ptr) :: f_stepper = c_null_ptr ! fast stepper + type(c_ptr) :: m_stepper = c_null_ptr ! intermediate stepper + + ! Slow and intermediate error controllers + type(SUNAdaptController), pointer :: m_controller_H ! step controller + type(SUNAdaptController), pointer :: m_controller_Tol ! tolerance controller + type(SUNAdaptController), pointer :: m_controller ! h-tol controller + + type(SUNAdaptController), pointer :: s_controller_H ! step controller + type(SUNAdaptController), pointer :: s_controller_Tol ! tolerance controller + type(SUNAdaptController), pointer :: s_controller ! h-tol controller + + ! General problem parameters +#if defined(SUNDIALS_INT32_T) + integer(kind=selected_int_kind(8)), parameter :: neq = 3 +#elif defined(SUNDIALS_INT64_T) + integer(kind=selected_int_kind(16)), parameter :: neq = 3 +#endif + real(c_double), parameter :: t0 = 0.0d0 ! initial time + real(c_double), parameter :: tf = 5.0d0 ! final time + integer(c_int), parameter :: num_output = 20 ! number of outputs + real(c_double), parameter :: rtol = 1.0d-4 ! relative tolerance + real(c_double), parameter :: atol = 1.0d-11 ! absolute tolerance + integer(c_int), parameter :: acc_type = ARK_ACCUMERROR_MAX ! error accumulation type + + ! Output variables + real(c_double) :: output_dt ! output interval + real(c_double) :: tout ! output time + real(c_double) :: tret(1) ! return time + integer(c_int) :: iout ! output counter + real(c_double) :: uerr, verr, werr ! solution error + + ! Error return flag + integer(c_int) :: retval = 0 + + ! File pointer for output + type(c_ptr) :: fp + + ! ----------------------- + ! Create SUNDIALS context + ! ----------------------- + + retval = FSUNContext_Create(SUN_COMM_NULL, sunctx) + call check_retval(retval, 'FSUNContext_Create') + + ! ------------------------------- + ! Create initial condition vector + ! ------------------------------- + + yvec => FN_VNew_Serial(neq, sunctx) + if (.not. associated(yvec)) then + print *, 'ERROR: N_VNew_Serial failed' + stop 1 + end if + + retval = Ytrue(T0, yvec) + call check_retval(retval, "Ytrue") + + ! ---------------------- + ! Create fast integrator + ! ---------------------- + + f_arkode_mem = FERKStepCreate(c_funloc(ff), t0, yvec, sunctx) + if (.not. c_associated(f_arkode_mem)) then + print *, 'ERROR: f_arkode_mem = NULL' + stop 1 + end if + + retval = FARKodeSStolerances(f_arkode_mem, rtol, atol) + call check_retval(retval, "FARKodeSStolerances") + + retval = FARKodeSetOrder(f_arkode_mem, 4) + call check_retval(retval, "FARKodeSetOrder") + + retval = FARKodeSetAccumulatedErrorType(f_arkode_mem, acc_type) + call check_retval(retval, "FARKodeSetAccumulatedErrorType") + + retval = FARKodeCreateMRIStepInnerStepper(f_arkode_mem, f_stepper) + call check_retval(retval, "FARKodeCreateMRIStepInnerStepper") + + ! ------------------------------ + ! Create intermediate integrator + ! ------------------------------ + + m_arkode_mem = FMRIStepCreate(c_funloc(fm), c_null_funptr, t0, yvec, f_stepper, sunctx) + if (.not. c_associated(m_arkode_mem)) then + print *, 'ERROR: m_arkode_mem = NULL' + stop 1 + end if + + retval = FARKodeSStolerances(m_arkode_mem, rtol, atol) + call check_retval(retval, "FARKodeSStolerances") + + retval = FARKodeSetOrder(m_arkode_mem, 4) + call check_retval(retval, "FARKodeSetOrder") + + retval = FARKodeSetAccumulatedErrorType(m_arkode_mem, acc_type) + call check_retval(retval, "FARKodeSetAccumulatedErrorType") + + m_controller_H => FSUNAdaptController_I(sunctx) + if (.not. associated(m_controller_H)) then + print *, 'ERROR: m_controller_H = NULL' + stop 1 + end if + + m_controller_Tol => FSUNAdaptController_I(sunctx) + if (.not. associated(m_controller_Tol)) then + print *, 'ERROR: m_controller_Tol = NULL' + stop 1 + end if + + m_controller => FSUNAdaptController_MRIHTol(m_controller_H, m_controller_Tol, sunctx) + if (.not. associated(m_controller)) then + print *, 'ERROR: m_controller = NULL' + stop 1 + end if + + retval = FARKodeSetAdaptController(m_arkode_mem, m_controller) + call check_retval(retval, "FARKodeSetAdaptController") + + retval = FARKodeCreateMRIStepInnerStepper(m_arkode_mem, m_stepper) + call check_retval(retval, "FARKodeCreateMRIStepInnerStepper") + + ! ---------------------- + ! Create slow integrator + ! ---------------------- + + s_arkode_mem = FMRIStepCreate(c_funloc(fs), c_null_funptr, t0, yvec, m_stepper, sunctx) + if (.not. c_associated(s_arkode_mem)) then + print *, 'ERROR: s_arkode_mem = NULL' + stop 1 + end if + + retval = FARKodeSStolerances(s_arkode_mem, rtol, atol) + call check_retval(retval, "FARKodeSStolerances") + + retval = FARKodeSetOrder(s_arkode_mem, 4) + call check_retval(retval, "FARKodeSetOrder") + + s_controller_H => FSUNAdaptController_I(sunctx) + if (.not. associated(s_controller_H)) then + print *, 'ERROR: s_controller_H = NULL' + stop 1 + end if + + s_controller_Tol => FSUNAdaptController_I(sunctx) + if (.not. associated(s_controller_Tol)) then + print *, 'ERROR: s_controller_Tol = NULL' + stop 1 + end if + + s_controller => FSUNAdaptController_MRIHTol(s_controller_H, s_controller_Tol, sunctx) + if (.not. associated(s_controller)) then + print *, 'ERROR: s_controller = NULL' + stop 1 + end if + + retval = FARKodeSetAdaptController(s_arkode_mem, s_controller) + call check_retval(retval, "FARKodeSetAdaptController") + + ! --------------- + ! Advance in time + ! --------------- + + output_dt = (tf - t0)/num_output ! output interval + tout = t0 + output_dt ! output time + tret = t0 ! return time + + ! Get state data to output + ydata => FN_VGetArrayPointer(yvec) + + print '(7A23)', "t", "u", "v", "w", "u err", "v err", "w err" + do iout = 1, 7*23 + write (*, '(A)', advance='no') "-" + end do + write (*, *) + print '(7ES23.15)', tret(1), ydata(1), ydata(2), ydata(3), 0.0d0, 0.0d0, 0.0d0 + + do iout = 1, num_output + + ! Stop at output time (do not interpolate) + retval = FARKodeSetStopTime(s_arkode_mem, tout) + + retval = FARKodeEvolve(s_arkode_mem, tout, yvec, tret, ARK_NORMAL) + call check_retval(retval, "FARKodeEvolve") + + ! Output solution and error + uerr = abs(utrue(tret(1)) - ydata(1)) + verr = abs(vtrue(tret(1)) - ydata(2)) + werr = abs(wtrue(tret(1)) - ydata(3)) + + print '(7ES23.15)', tret(1), ydata(1), ydata(2), ydata(3), uerr, verr, werr + + ! Update output time + tout = tout + output_dt + + end do + + do iout = 1, 7*23 + write (*, '(A)', advance='no') "-" + end do + write (*, *) + write (*, *) + + ! ---------------- + ! Final statistics + ! ---------------- + + ! Open up the file output.log for writing + retval = FSUNDIALSFileOpen("stdout", "w+", fp) + call check_retval(retval, "FSUNDIALSFileOpen") + + print '(A)', "Slow Integrator Stats" + flush (output_unit) + retval = FARKodePrintAllStats(s_arkode_mem, fp, SUN_OUTPUTFORMAT_TABLE) + write (*, *) + print '(A)', "Intermediate Integrator Stats" + flush (output_unit) + retval = FARKodePrintAllStats(m_arkode_mem, fp, SUN_OUTPUTFORMAT_TABLE) + write (*, *) + print '(A)', "Fast Integrator Stats" + flush (output_unit) + retval = FARKodePrintAllStats(f_arkode_mem, fp, SUN_OUTPUTFORMAT_TABLE) + + ! Close the file + retval = FSUNDIALSFileClose(fp) + call check_retval(retval, "FSUNDIALSFileOpen") + + ! -------- + ! Clean up + ! -------- + + ! Solution vector + call FN_VDestroy(yvec) + + ! Fast integrator + retval = FMRIStepInnerStepper_Free(f_stepper) + call FARKodeFree(f_arkode_mem) + + ! Intermediate integrator + retval = FSUNAdaptController_Destroy(m_controller) + retval = FSUNAdaptController_Destroy(m_controller_H) + retval = FSUNAdaptController_Destroy(m_controller_Tol) + retval = FMRIStepInnerStepper_Free(m_stepper) + call FARKodeFree(m_arkode_mem) + + ! Slow integrator + retval = FSUNAdaptController_Destroy(s_controller) + retval = FSUNAdaptController_Destroy(s_controller_H) + retval = FSUNAdaptController_Destroy(s_controller_Tol) + call FARKodeFree(s_arkode_mem) + + ! Context + retval = FSUNContext_Free(sunctx) + +end program main + +! ------------------------------------------------------------------------------ +! Utility to check return values +! ------------------------------------------------------------------------------ + +subroutine check_retval(retval, name) + use, intrinsic :: iso_c_binding + + character(len=*) :: name + integer(c_int) :: retval + + if (retval < 0) then + write (*, '(A,A,A,I3)') 'ERROR: ', name, ' returned ', retval + stop 1 + end if +end subroutine check_retval diff --git a/examples/arkode/F2003_serial/ark_kpr_nestedmri_f2003.out b/examples/arkode/F2003_serial/ark_kpr_nestedmri_f2003.out new file mode 100644 index 0000000000..9a6dee1c1e --- /dev/null +++ b/examples/arkode/F2003_serial/ark_kpr_nestedmri_f2003.out @@ -0,0 +1,78 @@ + t u v w u err v err w err +----------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0.000000000000000E+00 1.581138830084190E+00 1.732050807568877E+00 1.732050807568877E+00 0.000000000000000E+00 0.000000000000000E+00 0.000000000000000E+00 + 2.500000000000000E-01 1.575354291246483E+00 1.691373126460135E+00 9.812421429231667E-01 8.614969168394548E-04 2.345080409599243E-03 2.419407022538012E-02 + 5.000000000000000E-01 1.560212294026093E+00 1.084576713219838E+00 1.207272508262092E+00 1.450691686633254E-03 9.735026934536206E-03 2.997874834915581E-02 + 7.500000000000000E-01 1.535821076747637E+00 1.467789591527085E+00 1.255006960906892E+00 2.309097989505071E-03 1.257411189143820E-02 3.526051581286360E-02 + 1.000000000000000E+00 1.503887082550468E+00 1.643393179226049E+00 1.535654981910508E+00 2.814995809440779E-03 1.525150440227940E-02 3.101170591704605E-02 + 1.250000000000000E+00 1.465905643537446E+00 1.089933329662596E+00 1.084638429275984E+00 2.992304312449567E-03 2.789958266923698E-02 4.466716150204997E-02 + 1.500000000000000E+00 1.423229745439111E+00 1.425917760680276E+00 1.339651334373588E+00 3.433705005265075E-03 2.577827207164485E-02 4.443172398449646E-02 + 1.750000000000000E+00 1.378128849503047E+00 1.705806280347786E+00 1.350088006444161E+00 4.215887592317724E-03 2.584462291202727E-02 5.098823106377615E-02 + 2.000000000000000E+00 1.333355012586035E+00 1.542435734349903E+00 9.112070209266151E-01 5.273607206037756E-03 3.464627842147050E-02 9.439169201282838E-02 + 2.250000000000000E+00 1.291537836005878E+00 1.307772985752775E+00 1.107535568317707E+00 6.889362045145342E-03 5.013750111947490E-02 9.666829292882495E-02 + 2.500000000000000E+00 1.255098553621510E+00 1.035657788229420E+00 9.250541164345283E-01 9.586463379376520E-03 7.658378239399588E-02 1.565644027369089E-01 + 2.750000000000000E+00 1.228934066560649E+00 1.090949490777971E+00 1.585342217044049E+00 1.116625626963286E-02 9.140812086615169E-02 1.042801863250207E-01 + 3.000000000000000E+00 1.216068981183502E+00 1.092743725552892E+00 1.119044383134361E+00 1.071696253275123E-02 1.079411365878937E-01 1.262647701802380E-01 + 3.250000000000000E+00 1.215535683841296E+00 1.240138856057141E+00 1.634277111133497E+00 1.040687678694474E-02 9.750154196508354E-02 6.240302147277599E-02 + 3.500000000000000E+00 1.230683988799683E+00 1.411798205839374E+00 1.537522817681417E+00 6.963641325548942E-03 8.309509943358773E-02 4.096413595028614E-02 + 3.750000000000000E+00 1.255243587698301E+00 1.364045071871572E+00 1.405428626185899E+00 5.597528910060712E-03 7.816284969822607E-02 3.565721522248300E-02 + 4.000000000000000E+00 1.287688262349332E+00 9.732518195985076E-01 1.683597645865881E+00 5.825629948773337E-03 9.562343869545264E-02 3.551174121668166E-02 + 4.250000000000000E+00 1.327308264061362E+00 1.671985598563317E+00 9.485164357756218E-01 5.716962647833945E-03 5.329655557719581E-02 5.599299185700490E-02 + 4.500000000000000E+00 1.371496246048615E+00 1.600815391511029E+00 1.478302429626823E+00 4.949212472938891E-03 4.982578455617181E-02 2.187914362084453E-02 + 4.750000000000000E+00 1.417737644421425E+00 1.510885615348590E+00 1.158937763182584E+00 3.107553495641957E-03 4.466421552633992E-02 8.800123298265516E-03 + 5.000000000000000E+00 1.460953298338492E+00 1.470134387042476E+00 1.643590770857166E+00 2.546306284485933E-03 3.678699021167353E-02 1.189678851420295E-03 +----------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Slow Integrator Stats +Current time = 5 +Steps = 242 +Step attempts = 312 +Stability limited steps = 0 +Accuracy limited steps = 312 +Error test fails = 70 +NLS step fails = 0 +Inequality constraint fails = 0 +Initial step size = 0.025 +Last step size = 0.0258711979041237 +Current step size = 0.0258711979041237 +Explicit slow RHS fn evals = 1491 +Implicit slow RHS fn evals = 0 +Inner stepper failures = 0 +NLS iters = 0 +NLS fails = 0 +NLS iters per step = 0 +LS setups = 0 + +Intermediate Integrator Stats +Current time = 5 +Steps = 2982 +Step attempts = 3169 +Stability limited steps = 0 +Accuracy limited steps = 3169 +Error test fails = 187 +NLS step fails = 0 +Inequality constraint fails = 0 +Initial step size = 0.0005 +Last step size = 0.00517423958082474 +Current step size = 0.00517423958082474 +Explicit slow RHS fn evals = 15901 +Implicit slow RHS fn evals = 0 +Inner stepper failures = 0 +NLS iters = 0 +NLS fails = 0 +NLS iters per step = 0 +LS setups = 0 + +Fast Integrator Stats +Current time = 5 +Steps = 84773 +Step attempts = 119399 +Stability limited steps = 0 +Accuracy limited steps = 119399 +Error test fails = 34626 +NLS step fails = 0 +Inequality constraint fails = 0 +Initial step size = 3.4322590648825e-09 +Last step size = 0.00046218630415673 +Current step size = 0.00046218630415673 +RHS fn evals = 484178 diff --git a/src/arkode/arkode_mristep.c b/src/arkode/arkode_mristep.c index be24650148..a178420166 100644 --- a/src/arkode/arkode_mristep.c +++ b/src/arkode/arkode_mristep.c @@ -150,8 +150,8 @@ void* MRIStepCreate(ARKRhsFn fse, ARKRhsFn fsi, sunrealtype t0, N_Vector y0, } /* Allocate the general MRI stepper vectors using y0 as a template */ - /* NOTE: Fse, Fsi, inner_forcing, cvals, Xvecs, sdata, zpred and zcor will - be allocated later on (based on the MRI method) */ + /* NOTE: Fse, Fsi, inner_forcing, sdata, zpred and zcor will be allocated + later on (based on the MRI method) */ /* Copy the slow RHS functions into stepper memory */ step_mem->fse = fse; @@ -212,8 +212,9 @@ void* MRIStepCreate(ARKRhsFn fse, ARKRhsFn fsi, sunrealtype t0, N_Vector y0, step_mem->nls_fails = 0; step_mem->inner_fails = 0; - /* Initialize fused op work space with sufficient storage for - at least filling the full RHS on an ImEx problem */ + /* Initialize fused op work space with sufficient storage for at least filling + the full RHS on an ImEx problem -- must be allocate here as the full RHS + is called before mriStep_Init when nesting MRI methods */ step_mem->nfusedopvecs = 3; step_mem->cvals = NULL; step_mem->cvals = (sunrealtype*)calloc(step_mem->nfusedopvecs, @@ -1074,37 +1075,47 @@ int mriStep_Init(ARKodeMem ark_mem, sunrealtype tout, int init_type) } ark_mem->lrw += step_mem->MRIC->stages; - /* Allocate reusable arrays for fused vector interface */ - if (step_mem->cvals) - { - free(step_mem->cvals); - ark_mem->lrw -= step_mem->nfusedopvecs; - } - if (step_mem->Xvecs) - { - free(step_mem->Xvecs); - ark_mem->liw -= step_mem->nfusedopvecs; - } - step_mem->nfusedopvecs = 2 * step_mem->MRIC->stages + 2 + step_mem->nforcing; - step_mem->cvals = (sunrealtype*)calloc(step_mem->nfusedopvecs, - sizeof(*step_mem->cvals)); - if (step_mem->cvals == NULL) - { - arkProcessError(ark_mem, ARK_MEM_FAIL, __LINE__, __func__, __FILE__, - MSG_ARK_MEM_FAIL); - return (ARK_MEM_FAIL); - } - ark_mem->lrw += step_mem->nfusedopvecs; + /* Allocate reusable arrays for fused vector operations */ + int fused_workspace_size = + SUNMAX(3, 2 * step_mem->MRIC->stages + 2 + step_mem->nforcing); - step_mem->Xvecs = (N_Vector*)calloc(step_mem->nfusedopvecs, - sizeof(*step_mem->Xvecs)); - if (step_mem->Xvecs == NULL) + if (step_mem->nfusedopvecs < fused_workspace_size) { - arkProcessError(ark_mem, ARK_MEM_FAIL, __LINE__, __func__, __FILE__, - MSG_ARK_MEM_FAIL); - return (ARK_MEM_FAIL); + if (step_mem->cvals) + { + free(step_mem->cvals); + step_mem->cvals = NULL; + ark_mem->lrw -= step_mem->nfusedopvecs; + } + if (step_mem->Xvecs) + { + free(step_mem->Xvecs); + step_mem->Xvecs = NULL; + ark_mem->liw -= step_mem->nfusedopvecs; + } + step_mem->nfusedopvecs = 0; + + step_mem->cvals = (sunrealtype*)calloc(fused_workspace_size, + sizeof(*step_mem->cvals)); + if (step_mem->cvals == NULL) + { + arkProcessError(ark_mem, ARK_MEM_FAIL, __LINE__, __func__, __FILE__, + MSG_ARK_MEM_FAIL); + return (ARK_MEM_FAIL); + } + + step_mem->Xvecs = (N_Vector*)calloc(fused_workspace_size, + sizeof(*step_mem->Xvecs)); + if (step_mem->Xvecs == NULL) + { + arkProcessError(ark_mem, ARK_MEM_FAIL, __LINE__, __func__, __FILE__, + MSG_ARK_MEM_FAIL); + return (ARK_MEM_FAIL); + } + step_mem->nfusedopvecs = fused_workspace_size; + ark_mem->lrw += fused_workspace_size; + ark_mem->liw += fused_workspace_size; } - ark_mem->liw += step_mem->nfusedopvecs; /* Retrieve/store method and embedding orders now that tables are finalized */ step_mem->stages = step_mem->MRIC->stages; @@ -4914,12 +4925,13 @@ int mriStep_SetInnerForcing(ARKodeMem ark_mem, sunrealtype tshift, has a stale forcing function */ ark_mem->fn_is_current = SUNFALSE; - /* If cvals and Xvecs are not allocated then mriStep_Init has not been - called and the number of stages has not been set yet. These arrays will - be allocated in mriStep_Init and take into account the value of nforcing. - On subsequent calls will check if enough space has allocated in case - nforcing has increased since the original allocation. */ - if (step_mem->cvals != NULL && step_mem->Xvecs != NULL) + /* If the coupling table is NULL, then mriStep_Init has not been called and + the number of stages has not been set yet. In this case, the workspace + arrays for fused vector operations will be re-allocated in mriStep_Init + if necessary to account the value of nforcing. On subsequent calls we + check if enough space has already been allocated in case nforcing has + increased since the original allocation. */ + if (step_mem->MRIC) { /* check if there are enough reusable arrays for fused operations */ if ((step_mem->nfusedopvecs - nvecs) < (2 * step_mem->MRIC->stages + 2)) diff --git a/src/arkode/fmod_int32/CMakeLists.txt b/src/arkode/fmod_int32/CMakeLists.txt index bdb7e82804..c404b55cf3 100644 --- a/src/arkode/fmod_int32/CMakeLists.txt +++ b/src/arkode/fmod_int32/CMakeLists.txt @@ -39,6 +39,7 @@ sundials_add_f2003_library( LINK_LIBRARIES PUBLIC sundials_fcore_mod OBJECT_LIBRARIES sundials_fnvecserial_mod_obj + sundials_fsunadaptcontrollermrihtol_mod_obj sundials_fsunadaptcontrollersoderlind_mod_obj sundials_fsunadaptcontrollerimexgus_mod_obj sundials_fsunmatrixband_mod_obj diff --git a/src/arkode/fmod_int64/CMakeLists.txt b/src/arkode/fmod_int64/CMakeLists.txt index bdb7e82804..c404b55cf3 100644 --- a/src/arkode/fmod_int64/CMakeLists.txt +++ b/src/arkode/fmod_int64/CMakeLists.txt @@ -39,6 +39,7 @@ sundials_add_f2003_library( LINK_LIBRARIES PUBLIC sundials_fcore_mod OBJECT_LIBRARIES sundials_fnvecserial_mod_obj + sundials_fsunadaptcontrollermrihtol_mod_obj sundials_fsunadaptcontrollersoderlind_mod_obj sundials_fsunadaptcontrollerimexgus_mod_obj sundials_fsunmatrixband_mod_obj diff --git a/test/answers b/test/answers index 7768e33034..9193ddcb1c 160000 --- a/test/answers +++ b/test/answers @@ -1 +1 @@ -Subproject commit 7768e33034c1f6b3c80d8eb9c6a7357c68dc036a +Subproject commit 9193ddcb1c8d7c26d122c0f7608285d24b2d1646