diff --git a/LIB/EQUATION/ACMnew/module_ACM.f90 b/LIB/EQUATION/ACMnew/module_ACM.f90 index 65c076ac..3325c993 100644 --- a/LIB/EQUATION/ACMnew/module_ACM.f90 +++ b/LIB/EQUATION/ACMnew/module_ACM.f90 @@ -83,7 +83,7 @@ module module_acm real(kind=rk) :: mask_volume=0.0_rk, meanflow_channel(1:3) = 0.0_rk logical :: use_passive_scalar = .false. - integer(kind=ik) :: N_scalars = 0, nsave_stats = 999999 + integer(kind=ik) :: N_scalars = 0 real(kind=rk), allocatable :: schmidt_numbers(:), x0source(:), y0source(:), & z0source(:), scalar_Ceta(:), widthsource(:) character(len=cshort), allocatable :: scalar_inicond(:), scalar_source_type(:) @@ -192,9 +192,9 @@ subroutine READ_PARAMETERS_ACM( filename, N_mask_components, g ) character(len=*), intent(in) :: filename integer(kind=ik) :: mpicode, nx_max, n_entries - real(kind=rk) :: dx_min, dt_min + real(kind=rk) :: dx_min, dt_min_c0, dt_min_vpm, dt_min_nu character(len=cshort) :: Bs_str, Bs_conc - character(len=16834) :: input_files + character(len=400) :: input_files character(len=12) :: timestamp character(:), allocatable :: Bs_short real(kind=rk), dimension(3) :: ddx @@ -348,7 +348,6 @@ subroutine READ_PARAMETERS_ACM( filename, N_mask_components, g ) call read_param_mpi(FILE, 'Time', 'time_max', params_acm%T_end, 1.0_rk ) - call read_param_mpi(FILE, 'Statistics', 'nsave_stats', params_acm%nsave_stats, 999999 ) call read_param_mpi(FILE, 'FreeFlightSolver', 'use_free_flight_solver', params_acm%use_free_flight_solver, .false. ) call read_param_mpi(FILE, 'Blocks', 'max_treelevel', params_acm%Jmax, 1 ) @@ -428,11 +427,23 @@ subroutine READ_PARAMETERS_ACM( filename, N_mask_components, g ) ! uniqueGrid modification nx_max = maxval( (params_acm%Bs) * 2**(params_acm%Jmax) ) + ! print some time-step related information if (params_acm%c_0 > 0.0_rk) then - dt_min = params_acm%CFL*dx_min/params_acm%c_0 + dt_min_c0 = params_acm%CFL*dx_min/params_acm%c_0 else - dt_min = 0.0_rk + dt_min_c0 = 0.0_rk endif + if (params_acm%nu > 0.0_rk) then + dt_min_nu = params_acm%CFL_nu*dx_min**2/params_acm%nu + else + dt_min_nu = 0.0_rk + endif + if (params_acm%penalization .and. params_acm%C_eta > 0.0_rk) then + dt_min_vpm = params_acm%CFL_eta*params_acm%C_eta + else + dt_min_vpm = 0.0_rk + endif + ! nice to have this elsewhere in the ACM module: params_acm%dx_min = dx_min @@ -442,15 +453,17 @@ subroutine READ_PARAMETERS_ACM( filename, N_mask_components, g ) if (params_acm%mpirank==0) then write(*,'(80("<"))') - write(*,*) "Some information:" - write(*,'("c0=",g12.4," C_eta=",g12.4," CFL=",g12.4)') params_acm%c_0, params_acm%C_eta, params_acm%CFL - write(*,'("dx_min=",g12.4," dt(CFL,c0,dx_min)=",g12.4)') dx_min, dt_min - write(*,'("if all blocks were at Jmax, the resolution would be nx=",i5)') nx_max + write(*,'(A)') "Some information:" + write(*,'(" c0=",g12.4," CFL=",g12.4, "CFL_eta=",g12.4, "CFL_nu=",g12.4)') params_acm%c_0, params_acm%CFL, params_acm%CFL_eta, params_acm%CFL_nu + write(*,'(" dx_min=",g12.4)') dx_min + write(*,'(" dt(CFL,c0,dx_min)=",g12.4)') dt_min_c0 + write(*,'(" dt(CFL_nu,nu,dx_min)=",g12.4)') dt_min_nu + write(*,'(" dt(CFL_vpm,C_eta,dx_min)=",g12.4)') dt_min_vpm + write(*,'(" if all blocks were at Jmax, the resolution would be nx=",i5)') nx_max if (params_acm%penalization) then - write(*,'("C_eta=",es12.4," K_eta=",es12.4)') params_acm%C_eta, sqrt(params_acm%C_eta*params_acm%nu)/dx_min + write(*,'(" C_eta=",es12.4," K_eta=",es12.4)') params_acm%C_eta, sqrt(params_acm%C_eta*params_acm%nu)/dx_min endif - write(*,'("N_mask_components=",i1)') N_mask_components - write(*,'("N_scalars=",i2)') params_acm%N_scalars + write(*,'("N_mask_components=",i1, " N_scalars=",i1, " N_time_statistics=",i1)') N_mask_components, params_acm%N_scalars, params_acm%N_time_statistics write(*,'(80("<"))') endif diff --git a/LIB/EQUATION/ACMnew/rhs_ACM.f90 b/LIB/EQUATION/ACMnew/rhs_ACM.f90 index 7fb29c4a..6b62572b 100644 --- a/LIB/EQUATION/ACMnew/rhs_ACM.f90 +++ b/LIB/EQUATION/ACMnew/rhs_ACM.f90 @@ -817,6 +817,7 @@ end subroutine RHS_2D_acm subroutine RHS_3D_acm(g, Bs, dx, x0, phi, order_discretization, time, rhs, mask, n_domain) + use module_operators implicit none !> grid parameter @@ -875,6 +876,9 @@ subroutine RHS_3D_acm(g, Bs, dx, x0, phi, order_discretization, time, rhs, mask, real(kind=rk), parameter :: a_FD6(-3:3) = (/-1.0_rk/60.0_rk, 3.0_rk/20.0_rk, -3.0_rk/4.0_rk, 0.0_rk, 3.0_rk/4.0_rk, -3.0_rk/20.0_rk, 1.0_rk/60.0_rk/) ! 1st derivative real(kind=rk), parameter :: b_FD6(-3:3) = (/ 1.0_rk/90.0_rk, -3.0_rk/20.0_rk, 3.0_rk/2.0_rk, -49.0_rk/18.0_rk, 3.0_rk/2.0_rk, -3.0_rk/20.0_rk, 1.0_rk/90.0_rk/) ! 2nd derivative + real(kind=rk), allocatable, dimension(:) :: FD1_l, FD2 + integer(kind=ik) :: FD1_ls, FD1_le, FD2_s, FD2_e + ! set parameters for readability c_0 = params_acm%c_0 @@ -1453,7 +1457,131 @@ subroutine RHS_3D_acm(g, Bs, dx, x0, phi, order_discretization, time, rhs, mask, endif case default - call abort(441167, "3d Discretization unkown "//order_discretization//", I ll walk into the light now." ) + ! call abort(44167, "3d Discretization unknown "// trim(order_discretization)//", I'll walk into the light now.") + + ! Flexible operator version for 3D ACM using generalized stencils + + call setup_FD1_left_stencil(order_discretization, FD1_l, FD1_ls, FD1_le) + call setup_FD2_stencil(order_discretization, FD2, FD2_s, FD2_e) + + if (params_acm%skew_symmetry) then + do iz = g+1, Bs(3)+g + do iy = g+1, Bs(2)+g + do ix = g+1, Bs(1)+g + u = phi(ix,iy,iz,1) + v = phi(ix,iy,iz,2) + w = phi(ix,iy,iz,3) + p = phi(ix,iy,iz,4) + + chi = mask(ix,iy,iz,1) * C_eta_inv + penalx = -chi * (u - mask(ix,iy,iz,2)) + penaly = -chi * (v - mask(ix,iy,iz,3)) + penalz = -chi * (w - mask(ix,iy,iz,4)) + + ! Generalized first derivatives + u_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,1)) * dx_inv + u_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,1)) * dy_inv + u_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,1)) * dz_inv + + v_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,2)) * dx_inv + v_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,2)) * dy_inv + v_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,2)) * dz_inv + + w_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,3)) * dx_inv + w_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,3)) * dy_inv + w_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,3)) * dz_inv + + p_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,4)) * dx_inv + p_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,4)) * dy_inv + p_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,4)) * dz_inv + + ! Generalized nonlinear terms + uu_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,1)*phi(ix+FD1_ls:ix+FD1_le,iy,iz,1)) * dx_inv + uv_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,1)*phi(ix,iy+FD1_ls:iy+FD1_le,iz,2)) * dy_inv + uw_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,1)*phi(ix,iy,iz+FD1_ls:iz+FD1_le,3)) * dz_inv + + vu_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,2)*phi(ix+FD1_ls:ix+FD1_le,iy,iz,1)) * dx_inv + vv_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,2)*phi(ix,iy+FD1_ls:iy+FD1_le,iz,2)) * dy_inv + vw_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,2)*phi(ix,iy,iz+FD1_ls:iz+FD1_le,3)) * dz_inv + + wu_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,3)*phi(ix+FD1_ls:ix+FD1_le,iy,iz,1)) * dx_inv + wv_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,3)*phi(ix,iy+FD1_ls:iy+FD1_le,iz,2)) * dy_inv + ww_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,3)*phi(ix,iy,iz+FD1_ls:iz+FD1_le,3)) * dz_inv + + ! Generalized second derivatives + u_dxdx = sum(FD2(FD2_s:FD2_e) * phi(ix+FD2_s:ix+FD2_e,iy,iz,1)) * dx2_inv + u_dydy = sum(FD2(FD2_s:FD2_e) * phi(ix,iy+FD2_s:iy+FD2_e,iz,1)) * dy2_inv + u_dzdz = sum(FD2(FD2_s:FD2_e) * phi(ix,iy,iz+FD2_s:iz+FD2_e,1)) * dz2_inv + + v_dxdx = sum(FD2(FD2_s:FD2_e) * phi(ix+FD2_s:ix+FD2_e,iy,iz,2)) * dx2_inv + v_dydy = sum(FD2(FD2_s:FD2_e) * phi(ix,iy+FD2_s:iy+FD2_e,iz,2)) * dy2_inv + v_dzdz = sum(FD2(FD2_s:FD2_e) * phi(ix,iy,iz+FD2_s:iz+FD2_e,2)) * dz2_inv + + w_dxdx = sum(FD2(FD2_s:FD2_e) * phi(ix+FD2_s:ix+FD2_e,iy,iz,3)) * dx2_inv + w_dydy = sum(FD2(FD2_s:FD2_e) * phi(ix,iy+FD2_s:iy+FD2_e,iz,3)) * dy2_inv + w_dzdz = sum(FD2(FD2_s:FD2_e) * phi(ix,iy,iz+FD2_s:iz+FD2_e,3)) * dz2_inv + + ! Skew-symmetric formulation + rhs(ix,iy,iz,1) = -0.5_rk*(uu_dx + uv_dy + uw_dz + u*u_dx + v*u_dy + w*u_dz) -p_dx + nu*(u_dxdx + u_dydy + u_dzdz) + penalx + rhs(ix,iy,iz,2) = -0.5_rk*(vu_dx + vv_dy + vw_dz + u*v_dx + v*v_dy + w*v_dz) -p_dy + nu*(v_dxdx + v_dydy + v_dzdz) + penaly + rhs(ix,iy,iz,3) = -0.5_rk*(wu_dx + wv_dy + ww_dz + u*w_dx + v*w_dy + w*w_dz) -p_dz + nu*(w_dxdx + w_dydy + w_dzdz) + penalz + rhs(ix,iy,iz,4) = -(c_0**2)*(u_dx + v_dy + w_dz) - gamma*p + end do + end do + end do + else + do iz = g+1, Bs(3)+g + do iy = g+1, Bs(2)+g + do ix = g+1, Bs(1)+g + u = phi(ix,iy,iz,1) + v = phi(ix,iy,iz,2) + w = phi(ix,iy,iz,3) + p = phi(ix,iy,iz,4) + + chi = mask(ix,iy,iz,1) * C_eta_inv + penalx = -chi * (u - mask(ix,iy,iz,2)) + penaly = -chi * (v - mask(ix,iy,iz,3)) + penalz = -chi * (w - mask(ix,iy,iz,4)) + + ! Generalized first derivatives + u_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,1)) * dx_inv + u_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,1)) * dy_inv + u_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,1)) * dz_inv + + v_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,2)) * dx_inv + v_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,2)) * dy_inv + v_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,2)) * dz_inv + + w_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,3)) * dx_inv + w_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,3)) * dy_inv + w_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,3)) * dz_inv + + p_dx = sum(FD1_l(FD1_ls:FD1_le) * phi(ix+FD1_ls:ix+FD1_le,iy,iz,4)) * dx_inv + p_dy = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy+FD1_ls:iy+FD1_le,iz,4)) * dy_inv + p_dz = sum(FD1_l(FD1_ls:FD1_le) * phi(ix,iy,iz+FD1_ls:iz+FD1_le,4)) * dz_inv + + ! Generalized second derivatives + u_dxdx = sum(FD2(FD2_s:FD2_e) * phi(ix+FD2_s:ix+FD2_e,iy,iz,1)) * dx2_inv + u_dydy = sum(FD2(FD2_s:FD2_e) * phi(ix,iy+FD2_s:iy+FD2_e,iz,1)) * dy2_inv + u_dzdz = sum(FD2(FD2_s:FD2_e) * phi(ix,iy,iz+FD2_s:iz+FD2_e,1)) * dz2_inv + + v_dxdx = sum(FD2(FD2_s:FD2_e) * phi(ix+FD2_s:ix+FD2_e,iy,iz,2)) * dx2_inv + v_dydy = sum(FD2(FD2_s:FD2_e) * phi(ix,iy+FD2_s:iy+FD2_e,iz,2)) * dy2_inv + v_dzdz = sum(FD2(FD2_s:FD2_e) * phi(ix,iy,iz+FD2_s:iz+FD2_e,2)) * dz2_inv + + w_dxdx = sum(FD2(FD2_s:FD2_e) * phi(ix+FD2_s:ix+FD2_e,iy,iz,3)) * dx2_inv + w_dydy = sum(FD2(FD2_s:FD2_e) * phi(ix,iy+FD2_s:iy+FD2_e,iz,3)) * dy2_inv + w_dzdz = sum(FD2(FD2_s:FD2_e) * phi(ix,iy,iz+FD2_s:iz+FD2_e,3)) * dz2_inv + + ! Standard formulation + rhs(ix,iy,iz,1) = (-u*u_dx - v*u_dy - w*u_dz) -p_dx + nu*(u_dxdx + u_dydy + u_dzdz) + penalx + rhs(ix,iy,iz,2) = (-u*v_dx - v*v_dy - w*v_dz) -p_dy + nu*(v_dxdx + v_dydy + v_dzdz) + penaly + rhs(ix,iy,iz,3) = (-u*w_dx - v*w_dy - w*w_dz) -p_dz + nu*(w_dxdx + w_dydy + w_dzdz) + penalz + rhs(ix,iy,iz,4) = -(c_0**2)*(u_dx + v_dy + w_dz) - gamma*p + end do + end do + end do + endif end select @@ -1483,7 +1611,6 @@ subroutine RHS_3D_acm(g, Bs, dx, x0, phi, order_discretization, time, rhs, mask, ! -------------------------------------------------------------------------- ! HIT linear forcing - ! ATTENTION! This is at last position because I modify phi to avoid a 3-nested do-loop to subtract mean-flow ! -------------------------------------------------------------------------- if (params_acm%HIT_linear_forcing) then G_gain = params_acm%HIT_gain diff --git a/LIB/EQUATION/ACMnew/sponge.f90 b/LIB/EQUATION/ACMnew/sponge.f90 index 44c00aac..3604c7fb 100644 --- a/LIB/EQUATION/ACMnew/sponge.f90 +++ b/LIB/EQUATION/ACMnew/sponge.f90 @@ -78,22 +78,30 @@ subroutine sponge_3D(sponge, x0, dx, Bs, g) if (params_acm%sponge_type == "rect") then - ! rectangular sponge with 45deg edges - do iz = g+1, Bs(3)+g - z = dble(iz-(g+1)) * dx(3) + x0(3) - do iy = g+1, Bs(2)+g - y = dble(iy-(g+1)) * dx(2) + x0(2) - do ix = g+1, Bs(1)+g - x = dble(ix-(g+1)) * dx(1) + x0(1) - - ! distance to borders of domain - tmp = minval( (/x,y,z,-(x-params_acm%domain_size(1)),& - -(y-params_acm%domain_size(2)),-(z-params_acm%domain_size(3))/) ) - sponge(ix,iy,iz) = smoothstep( tmp, 0.5_rk*params_acm%L_sponge, 0.5_rk*params_acm%L_sponge) + ! check if this block is in sponge layer + ! find the point in the block closest to the domain boundary + tmp = min(minval(x0), minval(params_acm%domain_size - (x0 + dx*dble(Bs)))) + if (tmp > params_acm%L_sponge) then + sponge = 0.0_rk + else + ! rectangular sponge with 45deg edges + do iz = g+1, Bs(3)+g + z = dble(iz-(g+1)) * dx(3) + x0(3) + do iy = g+1, Bs(2)+g + y = dble(iy-(g+1)) * dx(2) + x0(2) + do ix = g+1, Bs(1)+g + x = dble(ix-(g+1)) * dx(1) + x0(1) + + ! distance to borders of domain + tmp = minval( (/x,y,z,-(x-params_acm%domain_size(1)),& + -(y-params_acm%domain_size(2)),-(z-params_acm%domain_size(3))/) ) + + sponge(ix,iy,iz) = smoothstep( tmp, 0.5_rk*params_acm%L_sponge, 0.5_rk*params_acm%L_sponge) + end do end do end do - end do + endif ! sponge for using with symmetry_BC ! insect is supposed to be at y=0 @@ -149,6 +157,7 @@ subroutine sponge_3D(sponge, x0, dx, Bs, g) ! p-norm sponge. The shape of the sponge is dictated as the p-norm ! https://de.wikipedia.org/wiki/P-Norm ! which is a nice and simple way to get a rectangle with round corners. + ! For 2 it is the eucledian norm (circle), for infinity it is a rectangle and for everything in between a rounded rectangle if ( maxval(abs(params_acm%domain_size-params_acm%domain_size(1))) > 1.0e-10_rk) then call abort(1610184,"ERROR: for the p-norm sponge, the domain has to be same size in all directions.") @@ -158,21 +167,31 @@ subroutine sponge_3D(sponge, x0, dx, Bs, g) pinv = 1.0_rk / p offset = 0.5_rk * params_acm%domain_size(1) - do iz = g+1, Bs(3)+g - z = (dble(iz-(g+1)) * dx(3) + x0(3) - offset)**p - do iy = g+1, Bs(2)+g - y = (dble(iy-(g+1)) * dx(2) + x0(2) - offset)**p - do ix = g+1, Bs(1)+g - x = (dble(ix-(g+1)) * dx(1) + x0(1) - offset)**p - - ! distance to borders of domain - tmp = -( (x + y + z)**pinv - offset) - - sponge(ix,iy,iz) = smoothstep( tmp, 0.5_rk*params_acm%L_sponge, & - 0.5_rk*params_acm%L_sponge) + ! check if this block is in sponge layer + ! Find the point in the block furthest from the domain center + x = max(abs(x0(1)-offset), abs(x0(1)+dx(1)*dble(Bs(1))-offset))**p + y = max(abs(x0(2)-offset), abs(x0(2)+dx(2)*dble(Bs(2))-offset))**p + z = max(abs(x0(3)-offset), abs(x0(3)+dx(3)*dble(Bs(3))-offset))**p + tmp = offset - (x + y + z)**pinv ! we invert distance to center to distance from boundary + if (tmp > params_acm%L_sponge) then + sponge = 0.0_rk + else + do iz = g+1, Bs(3)+g + z = (dble(iz-(g+1)) * dx(3) + x0(3) - offset)**p + do iy = g+1, Bs(2)+g + y = (dble(iy-(g+1)) * dx(2) + x0(2) - offset)**p + do ix = g+1, Bs(1)+g + x = (dble(ix-(g+1)) * dx(1) + x0(1) - offset)**p + + ! distance to borders of domain + tmp = -( (x + y + z)**pinv - offset) + + sponge(ix,iy,iz) = smoothstep( tmp, 0.5_rk*params_acm%L_sponge, & + 0.5_rk*params_acm%L_sponge) + end do end do end do - end do + endif elseif (params_acm%sponge_type == "p-norm-insect-centered") then ! p-norm sponge. The shape of the sponge is dictated as the p-norm diff --git a/LIB/EQUATION/ACMnew/time_statistics_ACM.f90 b/LIB/EQUATION/ACMnew/time_statistics_ACM.f90 index 5c566ecd..1af04ba7 100644 --- a/LIB/EQUATION/ACMnew/time_statistics_ACM.f90 +++ b/LIB/EQUATION/ACMnew/time_statistics_ACM.f90 @@ -34,7 +34,7 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) ! non-ghost point has the coordinate x0, from then on its just cartesian with dx spacing real(kind=rk), intent(in) :: x0(1:3), dx(1:3) - integer(kind=ik) :: iB, ix, iy, iz, Bs(3), i_ts, N_offset + integer(kind=ik) :: iB, ix, iy, iz, Bs(3), i_ts, N_offset, mean_idx1, mean_idx2, j real(kind=rk) :: time_diff ! compute the size of blocks @@ -45,13 +45,12 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) N_offset = params_acm%dim + 1 + params_acm%N_scalars time_diff = time-time_start - do i_ts = 1, params_acm%N_time_statistics - ! at the beginning of the simulation, initialize the array to zero - if ( abs(time - time_start) < 1.0e-12_rk ) then - u(:,:,:,N_offset + i_ts) = 0.0_rk - cycle - end if + ! if time is right at time_start, we do nothing, we already set the values to zero or load them in earlier + if ( time - time_start < 1.0e-12_rk ) then + return + end if + do i_ts = 1, params_acm%N_time_statistics select case (trim(params_acm%time_statistics_names(i_ts))) case ("ux-int") ! compute the integral of ux over time @@ -61,16 +60,14 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * u(:,:,:,1) case ("ux-var") ! compute the variance of ux over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "ux-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "ux-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'ux-avg' or 'ux-mean' right after 'ux-var' in time_statistics_names.") - end if + ! This needs the computation of the average somewhere after this field + call find_single_mean_index(i_ts, "ux-avg", "ux-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'ux-avg' or 'ux-mean' somewhere after 'ux-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (u(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (u(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*u(:,:,:,1)) + (u(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*u(:,:,:,1)) case ("uy-int") ! compute the integral of uy over time @@ -80,16 +77,14 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * u(:,:,:,2) case ("uy-var") ! compute the variance of uy over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "uy-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "uy-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'uy-avg' or 'uy-mean' right after 'uy-var' in time_statistics_names.") - end if + ! This needs the computation of the average somewhere after this field + call find_single_mean_index(i_ts, "uy-avg", "uy-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'uy-avg' or 'uy-mean' somewhere after 'uy-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (u(:,:,:,2) - u(:,:,:,N_offset + i_ts + 1)) * & - (u(:,:,:,2) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*u(:,:,:,2)) + (u(:,:,:,2) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,2) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*u(:,:,:,2)) case ("uz-int") if (params_acm%dim == 2) call abort(250915, "[TIME_STATISTICS_ACM]: uz not available in 2D simulations.") @@ -102,16 +97,14 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) case ("uz-var") if (params_acm%dim == 2) call abort(250916, "[TIME_STATISTICS_ACM]: uz not available in 2D simulations.") ! compute the variance of uz over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "uz-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "uz-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'uz-avg' or 'uz-mean' right after 'uz-var' in time_statistics_names.") - end if + ! This needs the computation of the average somewhere after this field + call find_single_mean_index(i_ts, "uz-avg", "uz-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'uz-avg' or 'uz-mean' somewhere after 'uz-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (u(:,:,:,3) - u(:,:,:,N_offset + i_ts + 1)) * & - (u(:,:,:,3) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*u(:,:,:,3)) + (u(:,:,:,3) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,3) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*u(:,:,:,3)) case ("p-int") ! compute the integral of p over time @@ -121,16 +114,14 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * u(:,:,:,params_acm%dim+1) case ("p-var") ! compute the variance of p over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "p-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "p-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'p-avg' or 'p-mean' right after 'p-var' in time_statistics_names.") - end if + ! This needs the computation of the average somewhere after this field + call find_single_mean_index(i_ts, "p-avg", "p-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'p-avg' or 'p-mean' somewhere after 'p-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (u(:,:,:,params_acm%dim+1) - u(:,:,:,N_offset + i_ts + 1)) * & - (u(:,:,:,params_acm%dim+1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*u(:,:,:,params_acm%dim+1)) + (u(:,:,:,params_acm%dim+1) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,params_acm%dim+1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*u(:,:,:,params_acm%dim+1)) case ("umag-int") ! compute the integral of velocity magnitude over time @@ -144,19 +135,17 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * sqrt( work(:,:,:,1) ) case ("umag-var") ! compute the variance of velocity magnitude over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "umag-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "umag-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'umag-avg' or 'umag-mean' right after 'umag-var' in time_statistics_names.") - end if + ! This needs the computation of the average somewhere after this field + call find_single_mean_index(i_ts, "umag-avg", "umag-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'umag-avg' or 'umag-mean' somewhere after 'umag-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x work (:,:,:,1) = u(:,:,:,1)**2 + u(:,:,:,2)**2 if (params_acm%dim == 3) work(:,:,:,1) = work(:,:,:,1) + u(:,:,:,3)**2 work(:,:,:,1) = sqrt( work(:,:,:,1) ) ! current umag value u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (work(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*work(:,:,:,1)) + (work(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*work(:,:,:,1)) case ("divu-int", "div-int") ! compute the integral, average or minmax of div(u) over time @@ -168,18 +157,29 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) case ("divu-var", "div-var") ! compute the variance of div(u) over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "divu-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "div-avg" .and. & - trim(params_acm%time_statistics_names(i_ts+1)) /= "divu-mean" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "div-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'divu-avg', 'div-avg', 'divu-mean', or 'div-mean' right after 'divu-var' or 'div-var' in time_statistics_names.") + ! This needs the computation of the average somewhere after this field + ! NOTE: Using manual search loop instead of find_single_mean_index helper because + ! this case has 4 possible mean names (divu-avg, div-avg, divu-mean, div-mean) + ! and the helper function only supports 2 name options + mean_idx1 = -1 + do j = i_ts + 1, params_acm%N_time_statistics + if (trim(params_acm%time_statistics_names(j)) == "divu-avg" .or. & + trim(params_acm%time_statistics_names(j)) == "div-avg" .or. & + trim(params_acm%time_statistics_names(j)) == "divu-mean" .or. & + trim(params_acm%time_statistics_names(j)) == "div-mean") then + mean_idx1 = j + exit + end if + end do + if (mean_idx1 == -1) then + call abort(251016, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'divu-avg', 'div-avg', 'divu-mean', or 'div-mean' somewhere after 'divu-var' or 'div-var' in time_statistics_names.") end if ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x call compute_divergence(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (work(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*work(:,:,:,1)) + (work(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*work(:,:,:,1)) case ("divu-minmax", "div-minmax") ! compute the minmax of div(u) over time call compute_divergence(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) @@ -202,20 +202,31 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) call compute_vorticity(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,:)) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) case ("vort-var", "vor-var") - if (params_acm%dim == 3) call abort(250916, "[TIME_STATISTICS_ACM]: vorticity is not a scalar in 3D simulations - use vorx, vory, vorz or vorabs instead.") + if (params_acm%dim == 3) call abort(251016, "[TIME_STATISTICS_ACM]: vorticity is not a scalar in 3D simulations - use vorx, vory, vorz or vorabs instead.") ! compute the variance of vorticity magnitude over time using Welford's method ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "vort-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "vor-avg" .and. & - trim(params_acm%time_statistics_names(i_ts+1)) /= "vort-mean" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "vor-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'vort-avg', 'vor-avg', 'vort-mean', or 'vor-mean' right after 'vort-var' or 'vor-var' in time_statistics_names.") + ! NOTE: Using manual search loop instead of find_single_mean_index helper because + ! this case has 4 possible mean names (vort-avg, vor-avg, vort-mean, vor-mean) + ! and the helper function only supports 2 name options + mean_idx1 = -1 + do j = i_ts + 1, params_acm%N_time_statistics + if (trim(params_acm%time_statistics_names(j)) == "vort-avg" .or. & + trim(params_acm%time_statistics_names(j)) == "vor-avg" .or. & + trim(params_acm%time_statistics_names(j)) == "vort-mean" .or. & + trim(params_acm%time_statistics_names(j)) == "vor-mean") then + mean_idx1 = j + exit + end if + end do + if (mean_idx1 == -1) then + call abort(251016, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'vort-avg', 'vor-avg', 'vort-mean', or 'vor-mean' AFTER 'vort-var' or 'vor-var' in time_statistics_names.") end if ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x call compute_vorticity(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,:)) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (work(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*work(:,:,:,1)) + (work(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*work(:,:,:,1)) case ("vorabs-int") if (params_acm%dim == 2) call abort(250916, "[TIME_STATISTICS_ACM]: vorticity is a scalar in 2D simulations - use vor instead.") @@ -228,20 +239,39 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) call compute_vorticity_abs(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) case ("vorabs-var") - if (params_acm%dim == 2) call abort(250916, "[TIME_STATISTICS_ACM]: vorticity is a scalar in 2D simulations - use vor instead.") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: vorticity is a scalar in 2D simulations - use vor instead.") ! compute the variance of vorticity magnitude over time using Welford's method ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "vorabs-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "vorabs-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'vorabs-avg' or 'vorabs-mean' right after 'vorabs-var' in time_statistics_names.") - end if + call find_single_mean_index(i_ts, "vorabs-avg", "vorabs-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'vorabs-avg' or 'vorabs-mean' AFTER 'vorabs-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x call compute_vorticity_abs(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (work(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*work(:,:,:,1)) - + (work(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*work(:,:,:,1)) + + ! Helicity magnitude cases + case ("helabs-int") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: helabs not available in 2D simulations.") + ! compute the integral of helicity magnitude over time + call compute_helicity_abs(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) + u(:,:,:,N_offset + i_ts) = u(:,:,:,N_offset + i_ts) + dt* work(:,:,:,1) + case ("helabs-avg", "helabs-mean") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: helabs not available in 2D simulations.") + ! compute the average of helicity magnitude over time + call compute_helicity_abs(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + case ("helabs-var") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: helabs not available in 2D simulations.") + ! compute the variance of helicity magnitude over time using Welford's method + call find_single_mean_index(i_ts, "helabs-avg", "helabs-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'helabs-avg' or 'helabs-mean' AFTER 'helabs-var' in time_statistics_names.", 251021_ik) + call compute_helicity_abs(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (work(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*work(:,:,:,1)) + case ("dissipation-int") ! compute the integral of dissipation over time call compute_dissipation(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) @@ -253,16 +283,14 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) case ("dissipation-var") ! compute the variance of dissipation over time using Welford's method ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_acm%N_time_statistics .or. & - (trim(params_acm%time_statistics_names(i_ts+1)) /= "dissipation-avg" .and. trim(params_acm%time_statistics_names(i_ts+1)) /= "dissipation-mean")) then - call abort(2153001, "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'dissipation-avg' or 'dissipation-mean' right after 'dissipation-var' in time_statistics_names.") - end if + call find_single_mean_index(i_ts, "dissipation-avg", "dissipation-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the variance together with the mean value. Insert 'dissipation-avg' or 'dissipation-mean' AFTER 'dissipation-var' in time_statistics_names.", 251016_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x call compute_dissipation(u(:,:,:,1:params_acm%dim), dx, Bs, g, params_acm%discretization, work(:,:,:,1)) u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & - (work(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) - dt/(time_diff)*work(:,:,:,1)) + (work(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (work(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*work(:,:,:,1)) ! Velocity derivative cases case ("uxdx-avg", "uxdx-mean") @@ -374,10 +402,162 @@ subroutine TIME_STATISTICS_ACM( time, dt, time_start, u, g, x0, dx, work, mask ) work(:,:,:,1) = u(:,:,:,3)**2 u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + ! Cross-product velocity component averages + case ("uxuy-avg", "uxuy-mean") + ! compute the average of ux*uy over time + work(:,:,:,1) = u(:,:,:,1) * u(:,:,:,2) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + case ("uxuz-avg", "uxuz-mean") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: uxuz-avg not available in 2D simulations.") + ! compute the average of ux*uz over time + work(:,:,:,1) = u(:,:,:,1) * u(:,:,:,3) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + case ("uyuz-avg", "uyuz-mean") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: uyuz-avg not available in 2D simulations.") + ! compute the average of uy*uz over time + work(:,:,:,1) = u(:,:,:,2) * u(:,:,:,3) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + + ! Velocity-pressure cross-product averages + case ("uxp-avg", "uxp-mean") + ! compute the average of ux*p over time + work(:,:,:,1) = u(:,:,:,1) * u(:,:,:,params_acm%dim+1) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + case ("uyp-avg", "uyp-mean") + ! compute the average of uy*p over time + work(:,:,:,1) = u(:,:,:,2) * u(:,:,:,params_acm%dim+1) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + case ("uzp-avg", "uzp-mean") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: uzp-avg not available in 2D simulations.") + ! compute the average of uz*p over time + work(:,:,:,1) = u(:,:,:,3) * u(:,:,:,params_acm%dim+1) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/time_diff * u(:,:,:,N_offset + i_ts) + dt/time_diff * work(:,:,:,1) + + ! Covariance cases using Welford's online algorithm + case ("uxuy-cov") + ! compute the covariance of ux and uy over time using Welford's method + ! This needs the computation of both averages somewhere after this field + call find_single_mean_index(i_ts, "ux-avg", "ux-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'ux-avg' or 'ux-mean' somewhere after 'uxuy-cov' in time_statistics_names.", 251016_ik) + call find_single_mean_index(i_ts, "uy-avg", "uy-mean", mean_idx2, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'uy-avg' or 'uy-mean' somewhere after 'uxuy-cov' in time_statistics_names.", 251016_ik) + ! cov_new = (time_diff-dt)/time_diff*cov_old + dt/time_diff*(x - mean_x_old)*(y - mean_y_new) + ! mean_x_new = (time_diff-dt)/time_diff*mean_x_old + dt/time_diff*x + ! mean_y_new = (time_diff-dt)/time_diff*mean_y_old + dt/time_diff*y + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (u(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,2) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx2) - dt/(time_diff)*u(:,:,:,2)) + case ("uxuz-cov") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: uxuz-cov not available in 2D simulations.") + ! compute the covariance of ux and uz over time using Welford's method + ! This needs the computation of both averages somewhere after this field + call find_single_mean_index(i_ts, "ux-avg", "ux-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'ux-avg' or 'ux-mean' somewhere after 'uxuz-cov' in time_statistics_names.", 251016_ik) + call find_single_mean_index(i_ts, "uz-avg", "uz-mean", mean_idx2, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'uz-avg' or 'uz-mean' somewhere after 'uxuz-cov' in time_statistics_names.", 251016_ik) + ! cov_new = (time_diff-dt)/time_diff*cov_old + dt/time_diff*(x - mean_x_old)*(y - mean_y_new) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (u(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,3) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx2) - dt/(time_diff)*u(:,:,:,3)) + case ("uyuz-cov") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: uyuz-cov not available in 2D simulations.") + ! compute the covariance of uy and uz over time using Welford's method + ! This needs the computation of both averages somewhere after this field + call find_single_mean_index(i_ts, "uy-avg", "uy-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'uy-avg' or 'uy-mean' somewhere after 'uyuz-cov' in time_statistics_names.", 251016_ik) + call find_single_mean_index(i_ts, "uz-avg", "uz-mean", mean_idx2, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'uz-avg' or 'uz-mean' somewhere after 'uyuz-cov' in time_statistics_names.", 251016_ik) + ! cov_new = (time_diff-dt)/time_diff*cov_old + dt/time_diff*(x - mean_x_old)*(y - mean_y_new) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (u(:,:,:,2) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,3) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx2) - dt/(time_diff)*u(:,:,:,3)) + + ! Velocity-pressure covariances + case ("uxp-cov") + ! compute the covariance of ux and p over time using Welford's method + ! This needs the computation of both averages somewhere after this field + call find_single_mean_index(i_ts, "ux-avg", "ux-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'ux-avg' or 'ux-mean' somewhere after 'uxp-cov' in time_statistics_names.", 251016_ik) + call find_single_mean_index(i_ts, "p-avg", "p-mean", mean_idx2, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'p-avg' or 'p-mean' somewhere after 'uxp-cov' in time_statistics_names.", 251016_ik) + ! cov_new = (time_diff-dt)/time_diff*cov_old + dt/time_diff*(x - mean_x_old)*(y - mean_y_new) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (u(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,params_acm%dim+1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx2) - dt/(time_diff)*u(:,:,:,params_acm%dim+1)) + case ("uyp-cov") + ! compute the covariance of uy and p over time using Welford's method + ! This needs the computation of both averages somewhere after this field + call find_single_mean_index(i_ts, "uy-avg", "uy-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'uy-avg' or 'uy-mean' somewhere after 'uyp-cov' in time_statistics_names.", 251016_ik) + call find_single_mean_index(i_ts, "p-avg", "p-mean", mean_idx2, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'p-avg' or 'p-mean' somewhere after 'uyp-cov' in time_statistics_names.", 251016_ik) + ! cov_new = (time_diff-dt)/time_diff*cov_old + dt/time_diff*(x - mean_x_old)*(y - mean_y_new) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (u(:,:,:,2) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,params_acm%dim+1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx2) - dt/(time_diff)*u(:,:,:,params_acm%dim+1)) + case ("uzp-cov") + if (params_acm%dim == 2) call abort(251016, "[TIME_STATISTICS_ACM]: uzp-cov not available in 2D simulations.") + ! compute the covariance of uz and p over time using Welford's method + ! This needs the computation of both averages somewhere after this field + call find_single_mean_index(i_ts, "uz-avg", "uz-mean", mean_idx1, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'uz-avg' or 'uz-mean' somewhere after 'uzp-cov' in time_statistics_names.", 251016_ik) + call find_single_mean_index(i_ts, "p-avg", "p-mean", mean_idx2, & + "[TIME_STATISTICS_ACM]: You need to compute the covariance together with both mean values. Insert 'p-avg' or 'p-mean' somewhere after 'uzp-cov' in time_statistics_names.", 251016_ik) + ! cov_new = (time_diff-dt)/time_diff*cov_old + dt/time_diff*(x - mean_x_old)*(y - mean_y_new) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* & + (u(:,:,:,3) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,params_acm%dim+1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx2) - dt/(time_diff)*u(:,:,:,params_acm%dim+1)) + case default call abort(2153000, "[TIME_STATISTICS_ACM]: time_statistics_name "// & trim(params_acm%time_statistics_names(i_ts))//" not recognized.") end select end do -end subroutine TIME_STATISTICS_ACM \ No newline at end of file +end subroutine TIME_STATISTICS_ACM + +!----------------------------------------------------------------------------- +! Helper subroutine to find a single mean value index after a given position +! +! This helper function was created to eliminate code duplication across variance +! and covariance calculations. Previously, each variance/covariance case contained +! identical search loops to find the corresponding mean values. This helper: +! +! 1. Reduces code duplication by centralizing the search logic +! 2. Provides consistent validation behavior across all cases +! 3. Enables flexible positioning of mean values anywhere AFTER the variance/covariance +! 4. Supports exactly 2 possible mean names (e.g., "ux-avg", "ux-mean") +! 5. Provides consistent error handling with custom messages and error codes +! +! Note: Cases with more than 2 possible mean names (like vort-var with 4 options: +! vort-avg, vor-avg, vort-mean, vor-mean) still use manual search loops since +! this helper is designed for the common case of 2 name variants. +!----------------------------------------------------------------------------- +subroutine find_single_mean_index(current_idx, mean_name1, mean_name2, mean_idx, error_message, error_code) + implicit none + + integer(kind=ik), intent(in) :: current_idx + character(len=*), intent(in) :: mean_name1, mean_name2 ! Two possible names (e.g., "ux-avg", "ux-mean") + integer(kind=ik), intent(out) :: mean_idx + character(len=*), intent(in) :: error_message + integer(kind=ik), intent(in) :: error_code + + integer(kind=ik) :: j + + mean_idx = -1 ! Initialize to not found + + ! Search for either mean name after current index + do j = current_idx + 1, params_acm%N_time_statistics + if (trim(params_acm%time_statistics_names(j)) == trim(mean_name1) .or. & + trim(params_acm%time_statistics_names(j)) == trim(mean_name2)) then + mean_idx = j + exit + end if + end do + + ! If mean is not found, abort with error + if (mean_idx == -1) then + call abort(error_code, error_message) + end if + +end subroutine find_single_mean_index diff --git a/LIB/EQUATION/convection-diffusion/module_ConvDiff_new.f90 b/LIB/EQUATION/convection-diffusion/module_ConvDiff_new.f90 index a51c9b3e..25e96fa8 100644 --- a/LIB/EQUATION/convection-diffusion/module_ConvDiff_new.f90 +++ b/LIB/EQUATION/convection-diffusion/module_ConvDiff_new.f90 @@ -226,26 +226,23 @@ subroutine PREPARE_SAVE_DATA_convdiff( time, u, g, x0, dx, work ) endif endif - ! saving of velocity field + ! saving of velocity field, this is done similar to save_data_ACM if (name(1:2) == 'ux' .or. name(1:2) == 'uy' .or. name(1:2) == 'uz') then read( name(4:4), * ) i_var if (params_convdiff%dim == 2) then + ! let's save a 2D velocity field, for this we need two work arrays if (size(work,4) - k < 1) then - call abort(250922,"CONVDIFF: Not enough space to compute velocity") + call abort(250922,"CONVDIFF: Not enough space to compute velocity, put atleast one other save variable afterwards.") endif - if (k > params_convdiff%N_scalars+2) then - call abort(250922, "CONVDIFF: You try to save a variable that does not exist") - endif - call create_velocity_field_2d( time, g, Bs, dx, x0, work(:,:,:,k), i_var, u(:,:,1,i_var) ) + call create_velocity_field_2d( time, g, Bs, dx, x0, work(:,:,1,k:k+1), i_var, u(:,:,1,i_var) ) elseif (params_convdiff%dim == 3) then + ! let's save a 3D velocity field, for this we need three work arrays if (size(work,4) - k < 2) then - call abort(250922,"CONVDIFF: Not enough space to compute velocity") - endif - if (k > params_convdiff%N_scalars+3) then - call abort(250922, "CONVDIFF: You try to save a variable that does not exist " // trim(name) ) + call abort(250922,"CONVDIFF: Not enough space to compute velocity, put atleast two other save variables afterwards.") endif call create_velocity_field_3d( time, g, Bs, dx, x0, work(:,:,:,k:k+2), i_var ) endif + ! now copy the correct component to the correct place if (name(1:2) == 'uy') then work(:,:,:,k) = work(:,:,:,k+1) elseif (name(1:2) == 'uz') then @@ -258,7 +255,7 @@ subroutine PREPARE_SAVE_DATA_convdiff( time, u, g, x0, dx, work ) ! if any of those endings is in the name, then it is a timestatistics variable if (index(name, "-avg") > 0 .or. index(name, "-mean") > 0 .or. index(name, "-var") > 0 & - .or. index(name, "-minmax") > 0 .or. index(name, "-min") > 0 .or. index(name, "-max") > 0) then + .or. index(name, "-minmax") > 0 .or. index(name, "-min") > 0 .or. index(name, "-max") > 0 .or. index(name, "-cov") > 0) then ! now we have to find the index of it do i_time_statistics = 1, params_convdiff%N_time_statistics if (name == trim(params_convdiff%time_statistics_names(i_time_statistics))) exit @@ -399,7 +396,7 @@ subroutine INICOND_convdiff( time, u, g, x0, dx ) ! non-ghost point has the coordinate x0, from then on its just cartesian with dx spacing real(kind=rk), intent(in) :: x0(1:3), dx(1:3) - integer(kind=ik) :: ix, iy, iz, i, iblob, bx, by, bz + integer(kind=ik) :: ix, iy, iz, i, iblob, bx, by, bz, i_time_statistics integer(kind=ik), dimension(3) :: Bs real(kind=rk) :: x, y, c0x, c0y, z, c0z, lambd, delta @@ -542,6 +539,13 @@ subroutine INICOND_convdiff( time, u, g, x0, dx ) enddo + ! initialize time statistics + if (params_convdiff%N_time_statistics > 0) then + do i_time_statistics = 1, params_convdiff%N_time_statistics + u(:,:,:, params_convdiff%N_scalars + i_time_statistics) = 0.0_rk + end do + endif + end subroutine INICOND_convdiff diff --git a/LIB/EQUATION/convection-diffusion/rhs_convdiff.f90 b/LIB/EQUATION/convection-diffusion/rhs_convdiff.f90 index 1ce59fa4..038a1bbc 100644 --- a/LIB/EQUATION/convection-diffusion/rhs_convdiff.f90 +++ b/LIB/EQUATION/convection-diffusion/rhs_convdiff.f90 @@ -419,14 +419,15 @@ end subroutine RHS_convdiff_new +!> Create a 2D velocity field for advecting a scalar field subroutine create_velocity_field_2D( time, g, Bs, dx, x0, u0, i, u ) implicit none - real(kind=rk), intent(in) :: time - integer(kind=ik), intent(in) :: g, i - integer(kind=ik), dimension(3), intent(in) :: Bs - real(kind=rk), intent(in) :: dx(1:2), x0(1:2) - real(kind=rk), intent(inout) :: u0(:,:,:) - real(kind=rk), intent(in) :: u(:,:) + real(kind=rk), intent(in) :: time !< current time + integer(kind=ik), intent(in) :: g, i !< ghost cells, scalar index (in case we solve several transport equations which different scalars) + integer(kind=ik), dimension(3), intent(in) :: Bs !< block size + real(kind=rk), intent(in) :: dx(1:2), x0(1:2) !< grid spacing and origin of the block + real(kind=rk), intent(inout) :: u0(:,:,:) !< output velocity field + real(kind=rk), intent(in) :: u(:,:) !< scalar input field (for some velocity fields) ! note you cannot change these values without recomputing the coefficients real(kind=rk), parameter :: tau= 0.30_rk, t0=0.0_rk, t1=0.55_rk, t2=1.0_rk, u1=1.0_rk, u2=-1.2221975311385904 real(kind=rk) :: u_this @@ -522,13 +523,14 @@ subroutine create_velocity_field_2D( time, g, Bs, dx, x0, u0, i, u ) +!> Create a 3D velocity field for advecting a scalar field subroutine create_velocity_field_3D( time, g, Bs, dx, x0, u0, i ) implicit none - real(kind=rk), intent(in) :: time - integer(kind=ik), intent(in) :: g, i - integer(kind=ik), dimension(3), intent(in) :: Bs - real(kind=rk), intent(in) :: dx(1:3), x0(1:3) - real(kind=rk), intent(inout) :: u0(:,:,:,1:) + real(kind=rk), intent(in) :: time !< current time + integer(kind=ik), intent(in) :: g, i !< ghost cells, scalar index + integer(kind=ik), dimension(3), intent(in) :: Bs !< block size + real(kind=rk), intent(in) :: dx(1:3), x0(1:3) !< grid spacing and origin of the block + real(kind=rk), intent(inout) :: u0(:,:,:,1:) !< output velocity field integer(kind=ik) :: ix, iy, iz real(kind=rk) :: x, y, z, c0x, c0y, c0z, T diff --git a/LIB/EQUATION/convection-diffusion/time_statistics_convdiff.f90 b/LIB/EQUATION/convection-diffusion/time_statistics_convdiff.f90 index 2dc2eab3..28a87a7f 100644 --- a/LIB/EQUATION/convection-diffusion/time_statistics_convdiff.f90 +++ b/LIB/EQUATION/convection-diffusion/time_statistics_convdiff.f90 @@ -34,7 +34,7 @@ subroutine TIME_STATISTICS_convdiff( time, dt, time_start, u, g, x0, dx, work, m ! non-ghost point has the coordinate x0, from then on its just cartesian with dx spacing real(kind=rk), intent(in) :: x0(1:3), dx(1:3) - integer(kind=ik) :: iB, ix, iy, iz, Bs(3), i_ts, N_offset, i_scalar + integer(kind=ik) :: iB, ix, iy, iz, Bs(3), i_ts, N_offset, i_scalar, mean_idx1 real(kind=rk) :: time_diff character(len=cshort) :: name_phi, stat_name @@ -79,15 +79,18 @@ subroutine TIME_STATISTICS_convdiff( time, dt, time_start, u, g, x0, dx, work, m u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* u(:,:,:,1) elseif (stat_name == trim(name_phi) // "-var") then ! compute the variance of phi over time using Welford's method - ! This needs the computation of the average in the variable afterwards to work - if (i_ts == params_convdiff%N_time_statistics .or. (trim(params_convdiff%time_statistics_names(i_ts+1)) /= trim(name_phi) // "-avg" .and. trim(params_convdiff%time_statistics_names(i_ts+1)) /= trim(name_phi) // "-mean")) then - call abort(2153001, "[TIME_STATISTICS_CONVDIFF]: You need to compute the variance together with the mean value. Insert 'phi-avg' right after 'phi-var' in time_statistics_names.") - end if + ! This needs the computation of the average somewhere after this field + call find_single_mean_index(i_ts, trim(name_phi)//"-avg", trim(name_phi)//"-mean", mean_idx1, & + "[TIME_STATISTICS_CONVDIFF]: You need to compute the variance together with the mean value. Insert '"//trim(name_phi)//"-avg' or '"//trim(name_phi)//"-mean' somewhere after '"//trim(name_phi)//"-var' in time_statistics_names.", 251023_ik) ! var_new = (time_diff-dt)/time_diff*var_old + dt/time_diff*(x - mean_old)*(x - mean_new) ! mean_new = (time_diff-dt)/time_diff*mean_old + dt/time_diff*x u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff) * & - (u(:,:,:,1) - u(:,:,:,N_offset + i_ts + 1)) * & - (u(:,:,:,1) - ((time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts + 1) + dt/(time_diff)*u(:,:,:,1))) + (u(:,:,:,1) - u(:,:,:,N_offset + mean_idx1)) * & + (u(:,:,:,1) - (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + mean_idx1) - dt/(time_diff)*u(:,:,:,1)) + elseif (stat_name == trim(name_phi) // "-avg" .or. & + stat_name == trim(name_phi) // "-mean") then + ! compute the average of phi over time (required for variance) + u(:,:,:,N_offset + i_ts) = (time_diff-dt)/(time_diff)*u(:,:,:,N_offset + i_ts) + dt/(time_diff)* u(:,:,:,1) elseif (stat_name == trim(name_phi) // "-minmax") then ! compute the minmax of phi over time do iz = merge(1,g+1,params_convdiff%dim==2), merge(1,Bs(3)+g,params_convdiff%dim==2) @@ -182,4 +185,37 @@ subroutine TIME_STATISTICS_convdiff( time, dt, time_start, u, g, x0, dx, work, m end if end do -end subroutine TIME_STATISTICS_convdiff \ No newline at end of file +end subroutine TIME_STATISTICS_convdiff + + +!----------------------------------------------------------------------------- +! Helper subroutine, returns the index of the AVERAGE of a quantity in the list of variables to save. +! For example, if the vector is +! params_convdiff%time_statistics_names = [phi1, ux, ux-mean ....] +! then the call to this routine +! call find_single_mean_index(2, 'mean', 'avg', mean_idx, "Did not find it", 17) +! will return the index "3". +! Note: oddly, the same search on the array +! params_convdiff%time_statistics_names = [ux-mean, ux, phi1 ....] +! will fail. +!----------------------------------------------------------------------------- +subroutine find_single_mean_index(current_idx, mean_name1, mean_name2, mean_idx, error_message, error_code) + implicit none + integer(kind=ik), intent(in) :: current_idx + character(len=*), intent(in) :: mean_name1, mean_name2 + integer(kind=ik), intent(out) :: mean_idx + character(len=*), intent(in) :: error_message + integer(kind=ik), intent(in) :: error_code + integer(kind=ik) :: j + mean_idx = -1 + do j = current_idx + 1, params_convdiff%N_time_statistics + if (trim(params_convdiff%time_statistics_names(j)) == trim(mean_name1) .or. & + trim(params_convdiff%time_statistics_names(j)) == trim(mean_name2)) then + mean_idx = j + exit + end if + end do + if (mean_idx == -1) then + call abort(error_code, error_message) + end if +end subroutine find_single_mean_index diff --git a/LIB/EQUATION/insects/kineloader.f90 b/LIB/EQUATION/insects/kineloader.f90 index 8a2b51dd..822ed166 100644 --- a/LIB/EQUATION/insects/kineloader.f90 +++ b/LIB/EQUATION/insects/kineloader.f90 @@ -13,16 +13,27 @@ subroutine load_kine_init(kine) read (10, *) t_period ! stroke period in s, for normalization read (10, *) r_wing ! wing length in mm, for normalization read (10, *) kine%nk - if (kine%nk > nhrmt_max) then - write(*,*) "WARNING(load_kine_init): adjusting the value kine%nk to nhrmt_max" - kine%nk = nhrmt_max - endif write(*,'("nk=",i5," t_period=",g12.4," r_wing=",g12.4)') kine%nk, t_period, r_wing endif call MPI_BCAST(kine%nk,1,MPI_INTEGER,0,MPI_COMM_WORLD,mpicode) nk = kine%nk + ! Allocate the kineloader arrays + if (.not. allocated(kine%vec_t)) allocate(kine%vec_t(max(1,nk))) + if (.not. allocated(kine%vec_phi)) allocate(kine%vec_phi(max(1,nk))) + if (.not. allocated(kine%vec_alpha)) allocate(kine%vec_alpha(max(1,nk))) + if (.not. allocated(kine%vec_theta)) allocate(kine%vec_theta(max(1,nk))) + if (.not. allocated(kine%vec_pitch)) allocate(kine%vec_pitch(max(1,nk))) + if (.not. allocated(kine%vec_vert)) allocate(kine%vec_vert(max(1,nk))) + if (.not. allocated(kine%vec_horz)) allocate(kine%vec_horz(max(1,nk))) + if (.not. allocated(kine%vec_phi_dt)) allocate(kine%vec_phi_dt(max(1,nk))) + if (.not. allocated(kine%vec_alpha_dt)) allocate(kine%vec_alpha_dt(max(1,nk))) + if (.not. allocated(kine%vec_theta_dt)) allocate(kine%vec_theta_dt(max(1,nk))) + if (.not. allocated(kine%vec_pitch_dt)) allocate(kine%vec_pitch_dt(max(1,nk))) + if (.not. allocated(kine%vec_vert_dt)) allocate(kine%vec_vert_dt(max(1,nk))) + if (.not. allocated(kine%vec_horz_dt)) allocate(kine%vec_horz_dt(max(1,nk))) + ! intitalize vectors as zero (note: not all of the space is actually used) kine%vec_t = 0.0_rk kine%vec_vert = 0.0_rk @@ -66,19 +77,19 @@ subroutine load_kine_init(kine) kine%vec_horz_dt = kine%vec_horz_dt * t_period / r_wing endif - call MPI_BCAST( kine%vec_t, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_phi, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_alpha, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_theta, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_pitch, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_vert, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_horz, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_phi_dt, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_alpha_dt, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_theta_dt, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_pitch_dt, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_vert_dt, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) - call MPI_BCAST( kine%vec_horz_dt, nhrmt_max, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_t, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_phi, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_alpha, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_theta, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_pitch, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_vert, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_horz, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_phi_dt, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_alpha_dt, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_theta_dt, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_pitch_dt, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_vert_dt, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) + call MPI_BCAST( kine%vec_horz_dt, nk, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, mpicode ) kine%initialized = .true. if (root) write(*,*) "done initializing kineoader!" diff --git a/LIB/EQUATION/insects/module_insects.f90 b/LIB/EQUATION/insects/module_insects.f90 index cb43bd43..80ee85ae 100644 --- a/LIB/EQUATION/insects/module_insects.f90 +++ b/LIB/EQUATION/insects/module_insects.f90 @@ -38,9 +38,6 @@ module module_insects ! arrays for fourier coefficients are fixed size (avoiding issues with allocatable ! elements in derived datatypes) this is their length: integer, parameter :: nfft_max = 1024 - ! Maximum number of Hermite interpolation nodes (hardcoded because of sxf90 compiler requirements) - ! JB: Array sizes resulting from this number result currently in 50% of WABBITs program size - integer, parameter :: nhrmt_max = 10000 ! Allocatable arrays used in Insect object ! this will hold the surface markers and their normals used for particles: @@ -84,8 +81,9 @@ module module_insects type wingkinematics ! Fourier coefficients real(kind=rk) :: a0_alpha=0.0_rk, a0_phi=0.0_rk, a0_theta=0.0_rk - real(kind=rk), dimension(1:nfft_max) :: ai_phi=0.0_rk, bi_phi=0.0_rk, ai_theta=0.0_rk, & - bi_theta=0.0_rk, ai_alpha=0.0_rk, bi_alpha=0.0_rk + ! JB: Made allocatable to avoid bloating the object file size + real(kind=rk), allocatable :: ai_phi(:), bi_phi(:), ai_theta(:), & + bi_theta(:), ai_alpha(:), bi_alpha(:) integer :: nfft_phi=0, nfft_alpha=0, nfft_theta=0 ! coefficients are read only once from file (or set differently) logical :: initialized = .false. @@ -93,10 +91,11 @@ module module_insects character(len=clong) :: infile_convention="", infile_type="", infile_units="", infile="" ! variables for kineloader (which uses non-periodic hermite interpolation) integer :: nk=0 - real(kind=rk), dimension (1:nhrmt_max) :: vec_t=0.0_rk, & - vec_phi=0.0_rk,vec_alpha=0.0_rk,vec_theta=0.0_rk,vec_pitch=0.0_rk,vec_vert=0.0_rk,vec_horz=0.0_rk, & - vec_phi_dt=0.0_rk,vec_alpha_dt=0.0_rk,vec_theta_dt=0.0_rk,vec_pitch_dt=0.0_rk,vec_vert_dt=0.0_rk, & - vec_horz_dt=0.0_rk + ! JB: Made allocatable to avoid bloating the object file size + real(kind=rk), allocatable :: vec_t(:), & + vec_phi(:),vec_alpha(:),vec_theta(:),vec_pitch(:),vec_vert(:),vec_horz(:), & + vec_phi_dt(:),vec_alpha_dt(:),vec_theta_dt(:),vec_pitch_dt(:),vec_vert_dt(:), & + vec_horz_dt(:) end type @@ -197,13 +196,12 @@ module module_insects !------------------------------------------------------------- ! wing shape fourier coefficients. Note notation: ! R = a0/2 + SUM ( ai cos(2pi*i) + bi sin(2pi*i) ) - ! to avoid compatibility issues, the array is of fixed size, although only - ! the first nftt_wings entries will be used - real(kind=rk), dimension(1:nfft_max,1:4) :: ai_wings=0.0_rk, bi_wings=0.0_rk + ! JB: Made allocatable to avoid bloating the object file size + real(kind=rk), allocatable :: ai_wings(:,:), bi_wings(:,:) real(kind=rk), dimension(1:4) :: a0_wings=0.0_rk ! fill the R0(theta) array once, then only table-lookup instead of Fseries - ! JB: This array increases WABBIT program size quite a bit, consider making it allocatable - real(kind=rk), dimension(1:25000,1:4) :: R0_table=0.0_rk + ! JB: Made allocatable to avoid bloating the object file size + real(kind=rk), allocatable :: R0_table(:,:) ! describes the origin of the wings system real(kind=rk), dimension(1:4) :: xc=0.0_rk, yc=0.0_rk ! number of fft coefficients for wing geometry diff --git a/LIB/EQUATION/insects/wings_geometry.f90 b/LIB/EQUATION/insects/wings_geometry.f90 index 7e5c1bd1..0c532527 100644 --- a/LIB/EQUATION/insects/wings_geometry.f90 +++ b/LIB/EQUATION/insects/wings_geometry.f90 @@ -46,6 +46,9 @@ subroutine draw_insect_wings(time, xx0, ddx, mask, mask_color, us, Insect, delet endif endif + ! sometimes we have the geometry type insect but it has no wings (for example for fractal_tree), we then want to skip the rest + if (.not. Insect%RightWing=="yes" .and. .not. Insect%LeftWing=="yes" .and. Insect%RightWing2=="yes" .and. Insect%LeftWing2=="yes") return + if ((dabs(Insect%time-time)>1.0d-10) .and. root) then write(*,'("error! time=",es15.8," but Insect%time=",es15.8)') time, Insect%time write(*,'("Did you call Update_Insect before draw_insect_wings?")') @@ -1526,6 +1529,21 @@ subroutine Setup_Wing_Fourier_coefficients(Insect, wingID) return endif + ! Allocate wing Fourier coefficient arrays if not already allocated + if (.not. allocated(Insect%ai_wings)) then + allocate(Insect%ai_wings(nfft_max, 4)) + Insect%ai_wings = 0.0_rk + endif + if (.not. allocated(Insect%bi_wings)) then + allocate(Insect%bi_wings(nfft_max, 4)) + Insect%bi_wings = 0.0_rk + endif + ! Allocate R0_table if not already allocated + if (.not. allocated(Insect%R0_table)) then + allocate(Insect%R0_table(25000, 4)) + Insect%R0_table = 0.0_rk + endif + Insect%a0_wings(wingID) = 0.0_rk Insect%ai_wings(:,wingID) = 0.0_rk Insect%bi_wings(:,wingID) = 0.0_rk diff --git a/LIB/EQUATION/insects/wings_motion.f90 b/LIB/EQUATION/insects/wings_motion.f90 index 70e7c0d2..f7a4d117 100644 --- a/LIB/EQUATION/insects/wings_motion.f90 +++ b/LIB/EQUATION/insects/wings_motion.f90 @@ -142,16 +142,22 @@ subroutine FlappingMotion(time, Insect, protocoll, phi, alpha, theta, phi_dt, & !------------------------------------------------------------------------------------ call read_param_mpi(kinefile,"kinematics","nfft_phi",kine%nfft_phi,0) + if (.not. allocated(kine%ai_phi)) allocate(kine%ai_phi(max(1,kine%nfft_phi))) + if (.not. allocated(kine%bi_phi)) allocate(kine%bi_phi(max(1,kine%nfft_phi))) call read_param_mpi(kinefile,"kinematics","a0_phi",kine%a0_phi,0.0_rk) call read_param_mpi(kinefile,"kinematics","ai_phi",kine%ai_phi(1:kine%nfft_phi)) call read_param_mpi(kinefile,"kinematics","bi_phi",kine%bi_phi(1:kine%nfft_phi)) call read_param_mpi(kinefile,"kinematics","nfft_alpha",kine%nfft_alpha,0) + if (.not. allocated(kine%ai_alpha)) allocate(kine%ai_alpha(max(1,kine%nfft_alpha))) + if (.not. allocated(kine%bi_alpha)) allocate(kine%bi_alpha(max(1,kine%nfft_alpha))) call read_param_mpi(kinefile,"kinematics","a0_alpha",kine%a0_alpha,0.0_rk) call read_param_mpi(kinefile,"kinematics","ai_alpha",kine%ai_alpha(1:kine%nfft_alpha)) call read_param_mpi(kinefile,"kinematics","bi_alpha",kine%bi_alpha(1:kine%nfft_alpha)) call read_param_mpi(kinefile,"kinematics","nfft_theta",kine%nfft_theta,0) + if (.not. allocated(kine%ai_theta)) allocate(kine%ai_theta(max(1,kine%nfft_theta))) + if (.not. allocated(kine%bi_theta)) allocate(kine%bi_theta(max(1,kine%nfft_theta))) call read_param_mpi(kinefile,"kinematics","a0_theta",kine%a0_theta,0.0_rk) call read_param_mpi(kinefile,"kinematics","ai_theta",kine%ai_theta(1:kine%nfft_theta)) call read_param_mpi(kinefile,"kinematics","bi_theta",kine%bi_theta(1:kine%nfft_theta)) @@ -359,6 +365,14 @@ subroutine FlappingMotion(time, Insect, protocoll, phi, alpha, theta, phi_dt, & kine%nfft_theta = 10 kine%nfft_phi = 10 + ! Allocate arrays + if (.not. allocated(kine%ai_phi)) allocate(kine%ai_phi(kine%nfft_phi)) + if (.not. allocated(kine%bi_phi)) allocate(kine%bi_phi(kine%nfft_phi)) + if (.not. allocated(kine%ai_alpha)) allocate(kine%ai_alpha(kine%nfft_alpha)) + if (.not. allocated(kine%bi_alpha)) allocate(kine%bi_alpha(kine%nfft_alpha)) + if (.not. allocated(kine%ai_theta)) allocate(kine%ai_theta(kine%nfft_theta)) + if (.not. allocated(kine%bi_theta)) allocate(kine%bi_theta(kine%nfft_theta)) + kine%a0_phi =25.4649398 kine%a0_alpha =-0.3056968 kine%a0_theta =-17.8244658 ! - sign (Dmitry, 10 Nov 2013) @@ -401,6 +415,14 @@ subroutine FlappingMotion(time, Insect, protocoll, phi, alpha, theta, phi_dt, & kine%nfft_theta = 10 kine%nfft_phi = 10 + ! Allocate arrays + if (.not. allocated(kine%ai_phi)) allocate(kine%ai_phi(kine%nfft_phi)) + if (.not. allocated(kine%bi_phi)) allocate(kine%bi_phi(kine%nfft_phi)) + if (.not. allocated(kine%ai_alpha)) allocate(kine%ai_alpha(kine%nfft_alpha)) + if (.not. allocated(kine%bi_alpha)) allocate(kine%bi_alpha(kine%nfft_alpha)) + if (.not. allocated(kine%ai_theta)) allocate(kine%ai_theta(kine%nfft_theta)) + if (.not. allocated(kine%bi_theta)) allocate(kine%bi_theta(kine%nfft_theta)) + kine%a0_phi =38.2280144124915 kine%a0_alpha =1.09156750841542 kine%a0_theta =-17.0396438317138 diff --git a/LIB/INDICATORS/refinementIndicator_tree.f90 b/LIB/INDICATORS/refinementIndicator_tree.f90 index 97352408..93be8d2d 100644 --- a/LIB/INDICATORS/refinementIndicator_tree.f90 +++ b/LIB/INDICATORS/refinementIndicator_tree.f90 @@ -193,6 +193,12 @@ subroutine refinementIndicator_tree(params, hvy_block, tree_ID, indicator) ! sync light data, as only root sets random refinement call MPI_BCAST( lgt_block(:, IDX_REFINE_STS), size(lgt_block,1), MPI_INTEGER4, 0, WABBIT_COMM, ierr ) + ! In some cases the refinement_status is set up by a routine other than this one. This is the case + ! in some forest processing (=handling multiple trees). In such a case, we do nothing here (in particular + ! we do not reset the refinement status) + case ('nothing (external)') + return + case default call abort("ERROR: refine_tree: the refinement indicator is unkown") diff --git a/LIB/MAIN/main.f90 b/LIB/MAIN/main.f90 index e103d44e..c09ff8ca 100644 --- a/LIB/MAIN/main.f90 +++ b/LIB/MAIN/main.f90 @@ -56,7 +56,7 @@ program main !!!!!! => renaming: hvy_tmp -> hvy_work ! time loop variables - real(kind=rk) :: time, time_start, output_time + real(kind=rk) :: time, output_time integer(kind=ik) :: iteration ! filename of *.ini file used to read parameters character(len=clong) :: filename @@ -177,16 +177,6 @@ program main ! On all blocks, set the initial condition (incl. synchronize ghosts) call setInitialCondition_tree( params, hvy_block, tree_ID_flow, params%adapt_inicond, time, iteration, hvy_mask, hvy_tmp, hvy_work=hvy_work) - !******************************************************************* - ! initial time statistics (integral, average, minmax or similar over time) - initializes the arrays (to 0) - !******************************************************************* - ! backup startup time, usefull for time statistics (averaging) - time_start = time - if (params%time_statistics) then - ! no sync needed as all values will simply be set to 0 - call time_statistics_wrapper(time, dt, time_start, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) - end if - if ((.not. params%read_from_files .or. params%adapt_inicond).and.(time>=params%write_time_first)) then ! NOTE: new versions (>16/12/2017) call physics module routines call prepare_save_data. These ! routines create the fields to be stored in the work array hvy_work in the first 1:params%N_fields_saved @@ -236,14 +226,14 @@ program main !******************************************************************* ! initial statistics (usefull for checking IC) !******************************************************************* - t4 = MPI_wtime() if ( (modulo(iteration, params%nsave_stats)==0).or.(abs(time - params%next_stats_time)<1e-12_rk) ) then + t4 = MPI_wtime() ! we need to sync ghost nodes for some derived qtys, for sure call sync_ghosts_RHS_tree( params, hvy_block, tree_ID_flow ) call statistics_wrapper(time, 0.0_rk, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) + call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t4) endif - call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t4) ! timing @@ -292,9 +282,9 @@ program main ! and use the "everywhere" indicator. Note: after resuming a run from backup, this works as well, because ! the refinement_flag is 0 and this results in "significant" refining in fact all blocks. if (params%refinement_indicator == "significant" .and. iteration == 0) then - call refine_tree( params, hvy_block, "everywhere", tree_ID=tree_ID_flow, error_OOM=error_OOM, check_full_tree=.true.) + call refine_tree( params, hvy_block, "everywhere", tree_ID=tree_ID_flow, error_OOM=error_OOM, check_full_tree=.true., time=time ) else - call refine_tree( params, hvy_block, params%refinement_indicator, tree_ID=tree_ID_flow, error_OOM=error_OOM, check_full_tree=.true.) + call refine_tree( params, hvy_block, params%refinement_indicator, tree_ID=tree_ID_flow, error_OOM=error_OOM, check_full_tree=.true., time=time ) endif ! if refine_tree runs out-of-memory (OOM), it does not actually do the refinement, but returns after realizing ! there won't be enough mem. We can thus jump to 17 to save a backup and terminate. @@ -364,20 +354,23 @@ program main ! synching of n_eqn_rhs is enough, as time statistics need no sync call sync_ghosts_RHS_tree( params, hvy_block(:,:,:,1:params%n_eqn_rhs,:), tree_ID_flow ) - call time_statistics_wrapper(time, dt, time_start, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) + call time_statistics_wrapper(time, dt, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) end if !******************************************************************* ! statistics !******************************************************************* - t4 = MPI_wtime() if ( (modulo(iteration, params%nsave_stats)==0).or.(abs(time - params%next_stats_time)<1e-12_rk) ) then + t4 = MPI_wtime() ! we need to sync ghost nodes for some derived qtys, for sure call sync_ghosts_RHS_tree( params, hvy_block, tree_ID_flow ) call statistics_wrapper(time, dt, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) + call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t4) + + ! update next stats time + if (abs(time - params%next_stats_time)<1e-12_rk) params%next_stats_time = params%next_stats_time + params%tsave_stats endif - call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t4) ! if multiple time steps are performed on the same grid, we have to be careful ! not to skip past saving time intervals. Therefore, if it is time to save, we @@ -400,12 +393,38 @@ program main call createMask_tree(params, time, hvy_mask, hvy_tmp) ! actual coarsening (including the mask function) - call adapt_tree( time, params, hvy_block, tree_ID_flow, params%coarsening_indicator, hvy_tmp, & - hvy_mask=hvy_mask, hvy_work=hvy_work, neqn_WD=params%n_eqn-params%N_time_statistics) + + ! High-level optimization for time statistics: + ! Check if any time statistics components should be adapted (threshold_state_vector_component > 0) + ! Time statistics are always at the end of the state vector + ! If we do not want to adapt them, we can reduce the workload of adapt_tree significantly. + if (params%time_statistics .and. params%N_time_statistics > 0 .and. & + any(params%threshold_state_vector_component(params%n_eqn-params%N_time_statistics+1:params%n_eqn) > 0)) then + ! Include time statistics in adaptation + call adapt_tree( time, params, hvy_block, tree_ID_flow, params%coarsening_indicator, hvy_tmp, & + hvy_mask=hvy_mask, hvy_work=hvy_work, neqn_adapt=params%n_eqn) + else + ! Exclude time statistics from adaptation to reduce workload + call adapt_tree( time, params, hvy_block, tree_ID_flow, params%coarsening_indicator, hvy_tmp, & + hvy_mask=hvy_mask, hvy_work=hvy_work, neqn_adapt=params%n_eqn-params%N_time_statistics) + endif else ! actual coarsening (no mask function is required) - call adapt_tree( time, params, hvy_block, tree_ID_flow, params%coarsening_indicator, hvy_tmp, & - hvy_work=hvy_work, neqn_WD=params%n_eqn-params%N_time_statistics) + + ! High-level optimization for time statistics: + ! Check if any time statistics components should be adapted (threshold_state_vector_component > 0) + ! Time statistics are always at the end of the state vector + ! If we do not want to adapt them, we can reduce the workload of adapt_tree significantly. + if (params%time_statistics .and. params%N_time_statistics > 0 .and. & + any(params%threshold_state_vector_component(params%n_eqn-params%N_time_statistics+1:params%n_eqn) > 0)) then + ! Include time statistics in adaptation + call adapt_tree( time, params, hvy_block, tree_ID_flow, params%coarsening_indicator, hvy_tmp, & + hvy_work=hvy_work, neqn_adapt=params%n_eqn) + else + ! Exclude time statistics from adaptation to reduce workload + call adapt_tree( time, params, hvy_block, tree_ID_flow, params%coarsening_indicator, hvy_tmp, & + hvy_work=hvy_work, neqn_adapt=params%n_eqn-params%N_time_statistics) + endif endif endif call toc( "TOPLEVEL: adapt mesh", 14, MPI_wtime()-t4) @@ -483,13 +502,16 @@ program main !******************************************************************* - ! statistics ( last time ) + ! statistics ( last time - always done if statistics is enabled, but skipped if it was already done at this time to prevent duplicates ) !******************************************************************* - if ( (modulo(iteration, params%nsave_stats)==0).or.(abs(time - params%next_stats_time)<1e-12_rk) ) then + if ( (params%nsave_stats /= 99999999_ik .or. (abs(params%tsave_stats - 9999999.9_rk) > 1e-12)) & + .and. .not. (modulo(iteration, params%nsave_stats) == 0 .or. abs(time - params%next_stats_time + params%tsave_stats)<1e-12_rk ) ) then + t4 = MPI_wtime() ! we need to sync ghost nodes for some derived qtys, for sure call sync_ghosts_RHS_tree( params, hvy_block, tree_ID_flow) call statistics_wrapper(time, dt, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) + call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t4) endif diff --git a/LIB/MESH/adapt_tree.f90 b/LIB/MESH/adapt_tree.f90 index 6a4fb94b..06653a37 100644 --- a/LIB/MESH/adapt_tree.f90 +++ b/LIB/MESH/adapt_tree.f90 @@ -8,7 +8,7 @@ ! !> \note It is well possible to start with a very fine mesh and end up with only one active !! block after this routine. You do *NOT* have to call it several times. -subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy_work, hvy_mask, ignore_coarsening, ignore_maxlevel, init_full_tree_grid, neqn_WD ) +subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy_work, hvy_mask, ignore_coarsening, ignore_maxlevel, init_full_tree_grid, neqn_adapt ) ! it is not technically required to include the module here, but for VS code it reduces the number of wrong "errors" use module_params @@ -36,17 +36,19 @@ subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy logical, intent(in), optional :: ignore_maxlevel !> Maybe we already have a full tree grid, so we do not need to initialize it logical, intent(in), optional :: init_full_tree_grid - !> In rare cases, not all equations need to be wavelet decomposed, this saves a lot of extra work. - !> If not present, all equations are used. Applying this decomposes only SC and reconstructs by copying - integer(kind=ik), optional, intent(in) :: neqn_WD + !> In some cases, not all component of the state vector need to be wavelet decomposed, and in those cases + !> excluding those saves a lot of extra work. If present, the code wavelet decomposes the first [1:neqn_adapt components] of + !> the vector. For the remaining ones [neqn_adapt+1:neqn], only scaling function coefficients are computed (and no WC); reconstruction + !> is done by simple copying. If neqn_adapt is not present, all components are fully decomposed (SC AND WC computed). + integer(kind=ik), optional, intent(in) :: neqn_adapt ! loop variables integer(kind=ik) :: iteration, k, lgt_id real(kind=rk) :: t_block, t_all, t_loop - integer(kind=ik) :: Jmax_active, Jmin_active, level, ierr, k1, hvy_id, neqn_WD_apply + integer(kind=ik) :: Jmax_active, Jmin_active, level, ierr, k1, hvy_id, neqn_adapt_apply logical :: ignore_coarsening_apply, ignore_maxlevel_apply, initFullTreeGrid, iterate, toBeManipulated integer(kind=ik) :: level_me, ref_stat, Jmin, lgt_n_old, g_this - character(len=clong) :: toc_statement + character(len=clong) :: format_string integer(kind=tsize) :: treecode real(kind=rk) :: thresh(1:params%n_eqn), thresh_old(1:params%n_eqn), thresh_save(1:params%n_eqn+1) @@ -81,8 +83,8 @@ subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy if (present(ignore_maxlevel)) ignore_maxlevel_apply = ignore_maxlevel initFullTreeGrid = .true. if (present(init_full_tree_grid)) initFullTreeGrid = init_full_tree_grid - neqn_WD_apply = size(hvy_block, 4) - if (present(neqn_WD)) neqn_WD_apply = neqn_WD + neqn_adapt_apply = size(hvy_block, 4) + if (present(neqn_adapt)) neqn_adapt_apply = neqn_adapt ! To avoid that the incoming hvy_neighbor array and active lists are outdated @@ -97,7 +99,7 @@ subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy ! Blocks always pass their SC to their mother (pendant to excuteCoarsening of old function), new blocks only synch from medium neighbors to avoid CE ! This is repeated until all blocks on the layer JMin are present with wavelet decomposed values t_block = MPI_Wtime() - call wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init_full_tree_grid=initFullTreeGrid) + call wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init_full_tree_grid=initFullTreeGrid, neqn_adapt=neqn_adapt_apply, time=time) call toc( "adapt_tree (decompose_tree)", 102, MPI_Wtime()-t_block ) ! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -234,13 +236,13 @@ subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy ! In a special case reconstruction can be skipped as it has already been done, therefore the if-condition if (.not. ((indicator == "threshold-cvs" .or. indicator == "threshold-image-denoise") .and. ignore_coarsening_apply)) then ! generally, we wavelet reconstruct all equations. In some rare cases, only a subset is required, for example for time statistics - ! in this case, we reconstruct only the first neqn_WD_apply equations and the rest is copied from the backup - ! Note that hvy_tmp contains the original values, so we copy them back after reconstructing the first neqn_WD_apply equations - call wavelet_reconstruct_full_tree(params, hvy_block(:,:,:,1:neqn_WD_apply,:), hvy_tmp(:,:,:,1:neqn_WD_apply,:), tree_ID) - if (neqn_WD_apply < size(hvy_block, 4)) then + ! in this case, we reconstruct only the first neqn_adapt_apply equations and the rest is copied from the backup + ! Note that hvy_tmp contains the original values, so we copy them back after reconstructing the first neqn_adapt_apply equations + call wavelet_reconstruct_full_tree(params, hvy_block(:,:,:,1:neqn_adapt_apply,:), hvy_tmp(:,:,:,1:neqn_adapt_apply,:), tree_ID, time=time) + if (neqn_adapt_apply < size(hvy_block, 4)) then do k = 1, hvy_n(tree_ID) hvy_ID = hvy_active(k, tree_ID) - hvy_block(:, :, : ,neqn_WD_apply+1:size(hvy_block, 4), hvy_ID) = hvy_tmp(:, :, : ,neqn_WD_apply+1:size(hvy_block, 4), hvy_ID) ! restore original values + hvy_block(:, :, : ,neqn_adapt_apply+1:size(hvy_block, 4), hvy_ID) = hvy_tmp(:, :, : ,neqn_adapt_apply+1:size(hvy_block, 4), hvy_ID) ! restore original values enddo endif endif @@ -255,7 +257,7 @@ subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy ! they may have passed several level also and non-leafs have been deleted. Rebalance for optimal loadbalancing ! ToDo: we sync directly afterwards so we could only transfer inner points, but this needs the buffering from xfer_bock_data t_block = MPI_Wtime() - call balanceLoad_tree( params, hvy_block, tree_ID) + call balanceLoad_tree( params, hvy_block, tree_ID, balance_name='adapt', time=time ) call toc( "adapt_tree (balanceLoad_tree)", 106, MPI_Wtime()-t_block ) ! synchronize ghost nodes - final synch to update all neighbours with the new values @@ -274,7 +276,7 @@ subroutine adapt_tree( time, params, hvy_block, tree_ID, indicator, hvy_tmp, hvy !! First, the full tree grid is prepared, then the leaf-layer is decomposed in one go !! in order to have load-balanced work there, then the mothers are lvl-wise updated and consequently !! decomposed until all blocks have the decomposed values. Afterwards, the grid will be in full tree format. -subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init_full_tree_grid, compute_SC_only, scaling_filter, neqn_WD, verbose_check) +subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init_full_tree_grid, compute_SC_only, scaling_filter, neqn_adapt, time, verbose_check) implicit none type (type_params), intent(in) :: params @@ -289,14 +291,18 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init logical, intent(in), optional :: verbose_check !< No matter the value, if this is present we debug !> In rare cases, not all equations need to be wavelet decomposed, this saves a lot of extra work. !> If not present, all equations are used. Applying this decomposes only SC and reconstructs by copying - integer(kind=ik), optional, intent(in) :: neqn_WD + integer(kind=ik), optional, intent(in) :: neqn_adapt + !> current simulation time, only used for development debugging output + real(kind=rk), intent(in), optional :: time - integer(kind=ik) :: k, lgt_ID, hvy_ID, lgt_n_old, g_this, level_me, ref_stat, iteration, level, neqn_WD_apply + integer(kind=ik) :: k, lgt_ID, hvy_ID, lgt_n_old, g_this, level_me, ref_stat, iteration, level, neqn_adapt_apply, ierr real(kind=rk) :: t_block, t_loop - character(len=clong) :: toc_statement + character(len=clong) :: format_string, string_prepare logical :: iterate, use_leaf_first, initFullTreeGrid, computeSCOnly real(kind=rk), dimension(:), allocatable :: scalingFilter integer(kind=ik), dimension(:), allocatable, save :: lgt_refinementStatus_backup + integer(kind=ik) :: blocks_decomposed + integer(kind=ik), allocatable, save :: blocks_decomposed_list(:) initFullTreeGrid = .true. if (present(init_full_tree_grid)) initFullTreeGrid = init_full_tree_grid @@ -311,8 +317,8 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init allocate(scalingFilter(lbound(params%HD, dim=1):ubound(params%HD, dim=1))) scalingFilter(:) = params%HD(:) endif - neqn_WD_apply = size(hvy_block, 4) - if (present(neqn_WD)) neqn_WD_apply = neqn_WD + neqn_adapt_apply = size(hvy_block, 4) + if (present(neqn_adapt)) neqn_adapt_apply = neqn_adapt ! the refinement_status is used in this routine for things other than the refinement_status ! but that may be problematic if we have set the refinement_status externally and like to keep it. @@ -405,25 +411,28 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init ! first point is SC and last WC, so we need left(HD) or right(GD) for sync g_this = max(abs(lbound(params%HD,1)),ubound(params%GD,1)) + write(string_prepare, '(A, i0)') "wavelet_decompose_syncN_lvl", level !< prepare format string for sync debug + ! ! for coarse extension we are not dependend on coarser neighbors so lets skip the syncing ! if (params%isLiftedWavelet) then - ! call sync_TMP_from_MF( params, hvy_block, tree_ID, -1, g_minus=g_this, g_plus=g_this, hvy_tmp=hvy_tmp) - ! call toc( "decompose_tree (sync TMP <- MF)", 111, MPI_Wtime()-t_block ) + ! call sync_TMP_from_MF( params, hvy_block, tree_ID, -1, g_minus=g_this, g_plus=g_this, hvy_tmp=hvy_tmp, sync_debug_name=string_prepare) + ! call toc( "decompose_tree (sync TMP <- MF)", 111, MPI_Wtime()-t_block) - ! write(toc_statement, '(A, i0, A)') "decompose_tree (it ", iteration, " sync lvl <- MF)" - ! call toc( toc_statement, 1100+iteration, MPI_Wtime()-t_block ) + ! write(format_string, '(A, i0, A)') "decompose_tree (it ", iteration, " sync lvl <- MF)" + ! call toc( format_string, 1100+iteration, MPI_Wtime()-t_block ) ! ! unlifted wavelets need coarser neighbor values for their WC so we need to sync them too ! else - call sync_TMP_from_all( params, hvy_block, tree_ID, -1, g_minus=g_this, g_plus=g_this, hvy_tmp=hvy_tmp) - call toc( "decompose_tree (sync TMP <- all)", 111, MPI_Wtime()-t_block ) + call sync_TMP_from_all( params, hvy_block, tree_ID, -1, g_minus=g_this, g_plus=g_this, hvy_tmp=hvy_tmp, sync_debug_name=string_prepare) + call toc( "decompose_tree (sync TMP <- all)", 111, MPI_Wtime()-t_block) - write(toc_statement, '(A, i0, A)') "decompose_tree (it ", iteration, " sync lvl <- all)" - call toc( toc_statement, 1100+iteration, MPI_Wtime()-t_block ) + write(format_string, '(A, i0, A)') "decompose_tree (it ", iteration, " sync lvl <- all)" + call toc( format_string, 1100+iteration, MPI_Wtime()-t_block ) ! endif ! Wavelet-transform all blocks which are untreated ! From now on until wavelet retransform hvy_block will hold the wavelet decomposed values in spaghetti form for these blocks t_block = MPI_Wtime() + blocks_decomposed = 0 do k = 1, hvy_n(tree_ID) hvy_ID = hvy_active(k, tree_ID) call hvy2lgt( lgt_ID, hvy_ID, params%rank, params%number_blocks ) @@ -431,6 +440,7 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init ! FWT required for a block that is on the level if (ref_stat == -1) then + blocks_decomposed = blocks_decomposed + 1 ! hvy_tmp now is a copy with sync'ed ghost points. ! We actually copy here a second time but this is to ensure we have the correct ghost points saved, and copying is not too expensive anyways hvy_tmp(:,:,:,1:size(hvy_block, 4),hvy_ID) = hvy_block(:,:,:,1:size(hvy_block, 4),hvy_ID) @@ -448,18 +458,37 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init lbound(params%HD, dim=1), ubound(params%HD, dim=1), do_restriction=.true.) else ! generally, we wavelet decompose all equations. In some rare cases, only a subset is required, for example for time statistics - ! in this case, we decompose only the first neqn_WD_apply equations and the rest only with the scaling function - call waveletDecomposition_block(params, hvy_block(:,:,:,1:neqn_WD_apply,hvy_ID)) - if (neqn_WD_apply < size(hvy_block, 4)) then - call blockFilterXYZ_vct( params, hvy_tmp(:,:,:,neqn_WD_apply+1:size(hvy_block, 4),hvy_ID), hvy_block(:,:,:,neqn_WD_apply+1:size(hvy_block, 4),hvy_ID), params%HD, lbound(params%HD, dim=1), ubound(params%HD, dim=1), do_restriction=.true.) + ! in this case, we decompose only the first neqn_adapt_apply equations and the rest only with the scaling function + call waveletDecomposition_block(params, hvy_block(:,:,:,1:neqn_adapt_apply,hvy_ID)) + if (neqn_adapt_apply < size(hvy_block, 4)) then + call blockFilterXYZ_vct( params, hvy_tmp(:,:,:,neqn_adapt_apply+1:size(hvy_block, 4),hvy_ID), hvy_block(:,:,:,neqn_adapt_apply+1:size(hvy_block, 4),hvy_ID), params%HD, lbound(params%HD, dim=1), ubound(params%HD, dim=1), do_restriction=.true.) endif endif endif end do call toc( "decompose_tree (FWT)", 112, MPI_Wtime()-t_block ) - write(toc_statement, '(A, i0, A)') "decompose_tree (it ", iteration, " FWT)" - call toc( toc_statement, 1150+iteration, MPI_Wtime()-t_block ) + write(format_string, '(A, i0, A)') "decompose_tree (it ", iteration, " FWT)" + call toc( format_string, 1150+iteration, MPI_Wtime()-t_block ) + + ! debug output to see how many blocks have been decomposed, gather information on rank 0 and print to file + if (params%debug_wavelet_decompose) then + if (.not. allocated(blocks_decomposed_list)) allocate(blocks_decomposed_list(1:params%number_procs)) + + call MPI_GATHER(blocks_decomposed, 1, MPI_INTEGER, blocks_decomposed_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_wavelet_decompose.csv"), status="unknown", position="append") + string_prepare = "-1.0E+00," ! set negative time, just to have csv with the same length in every row + if (present(time)) write(string_prepare,'(es16.6,",")') time + if (params%number_procs == 1) then + write(99,'(A, i0, ",", i0)') trim(adjustl(string_prepare)), level, blocks_decomposed_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs + write(99,format_string) trim(adjustl(string_prepare)), level, blocks_decomposed_list(1), blocks_decomposed_list(2:params%number_procs) + endif + close(99) + endif + endif ! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ! coarseExtension: remove wavelet coefficients near a fine/coarse interface @@ -494,12 +523,13 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init ! Update mother blocks ! We update mother blocks level-wise, with this we always know that all blocks on this level are ready and need no lgt_data update ! This uses the already wavelet decomposed blocks and copies their SC into the new mother block + write(string_prepare, '(A, i0)') "wavelet_decompose_syncF_lvl", level !< prepare format string for sync debug t_block = MPI_Wtime() - call sync_D2M(params, hvy_block, tree_ID, sync_case="level", s_val=level) - call toc( "decompose_tree (sync level 2 mothers)", 113, MPI_Wtime()-t_block ) + call sync_D2M(params, hvy_block, tree_ID, sync_case="level", s_val=level, sync_debug_name=string_prepare) + call toc( "decompose_tree (sync level 2 mothers)", 113, MPI_Wtime()-t_block) - write(toc_statement, '(A, i0, A)') "decompose_tree (it ", iteration, " sync level 2 mothers)" - call toc( toc_statement, 1200+iteration, MPI_Wtime()-t_block ) + write(format_string, '(A, i0, A)') "decompose_tree (it ", iteration, " sync level 2 mothers)" + call toc( format_string, 1200+iteration, MPI_Wtime()-t_block ) ! iteration counter iteration = iteration + 1 @@ -507,18 +537,18 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init ! loop continues until we are on the lowest level. iterate = (level >= params%Jmin) - write(toc_statement, '(A, i0, A)') "decompose_tree (it ", iteration, " TOTAL)" - call toc( toc_statement, 1250+iteration, MPI_Wtime()-t_loop ) + write(format_string, '(A, i0, A)') "decompose_tree (it ", iteration, " TOTAL)" + call toc( format_string, 1250+iteration, MPI_Wtime()-t_loop ) if (present(verbose_check)) then write(*, '(A, i0, A, i0, A)') "Loop ", iteration, " with ", lgt_n(tree_ID), " blocks" endif ! ! for debugging purposes - savin does a sync if any ghost values are NaN - ! write( toc_statement,'(A, i6.6, A)') 'TestWD_', iteration, "000000.h5" - ! call saveHDF5_tree(toc_statement, dble(iteration), iteration, 1, params, hvy_block, tree_ID) - ! write( toc_statement,'(A, i6.6, A)') 'TestN_', iteration, "000000.h5" - ! call saveHDF5_tree(toc_statement, dble(iteration), iteration, 1, params, hvy_tmp, tree_ID) + ! write( format_string,'(A, i6.6, A)') 'TestWD_', iteration, "000000.h5" + ! call saveHDF5_tree(format_string, dble(iteration), iteration, 1, params, hvy_block, tree_ID) + ! write( format_string,'(A, i6.6, A)') 'TestN_', iteration, "000000.h5" + ! call saveHDF5_tree(format_string, dble(iteration), iteration, 1, params, hvy_tmp, tree_ID) end do ! copy the original refinement_status (when entering this routine) back to lgt_block @@ -527,10 +557,10 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init enddo ! ! for debugging purposes - savin does a sync if any ghost values are NaN - ! write( toc_statement,'(A, i6.6, A)') 'TestWD_', 100, "000000.h5" - ! call saveHDF5_tree(toc_statement, dble(100), 100, 1, params, hvy_block, tree_ID) - ! write( toc_statement,'(A, i6.6, A)') 'TestN_', 100, "000000.h5" - ! call saveHDF5_tree(toc_statement, dble(100), 100, 1, params, hvy_tmp, tree_ID) + ! write( format_string,'(A, i6.6, A)') 'TestWD_', 100, "000000.h5" + ! call saveHDF5_tree(format_string, dble(100), 100, 1, params, hvy_block, tree_ID) + ! write( format_string,'(A, i6.6, A)') 'TestN_', 100, "000000.h5" + ! call saveHDF5_tree(format_string, dble(100), 100, 1, params, hvy_tmp, tree_ID) end subroutine @@ -540,7 +570,7 @@ subroutine wavelet_decompose_full_tree(params, hvy_block, tree_ID, hvy_tmp, init !! The input has to be in full grid format in order for the mother-update cycle to work. !! Blocks are reconstructed level-wise starting and JMin and mothers are updated until all blocks are reconstructed. !! Afterwards the grid is still in full tree format, but can be pruned by deleting all non-leafs -subroutine wavelet_reconstruct_full_tree(params, hvy_block, hvy_tmp, tree_ID) +subroutine wavelet_reconstruct_full_tree(params, hvy_block, hvy_tmp, tree_ID, time) ! it is not technically required to include the module here, but for VS code it reduces the number of wrong "errors" use module_params @@ -552,11 +582,14 @@ subroutine wavelet_reconstruct_full_tree(params, hvy_block, hvy_tmp, tree_ID) !> heavy tmp data array - block data. real(kind=rk), intent(inout) :: hvy_tmp(:, :, :, :, :) integer(kind=ik), intent(in) :: tree_ID + real(kind=rk), intent(in), optional :: time !< current simulation time, only for debug output ! loop variables - integer(kind=ik) :: iteration, k, Jmax_active, Jmin_active, level, hvy_ID, lgt_ID, level_me, Jmin + integer(kind=ik) :: iteration, k, Jmax_active, Jmin_active, level, hvy_ID, lgt_ID, level_me, Jmin, ierr real(kind=rk) :: t_block, t_all, t_loop - character(len=clong) :: toc_statement + character(len=clong) :: format_string, string_prepare + integer(kind=ik) :: blocks_reconstructed + integer(kind=ik), allocatable, save :: blocks_reconstructed_list(:) ! NOTE: after 24/08/2022, the arrays lgt_active/lgt_n hvy_active/hvy_n as well as lgt_sortednumlist, @@ -577,12 +610,13 @@ subroutine wavelet_reconstruct_full_tree(params, hvy_block, hvy_tmp, tree_ID) ! synch SC and WC from coarser neighbours and same-level neighbours in order to apply the correct wavelet reconstruction ! Attention: This uses hvy_temp for coarser neighbors to predict the data, as we want the correct SC from coarser neighbors + write(string_prepare, '(A, i0)') "wavelet_reconstruct_syncN_lvl", level !< prepare format string for sync debug t_block = MPI_Wtime() - call sync_SCWC_from_MC( params, hvy_block, tree_ID, hvy_tmp, g_minus=params%g, g_plus=params%g, level=level) - call toc( "reconstruct_tree (sync lvl <- MC)", 115, MPI_Wtime()-t_block ) + call sync_SCWC_from_MC( params, hvy_block, tree_ID, hvy_tmp, g_minus=params%g, g_plus=params%g, level=level, sync_debug_name=string_prepare) + call toc( "reconstruct_tree (sync lvl <- MC)", 115, MPI_Wtime()-t_block) - write(toc_statement, '(A, i0, A)') "reconstruct_tree (it ", iteration, " sync level <- MC)" - call toc( toc_statement, 1300+iteration, MPI_Wtime()-t_block ) + write(format_string, '(A, i0, A)') "reconstruct_tree (it ", iteration, " sync level <- MC)" + call toc( format_string, 1300+iteration, MPI_Wtime()-t_block ) ! In theory, for full WT this CE_modify is only here for two reasons: ! 1. We need to clear the WC from coarser neighbors, as the values in there are interpolated SC and we need to set those to the correct WC being 0 @@ -596,6 +630,7 @@ subroutine wavelet_reconstruct_full_tree(params, hvy_block, hvy_tmp, tree_ID) ! Wavelet-reconstruct blocks on level t_block = MPI_Wtime() + blocks_reconstructed = 0 do k = 1, hvy_n(tree_ID) hvy_ID = hvy_active(k, tree_ID) call hvy2lgt( lgt_ID, hvy_ID, params%rank, params%number_blocks ) @@ -610,32 +645,53 @@ subroutine wavelet_reconstruct_full_tree(params, hvy_block, hvy_tmp, tree_ID) ! One might be concerned if we sync from hvy_tmp and do prediction/interpolation, as it does not have it's ghost layers synced ! however, we discard all interpolated values with the CE so we cleverly circumvent problems there hvy_tmp(:,:,:,1:size(hvy_block, 4),hvy_ID) = hvy_block(:,:,:,1:size(hvy_block, 4),hvy_ID) + blocks_reconstructed = blocks_reconstructed + 1 endif end do call toc( "reconstruct_tree (RWT)", 116, MPI_Wtime()-t_block ) - write(toc_statement, '(A, i0, A)') "reconstruct_tree (it ", iteration, " RWT)" - call toc( toc_statement, 1350+iteration, MPI_Wtime()-t_block ) + ! debug output to see how many blocks have been reconstructed, gather information on rank 0 and print to file + if (params%debug_wavelet_reconstruct) then + if (.not. allocated(blocks_reconstructed_list)) allocate(blocks_reconstructed_list(1:params%number_procs)) + + call MPI_GATHER(blocks_reconstructed, 1, MPI_INTEGER, blocks_reconstructed_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_wavelet_reconstruct.csv"), status="unknown", position="append") + string_prepare = "-1.0E+00," ! set negative time, just to have csv with the same length in every row + if (present(time)) write(string_prepare,'(es16.6,",")') time + if (params%number_procs == 1) then + write(99,'(A, i0, ",", i0)') trim(adjustl(string_prepare)), level, blocks_reconstructed_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs + write(99,format_string) trim(adjustl(string_prepare)), level, blocks_reconstructed_list(1), blocks_reconstructed_list(2:params%number_procs) + endif + close(99) + endif + endif + + write(format_string, '(A, i0, A)') "reconstruct_tree (it ", iteration, " RWT)" + call toc( format_string, 1350+iteration, MPI_Wtime()-t_block ) ! now we need to update the SC of all daughters t_block = MPI_Wtime() - call sync_M2D(params, hvy_block, tree_ID, sync_case="level", s_val=level) + write(string_prepare, '(A, i0)') "wavelet_reconstruct_syncF_lvl", level !< prepare format string for sync debug + call sync_M2D(params, hvy_block, tree_ID, sync_case="level", s_val=level, sync_debug_name=string_prepare) call toc( "reconstruct_tree (sync level 2 daughters)", 117, MPI_Wtime()-t_block ) - write(toc_statement, '(A, i0, A)') "reconstruct_tree (it ", iteration, " sync level 2 daughters)" - call toc( toc_statement, 1400+iteration, MPI_Wtime()-t_block ) + write(format_string, '(A, i0, A)') "reconstruct_tree (it ", iteration, " sync level 2 daughters)" + call toc( format_string, 1400+iteration, MPI_Wtime()-t_block ) - write(toc_statement, '(A, i0, A)') "reconstruct_tree (it ", iteration, " TOTAL)" - call toc( toc_statement, 1450+iteration, MPI_Wtime()-t_loop ) + write(format_string, '(A, i0, A)') "reconstruct_tree (it ", iteration, " TOTAL)" + call toc( format_string, 1450+iteration, MPI_Wtime()-t_loop ) iteration = iteration + 1 enddo ! ! for debugging purposes - savin does a sync if any ghost values are NaN - ! write( toc_statement,'(A, i6.6, A)') 'TestWD_', 100, "000000.h5" - ! call saveHDF5_tree(toc_statement, dble(100), 100, 1, params, hvy_block, tree_ID) - ! write( toc_statement,'(A, i6.6, A)') 'TestN_', 100, "000000.h5" - ! call saveHDF5_tree(toc_statement, dble(100), 100, 1, params, hvy_tmp, tree_ID) + ! write( format_string,'(A, i6.6, A)') 'TestWD_', 100, "000000.h5" + ! call saveHDF5_tree(format_string, dble(100), 100, 1, params, hvy_block, tree_ID) + ! write( format_string,'(A, i6.6, A)') 'TestN_', 100, "000000.h5" + ! call saveHDF5_tree(format_string, dble(100), 100, 1, params, hvy_tmp, tree_ID) end subroutine @@ -654,7 +710,7 @@ subroutine init_full_tree(params, tree_ID, set_ref, verbose_check) integer(kind=ik) :: i_level, j, k, N, lgt_ID, hvy_ID, level_b, iteration, level, data_rank, lgt_merge_id, hvy_merge_id, hvy_n_old, digit, refset real(kind=rk) :: t_block, t_loop - character(len=clong) :: toc_statement + character(len=clong) :: format_string logical :: iterate ! list of block ids, proc ranks integer(kind=ik) :: lgt_daughters(1:8), rank_daughters(1:8) @@ -927,7 +983,7 @@ subroutine cvs_thresholding_local(params, hvy_block, hvy_tmp, hvy_work, indicato ! loop variables integer(kind=ik) :: iteration, k, k1, Jmax_active, Jmin_active, level, hvy_ID, lgt_ID, level_me, Jmin, g_spaghetti real(kind=rk) :: t_block, t_all, t_loop - character(len=clong) :: toc_statement + character(len=clong) :: format_string ! testing integer(kind=ik) :: ix, iy @@ -1110,7 +1166,7 @@ subroutine cvs_thresholding_global(params, hvy_block, hvy_tmp, hvy_work, indicat ! loop variables integer(kind=ik) :: iteration, k, k1, Jmax_active, Jmin_active, level, hvy_ID, lgt_ID, level_me, Jmin, g_spaghetti real(kind=rk) :: t_block, t_all, t_loop - character(len=clong) :: toc_statement + character(len=clong) :: format_string real(kind=rk) :: thresh_old(1:params%n_eqn), thresh_save(1:params%n_eqn+1), norm(1:params%n_eqn) diff --git a/LIB/MESH/balanceLoad_tree.f90 b/LIB/MESH/balanceLoad_tree.f90 index ab1cee30..be01c909 100644 --- a/LIB/MESH/balanceLoad_tree.f90 +++ b/LIB/MESH/balanceLoad_tree.f90 @@ -1,6 +1,6 @@ !> \image html balancing.svg "Load balancing" width=400 ! ******************************************************************************************** -subroutine balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement) +subroutine balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement, balance_name, time) ! it is not technically required to include the module here, but for VS code it reduces the number of wrong "errors" use module_params @@ -12,16 +12,22 @@ subroutine balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement) ! If true, we look at the refinement flags of each bock for distribution: blocks that have status +1 ! wil be refined to eight blocks, all others will just remain unchanged. logical, intent(in), optional :: balanceForRefinement + !> balanceLoad is always the same, however we want to differentiate between them, so we can assign a name + character(len=*), intent(in), optional :: balance_name + !> current simulation time, used for development output only + real(kind=rk), intent(in), optional :: time !============================================================================= integer(kind=ik) :: rank, mpirank_shouldBe, mpirank_currently, ierr, number_procs, & - k, N, l, com_i, com_N, sfc_id, & - lgt_free_id, hvy_free_id, hilbertcode(params%Jmax), lgt_ID, Nblocks_toDistribute + k, N, l, com_i, com_N, sfc_id, lgt_free_id, hvy_free_id, hilbertcode(params%Jmax), lgt_ID, Nblocks_toDistribute, & + num_blocks_count(3), i_var integer(kind=tsize) :: treecode, hilbertcode2 ! block distribution lists integer(kind=ik), allocatable, save :: blocksPerRank_balanced(:), sfc_com_list(:,:), sfc_sorted_list(:,:) real(kind=rk) :: t0, t1 logical :: balanceForRefinement2 + character(len=cshort) :: format_string, string_prepare + character(len=4) :: string_kind(3) ! NOTE: after 24/08/2022, the arrays lgt_active/lgt_n hvy_active/hvy_n as well as lgt_sortednumlist, @@ -45,6 +51,9 @@ subroutine balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement) balanceForRefinement2 = balanceForRefinement endif + ! development counters, just to see how many blocks are send/received/kept + num_blocks_count(1:3) = 0 + ! allocate block to proc lists if (.not.allocated(blocksPerRank_balanced)) allocate( blocksPerRank_balanced(1:number_procs)) ! allocate sfc com list, maximal number of communications is when every proc wants to send all of his blocks @@ -210,6 +219,20 @@ subroutine balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement) sfc_com_list(3, com_i) = sfc_sorted_list(1,k) ! light id of block end if + if (params%debug_balanceLoad) then + ! Development checks - count how many blocks are send, received or kept + if ( params%rank == mpirank_currently .and. mpirank_currently /= mpirank_shouldBe ) then + ! block is send away from this mpirank + num_blocks_count(1) = num_blocks_count(1) + 1 + elseif ( params%rank == mpirank_currently .and. mpirank_currently == mpirank_shouldBe ) then + ! block is kept on this mpirank + num_blocks_count(2) = num_blocks_count(2) + 1 + elseif ( params%rank == mpirank_shouldBe .and. mpirank_currently /= mpirank_shouldBe ) then + ! block is received by this mpirank + num_blocks_count(3) = num_blocks_count(3) + 1 + endif + endif + ! The blocksPerRank_balanced defines how many blocks this rank should have, and ! we just treated one (which either already was on the mpirank or will be on ! it after communication), so remove one item from the blocksPerRank_balanced @@ -240,6 +263,39 @@ subroutine balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement) ! the block xfer changes the light data, and afterwards active lists are outdated. call updateMetadata_tree(params, tree_ID) + ! development output, gather information on rank 0 and print to file + ! reuse blocksPerRank_balanced for this + if (params%debug_balanceLoad) then + string_kind = (/ "send", "keep", "recv" /) + + do i_var = 1, 3 + ! gather number of blocks send/received/kept on rank 0 + call MPI_GATHER(num_blocks_count(i_var), 1, MPI_INTEGER, blocksPerRank_balanced, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + if (present(balance_name)) then + string_prepare = trim(adjustl(balance_name))//"," + else + string_prepare = "balanceLoad," + endif + string_prepare = trim(adjustl(string_prepare))//trim(adjustl(string_kind(i_var)))//"," + if (present(time)) then + write(string_prepare,'(A,es12.6,",")') trim(adjustl(string_prepare)), time + else + string_prepare = trim(adjustl(string_prepare))//"-1.0E+00," ! set negative time, just to have csv with the same length in every row + endif + ! Single IO operation with dynamic format + open(unit=99, file='debug_balanceLoad.csv', status="unknown", position="append") + if (params%number_procs == 1) then + write(99, '(A, i0)') trim(adjustl(string_prepare)), blocksPerRank_balanced(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs - 1 + write(99, format_string) trim(adjustl(string_prepare)), blocksPerRank_balanced(1), blocksPerRank_balanced(2:params%number_procs) + endif + close(99) + endif + enddo + endif + ! timing call toc( "balanceLoad_tree (TOTAL)", 90, MPI_wtime()-t0 ) end subroutine balanceLoad_tree diff --git a/LIB/MESH/createEquidistantGrid_tree.f90 b/LIB/MESH/createEquidistantGrid_tree.f90 index 5ba4a41b..f0ecdf16 100644 --- a/LIB/MESH/createEquidistantGrid_tree.f90 +++ b/LIB/MESH/createEquidistantGrid_tree.f90 @@ -144,5 +144,5 @@ subroutine createEquidistantGrid_tree( params, hvy_block, Jmin, verbosity, tree_ ! the grid we created above is not at all balanced - on the contrary, it fills up CPU1, then CPU2 ! correct this by balancing: - call balanceLoad_tree(params, hvy_block, tree_ID) + call balanceLoad_tree(params, hvy_block, tree_ID, balance_name='createEquidistantGrid', time=0.0_rk ) end subroutine createEquidistantGrid_tree diff --git a/LIB/MESH/createMask_tree.f90 b/LIB/MESH/createMask_tree.f90 index 161c36b9..e4e5a765 100644 --- a/LIB/MESH/createMask_tree.f90 +++ b/LIB/MESH/createMask_tree.f90 @@ -10,11 +10,12 @@ subroutine createMask_tree(params, time, hvy_mask, hvy_tmp, all_parts) real(kind=rk), intent(inout) :: hvy_tmp(:, :, :, :, :) logical, intent(in), optional :: all_parts integer(kind=ik) :: k, lgt_id, Bs(1:3), g, hvy_id, iter, Jactive, Jmax - real(kind=rk) :: x0(1:3), dx(1:3), t0 + real(kind=rk) :: x0(1:3), dx(1:3) logical, save :: time_independent_part_ready = .false. logical :: force_all_parts + real(kind=rk) :: t_block, t_cycle - t0 = MPI_wtime() + t_cycle = MPI_wtime() Bs = params%Bs g = params%g Jactive = maxActiveLevel_tree(tree_ID_flow) @@ -31,6 +32,7 @@ subroutine createMask_tree(params, time, hvy_mask, hvy_tmp, all_parts) ! some mask functions have initialization routines (insects) which are to be called once and not for each ! block (efficiency). Usually, this would be a staging concept as well, but as only Thomas uses it anyways, cleanup ! is left as FIXME + ! You look outside the window and see Julius shaking his head in disapproval ... if (params_acm%geometry=="Insect") then call Update_Insect_wrapper(time) endif @@ -67,9 +69,10 @@ subroutine createMask_tree(params, time, hvy_mask, hvy_tmp, all_parts) !----------------------------------------------------------------------- if (((Jactive < Jmax-1 .and. params%force_maxlevel_dealiasing) .or. Jactive < Jmax) .or. (params%threshold_mask .eqv. .false.) .or. (force_all_parts)) then ! generate complete mask (may be expensive) + t_block = MPI_Wtime() call createCompleteMaskDirect_tree(params, time, hvy_mask) - call toc( "create_mask_tree (createCompleteMaskDirect_tree)", 210, MPI_Wtime()-t0 ) + call toc( "createMask_tree (createCompleteMaskDirect_tree)", 66, MPI_Wtime()-t_block ) ! we're done, all parts of mask function are created, leave routine now return @@ -84,14 +87,18 @@ subroutine createMask_tree(params, time, hvy_mask, hvy_tmp, all_parts) ! finest level (refined only where interesting). ! At most, mask in generated (Jmax-Jmin) times. if ((.not. time_independent_part_ready) .and. (params%mask_time_independent_part)) then + t_block = MPI_Wtime() call createTimeIndependentMask_tree(params, time, hvy_mask, hvy_tmp) + call toc( "createMask_tree (createTimeIndependentMask_tree)", 67, MPI_Wtime()-t_block ) time_independent_part_ready = .true. endif ! create "time-dependent-part" here, add the existing "time-independent-part" ! if it is available, return the complete mask incl. all parts + t_block = MPI_Wtime() if ( params%mask_time_dependent_part ) then + t_block = MPI_Wtime() do k = 1, hvy_n(tree_ID_flow) hvy_id = hvy_active(k, tree_ID_flow) @@ -118,9 +125,10 @@ subroutine createMask_tree(params, time, hvy_mask, hvy_tmp, all_parts) ! hvy_mask(:,:,:,6,hvy_id) = 0.0_rk ! sponge. as we keep it time-dependent, it is not required. enddo endif - + call toc( "createMask_tree (time-dependent-part)", 68, MPI_Wtime()-t_block ) if ( params%mask_time_independent_part ) then + t_block = MPI_Wtime() if (Jactive == params%Jmax ) then ! flow is on finest level: add complete mask on finest level ! this is the case when computing the right hand side. @@ -144,9 +152,10 @@ subroutine createMask_tree(params, time, hvy_mask, hvy_tmp, all_parts) else if (params%rank==0) write(*,'(A, i0)') "WARNING: mask generation with time independent part fails (flow grid neither on Jmax nor Jmax-1 but on level ", Jactive," )" endif + call toc( "createMask_tree (time-independent-part)", 69, MPI_Wtime()-t_block ) endif - call toc( "create_mask_tree (pruned_tree_logic)", 211, MPI_Wtime()-t0 ) + call toc( "createMask_tree (TOTAL)", 65, MPI_Wtime()-t_cycle ) end subroutine diff --git a/LIB/MESH/createRandomGrid_tree.f90 b/LIB/MESH/createRandomGrid_tree.f90 index 7c10e8fd..ea4f1494 100644 --- a/LIB/MESH/createRandomGrid_tree.f90 +++ b/LIB/MESH/createRandomGrid_tree.f90 @@ -67,5 +67,5 @@ subroutine createRandomGrid_tree( params, hvy_block, hvy_tmp, level_init, verbos dble(lgt_n(tree_ID)) / dble(size(lgt_block, 1)), params%max_grid_density endif - call balanceLoad_tree( params, hvy_block, tree_ID) + call balanceLoad_tree( params, hvy_block, tree_ID, balance_name='createRandomGrid', time=0.0_rk ) end subroutine createRandomGrid_tree diff --git a/LIB/MESH/executeCoarsening_tree.f90 b/LIB/MESH/executeCoarsening_tree.f90 index baa2093b..5a867e9e 100644 --- a/LIB/MESH/executeCoarsening_tree.f90 +++ b/LIB/MESH/executeCoarsening_tree.f90 @@ -122,7 +122,7 @@ end subroutine executeCoarsening_tree !> For CVS decomposition we need to update mothers from decomposed daughter values, this does exactly that -subroutine sync_D2M(params, hvy_block, tree_ID, sync_case, s_val) +subroutine sync_D2M(params, hvy_block, tree_ID, sync_case, s_val, sync_debug_name) ! it is not technically required to include the module here, but for VS code it reduces the number of wrong "errors" use module_params @@ -134,6 +134,7 @@ subroutine sync_D2M(params, hvy_block, tree_ID, sync_case, s_val) character(len=*) :: sync_case !< String representing which kind of syncing we want to do !> Additional value to be considered for syncing logic, can be level or refinement status from which should be synced, dependend on sync case integer(kind=ik), intent(in) :: s_val + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files ! loop variables integer(kind=ik) :: k, sync_case_id, nc, data_rank, n_xfer, lgt_ID, hvy_ID, level_me, ref_me @@ -192,7 +193,7 @@ subroutine sync_D2M(params, hvy_block, tree_ID, sync_case, s_val) ! actual xfer, this works on all blocks that are on this level and have a daughter n_xfer = 0 ! transfer counter - call prepare_update_family_metadata(params, tree_ID, n_xfer, sync_case="D2M_" // sync_case, ncomponents=nc, s_val=s_val) + call prepare_update_family_metadata(params, tree_ID, n_xfer, sync_case="D2M_" // sync_case, ncomponents=nc, s_val=s_val, sync_debug_name=sync_debug_name) call xfer_block_data(params, hvy_block, tree_ID, n_xfer, verbose_check=.true.) ! now the daughter blocks need to be retransformed to spaghetti @@ -220,7 +221,7 @@ end subroutine sync_D2M !> For CVS reconstruction we need to update daughters from reconstructed mothers, this does exactly that -subroutine sync_M2D(params, hvy_block, tree_ID, sync_case, s_val) +subroutine sync_M2D(params, hvy_block, tree_ID, sync_case, s_val, sync_debug_name) ! it is not technically required to include the module here, but for VS code it reduces the number of wrong "errors" use module_params @@ -232,6 +233,7 @@ subroutine sync_M2D(params, hvy_block, tree_ID, sync_case, s_val) character(len=*) :: sync_case !< String representing which kind of syncing we want to do !> Additional value to be considered for syncing logic, can be level or refinement status from which should be synced, dependend on sync case integer(kind=ik), intent(in) :: s_val + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files ! loop variables integer(kind=ik) :: k, sync_case_id, n_xfer, lgt_ID, hvy_ID, lgt_ID_m, level_m, ref_m, nc @@ -297,7 +299,7 @@ subroutine sync_M2D(params, hvy_block, tree_ID, sync_case, s_val) ! actual xfer, this works on all blocks that are on this level and have a daughter n_xfer = 0 ! transfer counter - call prepare_update_family_metadata(params, tree_ID, n_xfer, sync_case="M2D_" // sync_case, ncomponents=nc, s_val=s_val) + call prepare_update_family_metadata(params, tree_ID, n_xfer, sync_case="M2D_" // sync_case, ncomponents=nc, s_val=s_val, sync_debug_name=sync_debug_name) call xfer_block_data(params, hvy_block, tree_ID, n_xfer) !--------------------------------------------------------------------------- diff --git a/LIB/MESH/forest.f90 b/LIB/MESH/forest.f90 index dbfd6bd3..c89c9a10 100644 --- a/LIB/MESH/forest.f90 +++ b/LIB/MESH/forest.f90 @@ -215,17 +215,25 @@ subroutine add_pruned_to_full_tree( params, hvy_block, tree_ID_pruned, tree_ID_f integer(kind=ik), intent(in) :: tree_ID_pruned, tree_ID_full real(kind=rk), intent(inout) :: hvy_block(:, :, :, :, :) !< heavy data array - block data - integer(kind=ik) :: k, lgt_id, Jmax, hvy_id, rank, N, fsize, i + integer(kind=ik) :: k, lgt_id, Jmax, hvy_id, rank, N, fsize, i, ierr integer(kind=ik) :: lgt_id1, lgt_id2, hvy_id1, hvy_id2 integer(kind=ik) :: level1, level2, rank_pruned, rank_full, n_comm + integer(kind=ik) :: num_blocks_count(3), i_var ! debug counters: [send, recv, keep] logical :: exists integer(kind=ik), allocatable, save :: comm_list(:,:) integer(kind=tsize) :: treecode1, treecode2 + real(kind=rk) :: t_cycle, t_block + character(len=4) :: string_kind(3) + character(len=cshort) :: format_string fsize = params%forest_size Jmax = params%Jmax ! max treelevel rank = params%rank N = params%number_blocks + t_cycle = MPI_Wtime() + + ! debug counters: [send, recv, keep] + num_blocks_count(1:3) = 0 ! init treecode1 = 0_tsize; treecode2 = 0_tsize @@ -236,7 +244,9 @@ subroutine add_pruned_to_full_tree( params, hvy_block, tree_ID_pruned, tree_ID_f if (.not.allocated(comm_list)) allocate( comm_list( 3, params%number_procs*N ) ) + t_block = MPI_Wtime() call createActiveSortedLists_forest(params) + call toc("add_pruned_to_full_tree (createActiveSortedLists_forest)", 261, MPI_Wtime() - t_block) ! a pruned tree has fewer entries: loop over it instead of the fuller (fluid) one! ! @@ -256,6 +266,7 @@ subroutine add_pruned_to_full_tree( params, hvy_block, tree_ID_pruned, tree_ID_f ! holding the corresponding full trees block. ! Step 1a: prepare for xfer, gather all required xfers + t_block = MPI_Wtime() n_comm = 0 do k = 1, lgt_n(tree_ID_pruned) @@ -279,18 +290,36 @@ subroutine add_pruned_to_full_tree( params, hvy_block, tree_ID_pruned, tree_ID_f comm_list(1, n_comm) = rank_pruned ! sender mpirank comm_list(2, n_comm) = rank_full ! receiver mpirank comm_list(3, n_comm) = lgt_id1 ! block lgt_id to send + + ! count transferred blocks for this rank - used for development debugging output + if (params%rank == rank_pruned) then + num_blocks_count(1) = num_blocks_count(1) + 1 ! send + endif + if (params%rank == rank_full) then + num_blocks_count(2) = num_blocks_count(2) + 1 ! recv + endif + else + if (params%rank == rank_full) then + num_blocks_count(3) = num_blocks_count(3) + 1 ! keep + endif endif endif enddo + call toc("add_pruned_to_full_tree (gather comms)", 262, MPI_Wtime() - t_block) ! Step 1b: actual xfer. + t_block = MPI_Wtime() call block_xfer( params, comm_list, n_comm, hvy_block ) + call toc("add_pruned_to_full_tree (block_xfer)", 263, MPI_Wtime() - t_block) ! As some blocks have been transferred, the active lists are outdated. + t_block = MPI_Wtime() call createActiveSortedLists_tree(params, tree_ID_pruned) + call toc("add_pruned_to_full_tree (createActiveSortedLists_tree)", 264, MPI_Wtime() - t_block) ! Step 2: ADDITION. now we're sure that blocks existing in both trees are on the ! same mpirank. therefore, the responsible mpirank can just add them together. + t_block = MPI_Wtime() do k = 1, hvy_n(tree_ID_pruned) hvy_id1 = hvy_active(k, tree_ID_pruned) @@ -339,6 +368,30 @@ subroutine add_pruned_to_full_tree( params, hvy_block, tree_ID_pruned, tree_ID_f ! But that is no problem - We just ignore it endif enddo + call toc("add_pruned_to_full_tree (addition)", 265, MPI_Wtime() - t_block) + + if (params%debug_pruned2full) then + ! development output, gather information on rank 0 and print to file + string_kind = (/ "send", "recv", "keep" /) + + do i_var = 1, 3 + ! gather number of blocks transferred/added/ignored on rank 0 + call MPI_GATHER(num_blocks_count(i_var), 1, MPI_INTEGER, comm_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + ! Single IO operation with dynamic format + open(unit=99, file="debug_pruned2full.csv", status="unknown", position="append") + if (params%number_procs == 1) then + write(99, '(A,i0)') string_kind(i_var)//",", comm_list(1,1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs - 1 + write(99, format_string) string_kind(i_var)//",", comm_list(1,1), comm_list(1,2:params%number_procs) + endif + close(99) + endif + enddo + endif + + call toc("add_pruned_to_full_tree (TOTAL)", 260, MPI_Wtime() - t_cycle) end subroutine @@ -510,7 +563,7 @@ function compute_tree_L2norm(params, hvy_block, hvy_tmp, tree_ID, verbosity ) & !> This routine is usefull, when trying to go back to a old !> treestructure, after calling for example refine_trees2sametreelevel. subroutine coarse_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref, lgt_n_ref, & - hvy_block, hvy_tmp, tree_ID, verbosity) + hvy_block, hvy_tmp, tree_ID, ref_can_be_finer, verbosity) implicit none !----------------------------------------------------------------- @@ -521,16 +574,20 @@ subroutine coarse_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref, l real(kind=rk), intent(inout) :: hvy_block(:, :, :, :, :) !< heavy data array - block data integer(kind=ik), intent(inout) :: lgt_active_ref(:) !< active lists real(kind=rk), intent(inout) :: hvy_tmp(:, :, :, :, :) ! used for saving, filtering, and helper qtys + !> if true, the routine will also work if the reference mesh has some finer blocks, needs refine_tree_2_reference_mesh afterwards + logical, intent(in),optional :: ref_can_be_finer logical, intent(in),optional :: verbosity !----------------------------------------------------------------- integer(kind=ik) :: rank, level_ref, level, Jmax, lgt_id_ref, lgt_id, fsize integer(kind=ik) :: k1, k2, Nblocks_2coarsen, level_min integer(kind=tsize) :: treecode_ref, treecode - logical :: verbose, exists + logical :: verbose, exists, refCanBeFiner Jmax = params%Jmax ! max treelevel fsize= params%forest_size ! maximal number of trees in forest verbose = .false. + refCanBeFiner = .false. + if (present(ref_can_be_finer)) refCanBeFiner = ref_can_be_finer if (present(verbosity)) verbose=verbosity @@ -541,7 +598,8 @@ subroutine coarse_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref, l ! gives all blocks with identical TC/level to reference tree the status 0 and deletes all finer blocks, voila ! init full grid structure on the actual tree - call init_full_tree(params, tree_ID, set_ref=0, verbose_check=.false.) + call init_full_tree(params, tree_ID, set_ref=-1, verbose_check=.false.) + if (params%rank==0 .and. verbose) write(*,'("coarse_tree_2_reference_mesh : init_full_tree completed, size Nb=",i7)') lgt_n(tree_ID) ! init all ref stats to -1 do k1 = 1, lgt_n(tree_ID) @@ -560,14 +618,30 @@ subroutine coarse_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref, l call doesBlockExist_tree(treecode_ref, exists, lgt_id, dim=params%dim, level=level_ref, tree_id=tree_ID, max_level=params%Jmax) - if (.not. exists) then - call abort(20200806, "Something went wrong: Block on reference tree1 is too fine!!!") + ! if the block does not exists the reference block is finer than the actual tree (or Jmin is not respected) + ! we have two options: + ! 1) panic, scream and abort, because something went wrong (default) + ! 2) if refCanBeFiner is set to true, we coarsen the reference block until we find a block on the actual tree + ! or we fall below Jmin, means we just keep the finest mesh possible at that position and afterwards need to refine as well + if (.not. exists .and. .not. refCanBeFiner) then + call abort(250930, "Something went wrong: Block on reference tree1 is too fine (or lower than Jmin)!!!") + endif + if (.not. exists .and. refCanBeFiner) then + do while( .not. exists .and. level_ref >= params%Jmin ) + level_ref = level_ref - 1 + treecode_ref = tc_clear_until_level_b( treecode_ref, params%dim, level_ref, params%Jmax) + call doesBlockExist_tree(treecode_ref, exists, lgt_id, dim=params%dim, level=level_ref, tree_id=tree_ID, max_level=params%Jmax) + end do + if (.not. exists) then + call abort(250930, "Something went wrong: I have no Idea how you ended up here") + endif endif ! set lgt_id to be kept lgt_block(lgt_id, IDX_REFINE_STS) = 0 end do ! loop over reference blocks + if (params%rank==0 .and. verbose) write(*,'("coarse_tree_2_reference_mesh : marking blocks completed")') !---------------------------- !---------------------------- @@ -579,6 +653,109 @@ subroutine coarse_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref, l end subroutine + +!############################################################## +!> This function refines a tree to its reference mesh. +!> The reference mesh is saved in lgt_..._ref and stores the treecode and +!> all necessary light data. +!> All blocks on the actual tree are refined to have the same +!> treecode structure as the reference. +subroutine refine_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref, lgt_n_ref, & + hvy_block, hvy_tmp, tree_ID, ref_can_be_coarser, verbosity) + + implicit none + !----------------------------------------------------------------- + type (type_params), intent(inout) :: params !< params structure + integer(kind=ik), intent(in) :: tree_ID !< number of the tree + integer(kind=ik), intent(inout) :: lgt_n_ref !< number of light active blocks + integer(kind=ik), intent(inout) :: lgt_block_ref(:, : ) !< light data array + real(kind=rk), intent(inout) :: hvy_block(:, :, :, :, :) !< heavy data array - block data + integer(kind=ik), intent(inout) :: lgt_active_ref(:) !< active lists + real(kind=rk), intent(inout) :: hvy_tmp(:, :, :, :, :) ! used for saving, filtering, and helper qtys + !> if true, the routine will also work if the reference mesh has some coarser blocks, needs coarse_tree_2_reference_mesh afterwards + logical, intent(in),optional :: ref_can_be_coarser + logical, intent(in),optional :: verbosity + !----------------------------------------------------------------- + integer(kind=ik) :: level_ref, level, Jmax, lgt_id_ref, lgt_id, fsize + integer(kind=ik) :: k1, k2, Nblocks_2coarsen, level_min, mpierr, loop_i + integer(kind=tsize) :: treecode_ref, treecode + logical :: verbose, exists, refCanBeCoarser, continue_loop + + Jmax = params%Jmax ! max treelevel + fsize= params%forest_size ! maximal number of trees in forest + verbose = .false. + refCanBeCoarser = .false. + if (present(ref_can_be_coarser)) refCanBeCoarser = ref_can_be_coarser + + if (present(verbosity)) verbose=verbosity + + call updateMetadata_tree(params, tree_ID) + + ! Operations (addition, multiplication, etc) using two trees (=grids) require both trees (=grids) + ! to be identical. Therefore, this routine flags all blocks for refinement which are not on the reference tree, but a higher one is + + ! init full grid structure on the actual tree, so that we also check if reference mesh is coarser + if (refCanBeCoarser) call init_full_tree(params, tree_ID, set_ref=0, verbose_check=.false.) + + continue_loop = .true. + loop_i = 1 + do while (continue_loop) + continue_loop = .false. + if (params%rank==0 .and. verbose) write(*,'("refine_tree_2_reference_mesh : loop ",i3)') loop_i + + ! init all ref stats to 0 + do k1 = 1, lgt_n(tree_ID) + lgt_id = lgt_active(k1, tree_ID) + lgt_block(lgt_id, IDX_REFINE_STS) = 0 + end do + + !---------------------------- + ! loop over all blocks of the reference tree and find the corresponding block with our sorted list to be kept + do k1 = 1, lgt_n_ref + lgt_id_ref = lgt_active_ref(k1) + level_ref = lgt_block_ref(lgt_id_ref, IDX_MESH_LVL) + + ! get the treecode of the reference block + treecode_ref = get_tc(lgt_block_ref(lgt_id_ref, IDX_TC_1 : IDX_TC_2)) + + call doesBlockExist_tree(treecode_ref, exists, lgt_id, dim=params%dim, level=level_ref, tree_id=tree_ID, max_level=params%Jmax) + + ! if the block does not exists the reference block is finer than the actual tree (or Jmin is not respected) + ! we now go downwards in levels until we find the mother block and set it to be refined. + ! If this does not exist, then the reference mesh is coarser than the actual mesh or Jmin is not respected + if (.not. exists) then + do while( .not. exists .and. level_ref >= params%Jmin ) + level_ref = level_ref - 1 + treecode_ref = tc_clear_until_level_b( treecode_ref, params%dim, level_ref, params%Jmax) + call doesBlockExist_tree(treecode_ref, exists, lgt_id, dim=params%dim, level=level_ref, tree_id=tree_ID, max_level=params%Jmax) + end do + if (.not. exists) then + call abort(250930, "Something went wrong: Block on reference tree2 is too coarse (or lower than Jmin)!!!") + endif + ! set lgt_id to be refined + lgt_block(lgt_id, IDX_REFINE_STS) = 1 + endif + + end do ! loop over reference blocks + !---------------------------- + + !---------------------------- + ! refine the tagged blocks + call refine_tree( params, hvy_block, indicator='nothing (external)', tree_id=tree_ID, error_OOM=exists, check_full_tree=.false.) + !---------------------------- + + ! gather continue_loop to see if any mpirank wants to continue + call MPI_Allreduce(MPI_IN_PLACE, continue_loop, 1, MPI_LOGICAL, MPI_LOR, WABBIT_COMM, mpierr) + + end do + + if (params%rank==0 .and. verbose) write(*,'("refine_tree_2_reference_mesh : refinement completed, size Nb=",i7)') lgt_n(tree_ID) + + ! remove full tree + if (refCanBeCoarser) call prune_fulltree2leafs(params, tree_ID) + +end subroutine + !> \brief Stores the lgt data of two trees in an extra array as backup subroutine store_ref_meshes(lgt_block_ref, lgt_active_ref, lgt_n_ref, tree_ID1, tree_ID2) diff --git a/LIB/MESH/ini_file_to_params.f90 b/LIB/MESH/ini_file_to_params.f90 index 893ba8d0..30acc61d 100644 --- a/LIB/MESH/ini_file_to_params.f90 +++ b/LIB/MESH/ini_file_to_params.f90 @@ -16,7 +16,7 @@ subroutine ini_file_to_params( params, filename ) real(kind=rk) :: maxmem, mem_per_block, nstages ! string read from command line call character(len=cshort) :: memstring - integer(kind=ik) :: d,i, Nblocks_Jmax, g, Neqn, Nrk, g_RHS_min, diff_L, diff_R, Bs(1:3) + integer(kind=ik) :: d,i, Nblocks_Jmax, g, N_files, Nrk, g_RHS_min, diff_L, diff_R, Bs(1:3) rank = params%rank number_procs = params%number_procs @@ -42,6 +42,17 @@ subroutine ini_file_to_params( params, filename ) params%symmetry_vector_component = "0" call read_param_mpi(FILE, 'Domain', 'symmetry_vector_component', params%symmetry_vector_component, params%symmetry_vector_component ) + !*************************************************************************** + ! read time statistics parameters - before reading to decide how many files are read in + call read_param_mpi(FILE, 'Time-Statistics', 'time_statistics', params%time_statistics, .false.) + if (params%time_statistics) then + call read_param_mpi(FILE, 'Time-Statistics', 'N_time_statistics', params%N_time_statistics, 1) + allocate( params%time_statistics_names(1:params%N_time_statistics) ) + call read_param_mpi(FILE, 'Time-Statistics', 'time_statistics_names', params%time_statistics_names, (/ "none" /)) + call read_param_mpi(FILE, 'Time-Statistics', 'read_from_files_time_statistics', params%read_from_files_time_statistics, .false.) + call read_param_mpi(FILE, 'Time-Statistics', 'time_statistics_start_time', params%time_statistics_start_time, 0.0_rk) + endif + !************************************************************************** ! read INITIAL CONDITION parameters @@ -61,7 +72,13 @@ subroutine ini_file_to_params( params, filename ) if (params%read_from_files ) then ! read variable names - allocate( params%input_files( params%n_eqn ) ) + N_files = params%n_eqn + ! sometimes we want to read in time_statistics, sometimes we do not want that, this affects the number of files to read in + if (params%time_statistics .and. .not. params%read_from_files_time_statistics) then + N_files = N_files - params%N_time_statistics + endif + + allocate( params%input_files( N_files ) ) params%input_files = "---" call read_param_mpi(FILE, 'Physics', 'input_files', params%input_files, params%input_files, check_file_exists=.true. ) @@ -81,7 +98,8 @@ subroutine ini_file_to_params( params, filename ) call read_param_mpi(FILE, 'Discretization', 'poisson_order', params%poisson_order, "FD_4th_comp_1_3") call read_param_mpi(FILE, 'Discretization', 'poisson_cycle_end_criteria', params%poisson_cycle_end_criteria, "fixed_iterations") call read_param_mpi(FILE, 'Discretization', 'poisson_cycle_it', params%poisson_cycle_it, 6) - call read_param_mpi(FILE, 'Discretization', 'poisson_cycle_tol', params%poisson_cycle_tol, 1.0e-6_rk) + call read_param_mpi(FILE, 'Discretization', 'poisson_cycle_tol_abs', params%poisson_cycle_tol_abs, 1.0e-6_rk) + call read_param_mpi(FILE, 'Discretization', 'poisson_cycle_tol_rel', params%poisson_cycle_tol_rel, 1.0e-3_rk) call read_param_mpi(FILE, 'Discretization', 'poisson_cycle_max_it', params%poisson_cycle_max_it, 100) call read_param_mpi(FILE, 'Discretization', 'poisson_GS_it', params%poisson_GS_it, 8) call read_param_mpi(FILE, 'Discretization', 'poisson_Sync_it', params%poisson_Sync_it, 2) @@ -103,15 +121,6 @@ subroutine ini_file_to_params( params, filename ) call read_param_mpi(FILE, 'Statistics', 'nsave_stats', params%nsave_stats, 99999999_ik ) call read_param_mpi(FILE, 'Statistics', 'tsave_stats', params%tsave_stats, 9999999.9_rk ) - !*************************************************************************** - ! read time statistics parameters - call read_param_mpi(FILE, 'Time-Statistics', 'time_statistics', params%time_statistics, .false.) - if (params%time_statistics) then - call read_param_mpi(FILE, 'Time-Statistics', 'N_time_statistics', params%N_time_statistics, 1) - allocate( params%time_statistics_names(1:params%N_time_statistics) ) - call read_param_mpi(FILE, 'Time-Statistics', 'time_statistics_names', params%time_statistics_names, (/ "none" /)) - endif - !*************************************************************************** ! WABBIT needs to know about the mask function (if penalization is used): does it contain ! a time-dependent-part (e.g. moving obstacles, time-dependent forcing)? does it contain @@ -133,6 +142,13 @@ subroutine ini_file_to_params( params, filename ) call read_param_mpi(FILE, 'Debug', 'test_ghost_nodes_synch', params%test_ghost_nodes_synch, .true.) call read_param_mpi(FILE, 'Debug', 'test_wavelet_decomposition', params%test_wavelet_decomposition, .true.) + call read_param_mpi(FILE, 'Debug', 'debug_balanceLoad', params%debug_balanceLoad, .false.) + call read_param_mpi(FILE, 'Debug', 'debug_refinement', params%debug_refinement, .false.) + call read_param_mpi(FILE, 'Debug', 'debug_wavelet_decompose', params%debug_wavelet_decompose, .false.) + call read_param_mpi(FILE, 'Debug', 'debug_wavelet_reconstruct', params%debug_wavelet_reconstruct, .false.) + call read_param_mpi(FILE, 'Debug', 'debug_sync', params%debug_sync, .false.) + call read_param_mpi(FILE, 'Debug', 'debug_pruned2full', params%debug_pruned2full, .false.) + ! Hack. ! Small ascii files are written with the module_t_files, which is just a buffered wrapper. ! Instead of directly dumping the files to disk, it collects data and flushes after "flush_frequency" @@ -332,6 +348,9 @@ subroutine ini_blocks(params, FILE ) elseif (params%wavelet(4:4) == "6") then g_default = 5 g_RHS_default = 3 + elseif (params%wavelet(4:4) == "8") then + g_default = 7 + g_RHS_default = 4 else call abort(2320242, "no default specified for this wavelet...") endif @@ -420,7 +439,10 @@ subroutine ini_blocks(params, FILE ) allocate(params%threshold_state_vector_component(1:params%n_eqn)) ! as default, use ones (all components used for indicator) tmp = 1.0_rk - call read_param_mpi(FILE, 'Blocks', 'threshold_state_vector_component', tmp, tmp ) + ! only read in threshold_state_vector_component if needed + if ((params%adapt_tree .and. params%coarsening_indicator=="threshold-state-vector") .or. (params%adapt_inicond .and. params%coarsening_indicator_inicond=="threshold-state-vector")) then + call read_param_mpi(FILE, 'Blocks', 'threshold_state_vector_component', tmp, tmp ) + end if do i = 1, params%n_eqn params%threshold_state_vector_component(i) = nint(tmp(i)) enddo diff --git a/LIB/MESH/module_mesh.f90 b/LIB/MESH/module_mesh.f90 index 477bfeba..04f9ee02 100644 --- a/LIB/MESH/module_mesh.f90 +++ b/LIB/MESH/module_mesh.f90 @@ -15,9 +15,6 @@ module module_mesh use module_operators ! used in executeCoarsening_tree use module_helpers - ! used for multigrid solver - use module_fft - use module_poisson ! if the threshold_mask option is used, then the mesh module needs to create the mask function here ! hence we require the metamodule to be used. use module_physics_metamodule @@ -66,7 +63,6 @@ module module_mesh #include "check_lgt_block_synchronization.f90" #include "remove_nonperiodic_neighbors.f90" #include "forest.f90" -#include "multigrid_vcycle.f90" ! former module_initialization #include "setInitialCondition_tree.f90" diff --git a/LIB/MESH/refine_tree.f90 b/LIB/MESH/refine_tree.f90 index dc58d0f8..5b5b07c2 100644 --- a/LIB/MESH/refine_tree.f90 +++ b/LIB/MESH/refine_tree.f90 @@ -12,7 +12,7 @@ !! output: - light and heavy data arrays ! ******************************************************************************************** -subroutine refine_tree( params, hvy_block, indicator, tree_ID, error_OOM, check_full_tree) +subroutine refine_tree( params, hvy_block, indicator, tree_ID, error_OOM, check_full_tree, time) use module_indicators implicit none @@ -23,6 +23,7 @@ subroutine refine_tree( params, hvy_block, indicator, tree_ID, error_OOM, check_ integer(kind=ik), intent(in) :: tree_ID logical, intent(out) :: error_OOM !> Out-of-memory error, causes the main time loop to exit. logical, intent(in), optional :: check_full_tree !> if true, checks if a full tree can be created in deterministic fashion + real(kind=rk), intent(in), optional :: time !> current simulation time, used for development output only ! cpu time variables for running time calculation @@ -75,7 +76,7 @@ subroutine refine_tree( params, hvy_block, indicator, tree_ID, error_OOM, check_ ! which means 2**D blocks after refinement), but is better than before. ! To ensure perfect balancing, a second step is done after the actual refinement. if (indicator /= "everywhere") then - call balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement=.true.) + call balanceLoad_tree( params, hvy_block, tree_ID, balanceForRefinement=.true., balance_name='refine_pre', time=time ) endif !--------------------------------------------------------------------------- @@ -93,7 +94,7 @@ subroutine refine_tree( params, hvy_block, indicator, tree_ID, error_OOM, check_ !> (d) execute refinement, interpolate the new mesh. Blocks with status=+1 are refined, often !! that means all blocks are refined. t1 = MPI_Wtime() - call refinement_execute_tree( params, hvy_block, tree_ID ) + call refinement_execute_tree( params, hvy_block, tree_ID, time=time ) call toc( "refine_tree (refinement_execute)", 144, MPI_Wtime()-t1 ) @@ -115,7 +116,7 @@ subroutine refine_tree( params, hvy_block, indicator, tree_ID, error_OOM, check_ !! This of course implies any other indicator than "everywhere" requires balancing here. if ((params%force_maxlevel_dealiasing .eqv. .false.) .or. (indicator/="everywhere")) then t1 = MPI_Wtime() - call balanceLoad_tree( params, hvy_block, tree_ID ) + call balanceLoad_tree( params, hvy_block, tree_ID, balance_name='refine_post', time=time ) call toc( "refine_tree (balanceLoad_tree)", 145, MPI_Wtime()-t1 ) endif diff --git a/LIB/MESH/refinementExecute.f90 b/LIB/MESH/refinementExecute.f90 index 61f24e0e..f91977e8 100644 --- a/LIB/MESH/refinementExecute.f90 +++ b/LIB/MESH/refinementExecute.f90 @@ -229,27 +229,52 @@ end subroutine refineBlock2SpaghettiWD !! input: - params, light and heavy data \n !! output: - light and heavy data arrays \n ! ******************************************************************************************** -subroutine refinement_execute_tree( params, hvy_block, tree_ID ) +subroutine refinement_execute_tree( params, hvy_block, tree_ID, time ) implicit none type (type_params), intent(in) :: params !< user defined parameter structure real(kind=rk), intent(inout) :: hvy_block(:, :, :, :, :) !< heavy data array - block data integer(kind=ik), intent(in) :: tree_ID !< tree to work on + real(kind=rk), intent(in), optional :: time !< current simulation time, used for development output only - integer(kind=ik) :: i_b, lgt_id + integer(kind=ik) :: i_b, lgt_id, ierr + integer(kind=ik) :: blocks_refined + integer(kind=ik), allocatable, save :: blocks_refined_list(:) + character(len=clong) :: format_string, string_prepare ! every proc loop over its active heavy data array + blocks_refined = 0 do i_b = 1, hvy_n(tree_ID) ! light data id call hvy2lgt( lgt_id, hvy_active(i_b, tree_ID), params%rank, params%number_blocks ) ! block wants to refine if ( (lgt_block( lgt_id, idx_refine_sts) == +1) ) then + blocks_refined = blocks_refined + 1 call refineBlock(params, hvy_block, hvy_active(i_b, tree_ID), tree_ID) endif end do + if (params%debug_refinement) then + if (.not. allocated(blocks_refined_list)) allocate(blocks_refined_list(1:params%number_procs)) + + ! debug output to see how many blocks have been refined, gather information on rank 0 and print to file + call MPI_GATHER(blocks_refined, 1, MPI_INTEGER, blocks_refined_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_refinement.csv"), status="unknown", position="append") + string_prepare = "-1.0E+00," ! set negative time, just to have csv with the same length in every row + if (present(time)) write(string_prepare,'(es16.6,",")') time + if (params%number_procs == 1) then + write(99,'(A, i0)') trim(adjustl(string_prepare)), blocks_refined_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs - 1 + write(99,format_string) trim(adjustl(string_prepare)), blocks_refined_list(1), blocks_refined_list(2:params%number_procs) + endif + close(99) + endif + endif + ! synchronize light data call synchronize_lgt_data( params, refinement_status_only=.false. ) diff --git a/LIB/MESH/setInitialCondition_tree.f90 b/LIB/MESH/setInitialCondition_tree.f90 index 13279dc3..82dc15e7 100644 --- a/LIB/MESH/setInitialCondition_tree.f90 +++ b/LIB/MESH/setInitialCondition_tree.f90 @@ -235,7 +235,7 @@ subroutine setInitialCondition_tree(params, hvy_block, tree_ID, adapt, time, ite call updateMetadata_tree(params, tree_ID) ! balance the load - call balanceLoad_tree(params, hvy_block, tree_ID) + call balanceLoad_tree(params, hvy_block, tree_ID, balance_name='setInitialCondition', time=time ) ! synchronize ghosts now, in order to start with a clean grid. NOTE this can actually be removed, but ! it is a safety issue. Better simply keep it. diff --git a/LIB/MPI/synchronize_ghosts_generic.f90 b/LIB/MPI/synchronize_ghosts_generic.f90 index 71d33a56..854c0441 100644 --- a/LIB/MPI/synchronize_ghosts_generic.f90 +++ b/LIB/MPI/synchronize_ghosts_generic.f90 @@ -1,6 +1,6 @@ !> Wrapper to synch blocks with temporary flag from finer neighbours and same-level neighbors !> Used before wavelet decomposition -subroutine sync_level_from_M(params, hvy_block, tree_ID, level, g_minus, g_plus) +subroutine sync_level_from_M(params, hvy_block, tree_ID, level, g_minus, g_plus, sync_debug_name) implicit none type (type_params), intent(in) :: params @@ -8,6 +8,7 @@ subroutine sync_level_from_M(params, hvy_block, tree_ID, level, g_minus, g_plus) integer(kind=ik), intent(in) :: tree_ID !< which tree to study integer(kind=ik), intent(in) :: level !< level value to be checked against integer(kind=ik), optional, intent(in) :: g_minus, g_plus !< Boundary sizes in case we want to send less values + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files integer(kind=ik) :: gminus, gplus gminus = params%g @@ -17,14 +18,14 @@ subroutine sync_level_from_M(params, hvy_block, tree_ID, level, g_minus, g_plus) if (present(g_plus)) gplus = g_plus call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, spaghetti_form=.false., sync_case="Monly_level", & - s_val=level) + s_val=level, sync_debug_name=sync_debug_name) end subroutine sync_level_from_M !> Wrapper to synch blocks with temporary flag from finer neighbours and same-level neighbors !> Used before wavelet decomposition -subroutine sync_TMP_from_MF(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_minus, g_plus) +subroutine sync_TMP_from_MF(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_minus, g_plus, sync_debug_name) implicit none type (type_params), intent(in) :: params @@ -34,6 +35,7 @@ subroutine sync_TMP_from_MF(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_mi !> heavy temp data array - block data of preserved values before the WD, used in adapt_tree as neighbours already might be wavelet decomposed real(kind=rk), intent(inout), optional :: hvy_tmp(:, :, :, :, :) integer(kind=ik), optional, intent(in) :: g_minus, g_plus !< Boundary sizes in case we want to send less values + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files integer(kind=ik) :: gminus, gplus gminus = params%g @@ -43,7 +45,7 @@ subroutine sync_TMP_from_MF(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_mi if (present(g_plus)) gplus = g_plus call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, spaghetti_form=.false., sync_case="MoF_ref", & - s_val=ref_check, hvy_tmp=hvy_tmp) + s_val=ref_check, hvy_tmp=hvy_tmp, sync_debug_name=sync_debug_name) end subroutine sync_TMP_from_MF @@ -51,7 +53,7 @@ end subroutine sync_TMP_from_MF !> Wrapper to synch blocks with temporary flag from all neighbors !> Used before wavelet decomposition -subroutine sync_TMP_from_all(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_minus, g_plus) +subroutine sync_TMP_from_all(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_minus, g_plus, sync_debug_name) implicit none type (type_params), intent(in) :: params @@ -61,6 +63,7 @@ subroutine sync_TMP_from_all(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_m !> heavy temp data array - block data of preserved values before the WD, used in adapt_tree as neighbours already might be wavelet decomposed real(kind=rk), intent(inout), optional :: hvy_tmp(:, :, :, :, :) integer(kind=ik), optional, intent(in) :: g_minus, g_plus !< Boundary sizes in case we want to send less values + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files integer(kind=ik) :: gminus, gplus gminus = params%g @@ -70,7 +73,7 @@ subroutine sync_TMP_from_all(params, hvy_block, tree_ID, ref_check, hvy_tmp, g_m if (present(g_plus)) gplus = g_plus call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, spaghetti_form=.false., sync_case="full_ref", & - s_val=ref_check, hvy_tmp=hvy_tmp) + s_val=ref_check, hvy_tmp=hvy_tmp, sync_debug_name=sync_debug_name) end subroutine sync_TMP_from_all @@ -78,7 +81,7 @@ end subroutine sync_TMP_from_all !> Wrapper to synch from coarser neighbours and same-level neighbors !! Used after coarse extension to update SC and WC, coarse neighbours need to be synched from hvy_tmp -subroutine sync_SCWC_from_MC(params, hvy_block, tree_ID, hvy_tmp, g_minus, g_plus, level) +subroutine sync_SCWC_from_MC(params, hvy_block, tree_ID, hvy_tmp, g_minus, g_plus, level, sync_debug_name) implicit none type (type_params), intent(in) :: params @@ -87,6 +90,7 @@ subroutine sync_SCWC_from_MC(params, hvy_block, tree_ID, hvy_tmp, g_minus, g_plu !> heavy temp data array - block data of preserved values before the WD, used in adapt_tree as neighbours already might be wavelet decomposed real(kind=rk), intent(inout) :: hvy_tmp(:, :, :, :, :) integer(kind=ik), optional, intent(in) :: g_minus, g_plus, level + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files integer(kind=ik) :: gminus, gplus gminus = params%g @@ -98,17 +102,17 @@ subroutine sync_SCWC_from_MC(params, hvy_block, tree_ID, hvy_tmp, g_minus, g_plu ! if we passed on the level then sync is level-wise if (present(level)) then call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, spaghetti_form=.true., sync_case="MoC_level", & - s_val=level, hvy_tmp=hvy_tmp) + s_val=level, hvy_tmp=hvy_tmp, sync_debug_name=sync_debug_name) else call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, spaghetti_form=.true., sync_case="MoC", & - hvy_tmp=hvy_tmp) + hvy_tmp=hvy_tmp, sync_debug_name=sync_debug_name) endif end subroutine sync_SCWC_from_MC !> Wrapper to synch all ghost-point patches -subroutine sync_ghosts_tree(params, hvy_block, tree_ID, g_minus, g_plus, ignore_Filter) +subroutine sync_ghosts_tree(params, hvy_block, tree_ID, g_minus, g_plus, ignore_Filter, sync_debug_name) implicit none type (type_params), intent(in) :: params @@ -116,6 +120,7 @@ subroutine sync_ghosts_tree(params, hvy_block, tree_ID, g_minus, g_plus, ignore_ integer(kind=ik), intent(in) :: tree_ID !< which tree to study integer(kind=ik), optional, intent(in) :: g_minus, g_plus !< set specific ghost node sizes to be synced logical, optional, intent(in) :: ignore_Filter !< ignore restriction filter, defaults to false + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files integer(kind=ik) :: gminus, gplus logical :: do_ignore_Filter @@ -128,7 +133,7 @@ subroutine sync_ghosts_tree(params, hvy_block, tree_ID, g_minus, g_plus, ignore_ if (present(ignore_Filter)) do_ignore_Filter = ignore_Filter ! set level to -1 to enable synching between all, set stati to send to all levels - call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, sync_case="full", spaghetti_form=.false., ignore_Filter=do_ignore_Filter) + call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, sync_case="full", spaghetti_form=.false., ignore_Filter=do_ignore_Filter, sync_debug_name=sync_debug_name) end subroutine sync_ghosts_tree @@ -141,13 +146,14 @@ end subroutine sync_ghosts_tree ! ! One might ask, why for RHS with only +-shaped stencils we still synch the diagonals (edges/corners) as well? ! This is, because within the synching we apply the coarsening and interpolation filters at the interfaces, which need the other stencils as well! -subroutine sync_ghosts_RHS_tree(params, hvy_block, tree_ID, g_minus, g_plus) +subroutine sync_ghosts_RHS_tree(params, hvy_block, tree_ID, g_minus, g_plus, sync_debug_name) implicit none type (type_params), intent(in) :: params real(kind=rk), intent(inout) :: hvy_block(:, :, :, :, :) !< heavy data array - block data integer(kind=ik), intent(in) :: tree_ID !< which tree to study integer(kind=ik), optional, intent(in) :: g_minus, g_plus + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files integer(kind=ik) :: gminus, gplus gminus = params%g @@ -157,7 +163,7 @@ subroutine sync_ghosts_RHS_tree(params, hvy_block, tree_ID, g_minus, g_plus) if (present(g_plus)) gplus = g_plus ! set level to -1 to enable synching between all, set stati to send to all levels - call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, sync_case="full_leaf", spaghetti_form=.false., ignore_Filter=.true.) + call sync_ghosts_generic(params, hvy_block, tree_ID, g_minus=gminus, g_plus=gplus, sync_case="full_leaf", spaghetti_form=.false., ignore_Filter=.true., sync_debug_name=sync_debug_name) end subroutine @@ -167,7 +173,7 @@ subroutine sync_ghosts_RHS_tree(params, hvy_block, tree_ID, g_minus, g_plus) !! In order to avoid confusion wrapper functions should be used everywhere in order to implement !! specific versions. This also means that parameter changes only have to be changed in the wrappers subroutine sync_ghosts_generic( params, hvy_block, tree_ID, sync_case, spaghetti_form, & - g_minus, g_plus, s_val, hvy_tmp, verbose_check, ignore_Filter) + g_minus, g_plus, s_val, hvy_tmp, verbose_check, ignore_Filter, sync_debug_name) ! it is not technically required to include the module here, but for VS code it reduces the number of wrong "errors" use module_params @@ -187,11 +193,15 @@ subroutine sync_ghosts_generic( params, hvy_block, tree_ID, sync_case, spaghetti integer(kind=ik), intent(in), optional :: s_val logical, intent(in), optional :: ignore_Filter !< If set, coarsening will be done only with loose downsampling, not applying HD filter even in the case of lifted wavelets integer(kind=ik), optional, intent(in) :: g_minus, g_plus !< Synch only so many ghost points + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files - integer(kind=ik) :: count_send_total, Nstages, istage, gminus, gplus, k, hvy_id, lgt_id, i_dim + integer(kind=ik) :: count_send_total, Nstages, istage, gminus, gplus, k, hvy_id, lgt_id, i_dim, ierr real(kind=rk) :: t0, t1, t2, x0(1:3), dx(1:3), tolerance character(len=clong) :: toc_statement integer(kind=2) :: n_domain(1:3) + character(len=clong) :: format_string, string_prepare + integer(kind=ik) :: points_synched + integer(kind=ik), allocatable, save :: points_synched_list(:) t0 = MPI_wtime() @@ -279,6 +289,38 @@ subroutine sync_ghosts_generic( params, hvy_block, tree_ID, sync_case, spaghetti call bound_cond_generic(params, hvy_block, tree_ID, sync_case, spaghetti_form, s_val=s_val, edges_only=istage>=2) call toc( "sync ghosts (bound_cond_generic)", 83, MPI_wtime()-t2 ) + !*************************************************************************** + ! (iv) debugging output (optional) - print to files how many data was sent/received + !*************************************************************************** + if (params%debug_sync .and. present(sync_debug_name)) then + if (.not. allocated(points_synched_list)) allocate(points_synched_list(1:params%number_procs)) + + ! debug send volume + call MPI_GATHER(sum(data_send_counter), 1, MPI_INTEGER, points_synched_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_sync.csv"), status="unknown", position="append") + if (params%number_procs == 1) then + write(99,'(A, i0, ",", i0)') trim(adjustl(sync_debug_name))//",send,", istage, points_synched_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs + write(99,format_string) trim(adjustl(sync_debug_name))//",send,", istage, points_synched_list(1), points_synched_list(2:params%number_procs) + endif + close(99) + endif + ! debug recv volume + call MPI_GATHER(sum(data_recv_counter), 1, MPI_INTEGER, points_synched_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_sync.csv"), status="unknown", position="append") + if (params%number_procs == 1) then + write(99,'(A, i0, ",", i0)') trim(adjustl(sync_debug_name))//",recv,", istage, points_synched_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs + write(99,format_string) trim(adjustl(sync_debug_name))//",recv,", istage, points_synched_list(1), points_synched_list(2:params%number_procs) + endif + close(99) + endif + endif + write(toc_statement, '(A, i0, A)') "sync ghosts (stage ", istage, " TOTAL)" call toc( toc_statement, 83+istage, MPI_wtime()-t1 ) diff --git a/LIB/MPI/xfer_block_data.f90 b/LIB/MPI/xfer_block_data.f90 index e616f744..022f0796 100644 --- a/LIB/MPI/xfer_block_data.f90 +++ b/LIB/MPI/xfer_block_data.f90 @@ -576,7 +576,7 @@ subroutine finalize_xfer_mpi(params, isend, irecv, verbose_check) ! - saving of all metadata ! - computing of buffer sizes for metadata for both sending and receiving ! This is done strictly locally so no MPI needed here -subroutine prepare_update_family_metadata(params, tree_ID, count_send, sync_case, ncomponents, s_val) +subroutine prepare_update_family_metadata(params, tree_ID, count_send, sync_case, ncomponents, s_val, sync_debug_name) implicit none @@ -589,6 +589,7 @@ subroutine prepare_update_family_metadata(params, tree_ID, count_send, sync_case !> Additional value to be considered for syncing logic, can be level or refinement status to which should be synced, dependend on sync case integer(kind=ik), intent(in), optional :: s_val + character(len=*), optional, intent(in) :: sync_debug_name !< name to be used in debug output files ! Following are global data used but defined in module_mpi: ! data_recv_counter, data_send_counter @@ -600,6 +601,9 @@ subroutine prepare_update_family_metadata(params, tree_ID, count_send, sync_case integer(kind=ik) :: hvy_ID_n, lgt_ID_n, level_n, ref_n, rank_n integer(kind=tsize) :: tc logical :: b_send, b_recv + character(len=clong) :: format_string, string_prepare + integer(kind=ik) :: points_synched + integer(kind=ik), allocatable, save :: points_synched_list(:) !----------------------------------------------------------------------- ! set up constant arrays @@ -811,6 +815,36 @@ subroutine prepare_update_family_metadata(params, tree_ID, count_send, sync_case endif ! check if daughters exist end do ! loop over all heavy active + if (params%debug_sync .and. present(sync_debug_name)) then + if (.not. allocated(points_synched_list)) allocate(points_synched_list(1:params%number_procs)) + + ! debug send volume + call MPI_GATHER(sum(data_send_counter), 1, MPI_INTEGER, points_synched_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_sync.csv"), status="unknown", position="append") + ! 0 is send as stage + if (params%number_procs == 1) then + write(99,'(A, i0, ",", i0)') trim(adjustl(sync_debug_name))//",send,", 0, points_synched_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs + write(99,format_string) trim(adjustl(sync_debug_name))//",send,", 0, points_synched_list(1), points_synched_list(2:params%number_procs) + endif + close(99) + endif + ! debug recv volume + call MPI_GATHER(sum(data_recv_counter), 1, MPI_INTEGER, points_synched_list, 1, MPI_INTEGER, 0, WABBIT_COMM, ierr) + if (params%rank == 0) then + open(unit=99, file=trim("debug_sync.csv"), status="unknown", position="append") + ! 0 is send as stage + if (params%number_procs == 1) then + write(99,'(A, i0, ",", i0)') trim(adjustl(sync_debug_name))//",recv,", 0, points_synched_list(1) + else + write(format_string, '("(A, i0,",i0,"("","",i0))")') params%number_procs + write(99,format_string) trim(adjustl(sync_debug_name))//",recv,", 0, points_synched_list(1), points_synched_list(2:params%number_procs) + endif + close(99) + endif + endif ! NOTE: this feature is against wabbits memory policy: we try to allocate the ! whole memory of the machine on startup, then work with that. however, we have to diff --git a/LIB/OPERATORS/module_operators.f90 b/LIB/OPERATORS/module_operators.f90 index 00df205f..ebaae3f2 100644 --- a/LIB/OPERATORS/module_operators.f90 +++ b/LIB/OPERATORS/module_operators.f90 @@ -33,6 +33,19 @@ module module_operators real(kind=rk), parameter :: FD2_C6(-3:3) = (/ 2.0_rk, -27.0_rk, 270.0_rk, -490.0_rk, 270.0_rk, -27.0_rk, 2.0_rk/) / 180.0_rk ! 6th order central difference for second derivative real(kind=rk), parameter :: FD2_C8(-4:4) = (/ -9.0_rk, 128.0_rk, -1008.0_rk, 8064.0_rk, -14350.0_rk, 8064.0_rk, -1008.0_rk, 128.0_rk, -9.0_rk /) / 5040_rk ! 8th order central difference for second derivative +! Composite second derivative stencils (from module_poisson) +real(kind=rk), parameter :: FD2_COMP3_1_2(-3:3) = (/ -2.0_rk, 9.0_rk, 18.0_rk, -50.0_rk, 18.0_rk, 9.0_rk, -2.0_rk /) / 36.0_rk ! 3th order composite difference (-1,2) +real(kind=rk), parameter :: FD2_COMP4_0_4(-4:4) = (/ -75.0_rk, 544.0_rk, -1776.0_rk, 3552.0_rk, -4490.0_rk, 3552.0_rk, -1776.0_rk, 544.0_rk, -75.0_rk /) / 144.0_rk ! 4th order composite difference (0,4) +real(kind=rk), parameter :: FD2_COMP4_2_2(-4:4) = (/ 1.0_rk, -16.0_rk, 64.0_rk, 16.0_rk, -130.0_rk, 16.0_rk, 64.0_rk, -16.0_rk, 1.0_rk /) / 144.0_rk ! 4th order composite difference (2,2) +real(kind=rk), parameter :: FD2_COMP4_1_3(-4:4) = (/ 3.0_rk, -8.0_rk, -24.0_rk, 264.0_rk, -470.0_rk, 264.0_rk, -24.0_rk, -8.0_rk, 3.0_rk /) / 144.0_rk ! 4th order composite difference (1,3) +real(kind=rk), parameter :: FD2_COMP5_2_3(-5:5) = (/ -6.0_rk, 105.0_rk, -590.0_rk, 1440.0_rk, 1620.0_rk, -5138.0_rk, 1620.0_rk, 1440.0_rk, -590.0_rk, 105.0_rk, -6.0_rk /) / 3600.0_rk ! 5th order composite difference (2,3) +real(kind=rk), parameter :: FD2_COMP6_3_3(-6:6) = (/ 1.0_rk, -18.0_rk, 171.0_rk, -810.0_rk, 1935.0_rk, 828.0_rk, -4214.0_rk, 828.0_rk, 1935.0_rk, -810.0_rk, 171.0_rk, -18.0_rk, 1.0_rk /) / 3600.0_rk ! 6th order composite difference (3,3) +real(kind=rk), parameter :: FD2_COMP6_2_4(-6:6) = (/ 2.0_rk, -40.0_rk, 217.0_rk, -520.0_rk, 270.0_rk, 4656.0_rk, -9170.0_rk, 4656.0_rk, 270.0_rk, -520.0_rk, 217.0_rk, -40.0_rk, 2.0_rk /) / 3600.0_rk ! 6th order composite difference (2,4) +real(kind=rk), parameter :: FD2_COMP6_1_5(-6:6) = (/ 20.0_rk, 4.0_rk, -955.0_rk, 5300.0_rk, -15300.0_rk, 31560.0_rk, -41258.0_rk, 31560.0_rk, -15300.0_rk, 5300.0_rk, -955.0_rk, 4.0_rk, 20.0_rk /) / 3600.0_rk ! 6th order composite difference (1,5) +real(kind=rk), parameter :: FD2_COMP6_0_6(-6:6) = (/ -1470.0_rk, 14184.0_rk, -63495.0_rk, 176200.0_rk, -342450.0_rk, 501840.0_rk, -569618.0_rk, 501840.0_rk, -342450.0_rk, 176200.0_rk, -63495.0_rk, 14184.0_rk, -1470.0_rk /) / 3600.0_rk ! 6th order composite difference (0,6) +real(kind=rk), parameter :: FD2_COMP7_3_4(-7:7) = (/ -12.0_rk, 238.0_rk, -2436.0_rk, 13713.0_rk, -45612.0_rk, 83874.0_rk, 84924.0_rk, -269378.0_rk, 84924.0_rk, 83874.0_rk, -45612.0_rk, 13713.0_rk, -2436.0_rk, 238.0_rk, -12.0_rk /) / 176400.0_rk ! 7th order composite difference (3,4) +real(kind=rk), parameter :: FD2_COMP8_3_5(-8:8) = (/ 15.0_rk, -330.0_rk, 3760.0_rk, -21966.0_rk, 74760.0_rk, -155610.0_rk, 142800.0_rk, 767730.0_rk, -1622318.0_rk, 767730.0_rk, 142800.0_rk, -155610.0_rk, 74760.0_rk, -21966.0_rk, 3760.0_rk, -330.0_rk, 15.0_rk /) / 705600.0_rk ! 8th order composite difference (3,5) + ! Cross derivative coefficients (mixed partials d²/dxdy, d²/dxdz, d²/dydz) ! These are applied in a cross-shaped pattern along diagonals ! 2nd order cross derivative: 3-point stencil applied cross-wise @@ -43,18 +56,22 @@ module module_operators ! Forward finite difference stencils for first derivative (used at left boundaries) ! R0 = starting from point 0, using points to the right -real(kind=rk), parameter :: FD1_R0_1(0:1) = (/ -1.0_rk, 1.0_rk /) ! 1st order forward difference (2-point) -real(kind=rk), parameter :: FD1_R0_2(0:2) = (/ -3.0_rk, 6.0_rk, -1.0_rk /) / 2.0_rk ! 2nd order forward difference (3-point) -real(kind=rk), parameter :: FD1_R0_3(0:3) = (/ -11.0_rk, 18.0_rk, -9.0_rk, 2.0_rk /) / 6.0_rk ! 3rd order forward difference (4-point) -real(kind=rk), parameter :: FD1_R0_4(0:4) = (/ -25.0_rk, 48.0_rk, -36.0_rk, 16.0_rk, -3.0_rk /) / 12.0_rk ! 4th order forward difference (5-point) -real(kind=rk), parameter :: FD1_R0_5(0:5) = (/ -137.0_rk, 300.0_rk, -300.0_rk, 200.0_rk, -75.0_rk, 12.0_rk /) / 60.0_rk ! 5th order forward difference (6-point) -real(kind=rk), parameter :: FD1_R0_6(0:6) = (/ -147.0_rk, 360.0_rk, -450.0_rk, 400.0_rk, -225.0_rk, 72.0_rk, -10.0_rk /) / 60.0_rk ! 6th order forward difference (7-point) +real(kind=rk), parameter :: FD1_COMP1_0_1(0:1) = (/ -1.0_rk, 1.0_rk /) ! 1st order forward difference (2-point) +real(kind=rk), parameter :: FD1_COMP2_0_2(0:2) = (/ -3.0_rk, 6.0_rk, -1.0_rk /) / 2.0_rk ! 2nd order forward difference (3-point) +real(kind=rk), parameter :: FD1_COMP3_0_3(0:3) = (/ -11.0_rk, 18.0_rk, -9.0_rk, 2.0_rk /) / 6.0_rk ! 3rd order forward difference (4-point) +real(kind=rk), parameter :: FD1_COMP4_0_4(0:4) = (/ -25.0_rk, 48.0_rk, -36.0_rk, 16.0_rk, -3.0_rk /) / 12.0_rk ! 4th order forward difference (5-point) +real(kind=rk), parameter :: FD1_COMP5_0_5(0:5) = (/ -137.0_rk, 300.0_rk, -300.0_rk, 200.0_rk, -75.0_rk, 12.0_rk /) / 60.0_rk ! 5th order forward difference (6-point) +real(kind=rk), parameter :: FD1_COMP6_0_6(0:6) = (/ -147.0_rk, 360.0_rk, -450.0_rk, 400.0_rk, -225.0_rk, 72.0_rk, -10.0_rk /) / 60.0_rk ! 6th order forward difference (7-point) ! Biased finite difference stencils for first derivative (used near boundaries) ! R1 = starting from point -1, using mostly points to the right -real(kind=rk), parameter :: FD1_R1_3(-1:2) = (/ -2.0_rk, -3.0_rk, 6.0_rk, -1.0_rk /) / 6.0_rk ! 3rd order biased difference (4-point, stencil from -1 to +2) -real(kind=rk), parameter :: FD1_R1_4(-1:3) = (/ -3.0_rk, -10.0_rk, 18.0_rk, -6.0_rk, 1.0_rk /) / 12.0_rk ! 4th order biased difference (5-point, stencil from -1 to +3) -real(kind=rk), parameter :: FD1_R2_6(-2:4) = (/ 2.0_rk, -24.0_rk, -35.0_rk, 80.0_rk, -30.0_rk, 8.0_rk, -1.0_rk /) / 60.0_rk ! 6th order biased difference (7-point, stencil from -2 to +4) +real(kind=rk), parameter :: FD1_COMP3_1_2(-1:2) = (/ -2.0_rk, -3.0_rk, 6.0_rk, -1.0_rk /) / 6.0_rk ! 3rd order biased difference (4-point, stencil from -1 to +2) +real(kind=rk), parameter :: FD1_COMP4_1_3(-1:3) = (/ -3.0_rk, -10.0_rk, 18.0_rk, -6.0_rk, 1.0_rk /) / 12.0_rk ! 4th order biased difference (5-point, stencil from -1 to +3) +real(kind=rk), parameter :: FD1_COMP5_2_3(-2:3) = (/ 3.0_rk, -30.0_rk, -20.0_rk, 60.0_rk, -15.0_rk, 2.0_rk /) / 60.0_rk ! 5th order biased difference (6-point, stencil from -2 to +3) +real(kind=rk), parameter :: FD1_COMP6_1_5(-1:5) = (/ -10.0_rk, -77.0_rk, 150.0_rk, -100.0_rk, 50.0_rk, -15.0_rk, 2.0_rk /) / 60.0_rk ! 6th order biased difference (7-point, stencil from -1 to +5) +real(kind=rk), parameter :: FD1_COMP6_2_4(-2:4) = (/ 2.0_rk, -24.0_rk, -35.0_rk, 80.0_rk, -30.0_rk, 8.0_rk, -1.0_rk /) / 60.0_rk ! 6th order biased difference (7-point, stencil from -2 to +4) +real(kind=rk), parameter :: FD1_COMP7_3_4(-3:4) = (/ -4.0_rk, 42.0_rk, -252.0_rk, -105.0_rk, 420.0_rk, -126.0_rk, 28.0_rk, -3.0_rk /) / 420.0_rk ! 7th order biased difference (8-point, stencil from -3 to +4) +real(kind=rk), parameter :: FD1_COMP8_3_5(-3:5) = (/ -5.0_rk, 60.0_rk, -420.0_rk, -378.0_rk, 1050.0_rk, -420.0_rk, 140.0_rk, -30.0_rk, 3.0_rk /) / 840.0_rk ! 8th order biased difference (9-point, stencil from -3 to +5) !********************************************************************************************** ! These are the important routines that are visible to WABBIT: @@ -105,21 +122,46 @@ subroutine setup_FD1_left_stencil(FD1_order_discretization_in, FD1_l_array, l_st l_end = 4 allocate(FD1_l_array(l_start:l_end)) FD1_l_array(:) = FD1_C8(:) + case("FD_3th_comp_1_2") + l_start = -2 + l_end = 1 + allocate(FD1_l_array(l_start:l_end)) + FD1_l_array(:) = FD1_COMP3_1_2(-l_start:-l_end:-1) ! left side is reversed case("FD_4th_comp_0_4") l_start = -4 l_end = 0 allocate(FD1_l_array(l_start:l_end)) - FD1_l_array(:) = -FD1_R0_4(-l_start:-l_end:-1) ! left side is reversed + FD1_l_array(:) = -FD1_COMP4_0_4(-l_start:-l_end:-1) ! left side is reversed case("FD_4th_comp_1_3") l_start = -3 l_end = 1 allocate(FD1_l_array(l_start:l_end)) - FD1_l_array(:) = -FD1_R1_4(-l_start:-l_end:-1) ! left side is reversed + FD1_l_array(:) = -FD1_COMP4_1_3(-l_start:-l_end:-1) ! left side is reversed + case("FD_5th_comp_2_3") + l_start = -3 + l_end = 2 + allocate(FD1_l_array(l_start:l_end)) + FD1_l_array(:) = -FD1_COMP5_2_3(-l_start:-l_end:-1) ! left side is reversed + case("FD_6th_comp_1_5") + l_start = -5 + l_end = 1 + allocate(FD1_l_array(l_start:l_end)) + FD1_l_array(:) = -FD1_COMP6_1_5(-l_start:-l_end:-1) ! left side is reversed case("FD_6th_comp_2_4") l_start = -4 l_end = 2 allocate(FD1_l_array(l_start:l_end)) - FD1_l_array(:) = -FD1_R2_6(-l_start:-l_end:-1) ! left side is reversed + FD1_l_array(:) = -FD1_COMP6_2_4(-l_start:-l_end:-1) ! left side is reversed + case("FD_7th_comp_3_4") + l_start = -4 + l_end = 3 + allocate(FD1_l_array(l_start:l_end)) + FD1_l_array(:) = -FD1_COMP7_3_4(-l_start:-l_end:-1) ! left side is reversed + case("FD_8th_comp_3_5") + l_start = -5 + l_end = 3 + allocate(FD1_l_array(l_start:l_end)) + FD1_l_array(:) = -FD1_COMP8_3_5(-l_start:-l_end:-1) ! left side is reversed case default call abort(250615, "ERROR: order of FD1 discretization not known: " // trim(FD1_order_discretization_in)) end select @@ -158,21 +200,46 @@ subroutine setup_FD1_right_stencil(FD1_order_discretization_in, FD1_r_array, r_s r_end = 4 allocate(FD1_r_array(r_start:r_end)) FD1_r_array(:) = FD1_C8(:) + case("FD_3th_comp_1_2") + r_start = -1 + r_end = 2 + allocate(FD1_r_array(r_start:r_end)) + FD1_r_array(:) = FD1_COMP3_1_2(:) case("FD_4th_comp_0_4") r_start = 0 r_end = 4 allocate(FD1_r_array(r_start:r_end)) - FD1_r_array(:) = FD1_R0_4(:) + FD1_r_array(:) = FD1_COMP4_0_4(:) case("FD_4th_comp_1_3") r_start = -1 r_end = 3 allocate(FD1_r_array(r_start:r_end)) - FD1_r_array(:) = FD1_R1_4(:) + FD1_r_array(:) = FD1_COMP4_1_3(:) + case("FD_5th_comp_2_3") + r_start = -2 + r_end = 3 + allocate(FD1_r_array(r_start:r_end)) + FD1_r_array(:) = FD1_COMP5_2_3(:) case("FD_6th_comp_2_4") r_start = -2 r_end = 4 allocate(FD1_r_array(r_start:r_end)) - FD1_r_array(:) = FD1_R2_6(:) + FD1_r_array(:) = FD1_COMP6_2_4(:) + case("FD_6th_comp_1_5") + r_start = -1 + r_end = 5 + allocate(FD1_r_array(r_start:r_end)) + FD1_r_array(:) = FD1_COMP6_1_5(:) + case("FD_7th_comp_3_4") + r_start = -3 + r_end = 4 + allocate(FD1_r_array(r_start:r_end)) + FD1_r_array(:) = FD1_COMP7_3_4(:) + case("FD_8th_comp_3_5") + r_start = -3 + r_end = 5 + allocate(FD1_r_array(r_start:r_end)) + FD1_r_array(:) = FD1_COMP8_3_5(:) case default call abort(250615, "ERROR: order of FD1 discretization not known: " // trim(FD1_order_discretization_in)) end select @@ -181,12 +248,24 @@ end subroutine setup_FD1_right_stencil !********************************************************************************************** !> \brief setup the FD2 stencils !> \details This routine sets up the second derivative stencils based on the discretization order -subroutine setup_FD2_stencil(FD2_order_discretization_in, FD2_array, fd2_start, fd2_end) +subroutine setup_FD2_stencil(FD2_order_discretization_in, FD2_array, fd2_start, fd2_end, use_exact_discretization) implicit none character(len=*), intent(in) :: FD2_order_discretization_in !> order of the second derivative discretization real(kind=rk), allocatable, intent(inout) :: FD2_array(:) !> second derivative stencil array integer(kind=ik), intent(inout) :: fd2_start, fd2_end !> second derivative stencil bounds + logical, intent(in), optional :: use_exact_discretization !> if true, use exact composite stencils; if false (default), use central stencils for composite orders + + logical :: use_exact + + ! We need the second order stencils for two reasons: + ! 1) to compute the diffusion term + ! 2) to solve the pressure-poisson equation + ! For the first case, operator compatibility is not too important and we choose the best available stencil (central one) + ! For the second case, operator compatibility is crucial and we need the correct stencils + ! default behaviour is dictated by RHS, so for now we need to set use_exact_discretization = .true. when wanting to choose the coinciding stencil + use_exact = .false. + if (present(use_exact_discretization)) use_exact = use_exact_discretization if (allocated(FD2_array)) deallocate(FD2_array) @@ -196,21 +275,163 @@ subroutine setup_FD2_stencil(FD2_order_discretization_in, FD2_array, fd2_start, fd2_end = 1 allocate(FD2_array(fd2_start:fd2_end)) FD2_array(:) = FD2_C2(:) - case("FD_4th_central", "FD_4th_comp_0_4", "FD_4th_comp_1_3", "FD_4th_comp_2_2") + case("FD_3rd_comp_1_2") + if (use_exact) then + fd2_start = -3 + fd2_end = 3 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP3_1_2(:) + ! no corresponding 3rd order central stencil, fallback to 4th order + else + fd2_start = -2 + fd2_end = 2 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C4(:) + endif + case("FD_4th_central") fd2_start = -2 fd2_end = 2 allocate(FD2_array(fd2_start:fd2_end)) FD2_array(:) = FD2_C4(:) - case("FD_6th_central", "FD_6th_comp_0_6", "FD_6th_comp_1_5", "FD_6th_comp_2_4") + case("FD_4th_comp_0_4") + if (use_exact) then + fd2_start = -4 + fd2_end = 4 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP4_0_4(:) + else + ! fall back to central 4th order stencil + fd2_start = -2 + fd2_end = 2 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C4(:) + endif + case("FD_4th_comp_2_2") + if (use_exact) then + fd2_start = -4 + fd2_end = 4 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP4_2_2(:) + else + ! fall back to central 4th order stencil + fd2_start = -2 + fd2_end = 2 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C4(:) + endif + case("FD_4th_comp_1_3") + if (use_exact) then + fd2_start = -4 + fd2_end = 4 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP4_1_3(:) + else + ! fall back to central 4th order stencil + fd2_start = -2 + fd2_end = 2 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C4(:) + endif + case("FD_5th_comp_2_3") + if (use_exact) then + fd2_start = -5 + fd2_end = 5 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP5_2_3(:) + ! no corresponding 5th order central stencil, fallback to 6th order + else + fd2_start = -3 + fd2_end = 3 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C6(:) + endif + case("FD_6th_central") fd2_start = -3 fd2_end = 3 allocate(FD2_array(fd2_start:fd2_end)) FD2_array(:) = FD2_C6(:) + case("FD_6th_comp_3_3") + if (use_exact) then + fd2_start = -6 + fd2_end = 6 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP6_3_3(:) + else + ! fall back to central 6th order stencil + fd2_start = -3 + fd2_end = 3 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C6(:) + endif + case("FD_6th_comp_2_4") + if (use_exact) then + fd2_start = -6 + fd2_end = 6 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP6_2_4(:) + else + ! fall back to central 6th order stencil + fd2_start = -3 + fd2_end = 3 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C6(:) + endif + case("FD_6th_comp_1_5") + if (use_exact) then + fd2_start = -6 + fd2_end = 6 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP6_1_5(:) + else + ! fall back to central 6th order stencil + fd2_start = -3 + fd2_end = 3 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C6(:) + endif + case("FD_6th_comp_0_6") + if (use_exact) then + fd2_start = -6 + fd2_end = 6 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP6_0_6(:) + else + ! fall back to central 6th order stencil + fd2_start = -3 + fd2_end = 3 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C6(:) + endif + case("FD_7th_comp_3_4") + if (use_exact) then + fd2_start = -7 + fd2_end = 7 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP7_3_4(:) + ! no corresponding 7th order central stencil, fallback to 8th order + else + fd2_start = -4 + fd2_end = 4 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C8(:) + endif case("FD_8th_central") fd2_start = -4 fd2_end = 4 allocate(FD2_array(fd2_start:fd2_end)) FD2_array(:) = FD2_C8(:) + case("FD_8th_comp_3_5") + if (use_exact) then + fd2_start = -8 + fd2_end = 8 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_COMP8_3_5(:) + else + fd2_start = -4 + fd2_end = 4 + allocate(FD2_array(fd2_start:fd2_end)) + FD2_array(:) = FD2_C8(:) + endif case default call abort(250616, "ERROR: order of FD2 discretization not known: " // trim(FD2_order_discretization_in)) end select diff --git a/LIB/PARAMS/module_ini_files_parser.f90 b/LIB/PARAMS/module_ini_files_parser.f90 index 12fa0ff7..f2f0767e 100644 --- a/LIB/PARAMS/module_ini_files_parser.f90 +++ b/LIB/PARAMS/module_ini_files_parser.f90 @@ -369,7 +369,7 @@ subroutine param_dbl (PARAMS, section, keyword, params_real, defaultvalue) ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section), trim(keyword), adjustl(trim(value)) + write (*,FORMAT1) trim(section), trim(keyword), trim(adjustl(value)) endif end subroutine param_dbl @@ -420,7 +420,7 @@ subroutine param_sgl (PARAMS, section, keyword, params_real, defaultvalue) ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section), trim(keyword), adjustl(trim(value)) + write (*,FORMAT1) trim(section), trim(keyword), trim(adjustl(value)) endif end subroutine param_sgl @@ -470,7 +470,7 @@ subroutine param_str (PARAMS, section, keyword, params_string, defaultvalue, che ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section), trim(keyword), adjustl(trim(value)) + write (*,FORMAT1) trim(section), trim(keyword), trim(adjustl(value)) endif end subroutine param_str @@ -538,7 +538,7 @@ subroutine param_vct (PARAMS, section, keyword, params_vector, defaultvalue) ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section), trim(keyword), adjustl(trim(value)) + write (*,FORMAT1) trim(section), trim(keyword), trim(adjustl(value)) endif end subroutine param_vct @@ -627,7 +627,7 @@ subroutine param_vct_str (PARAMS, section, keyword, params_vector, defaultvalue, ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section),trim(keyword),adjustl(trim(value)) + write (*,FORMAT1) trim(section),trim(keyword),trim(adjustl(value)) endif end subroutine param_vct_str @@ -665,7 +665,7 @@ subroutine param_int(PARAMS, section, keyword, params_int, defaultvalue) ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section),trim(keyword),adjustl(trim(value)) + write (*,FORMAT1) trim(section),trim(keyword),trim(adjustl(value)) endif end subroutine param_int @@ -709,7 +709,7 @@ subroutine param_bool(PARAMS, section, keyword, params_bool, defaultvalue) ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section),trim(keyword),adjustl(trim(value)) + write (*,FORMAT1) trim(section),trim(keyword),trim(adjustl(value)) endif end subroutine param_bool @@ -783,7 +783,7 @@ subroutine param_vct_bool (PARAMS, section, keyword, params_vector, defaultvalue ! in verbose mode, inform about what we did if (verbosity) then - write (*,FORMAT1) trim(section),trim(keyword),adjustl(trim(value)) + write (*,FORMAT1) trim(section),trim(keyword),trim(adjustl(value)) endif end subroutine param_vct_bool diff --git a/LIB/PARAMS/module_params.f90 b/LIB/PARAMS/module_params.f90 index 642f6884..3323f997 100644 --- a/LIB/PARAMS/module_params.f90 +++ b/LIB/PARAMS/module_params.f90 @@ -89,7 +89,8 @@ module module_params integer(kind=ik) :: poisson_stencil_size=0 character(len=cshort) :: poisson_cycle_end_criteria="not-initialized" integer(kind=ik) :: poisson_cycle_it=0 - real(kind=rk) :: poisson_cycle_tol=0.0_rk + real(kind=rk) :: poisson_cycle_tol_abs=0.0_rk + real(kind=rk) :: poisson_cycle_tol_rel=0.0_rk integer(kind=ik) :: poisson_cycle_max_it=0 integer(kind=ik) :: poisson_GS_it=0 integer(kind=ik) :: poisson_Sync_it=0 @@ -141,6 +142,8 @@ module module_params logical :: time_statistics = .false. integer(kind=ik) :: N_time_statistics = 0 character(len=cshort), allocatable :: time_statistics_names(:) + logical :: read_from_files_time_statistics = .false. + real(kind=rk) :: time_statistics_start_time = 0.0_rk ! ------------------------------------------------------------------------------------- ! MPI @@ -173,10 +176,13 @@ module module_params logical :: use_iteration_as_fileid = .false. ! ------------------------------------------------------------------------------------- - ! unit test + ! unit tests and debugging ! ------------------------------------------------------------------------------------- logical :: test_treecode=.false., test_ghost_nodes_synch=.true., test_wavelet_decomposition=.true. + logical :: debug_balanceLoad = .false., debug_refinement = .false., debug_wavelet_decompose = .false. + logical :: debug_wavelet_reconstruct = .false., debug_sync = .false., debug_pruned2full = .false. + ! ------------------------------------------------------------------------------------- ! filter ! ------------------------------------------------------------------------------------- diff --git a/LIB/POISSON/module_poisson.f90 b/LIB/POISSON/module_poisson.f90 index d7474015..98dadd65 100644 --- a/LIB/POISSON/module_poisson.f90 +++ b/LIB/POISSON/module_poisson.f90 @@ -14,6 +14,10 @@ module module_poisson use module_forestMetaData use module_timing ! debug module use module_treelib ! module with evrything related to treecodes (encoding, decoding, neighbors, etc) + use module_mesh ! needed for multigrid + use module_mpi ! needed for multigrid + use module_fft ! needed for multigrid + use module_operators, only: setup_FD2_stencil ! for finite difference stencils implicit none @@ -34,7 +38,7 @@ module module_poisson ! different stencils, either for one-dimensional cross-stencil form or tensorial form real(kind=rk), allocatable :: stencil(:), stencil_tensor_2D(:,:), stencil_tensor_3D(:,:,:), stencil_RHS_tensor_2D(:,:), stencil_RHS_tensor_3D(:,:,:) real(kind=rk) :: stencil_RHS - integer(kind=ik) :: stencil_size, stencil_RHS_size + integer(kind=ik) :: stencil_RHS_size logical :: use_tensor @@ -42,17 +46,22 @@ module module_poisson !--------------------------------------------------------------------------------------------- ! public parts of this module - PUBLIC :: GS_iteration_level, GS_iteration_ref, GS_compute_residual, GS_compute_Ax, CG_solve_poisson_level0, setup_Laplacian_stencils + PUBLIC :: GS_iteration_level, GS_iteration_ref, GS_compute_residual, GS_compute_Ax, CG_solve_poisson_level0, setup_Laplacian_stencils, multigrid_vcycle, multigrid_solve contains +#include "multigrid_vcycle.f90" + subroutine setup_Laplacian_stencils(params, g) implicit none !> parameter struct type (type_params), intent(inout) :: params integer(kind=ik), intent(inout), optional :: g ! ghost points, if present then we set it if we require more points + ! local variables for setup_FD2_stencil + integer(kind=ik) :: fd2_start, fd2_end + ! prepare stencil if (allocated(stencil)) deallocate(stencil) if (allocated(stencil_tensor_2D)) deallocate(stencil_tensor_2D) @@ -60,95 +69,19 @@ subroutine setup_Laplacian_stencils(params, g) if (allocated(stencil_RHS_tensor_2D)) deallocate(stencil_RHS_tensor_2D) if (allocated(stencil_RHS_tensor_3D)) deallocate(stencil_RHS_tensor_3D) - if (params%poisson_order == "FD_2nd_central") then - allocate(stencil(-1:1)) - stencil = (/ 1.0_rk, -2.0_rk, 1.0_rk /) - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 1 - stencil_size = 1 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_4th_central") then - allocate(stencil(-2:2)) - stencil = (/ -1.0_rk, 16.0_rk, -30.0_rk, 16.0_rk, -1.0_rk /) / 12.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 2 - stencil_size = 2 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_6th_central") then - allocate(stencil(-3:3)) - stencil = (/ 2.0_rk, -27.0_rk, 270.0_rk, -490.0_rk, 270.0_rk, -27.0_rk, 2.0_rk/) / 180.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 3 - stencil_size = 3 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_8th_central") then - allocate(stencil(-4:4)) - stencil = (/ -9.0_rk, 128.0_rk, -1008.0_rk, 8064.0_rk, -14350.0_rk, 8064.0_rk, -1008.0_rk, 128.0_rk, -9.0_rk /) / 5040_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 4 - stencil_size = 4 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_4th_comp_0_4") then - allocate(stencil(-4:4)) - stencil = (/ -75.0_rk, 544.0_rk, -1776.0_rk, 3552.0_rk, -4490.0_rk, 3552.0_rk, -1776.0_rk, 544.0_rk, -75.0_rk /) / 144.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 4 - stencil_size = 4 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_4th_comp_2_2") then - allocate(stencil(-4:4)) - stencil = (/ 1.0_rk, -16.0_rk, 64.0_rk, 16.0_rk, -130.0_rk, 16.0_rk, 64.0_rk, -16.0_rk, 1.0_rk /) / 144.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 4 - stencil_size = 4 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_4th_comp_1_3") then - allocate(stencil(-4:4)) - stencil = (/ 3.0_rk, -8.0_rk, -24.0_rk, 264.0_rk, -470.0_rk, 264.0_rk, -24.0_rk, -8.0_rk, 3.0_rk /) / 144.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 4 - stencil_size = 4 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_6th_comp_3_3") then - allocate(stencil(-6:6)) - stencil = (/ 1.0_rk, -18.0_rk, 171.0_rk, -810.0_rk, 1935.0_rk, 828.0_rk, -4214.0_rk, 828.0_rk, 1935.0_rk, -810.0_rk, 171.0_rk, -18.0_rk, 1.0_rk /) / 3600.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 6 - stencil_size = 6 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_6th_comp_2_4") then - allocate(stencil(-6:6)) - stencil = (/ 2.0_rk, -40.0_rk, 217.0_rk, -520.0_rk, 270.0_rk, 4656.0_rk, -9170.0_rk, 4656.0_rk, 270.0_rk, -520.0_rk, 217.0_rk, -40.0_rk, 2.0_rk /) / 3600.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 6 - stencil_size = 6 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_6th_comp_1_5") then - allocate(stencil(-6:6)) - stencil = (/ 20.0_rk, 4.0_rk, -955.0_rk, 5300.0_rk, -15300.0_rk, 31560.0_rk, -41258.0_rk, 31560.0_rk, -15300.0_rk, 5300.0_rk, -955.0_rk, 4.0_rk, 20.0_rk /) / 3600.0_rk - stencil_RHS = 1.0_rk - params%poisson_stencil_size = 6 - stencil_size = 6 - stencil_RHS_size = 0 - use_tensor = .false. - elseif (params%poisson_order == "FD_6th_comp_0_6") then - allocate(stencil(-6:6)) - stencil = (/ -1470.0_rk, 14184.0_rk, -63495.0_rk, 176200.0_rk, -342450.0_rk, 501840.0_rk, -569618.0_rk, 501840.0_rk, -342450.0_rk, 176200.0_rk, -63495.0_rk, 14184.0_rk, -1470.0_rk /) / 3600.0_rk + ! check if we have a standard stencil (1D cross-shaped) or the special Mehrstellenverfahren + if (.not. params%poisson_order == "FD_6th_mehrstellen") then + ! use standard 1D stencils via module_operators setup_FD2_stencil + ! for Poisson solver, we want to use exact discretizations for composite stencils to preserve compatibility between operators + call setup_FD2_stencil(params%poisson_order, stencil, fd2_start, fd2_end, use_exact_discretization=.true.) + + ! set module variables for compatibility with existing code stencil_RHS = 1.0_rk - params%poisson_stencil_size = 6 - stencil_size = 6 + params%poisson_stencil_size = max(abs(fd2_start), abs(fd2_end)) stencil_RHS_size = 0 use_tensor = .false. - elseif (params%poisson_order == "FD_6th_mehrstellen") then + else + ! handle the special Mehrstellenverfahren case with tensor stencils if (params%dim == 2) then allocate(stencil_tensor_2D(-1:1, -1:1)) allocate(stencil_RHS_tensor_2D(-2:2, -2:2)) @@ -201,11 +134,8 @@ subroutine setup_Laplacian_stencils(params, g) 0.0_rk, 0.0_rk, 0.0_rk, 0.0_rk, 0.0_rk /), (/5,5,5/) ) / 720.0_rk endif params%poisson_stencil_size = 1 - stencil_size = 1 stencil_RHS_size = 2 use_tensor = .true. - else - call abort(250612, "I don't know about this laplacian discretization order: "//trim(adjustl(params%poisson_order))//" in GS_compute_residual") endif if (present(g)) then @@ -320,19 +250,163 @@ subroutine GS_iteration(params, u, b, dx, sweep_forward, filter_offset) do ic = 1, size(u,4) if (.not. use_tensor) then if (params%dim == 2) then - do iy = is(2), ie(2), dir - do ix = is(1), ie(1), dir - u(ix,iy,1,ic) = (-sum(stencil * u(ix-a:ix+a,iy,1,ic)) - sum(stencil * u(ix,iy-a:iy+a,1,ic)) + b(ix,iy,1,ic)*dx(1)*dx(1) + params%dim*stencil(0)*u(ix,iy,1,ic)) / (stencil(0)*params%dim) - enddo - enddo + select case(a) + case(1) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-1)*u(ix-1,iy,1,ic) - stencil(1)*u(ix+1,iy,1,ic) & + - stencil(-1)*u(ix,iy-1,1,ic) - stencil(1)*u(ix,iy+1,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(2) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) & + - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(3) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-3)*u(ix-3,iy,1,ic) - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) - stencil(3)*u(ix+3,iy,1,ic) & + - stencil(-3)*u(ix,iy-3,1,ic) - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) - stencil(3)*u(ix,iy+3,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(4) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-4)*u(ix-4,iy,1,ic) - stencil(-3)*u(ix-3,iy,1,ic) - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) - stencil(3)*u(ix+3,iy,1,ic) - stencil(4)*u(ix+4,iy,1,ic) & + - stencil(-4)*u(ix,iy-4,1,ic) - stencil(-3)*u(ix,iy-3,1,ic) - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) - stencil(3)*u(ix,iy+3,1,ic) - stencil(4)*u(ix,iy+4,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(5) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-5)*u(ix-5,iy,1,ic) - stencil(-4)*u(ix-4,iy,1,ic) - stencil(-3)*u(ix-3,iy,1,ic) - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) & + - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) - stencil(3)*u(ix+3,iy,1,ic) - stencil(4)*u(ix+4,iy,1,ic) - stencil(5)*u(ix+5,iy,1,ic) & + - stencil(-5)*u(ix,iy-5,1,ic) - stencil(-4)*u(ix,iy-4,1,ic) - stencil(-3)*u(ix,iy-3,1,ic) - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) & + - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) - stencil(3)*u(ix,iy+3,1,ic) - stencil(4)*u(ix,iy+4,1,ic) - stencil(5)*u(ix,iy+5,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(6) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-6)*u(ix-6,iy,1,ic) - stencil(-5)*u(ix-5,iy,1,ic) - stencil(-4)*u(ix-4,iy,1,ic) - stencil(-3)*u(ix-3,iy,1,ic) - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) & + - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) - stencil(3)*u(ix+3,iy,1,ic) - stencil(4)*u(ix+4,iy,1,ic) - stencil(5)*u(ix+5,iy,1,ic) - stencil(6)*u(ix+6,iy,1,ic) & + - stencil(-6)*u(ix,iy-6,1,ic) - stencil(-5)*u(ix,iy-5,1,ic) - stencil(-4)*u(ix,iy-4,1,ic) - stencil(-3)*u(ix,iy-3,1,ic) - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) & + - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) - stencil(3)*u(ix,iy+3,1,ic) - stencil(4)*u(ix,iy+4,1,ic) - stencil(5)*u(ix,iy+5,1,ic) - stencil(6)*u(ix,iy+6,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(7) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-7)*u(ix-7,iy,1,ic) - stencil(-6)*u(ix-6,iy,1,ic) - stencil(-5)*u(ix-5,iy,1,ic) - stencil(-4)*u(ix-4,iy,1,ic) - stencil(-3)*u(ix-3,iy,1,ic) - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) & + - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) - stencil(3)*u(ix+3,iy,1,ic) - stencil(4)*u(ix+4,iy,1,ic) - stencil(5)*u(ix+5,iy,1,ic) - stencil(6)*u(ix+6,iy,1,ic) - stencil(7)*u(ix+7,iy,1,ic) & + - stencil(-7)*u(ix,iy-7,1,ic) - stencil(-6)*u(ix,iy-6,1,ic) - stencil(-5)*u(ix,iy-5,1,ic) - stencil(-4)*u(ix,iy-4,1,ic) - stencil(-3)*u(ix,iy-3,1,ic) - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) & + - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) - stencil(3)*u(ix,iy+3,1,ic) - stencil(4)*u(ix,iy+4,1,ic) - stencil(5)*u(ix,iy+5,1,ic) - stencil(6)*u(ix,iy+6,1,ic) - stencil(7)*u(ix,iy+7,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case(8) + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = ( & + - stencil(-8)*u(ix-8,iy,1,ic) - stencil(-7)*u(ix-7,iy,1,ic) - stencil(-6)*u(ix-6,iy,1,ic) - stencil(-5)*u(ix-5,iy,1,ic) - stencil(-4)*u(ix-4,iy,1,ic) - stencil(-3)*u(ix-3,iy,1,ic) - stencil(-2)*u(ix-2,iy,1,ic) - stencil(-1)*u(ix-1,iy,1,ic) & + - stencil(1)*u(ix+1,iy,1,ic) - stencil(2)*u(ix+2,iy,1,ic) - stencil(3)*u(ix+3,iy,1,ic) - stencil(4)*u(ix+4,iy,1,ic) - stencil(5)*u(ix+5,iy,1,ic) - stencil(6)*u(ix+6,iy,1,ic) - stencil(7)*u(ix+7,iy,1,ic) - stencil(8)*u(ix+8,iy,1,ic) & + - stencil(-8)*u(ix,iy-8,1,ic) - stencil(-7)*u(ix,iy-7,1,ic) - stencil(-6)*u(ix,iy-6,1,ic) - stencil(-5)*u(ix,iy-5,1,ic) - stencil(-4)*u(ix,iy-4,1,ic) - stencil(-3)*u(ix,iy-3,1,ic) - stencil(-2)*u(ix,iy-2,1,ic) - stencil(-1)*u(ix,iy-1,1,ic) & + - stencil(1)*u(ix,iy+1,1,ic) - stencil(2)*u(ix,iy+2,1,ic) - stencil(3)*u(ix,iy+3,1,ic) - stencil(4)*u(ix,iy+4,1,ic) - stencil(5)*u(ix,iy+5,1,ic) - stencil(6)*u(ix,iy+6,1,ic) - stencil(7)*u(ix,iy+7,1,ic) - stencil(8)*u(ix,iy+8,1,ic) & + + b(ix,iy,1,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo + case default + do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,1,ic) = (-sum(stencil * u(ix-a:ix+a,iy,1,ic)) - sum(stencil * u(ix,iy-a:iy+a,1,ic)) + b(ix,iy,1,ic)*dx(1)*dx(1) + params%dim*stencil(0)*u(ix,iy,1,ic)) / (stencil(0)*params%dim) + enddo; enddo + end select else - do iz = is(3), ie(3), dir - do iy = is(2), ie(2), dir - do ix = is(1), ie(1), dir - u(ix,iy,iz,ic) = (-sum(stencil * u(ix-a:ix+a,iy,iz,ic)) - sum(stencil * u(ix,iy-a:iy+a,iz,ic)) - sum(stencil * u(ix,iy,iz-a:iz+a,ic)) + b(ix,iy,iz,ic)*dx(1)*dx(1) + params%dim*stencil(0)*u(ix,iy,iz,ic)) / (stencil(0)*params%dim) - enddo - enddo - enddo + ! hardcoding of several stencil sizes, as this is critical for performance + select case(a) + case(1) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-1) * u(ix-1,iy,iz,ic) + stencil(1) * u(ix+1,iy,iz,ic)) & + - (stencil(-1) * u(ix,iy-1,iz,ic) + stencil(1) * u(ix,iy+1,iz,ic)) & + - (stencil(-1) * u(ix,iy,iz-1,ic) + stencil(1) * u(ix,iy,iz+1,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(2) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic)) & + - (stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic)) & + - (stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(3) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-3) * u(ix-3,iy,iz,ic) + stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic) + stencil(3) * u(ix+3,iy,iz,ic)) & + - (stencil(-3) * u(ix,iy-3,iz,ic) + stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic) + stencil(3) * u(ix,iy+3,iz,ic)) & + - (stencil(-3) * u(ix,iy,iz-3,ic) + stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic) + stencil(3) * u(ix,iy,iz+3,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(4) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-4) * u(ix-4,iy,iz,ic) + stencil(-3) * u(ix-3,iy,iz,ic) + stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + & + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic) + stencil(3) * u(ix+3,iy,iz,ic) + stencil(4) * u(ix+4,iy,iz,ic)) & + - (stencil(-4) * u(ix,iy-4,iz,ic) + stencil(-3) * u(ix,iy-3,iz,ic) + stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + & + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic) + stencil(3) * u(ix,iy+3,iz,ic) + stencil(4) * u(ix,iy+4,iz,ic)) & + - (stencil(-4) * u(ix,iy,iz-4,ic) + stencil(-3) * u(ix,iy,iz-3,ic) + stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + & + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic) + stencil(3) * u(ix,iy,iz+3,ic) + stencil(4) * u(ix,iy,iz+4,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(5) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-5) * u(ix-5,iy,iz,ic) + stencil(-4) * u(ix-4,iy,iz,ic) + stencil(-3) * u(ix-3,iy,iz,ic) + stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + & + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic) + stencil(3) * u(ix+3,iy,iz,ic) + stencil(4) * u(ix+4,iy,iz,ic) + stencil(5) * u(ix+5,iy,iz,ic)) & + - (stencil(-5) * u(ix,iy-5,iz,ic) + stencil(-4) * u(ix,iy-4,iz,ic) + stencil(-3) * u(ix,iy-3,iz,ic) + stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + & + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic) + stencil(3) * u(ix,iy+3,iz,ic) + stencil(4) * u(ix,iy+4,iz,ic) + stencil(5) * u(ix,iy+5,iz,ic)) & + - (stencil(-5) * u(ix,iy,iz-5,ic) + stencil(-4) * u(ix,iy,iz-4,ic) + stencil(-3) * u(ix,iy,iz-3,ic) + stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + & + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic) + stencil(3) * u(ix,iy,iz+3,ic) + stencil(4) * u(ix,iy,iz+4,ic) + stencil(5) * u(ix,iy,iz+5,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(6) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-6) * u(ix-6,iy,iz,ic) + stencil(-5) * u(ix-5,iy,iz,ic) + stencil(-4) * u(ix-4,iy,iz,ic) + stencil(-3) * u(ix-3,iy,iz,ic) + stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + & + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic) + stencil(3) * u(ix+3,iy,iz,ic) + stencil(4) * u(ix+4,iy,iz,ic) + stencil(5) * u(ix+5,iy,iz,ic) + stencil(6) * u(ix+6,iy,iz,ic)) & + - (stencil(-6) * u(ix,iy-6,iz,ic) + stencil(-5) * u(ix,iy-5,iz,ic) + stencil(-4) * u(ix,iy-4,iz,ic) + stencil(-3) * u(ix,iy-3,iz,ic) + stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + & + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic) + stencil(3) * u(ix,iy+3,iz,ic) + stencil(4) * u(ix,iy+4,iz,ic) + stencil(5) * u(ix,iy+5,iz,ic) + stencil(6) * u(ix,iy+6,iz,ic)) & + - (stencil(-6) * u(ix,iy,iz-6,ic) + stencil(-5) * u(ix,iy,iz-5,ic) + stencil(-4) * u(ix,iy,iz-4,ic) + stencil(-3) * u(ix,iy,iz-3,ic) + stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + & + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic) + stencil(3) * u(ix,iy,iz+3,ic) + stencil(4) * u(ix,iy,iz+4,ic) + stencil(5) * u(ix,iy,iz+5,ic) + stencil(6) * u(ix,iy,iz+6,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(7) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-7) * u(ix-7,iy,iz,ic) + stencil(-6) * u(ix-6,iy,iz,ic) + stencil(-5) * u(ix-5,iy,iz,ic) + stencil(-4) * u(ix-4,iy,iz,ic) + stencil(-3) * u(ix-3,iy,iz,ic) + stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + & + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic) + stencil(3) * u(ix+3,iy,iz,ic) + stencil(4) * u(ix+4,iy,iz,ic) + stencil(5) * u(ix+5,iy,iz,ic) + stencil(6) * u(ix+6,iy,iz,ic) + stencil(7) * u(ix+7,iy,iz,ic)) & + - (stencil(-7) * u(ix,iy-7,iz,ic) + stencil(-6) * u(ix,iy-6,iz,ic) + stencil(-5) * u(ix,iy-5,iz,ic) + stencil(-4) * u(ix,iy-4,iz,ic) + stencil(-3) * u(ix,iy-3,iz,ic) + stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + & + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic) + stencil(3) * u(ix,iy+3,iz,ic) + stencil(4) * u(ix,iy+4,iz,ic) + stencil(5) * u(ix,iy+5,iz,ic) + stencil(6) * u(ix,iy+6,iz,ic) + stencil(7) * u(ix,iy+7,iz,ic)) & + - (stencil(-7) * u(ix,iy,iz-7,ic) + stencil(-6) * u(ix,iy,iz-6,ic) + stencil(-5) * u(ix,iy,iz-5,ic) + stencil(-4) * u(ix,iy,iz-4,ic) + stencil(-3) * u(ix,iy,iz-3,ic) + stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + & + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic) + stencil(3) * u(ix,iy,iz+3,ic) + stencil(4) * u(ix,iy,iz+4,ic) + stencil(5) * u(ix,iy,iz+5,ic) + stencil(6) * u(ix,iy,iz+6,ic) + stencil(7) * u(ix,iy,iz+7,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case(8) + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = ( & + - (stencil(-8) * u(ix-8,iy,iz,ic) + stencil(-7) * u(ix-7,iy,iz,ic) + stencil(-6) * u(ix-6,iy,iz,ic) + stencil(-5) * u(ix-5,iy,iz,ic) + stencil(-4) * u(ix-4,iy,iz,ic) + stencil(-3) * u(ix-3,iy,iz,ic) + stencil(-2) * u(ix-2,iy,iz,ic) + stencil(-1) * u(ix-1,iy,iz,ic) + & + stencil(1) * u(ix+1,iy,iz,ic) + stencil(2) * u(ix+2,iy,iz,ic) + stencil(3) * u(ix+3,iy,iz,ic) + stencil(4) * u(ix+4,iy,iz,ic) + stencil(5) * u(ix+5,iy,iz,ic) + stencil(6) * u(ix+6,iy,iz,ic) + stencil(7) * u(ix+7,iy,iz,ic) + stencil(8) * u(ix+8,iy,iz,ic)) & + - (stencil(-8) * u(ix,iy-8,iz,ic) + stencil(-7) * u(ix,iy-7,iz,ic) + stencil(-6) * u(ix,iy-6,iz,ic) + stencil(-5) * u(ix,iy-5,iz,ic) + stencil(-4) * u(ix,iy-4,iz,ic) + stencil(-3) * u(ix,iy-3,iz,ic) + stencil(-2) * u(ix,iy-2,iz,ic) + stencil(-1) * u(ix,iy-1,iz,ic) + & + stencil(1) * u(ix,iy+1,iz,ic) + stencil(2) * u(ix,iy+2,iz,ic) + stencil(3) * u(ix,iy+3,iz,ic) + stencil(4) * u(ix,iy+4,iz,ic) + stencil(5) * u(ix,iy+5,iz,ic) + stencil(6) * u(ix,iy+6,iz,ic) + stencil(7) * u(ix,iy+7,iz,ic) + stencil(8) * u(ix,iy+8,iz,ic)) & + - (stencil(-8) * u(ix,iy,iz-8,ic) + stencil(-7) * u(ix,iy,iz-7,ic) + stencil(-6) * u(ix,iy,iz-6,ic) + stencil(-5) * u(ix,iy,iz-5,ic) + stencil(-4) * u(ix,iy,iz-4,ic) + stencil(-3) * u(ix,iy,iz-3,ic) + stencil(-2) * u(ix,iy,iz-2,ic) + stencil(-1) * u(ix,iy,iz-1,ic) + & + stencil(1) * u(ix,iy,iz+1,ic) + stencil(2) * u(ix,iy,iz+2,ic) + stencil(3) * u(ix,iy,iz+3,ic) + stencil(4) * u(ix,iy,iz+4,ic) + stencil(5) * u(ix,iy,iz+5,ic) + stencil(6) * u(ix,iy,iz+6,ic) + stencil(7) * u(ix,iy,iz+7,ic) + stencil(8) * u(ix,iy,iz+8,ic)) & + + b(ix,iy,iz,ic)*dx(1)*dx(1)) / (stencil(0)*params%dim) + enddo; enddo; enddo + case default + do iz = is(3), ie(3), dir; do iy = is(2), ie(2), dir; do ix = is(1), ie(1), dir + u(ix,iy,iz,ic) = (-sum(stencil * u(ix-a:ix+a,iy,iz,ic)) - sum(stencil * u(ix,iy-a:iy+a,iz,ic)) - sum(stencil * u(ix,iy,iz-a:iz+a,ic)) + b(ix,iy,iz,ic)*dx(1)*dx(1) + params%dim*stencil(0)*u(ix,iy,iz,ic)) / (stencil(0)*params%dim) + enddo; enddo; enddo + end select endif else if (params%dim == 2) then @@ -386,7 +460,7 @@ subroutine GS_compute_residual(params, u, b, r, dx, recompute_b) if (params%dim == 2) then do iy = params%g+1, params%g+params%bs(2) do ix = params%g+1, params%g+params%bs(1) - r(ix,iy,1,ic) = b(ix,iy,1,ic) + Ax_factor * (sum(stencil * u(ix-a:ix+a,iy,1,ic)) / (dx(1)*dx(1)) \ + r(ix,iy,1,ic) = b(ix,iy,1,ic) + Ax_factor * (sum(stencil * u(ix-a:ix+a,iy,1,ic)) / (dx(1)*dx(1)) & + sum(stencil * u(ix,iy-a:iy+a,1,ic)) / (dx(2)*dx(2))) enddo enddo @@ -394,7 +468,7 @@ subroutine GS_compute_residual(params, u, b, r, dx, recompute_b) do iz = params%g+1, params%g+params%bs(3) do iy = params%g+1, params%g+params%bs(2) do ix = params%g+1, params%g+params%bs(1) - r(ix,iy,iz,ic) = b(ix,iy,iz,ic) + Ax_factor * (sum(stencil * u(ix-a:ix+a,iy,iz,ic)) / (dx(1)*dx(1)) \ + r(ix,iy,iz,ic) = b(ix,iy,iz,ic) + Ax_factor * (sum(stencil * u(ix-a:ix+a,iy,iz,ic)) / (dx(1)*dx(1)) & + sum(stencil * u(ix,iy-a:iy+a,iz,ic)) / (dx(2)*dx(2)) + sum(stencil * u(ix,iy,iz-a:iz+a,ic)) / (dx(3)*dx(3))) enddo enddo @@ -513,7 +587,6 @@ subroutine GS_compute_Ax(params, u, ddu, dx, apply_B_RHS) subroutine CG_solve_poisson_level0(params, u, f, dx, tol, max_iter) - use module_params implicit none ! Arguments @@ -589,7 +662,6 @@ end subroutine CG_solve_poisson_level0 ! synch routine for block on lowest level subroutine sync_block_level0(params, u, sync_sides_only) - use module_params implicit none ! Arguments diff --git a/LIB/MESH/multigrid_vcycle.f90 b/LIB/POISSON/multigrid_vcycle.f90 similarity index 94% rename from LIB/MESH/multigrid_vcycle.f90 rename to LIB/POISSON/multigrid_vcycle.f90 index cf812cd7..0c0595c4 100644 --- a/LIB/MESH/multigrid_vcycle.f90 +++ b/LIB/POISSON/multigrid_vcycle.f90 @@ -9,7 +9,7 @@ subroutine multigrid_solve(params, hvy_sol, hvy_RHS, hvy_work, tree_ID, init_0, logical, intent(in), optional :: verbose integer(kind=ik) :: k_block, lgt_ID, hvy_id, ic, i_cycle - real(kind=rk) :: dx(1:3), x0(1:3), residual(1:4*size(hvy_sol,4)) + real(kind=rk) :: dx(1:3), x0(1:3), residual(1:4*size(hvy_sol,4)), norm_sol(1:size(hvy_sol,4)) real(kind=rk) :: t_block logical :: verbose_apply, init0 @@ -39,6 +39,9 @@ subroutine multigrid_solve(params, hvy_sol, hvy_RHS, hvy_work, tree_ID, init_0, call toc( "Sync Layer", 10010, MPI_Wtime()-t_block ) endif + ! compute norm of RHS for relative tolerances + call componentWiseNorm_tree(params, hvy_RHS(:,:,:,1:size(hvy_RHS,4),:), tree_ID, "Linfty", norm_sol(1:size(hvy_RHS,4)), threshold_state_vector=.false.) + ! prepare full tree grid, this will be populated along the way ! blocks are practically empty, but we fill them along the way so ref flag will be set to 0 to allow synching call init_full_tree(params, tree_ID, set_ref=0) @@ -63,11 +66,12 @@ subroutine multigrid_solve(params, hvy_sol, hvy_RHS, hvy_work, tree_ID, init_0, ! choose between different end criteria: ! fixed_iterations - do a set number of V-cycles - ! tolerance - stop when residuals are below a certain threshold or maximum number of iterations is reached + ! tolerance - stop when absolute or relative residuals are below a certain threshold or maximum number of iterations is reached if (params%poisson_cycle_end_criteria == "fixed_iterations") then if (i_cycle >= params%poisson_cycle_it) exit elseif (params%poisson_cycle_end_criteria == "tolerance") then - if (all(residual(1:size(hvy_sol,4)) < params%poisson_cycle_tol)) exit + if (all(residual(1:size(hvy_sol,4)) < params%poisson_cycle_tol_abs)) exit + if (all(residual(1:size(hvy_sol,4))/norm_sol(1:size(hvy_sol,4)) < params%poisson_cycle_tol_rel) .and. all(norm_sol(1:size(hvy_sol,4)) > 1e-8)) exit else call abort(250903, "Don't know how to stop! Please choose between 'fixed_iterations' and 'tolerance', thank you :)") endif @@ -161,6 +165,25 @@ subroutine multigrid_vcycle(params, hvy_sol, hvy_RHS, hvy_work, tree_ID, verbose call sync_ghosts_tree(params, hvy_sol(:,:,:,1:nc,:), tree_ID) call toc( "Sync Layer", 10010, MPI_Wtime()-t_block ) + ! JB Comment: I think this is not necessary, we do coarse extension but only keep the block values and not the decomposed ones, so we should reconstruct the original values later on without any problems + ! ! we need to preserve b + ! ! Usually we could restore it later with b = r + Ax, however, with coarse extension we have to alter the residual and would not get back the original b + ! ! downwards - no sweeps are done, we only restrict the residual down to the lowest level + ! ! compute the residual r = b - Ax to pass it downwards + ! t_block = MPI_Wtime() + ! do k_block = 1, hvy_n(tree_ID) + ! hvy_id = hvy_active(k_block, tree_ID) + ! call hvy2lgt( lgt_id, hvy_id, params%rank, params%number_blocks ) + + ! ! b only defined on leaf layer + ! if (.not. block_is_leaf(params, hvy_id)) cycle + + ! ! store b + ! hvy_work(:,:,:,nc+1:2*nc,hvy_id) = hvy_RHS(:,:,:,1:nc,hvy_id) + ! enddo + ! call toc( "GS Downwards - Store b", 10020, MPI_Wtime()-t_block ) + + ! downwards - no sweeps are done, we only restrict the residual down to the lowest level ! compute the residual r = b - Ax to pass it downwards t_block = MPI_Wtime() @@ -199,6 +222,7 @@ subroutine multigrid_vcycle(params, hvy_sol, hvy_RHS, hvy_work, tree_ID, verbose ! for all levels we are not solving Au = b, but rather Ae=r ! only on the last upwards step will we iterate on the original problem, so we need to backup u + ! starting from here, hvy_work contains the backed-up solution t_block = MPI_Wtime() do k_block = 1, hvy_n(tree_ID) hvy_id = hvy_active(k_block, tree_ID) @@ -235,8 +259,10 @@ subroutine multigrid_vcycle(params, hvy_sol, hvy_RHS, hvy_work, tree_ID, verbose ! recompute RHS b = r + Ax, call GS_compute_residual(params, hvy_work(:,:,:,1:nc,hvy_id), hvy_RHS(:,:,:,1:nc,hvy_id), hvy_RHS(:,:,:,1:nc,hvy_id), dx, recompute_b=.true.) + ! ! restore full RHS b + ! hvy_RHS(:,:,:,1:nc,hvy_id) = hvy_work(:,:,:,nc+1:2*nc,hvy_id) - ! add reconstructed solution to residual to previous solution + ! add previous solution to reconstructed solution hvy_sol(:,:,:,1:nc,hvy_id) = hvy_sol(:,:,:,1:nc,hvy_id) + hvy_work(:,:,:,1:nc,hvy_id) endif enddo diff --git a/LIB/POSTPROCESSING/compute_vorticity_post.f90 b/LIB/POSTPROCESSING/compute_vorticity_post.f90 index 0faf37b4..8ee0cb65 100644 --- a/LIB/POSTPROCESSING/compute_vorticity_post.f90 +++ b/LIB/POSTPROCESSING/compute_vorticity_post.f90 @@ -97,35 +97,50 @@ subroutine compute_vorticity_post(params) ! is used in ghost nodes sync'ing if (order == "2") then params%order_discretization = "FD_2nd_central" + params%wavelet = "CDF20" ! wavelet used for adaptive syncing params%g = 2_ik elseif (order == "4") then params%order_discretization = "FD_4th_central" - params%g = 2_ik + params%wavelet = "CDF40" ! wavelet used for adaptive syncing + params%g = 3_ik elseif (order == "4op") then params%order_discretization = "FD_4th_central_optimized" + params%wavelet = "CDF40" ! wavelet used for adaptive syncing params%g = 3_ik elseif (order == "4_comp_13") then params%order_discretization = "FD_4th_comp_1_3" + params%wavelet = "CDF40" ! wavelet used for adaptive syncing params%g = 3_ik + if (operator == "--dissipation") params%g = 4_ik ! dissipation uses L=DG with different stencil width elseif (order == "4_comp_04") then params%order_discretization = "FD_4th_comp_0_4" + params%wavelet = "CDF40" ! wavelet used for adaptive syncing params%g = 4_ik elseif (order == "6") then params%order_discretization = "FD_6th_central" + params%wavelet = "CDF60" ! wavelet used for adaptive syncing params%g = 3_ik elseif (order == "6_comp_24") then params%order_discretization = "FD_6th_comp_2_4" + params%wavelet = "CDF60" ! wavelet used for adaptive syncing params%g = 4_ik + if (operator == "--dissipation") params%g = 6_ik ! dissipation uses L=DG with different stencil width elseif (order == "6_comp_15") then params%order_discretization = "FD_6th_comp_1_5" + params%wavelet = "CDF60" ! wavelet used for adaptive syncing params%g = 5_ik + if (operator == "--dissipation") params%g = 6_ik ! dissipation uses L=DG with different stencil width elseif (order == "6_comp_06") then params%order_discretization = "FD_6th_comp_0_6" + params%wavelet = "CDF60" ! wavelet used for adaptive syncing params%g = 6_ik + elseif (order == "8") then + params%order_discretization = "FD_8th_central" + params%wavelet = "CDF80" ! wavelet used for adaptive syncing + params%g = 4_ik else - call abort(8765,"chosen discretization order invalid or not (yet) implemented. choose between 6 (FD_6th_central), 4 (FD_4th_central), 4op (FD_4th_central_optimized), 6_comp_24 (FD_6th_comp_2_4), 6_comp_15 (FD_6th_comp_1_5), 6_comp_06 (FD_6th_comp_0_6) and 2 (FD_2nd_central)") + call abort(8765,"chosen discretization order invalid or not (yet) implemented. choose between 2 (FD_2nd_central), 4 (FD_4th_central), 6 (FD_6th_central), 4op (FD_4th_central_optimized), 4_comp_13 (FD_4th_comp_1_3), 4_comp_04 (FD_4th_comp_0_4), 6_comp_24 (FD_6th_comp_2_4), 6_comp_15 (FD_6th_comp_1_5) and 6_comp_06 (FD_6th_comp_0_6)") end if - params%wavelet="CDF20" ! wavelet is not used params%Jmax = tc_length params%n_eqn = params%dim diff --git a/LIB/POSTPROCESSING/post_add_two_masks.f90 b/LIB/POSTPROCESSING/post_add_two_masks.f90 index efafec3d..550e7c4c 100644 --- a/LIB/POSTPROCESSING/post_add_two_masks.f90 +++ b/LIB/POSTPROCESSING/post_add_two_masks.f90 @@ -70,7 +70,7 @@ subroutine post_add_two_masks(params) end if params%domain_size = domain params%Jmax = max( tc_length2, tc_length1 ) - params%Jmin = 1 + params%Jmin = 0 params%n_eqn = 1 allocate(params%threshold_state_vector_component(params%n_eqn)) @@ -101,8 +101,8 @@ subroutine post_add_two_masks(params) hvy_n = 0 tree_n = 0 ! reset number of trees in forest - call readHDF5vct_tree((/fname1/), params, hvy_block, tree_ID=1, verbosity=.false.) - call readHDF5vct_tree((/fname2/), params, hvy_block, tree_ID=2, verbosity=.false.) + call readHDF5vct_tree((/fname1/), params, hvy_block, tree_ID=1, verbosity=.true.) + call readHDF5vct_tree((/fname2/), params, hvy_block, tree_ID=2, verbosity=.true.) call createActiveSortedLists_forest(params) @@ -128,10 +128,15 @@ subroutine post_add_two_masks(params) call store_ref_meshes( lgt_block_ref, lgt_active_ref, lgt_n_ref, tree_ID1=1, tree_ID2=2) ! before applying wavelet filters we need to sync call sync_ghosts_tree(params, hvy_block, tree_ID=1) - ! make grid1 equal to grid 1 (by only coarsening). - ! Should refinement be required error will be thrown. + ! make grid1 equal to grid 1 (by coarsening and refining). + ! something is not working here yet but I don't have the brain for it currently + if (params%rank==0) write(*,'(A)') "Coarsening grid 1 to grid 2" call coarse_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref(:,2), lgt_n_ref(2), & - hvy_block, hvy_tmp, tree_ID=1, verbosity=.true.) + hvy_block, hvy_tmp, tree_ID=1, ref_can_be_finer=.true., verbosity=.true.) + + if (params%rank==0) write(*,'(A)') "Refining grid 1 to grid 2" + call refine_tree_2_reference_mesh(params, lgt_block_ref, lgt_active_ref(:,2), lgt_n_ref(2), & + hvy_block, hvy_tmp, tree_ID=1, ref_can_be_coarser=.false., verbosity=.true.) case ("--test_operations") ! this tests inplace vs out of place operations diff --git a/LIB/POSTPROCESSING/post_compression_unit_test.f90 b/LIB/POSTPROCESSING/post_compression_unit_test.f90 index da21f7e7..f9be14b9 100644 --- a/LIB/POSTPROCESSING/post_compression_unit_test.f90 +++ b/LIB/POSTPROCESSING/post_compression_unit_test.f90 @@ -133,7 +133,7 @@ subroutine post_compression_unit_test(params) open(14, file="header.txt", status='replace') write (14,'("Bs=",i3," Jmax=",i2," dim=",i1," eps-norm=",A," eps-normalized=",L1)') Bs(1), params%Jmax, params%dim, & - adjustl(trim(params%eps_norm)), params%eps_normalized + trim(adjustl(params%eps_norm)), params%eps_normalized close(14) endif diff --git a/LIB/POSTPROCESSING/post_mean.f90 b/LIB/POSTPROCESSING/post_mean.f90 index 2de48c8c..a0a60a4a 100644 --- a/LIB/POSTPROCESSING/post_mean.f90 +++ b/LIB/POSTPROCESSING/post_mean.f90 @@ -1,6 +1,7 @@ subroutine post_mean(params) use module_globals use module_params + use module_operators use module_mesh use mpi use module_forestMetaData @@ -10,22 +11,11 @@ subroutine post_mean(params) type (type_params), intent(inout) :: params !> parameter struct real(kind=rk), allocatable :: hvy_block(:, :, :, :, :) - integer(kind=ik) :: tree_ID=1, hvy_id - - integer(hsize_t), dimension(4) :: size_field - integer(hid_t) :: file_id - integer(kind=ik) :: lgt_id, k, nz, iteration, dim, g - integer(kind=ik), dimension(3) :: Bs - real(kind=rk), dimension(3) :: x0, dx - real(kind=rk), dimension(3) :: domain - real(kind=rk) :: time - integer(hsize_t), dimension(2) :: dims_treecode - integer(kind=ik), allocatable :: tree(:), sum_tree(:), blocks_per_rank(:) - - real(kind=rk) :: x,y,z - real(kind=rk) :: maxi,mini,squari,meani,qi,inti - real(kind=rk) :: maxl,minl,squarl,meanl,ql - integer(kind=ik) :: ix,iy,iz,mpicode, ioerr, rank, i, tc_length + integer(kind=ik) :: tree_ID=1 + integer(kind=ik) :: rank, iteration + real(kind=rk) :: time + real(kind=rk) :: norms(1:5) + logical :: exist ! this routine works only on one tree allocate( hvy_n(1), lgt_n(1) ) @@ -36,8 +26,7 @@ subroutine post_mean(params) call get_command_argument(2,fname) if (fname =='--help' .or. fname=='--h') then if (rank==0) then - write(*,*) "WABBIT postprocessing: compute mean value of field. We also output the volume integral" - write(*,*) "(Lx*Ly*Lz*mean(field) to the file results.txt" + write(*,*) "WABBIT postprocessing: compute mean value of field. We output all values to a file." write(*,*) "mpi_command -n number_procs ./wabbit-post --mean filename.h5 result.txt" end if return @@ -47,66 +36,45 @@ subroutine post_mean(params) call check_file_exists( fname ) ! add some parameters from the file - call read_attributes(fname, lgt_n(tree_ID), time, iteration, domain, Bs, tc_length, dim, & + call read_attributes(fname, lgt_n(tree_ID), time, iteration, params%domain_size, params%Bs, params%Jmax, params%dim, & periodic_BC=params%periodic_BC, symmetry_BC=params%symmetry_BC) - params%Bs = Bs params%n_eqn = 1 params%g = 2_ik params%order_predictor = "multiresolution_2nd" - params%Jmax = tc_length - params%domain_size(1) = domain(1) - params%domain_size(2) = domain(2) - params%domain_size(3) = domain(3) params%number_blocks = ceiling( real(lgt_n(tree_ID)) / real(params%number_procs) ) + allocate(params%threshold_state_vector_component(1:params%n_eqn)) + params%threshold_state_vector_component = 1 call allocate_forest(params, hvy_block) ! read input data call readHDF5vct_tree( (/fname/), params, hvy_block, tree_ID) - ! compute an additional quantity that depends also on the position - ! (the others are translation invariant) - if (params%dim == 3) then - nz = Bs(3) - else - nz = 1 - end if - - meanl = 0.0_rk - - g = params%g - do k = 1, hvy_n(tree_ID) - hvy_id = hvy_active(k,tree_ID) - - call hvy2lgt(lgt_id, hvy_id, params%rank, params%number_blocks) - call get_block_spacing_origin( params, lgt_id, x0, dx ) - - if (params%dim == 3) then - meanl = meanl + sum( hvy_block(g+1:Bs(1)+g, g+1:Bs(2)+g, g+1:Bs(3)+g, 1, hvy_id))*dx(1)*dx(2)*dx(3) - else - meanl = meanl + sum( hvy_block(g+1:Bs(1)+g, g+1:Bs(2)+g, 1, 1, hvy_id))*dx(1)*dx(2) - endif - end do - - call MPI_REDUCE(meanl,meani,1,MPI_DOUBLE_PRECISION,MPI_SUM,0,WABBIT_COMM,mpicode) - - if (params%dim == 3) then - meani = meani / (params%domain_size(1)*params%domain_size(2)*params%domain_size(3)) - inti = meani*product(domain) - else - meani = meani / (params%domain_size(1)*params%domain_size(2)) - inti = meani*product(domain(1:2)) - endif + ! use functions from module_operators + call componentWiseNorm_tree(params, hvy_block, tree_ID, "Mean", norms(1:1), threshold_state_vector=.false.) + call componentWiseNorm_tree(params, hvy_block, tree_ID, "L1", norms(2:2), threshold_state_vector=.false.) + call componentWiseNorm_tree(params, hvy_block, tree_ID, "L2", norms(3:3), threshold_state_vector=.false.) + call componentWiseNorm_tree(params, hvy_block, tree_ID, "Linfty", norms(4:4), threshold_state_vector=.false.) + call componentWiseNorm_tree(params, hvy_block, tree_ID, "H1", norms(5:5), threshold_state_vector=.false.) if (rank == 0) then - write(*,*) "Computed mean value is: ", meani - write(*,*) "Computed integral value is: ", inti + write(*,'(A, es16.8)') "Mean: ", norms(1) + write(*,'(A, es16.8)') "L1 norm: ", norms(2) + write(*,'(A, es16.8)') "L2 norm: ", norms(3) + write(*,'(A, es16.8)') "LInfty norm: ", norms(4) + write(*,'(A, es16.8)') "H1 norm: ", norms(5) ! write volume integral to disk call get_command_argument(3,fname_out) - open(14,file=fname_out, status='replace') - write(14,*) inti - close(14) + inquire ( file=fname_out, exist=exist ) + if (exist) then + open(14,file=fname_out, status='replace') + write(14,'(A)') "# Mean, L1 norm, L2 norm, LInfty norm, H1 norm" + write(14,'(5(es16.8, ", "))') norms(1), norms(2), norms(3), norms(4), norms(5) + close(14) + else + write(*,'(A)') "Output file "//trim(adjustl(fname_out))//" does not exist. Not writing output. Use e.g. touch" + endif endif end subroutine post_mean diff --git a/LIB/POSTPROCESSING/proto_pressure_multigrid.f90 b/LIB/POSTPROCESSING/proto_pressure_multigrid.f90 index 555a12fc..ac505316 100644 --- a/LIB/POSTPROCESSING/proto_pressure_multigrid.f90 +++ b/LIB/POSTPROCESSING/proto_pressure_multigrid.f90 @@ -144,6 +144,7 @@ end subroutine compute_divergence_tree end select params%poisson_coarsest = "FFT" params%FFT_accuracy = "FD" ! FD or spectral + params%poisson_Sync_it = 2 Bs = params%Bs @@ -160,7 +161,7 @@ end subroutine compute_divergence_tree ! allocate data call allocate_forest(params, hvy_block, hvy_tmp=hvy_tmp, hvy_work=hvy_work, nrhs_slots1=1, neqn_hvy_tmp=8) - ! read input data, if comparison file is given, we read it in as well + ! read input data if (exist_p) then call readHDF5vct_tree( (/file_ux, file_uy, file_uz, file_p/), params, hvy_block, tree_ID) else @@ -549,6 +550,7 @@ end subroutine compute_projection call sync_ghosts_tree( params, hvy_block(:,:,:,params%dim+1:params%dim+1,:), tree_ID_flow ) call compute_projection(params, hvy_block(:,:,:,1:params%dim,:), hvy_block(:,:,:,params%dim+1:params%dim+1,:), hvy_block(:,:,:,1:params%dim,:), order_disc_pressure, tree_ID_flow, 1.0_rk) + ! --- following block is for debugging the helmholtz projection ---- ! call sync_ghosts_tree( params, hvy_block(:,:,:,1:params%dim,:), tree_ID_flow ) ! write(fname, '(A, i12.12, A)') "ux_", int(100*1.0e6),".h5" ! call saveHDF5_tree(fname, 100.0_rk, 0, 1, params, hvy_block, tree_ID ) @@ -568,6 +570,7 @@ end subroutine compute_projection ! call saveHDF5_tree(fname, 100.0_rk, 0, 1, params, hvy_block, tree_ID ) ! write(fname, '(A, i12.12, A)') "u2y_", int(100*1.0e6),".h5" ! call saveHDF5_tree(fname, 100.0_rk, 0, 2, params, hvy_block, tree_ID ) + ! ----------------------------------------------------------------- ! we have to recompute p call sync_ghosts_tree( params, hvy_block(:,:,:,1:params%dim,:), tree_ID_flow ) @@ -633,24 +636,34 @@ end subroutine compute_projection !*********************************************************************** ! Timestep - Euler Explicit or RK2 / Heun's method !*********************************************************************** - ! ! possibility to set different stencil for intermediate projections - ! params%poisson_order = "FD_6th_mehrstellen" - ! call setup_laplacian_stencils(params, params%g) + t4 = MPI_wtime() ! RK_generic + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_block(:,:,:,1:params%dim,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) ! caluclate timestep, here we use that of ACM and set the speed of sound to close to 0 call calculate_time_step(params, time, iteration, hvy_block, dt, tree_ID) ! compute RHS for the first stage without pressure gradient + t3 = MPI_wtime() call compute_NSI_RHS(params, hvy_block(:,:,:,1:params%dim,:), hvy_mask, hvy_work(:,:,:,1:params%dim,:,1), order_disc_nonlinear, tree_ID_flow, nu, C_eta ) + call toc( "timestep: compute_NSI_RHS", 22, MPI_wtime()-t3) ! compute RHS for the pressure Poisson equation, solve the Poisson equation and add pressure gradient + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_work(:,:,:,1:params%dim,:,1), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_divergence_tree(params, hvy_work(:,:,:,1:params%dim,:,1), hvy_tmp(:,:,:,1,:), order_disc_pressure, tree_ID_flow) - call multigrid_solve(params, hvy_tmp(:,:,:,4:4,:), hvy_tmp(:,:,:,1:1,:), hvy_tmp(:,:,:,3:size(hvy_tmp,4),:), tree_ID_flow, verbose=.false.) - call compute_projection(params, hvy_work(:,:,:,1:params%dim,:,1), hvy_tmp(:,:,:,4:4,:), hvy_work(:,:,:,1:params%dim,:,1), order_disc_pressure, tree_ID_flow, 1.0_rk) + call toc( "timestep: compute_divergence_tree", 23, MPI_wtime()-t3) + t3 = MPI_wtime() + call multigrid_solve(params, hvy_tmp(:,:,:,params%dim+1:params%dim+1,:), hvy_tmp(:,:,:,1:1,:), hvy_tmp(:,:,:,params%dim+2:size(hvy_tmp,4),:), tree_ID_flow, verbose=.false.) + call toc( "timestep: multigrid_solve", 24, MPI_wtime()-t3) + t3 = MPI_wtime() + call compute_projection(params, hvy_work(:,:,:,1:params%dim,:,1), hvy_tmp(:,:,:,params%dim+1:params%dim+1,:), hvy_work(:,:,:,1:params%dim,:,1), order_disc_pressure, tree_ID_flow, 1.0_rk) + call toc( "timestep: compute_projection", 25, MPI_wtime()-t3) ! compute k_1, k_2, .... (coefficients for final stage) do j = 2, size(params%butcher_tableau, 1) - 1 @@ -671,14 +684,26 @@ end subroutine compute_projection + dt * params%butcher_tableau(j,l) * hvy_work(g(1)+1:g(1)+bs(1),g(2)+1:g(2)+bs(2),g(3)+1:g(3)+bs(3),1:params%dim,hvy_id,l-1) end do end do + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_tmp(:,:,:,1:params%dim,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) ! compute RHS for the current stage without pressure gradient + t3 = MPI_wtime() call compute_NSI_RHS(params, hvy_tmp(:,:,:,1:params%dim,:), hvy_mask, hvy_work(:,:,:,1:params%dim,:,j), order_disc_nonlinear, tree_ID_flow, nu, C_eta ) + call toc( "timestep: compute_NSI_RHS", 22, MPI_wtime()-t3) ! compute RHS for the pressure Poisson equation, solve the Poisson equation and add pressure gradient + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_work(:,:,:,1:params%dim,:,j), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_divergence_tree(params, hvy_work(:,:,:,1:params%dim,:,j), hvy_tmp(:,:,:,1,:), order_disc_pressure, tree_ID_flow) + call toc( "timestep: compute_divergence_tree", 23, MPI_wtime()-t3) + t3 = MPI_wtime() call multigrid_solve(params, hvy_tmp(:,:,:,params%dim+1:params%dim+1,:), hvy_tmp(:,:,:,1:1,:), hvy_tmp(:,:,:,params%dim+2:size(hvy_tmp,4),:), tree_ID_flow, verbose=.false.) + call toc( "timestep: multigrid_solve", 24, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_projection(params, hvy_work(:,:,:,1:params%dim,:,j), hvy_tmp(:,:,:,params%dim+1:params%dim+1,:), hvy_work(:,:,:,1:params%dim,:,j), order_disc_pressure, tree_ID_flow, 1.0_rk) + call toc( "timestep: compute_projection", 25, MPI_wtime()-t3) enddo ! final stage (actual final update of state vector) @@ -699,7 +724,9 @@ end subroutine compute_projection + dt*params%butcher_tableau(size(params%butcher_tableau,1),j) * hvy_work(g(1)+1:g(1)+bs(1),g(2)+1:g(2)+bs(2),g(3)+1:g(3)+bs(3),1:params%dim,hvy_id,j-1) end do end do + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_block(:,:,:,1:params%dim,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) ! write(fname, '(A, i12.12, A)') "ux_", int(time*1.0e6),".h5" ! call saveHDF5_tree(fname, time, 0, 1, params, hvy_block, tree_ID ) @@ -729,39 +756,68 @@ end subroutine compute_projection ! let's do some regular projections - this can be changed by a parameter if (modulo(iteration, params%nprojection_NSI)==0) then ! Maybe u is not completely divergence free in the discrete sense, so we do one projection in order to get a divergence free velocity field + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_block(:,:,:,1:params%dim,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_divergence_tree(params, hvy_block(:,:,:,1:params%dim,:), hvy_tmp(:,:,:,params%dim+2,:), order_disc_pressure, tree_ID_flow) + call toc( "timestep: compute_divergence_tree", 23, MPI_wtime()-t3) + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_tmp(:,:,:,params%dim+2:params%dim+2,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call multigrid_solve(params, hvy_block(:,:,:,params%dim+1:params%dim+1,:), hvy_tmp(:,:,:,params%dim+2:params%dim+2,:), hvy_tmp(:,:,:,params%dim+3:size(hvy_tmp,4),:), tree_ID_flow, init_0=.true., verbose=.false.) + call toc( "timestep: multigrid_solve", 24, MPI_wtime()-t3) + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_block(:,:,:,params%dim+1:params%dim+1,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_projection(params, hvy_block(:,:,:,1:params%dim,:), hvy_block(:,:,:,params%dim+1:params%dim+1,:), hvy_block(:,:,:,1:params%dim,:), order_disc_pressure, tree_ID_flow, 1.0_rk) + call toc( "timestep: compute_projection", 25, MPI_wtime()-t3) endif ! we reconstruct the pressure if we do statistics or save data to have it in respective accuracy if (it_is_time_to_save_data .or. (modulo(iteration, params%nsave_stats)==0).or.(abs(time - params%next_stats_time)<1e-12_rk)) then + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_block(:,:,:,1:params%dim,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_NSI_RHS(params, hvy_block(:,:,:,1:params%dim,:), hvy_mask, hvy_tmp(:,:,:,1:params%dim,:), order_disc_nonlinear, tree_ID_flow, nu, C_eta) + call toc( "timestep: compute_NSI_RHS", 22, MPI_wtime()-t3) + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_tmp(:,:,:,1:params%dim,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call compute_divergence_tree(params, hvy_tmp(:,:,:,1:params%dim,:), hvy_tmp(:,:,:,params%dim+2,:), order_disc_pressure, tree_ID_flow) + call toc( "timestep: compute_divergence_tree", 23, MPI_wtime()-t3) + t3 = MPI_wtime() call sync_ghosts_tree( params, hvy_tmp(:,:,:,params%dim+2:params%dim+2,:), tree_ID_flow ) + call toc( "timestep: sync_ghost_tree", 21, MPI_wtime()-t3) + t3 = MPI_wtime() call multigrid_solve(params, hvy_tmp(:,:,:,params%dim+1:params%dim+1,:), hvy_tmp(:,:,:,params%dim+2:params%dim+2,:), hvy_tmp(:,:,:,params%dim+3:size(hvy_tmp,4),:), tree_ID_flow) + call toc( "timestep: multigrid_solve", 24, MPI_wtime()-t3) do k_block = 1, hvy_n(tree_ID) hvy_id = hvy_active(k_block, tree_ID) hvy_block(g(1)+1:g(1)+bs(1),g(2)+1:g(2)+bs(2),g(3)+1:g(3)+bs(3),params%dim+1,hvy_id) = hvy_tmp(g(1)+1:g(1)+bs(1),g(2)+1:g(2)+bs(2),g(3)+1:g(3)+bs(3),4,hvy_id) enddo endif + call toc( "TOPLEVEL: time stepper", 11, MPI_wtime()-t4) + !******************************************************************* ! statistics !******************************************************************* - t_block = MPI_wtime() - if ( (modulo(iteration, params%nsave_stats)==0).or.(abs(time - params%next_stats_time)<1e-12_rk) ) then + if ( (modulo(iteration, params%nsave_stats)==0).or.(abs(time - params%next_stats_time)<1e-12_rk) .or. abs(time - params%time_max) < 1e-12_rk ) then + t4 = MPI_wtime() ! we need to sync ghost nodes for some derived qtys, for sure call sync_ghosts_RHS_tree( params, hvy_block, tree_ID_flow ) call statistics_wrapper(time, dt, params, hvy_block, hvy_tmp, hvy_mask, tree_ID_flow) + call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t4) + + ! update next stats time + if (abs(time - params%next_stats_time)<1e-12_rk) params%next_stats_time = params%next_stats_time + params%tsave_stats endif - call toc( "TOPLEVEL: statistics", 13, MPI_wtime()-t_block) !*********************************************************************** ! Adapt mesh - (coarsening where possible) diff --git a/LIB/TIME/calculate_time_step.f90 b/LIB/TIME/calculate_time_step.f90 index e589465d..91db2cd8 100644 --- a/LIB/TIME/calculate_time_step.f90 +++ b/LIB/TIME/calculate_time_step.f90 @@ -5,12 +5,12 @@ subroutine calculate_time_step( params, time, iteration, hvy_block, dt, tree_ID !-------------------------------------------------------------- implicit none - type (type_params), intent(in):: params !< user defined parameter structure - real(kind=rk), intent(in) :: time !< current time of the simulation - integer(kind=ik), intent(in) :: iteration - real(kind=rk), intent(in) :: hvy_block(:, :, :, :, :) !< heavy data array contains the block data of the statevector - integer(kind=ik), intent(in) :: tree_ID - real(kind=rk), intent(out) :: dt !< time step dt + type (type_params), intent(inout) :: params !< user defined parameter structure + real(kind=rk), intent(in) :: time !< current time of the simulation + integer(kind=ik), intent(in) :: iteration + real(kind=rk), intent(in) :: hvy_block(:, :, :, :, :) !< heavy data array contains the block data of the statevector + integer(kind=ik), intent(in) :: tree_ID + real(kind=rk), intent(out) :: dt !< time step dt !-------------------------------------------------------------- ! MPI error variable integer(kind=ik) :: ierr, Jmax, k, lgt_id, hvy_id @@ -74,6 +74,12 @@ subroutine calculate_time_step( params, time, iteration, hvy_block, dt, tree_ID end if end if + if ( params%time_statistics) then + ! time step should hit time_statistics_start_time exactly + if ( time+dt > params%time_statistics_start_time .and. time params%time_max .and. time<=params%time_max) dt = params%time_max - time diff --git a/LIB/TIME/statistics_wrapper.f90 b/LIB/TIME/statistics_wrapper.f90 index ab9e71a2..b46cdd1b 100644 --- a/LIB/TIME/statistics_wrapper.f90 +++ b/LIB/TIME/statistics_wrapper.f90 @@ -88,11 +88,11 @@ subroutine statistics_wrapper(time, dt, params, hvy_block, hvy_tmp, hvy_mask, tr end subroutine statistics_wrapper -subroutine time_statistics_wrapper(time, dt, time_start, params, hvy_block, hvy_tmp, hvy_mask, tree_ID) +subroutine time_statistics_wrapper(time, dt, params, hvy_block, hvy_tmp, hvy_mask, tree_ID) implicit none - real(kind=rk), intent(in) :: time, dt, time_start + real(kind=rk), intent(in) :: time, dt type (type_params), intent(inout) :: params !> user defined parameter structure real(kind=rk), intent(inout) :: hvy_tmp(:, :, :, :, :) !> heavy work data array - block data real(kind=rk), intent(inout) :: hvy_block(:, :, :, :, :) !> heavy data array - block data @@ -121,7 +121,7 @@ subroutine time_statistics_wrapper(time, dt, time_start, params, hvy_block, hvy_ hvy_id_mask = hvy_id endif - call TIME_STATISTICS_meta(params%physics_type, time, dt, time_start, hvy_block(:,:,:,:, hvy_id), params%g, x0, dx, & + call TIME_STATISTICS_meta(params%physics_type, time, dt, params%time_statistics_start_time, hvy_block(:,:,:,:, hvy_id), params%g, x0, dx, & hvy_tmp(:,:,:,:, hvy_id), hvy_mask(:,:,:,:, hvy_id_mask)) enddo diff --git a/LIB/TIMING/module_timing.f90 b/LIB/TIMING/module_timing.f90 index 02831783..1b1b0716 100644 --- a/LIB/TIMING/module_timing.f90 +++ b/LIB/TIMING/module_timing.f90 @@ -53,6 +53,7 @@ subroutine reset_all_timings() ! 50- 55 createActiveSortedLists ! 57- 59 updateMetadata_tree ! 60- 63 synchronize_lgt_data + ! 65- 69 createMask_tree ! 70- 74 xfer_block_data ! 80- 85 sync ghosts ! 90- 92 balanceLoad_tree @@ -64,7 +65,7 @@ subroutine reset_all_timings() ! 150- 158 coarseExtensionUpdate_tree ! 160- 165 executeCoarsening_tree ! 200-1000 Other - ! 250- 258 forest + ! 250- 270 forest ! 350- 354 module_MOR ! 1000-XXXX Miscellaneous ! 1001-1002 Commented block based toc in coarsening indicator diff --git a/LIB/WAVELETS/module_wavelets.f90 b/LIB/WAVELETS/module_wavelets.f90 index b05e0ff9..de85f83c 100644 --- a/LIB/WAVELETS/module_wavelets.f90 +++ b/LIB/WAVELETS/module_wavelets.f90 @@ -1713,12 +1713,6 @@ subroutine WaveReconstruction_dim1( params, u_wc, SC_reconstruct, WC_reconstruct if (.not. allocated(params%HD)) call abort(1717229, "Wavelet setup not called?!") if (.not. allocated(params%GD)) call abort(1717231, "Wavelet setup not called?!") - ! For the reconstruction of the wavelets all points are shifted by one one the SC positions - ! therefore, if the first point in the ghost layer would be a WC, it is ignored - ! This is done to align the WC with the correct positions. However, as the left bound of the GR filter defines the minimum g, - ! we apply a trick here, to ignore the first point in order to reduce the ghost point size, because it is always zero - ! The respecting buffers are zero-padded in order to account for this shift. - maxn = maxval((/ nx, ny, nz /)) if (allocated(buffer1)) then if (size(buffer1, dim=1)