From 0c2adb55bd927da0aa0f7ccb9a28d514ae8efe22 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Mon, 6 Oct 2025 16:15:51 +0200 Subject: [PATCH 01/22] pool of temp vectors; tmp_host_real_grid done --- src/detail/tmp_vector_pool.hpp | 117 +++++++++++++++++++++++++++++++ src/impl/particles_impl.ipp | 36 +++++----- src/impl/particles_impl_coal.ipp | 42 ++++++----- src/impl/particles_impl_sync.ipp | 16 +++-- src/particles.tpp | 1 + 5 files changed, 168 insertions(+), 44 deletions(-) create mode 100644 src/detail/tmp_vector_pool.hpp diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp new file mode 100644 index 000000000..d3cc3b036 --- /dev/null +++ b/src/detail/tmp_vector_pool.hpp @@ -0,0 +1,117 @@ +#pragma once +#include +#include +#include + + +namespace libcloudphxx +{ + namespace lgrngn + { + template + class tmp_vector_pool { + struct entry { + vec_t vec; + bool in_use = false; + entry(size_t n) : vec(n) {} + }; + std::vector pool; + public: + // tmp_vector_pool(size_t pool_size, size_t vec_size) { + // for (size_t i = 0; i < pool_size; ++i) + // pool.emplace_back(vec_size); + // } + + // Add a new vector to the pool + // void add_vector(size_t vec_size) { + // pool.emplace_back(vec_size); + // } + void add_vector() { + pool.emplace_back(); + } + + void resize(size_t n) { + for (size_t i = 0; i < pool.size(); ++i) { + pool[i].vec.resize(n); + } + } + + // Acquire an available vector, returns its index + size_t acquire() { + for (size_t i = 0; i < pool.size(); ++i) { + if (!pool[i].in_use) { + pool[i].in_use = true; + return i; + } + } + assert(false && "No available temporary vectors in pool!"); + return size_t(-1); + } + + // Release a vector by index + void release(size_t idx) { + assert(idx < pool.size() && pool[idx].in_use); + pool[idx].in_use = false; + } + + // Access vector by index + vec_t& get(size_t idx) { + assert(idx < pool.size() && pool[idx].in_use); + return pool[idx].vec; + } + + std::pair get() { + const size_t idx = acquire(); + assert(idx < pool.size() && pool[idx].in_use); + return std::make_pair(idx, pool[idx].vec); + } + + // RAII guard + class guard { + tmp_vector_pool& pool; + size_t idx; + bool valid; + public: + guard(tmp_vector_pool& pool_) + : pool(pool_), idx(pool_.acquire()), valid(true) {} + ~guard() { if (valid) pool.release(idx); } + guard(const guard&) = delete; + guard& operator=(const guard&) = delete; + guard(guard&& other) noexcept : pool(other.pool), idx(other.idx), valid(other.valid) { + other.valid = false; + } + guard& operator=(guard&& other) noexcept { + if (this != &other) { + if (valid) pool.release(idx); + pool = other.pool; + idx = other.idx; + valid = other.valid; + other.valid = false; + } + return *this; + } + vec_t& get() { return pool.get(idx); } + // vec_t* operator->() { return &pool.get(idx); } + vec_t& operator*() { return pool.get(idx); } + }; + + guard get_guard() { + return guard(*this); + } + }; + }; +}; + + +// Usage example 1: +{ + tmp_vector_pool::guard tmp(pool); + thrust::device_vector& tmp = guard.get(); + // Use *tmp or tmp.get() as a thrust::device_vector& +} // Automatically released here + +// Usage example 2: +size_t idx = pool.acquire(); +thrust::device_vector& tmp = pool.get(idx); +// Use tmp... +pool.release(idx); // Mark as available again \ No newline at end of file diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index deef715d4..c7aa5b5fb 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -196,26 +196,24 @@ namespace libcloudphxx > chem_stepper; // temporary data - thrust::host_vector + tmp_vector_pool> tmp_host_real_grid, tmp_host_real_cell; - thrust::host_vector + tmp_vector_pool> tmp_host_size_cell; - thrust_device::vector + tmp_vector_pool> tmp_device_real_part, - tmp_device_real_part1, - tmp_device_real_part2, - tmp_device_real_part3, - tmp_device_real_part4, - tmp_device_real_part5, - tmp_device_real_cell, - tmp_device_real_cell1, - tmp_device_real_cell2, - &u01; // uniform random numbers between 0 and 1 // TODO: use the tmp array as rand argument? - thrust_device::vector - tmp_device_n_part, - &un; // uniform natural random numbers between 0 and max value of unsigned int - thrust_device::vector + // tmp_device_real_part1, + // tmp_device_real_part2, + // tmp_device_real_part3, + // tmp_device_real_part4, + // tmp_device_real_part5, + tmp_device_real_cell; + // tmp_device_real_cell1, + // tmp_device_real_cell2, + tmp_vector_pool> + tmp_device_n_part; + tmp_vector_pool> tmp_device_size_cell, tmp_device_size_part; @@ -278,10 +276,10 @@ namespace libcloudphxx // --- methods --- // fills u01 with n random real numbers uniformly distributed in range [0,1) - void rand_u01(thrust_size_t n) { rng.generate_n(u01, n); } + void rand_u01(thrust_device::vector &u01, thrust_size_t n) { rng.generate_n(u01, n); } // fills un with n random integers uniformly distributed on the whole integer range - void rand_un(thrust_size_t n) { rng.generate_n(un, n); } + void rand_un(thrust_device::vector &un, thrust_size_t n) { rng.generate_n(un, n); } // max(1, n) int m1(int n) { return n == 0 ? 1 : n; } @@ -307,9 +305,7 @@ namespace libcloudphxx zero(0), n_part(0), sorted(false), - u01(tmp_device_real_part), n_user_params(_opts_init.kernel_parameters.size()), - un(tmp_device_n_part), rng(_opts_init.rng_seed), src_stp_ctr(0), rlx_stp_ctr(0), diff --git a/src/impl/particles_impl_coal.ipp b/src/impl/particles_impl_coal.ipp index 205ccacb2..0d085d1fa 100644 --- a/src/impl/particles_impl_coal.ipp +++ b/src/impl/particles_impl_coal.ipp @@ -310,9 +310,6 @@ namespace libcloudphxx ); // nancheck(off, "off - droplet index within a cell"); - // tossing n_part/2 random numbers for comparing with probability of collisions in a pair of droplets - rand_u01(n_part); - // colliding typedef thrust::permutation_iterator< typename thrust_device::vector::iterator, @@ -352,22 +349,29 @@ namespace libcloudphxx > > zip_rw_t; - zip_ro_t zip_ro_it( - thrust::make_tuple( - // u01 - u01.begin(), - // scl - thrust::make_permutation_iterator(scl.begin(), sorted_ijk.begin()), - // ix - zero, - zero+1, - // cid - thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin()), - thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin())+1, - // dv - thrust::make_permutation_iterator(dv.begin(), sorted_ijk.begin()) - ) - ); + // tossing n_part/2 random numbers for comparing with probability of collisions in a pair of droplets + { + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, n_part); + + zip_ro_t zip_ro_it( + thrust::make_tuple( + // u01 + u01.begin(), + // scl + thrust::make_permutation_iterator(scl.begin(), sorted_ijk.begin()), + // ix + zero, + zero+1, + // cid + thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin()), + thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin())+1, + // dv + thrust::make_permutation_iterator(dv.begin(), sorted_ijk.begin()) + ) + ); + } auto zip_ro_calc_it = thrust::make_zip_iterator( diff --git a/src/impl/particles_impl_sync.ipp b/src/impl/particles_impl_sync.ipp index 6c6ae98ba..b96dd561b 100644 --- a/src/impl/particles_impl_sync.ipp +++ b/src/impl/particles_impl_sync.ipp @@ -21,10 +21,13 @@ namespace libcloudphxx assert(to.size() >= l2e[&to].size()); + auto host_consec_g = tmp_host_real_grid.get_guard(); + thrust::host_vector &host_consec = host_consec_g.get(); + thrust::transform( l2e[&to].begin(), l2e[&to].end(), #if defined(__NVCC__) // TODO: better condition (same addressing space) - tmp_host_real_grid.begin(), + host_consec.begin(), #else to.begin(), #endif @@ -32,7 +35,7 @@ namespace libcloudphxx ); #if defined(__NVCC__) - thrust::copy(tmp_host_real_grid.begin(), tmp_host_real_grid.begin() + l2e[&to].size(), to.begin()); + thrust::copy(host_consec.begin(), host_consec.begin() + l2e[&to].size(), to.begin()); #endif } @@ -44,15 +47,18 @@ namespace libcloudphxx { if (to.is_null()) return; + auto host_consec_g = tmp_host_real_grid.get_guard(); + thrust::host_vector &host_consec = host_consec_g.get(); + #if defined(__NVCC__) - assert(from.size() <= tmp_host_real_grid.size()); - thrust::copy(from.begin(), from.end(), tmp_host_real_grid.begin()); + assert(from.size() <= host_consec.size()); + thrust::copy(from.begin(), from.end(), host_consec.begin()); #endif thrust::transform( l2e[&from].begin(), l2e[&from].end(), #if defined(__NVCC__) // TODO: better condition (same addressing space) - tmp_host_real_grid.begin(), + host_consec.begin(), #else from.begin(), #endif diff --git a/src/particles.tpp b/src/particles.tpp index 477f7a295..a3ad121b1 100644 --- a/src/particles.tpp +++ b/src/particles.tpp @@ -28,6 +28,7 @@ #include "detail/kernel_interpolation.hpp" #include "detail/functors_host.hpp" #include "detail/ran_with_mpi.hpp" +#include "detail/tmp_vector_pool.hpp" //kernel definitions #include "detail/kernel_definitions/hall_efficiencies.hpp" From 7aa5fce1fe99463a8ea8e00fb9afe56982420e37 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Mon, 6 Oct 2025 18:08:30 +0200 Subject: [PATCH 02/22] pool of temp vectors; impl_init_n done --- src/impl/particles_impl_init_n.ipp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/impl/particles_impl_init_n.ipp b/src/impl/particles_impl_init_n.ipp index e409ee831..d8628c164 100644 --- a/src/impl/particles_impl_init_n.ipp +++ b/src/impl/particles_impl_init_n.ipp @@ -50,9 +50,14 @@ namespace libcloudphxx ) { // temporary space on the host - thrust::host_vector tmp_real(n_part_to_init); - thrust::host_vector tmp_ijk(n_part_to_init); - thrust::host_vector &tmp_rhod(tmp_host_real_cell); + auto tmp_real_g = tmp_host_real_part.get_guard(); + thrust::host_vector &tmp_real = tmp_real_g.get(); + + auto tmp_ijk_g = tmp_host_size_part.get_guard(); + thrust::host_vector &tmp_ijk = tmp_ijk_g.get(); + + auto tmp_rhod_g = tmp_host_real_cell.get_guard(); + thrust::host_vector &tmp_rhod = tmp_rhod_g.get(); thrust::copy( rhod.begin(), rhod.end(), // from @@ -70,7 +75,7 @@ namespace libcloudphxx // evaluating n_of_lnrd_stp thrust::transform( - tmp_real.begin(), tmp_real.end(), // input + tmp_real.begin(), tmp_real.begin() + n_part_to_init, // input tmp_real.begin(), // output detail::eval_and_multiply(n_of_lnrd_stp, multiplier) ); @@ -82,7 +87,7 @@ namespace libcloudphxx // correcting STP -> actual ambient conditions if(!opts_init.aerosol_independent_of_rhod) thrust::transform( - tmp_real.begin(), tmp_real.end(), // input - 1st arg + tmp_real.begin(), tmp_real.begin() + n_part_to_init, // input - 1st arg thrust::make_permutation_iterator( // input - 2nd arg tmp_rhod.begin(), tmp_ijk.begin() @@ -94,7 +99,7 @@ namespace libcloudphxx // accounting for the aerosol concentration profile if(opts_init.aerosol_conc_factor.size()>0) thrust::transform( - tmp_real.begin(), tmp_real.end(), // input - 1st arg + tmp_real.begin(), tmp_real.begin() + n_part_to_init, // input - 1st arg thrust::make_permutation_iterator( // input - 2nd arg opts_init.aerosol_conc_factor.begin(), thrust::make_transform_iterator(tmp_ijk.begin(), arg::_1 % opts_init.nz) // k index @@ -114,7 +119,7 @@ namespace libcloudphxx ); thrust::transform( - tmp_real.begin(), tmp_real.end(), + tmp_real.begin(), tmp_real.begin() + n_part_to_init, thrust::make_permutation_iterator( // input - 2nd arg tmp_rhod.begin(), tmp_ijk.begin() @@ -127,7 +132,7 @@ namespace libcloudphxx // host -> device (includes casting from real_t to uint! and rounding) thrust::copy( thrust::make_transform_iterator(tmp_real.begin(), arg::_1 + real_t(0.5)), - thrust::make_transform_iterator(tmp_real.end(), arg::_1 + real_t(0.5)), + thrust::make_transform_iterator(tmp_real.begin() + n_part_to_init, arg::_1 + real_t(0.5)), n.begin() + n_part_old ); } From eb624336faee0c1aefc91ee33723ccf25e1fb663 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Mon, 6 Oct 2025 18:26:31 +0200 Subject: [PATCH 03/22] pool of temp vectors; tmp_host_real_cell --- src/detail/tmp_vector_pool.hpp | 10 +++++----- src/impl/particles_impl.ipp | 6 ++++-- src/impl/particles_impl_fill_outbuf.ipp | 6 +++--- src/impl/particles_impl_init_hskpng_ncell.ipp | 2 -- src/particles_ctor.ipp | 7 +++++-- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index d3cc3b036..1548003ea 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -17,10 +17,10 @@ namespace libcloudphxx }; std::vector pool; public: - // tmp_vector_pool(size_t pool_size, size_t vec_size) { - // for (size_t i = 0; i < pool_size; ++i) - // pool.emplace_back(vec_size); - // } + tmp_vector_pool(size_t pool_size = 1) { + for (size_t i = 0; i < pool_size; ++i) + pool.emplace_back(); + } // Add a new vector to the pool // void add_vector(size_t vec_size) { @@ -114,4 +114,4 @@ namespace libcloudphxx size_t idx = pool.acquire(); thrust::device_vector& tmp = pool.get(idx); // Use tmp... -pool.release(idx); // Mark as available again \ No newline at end of file +pool.release(idx); // Mark as available again diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index c7aa5b5fb..e38e099e5 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -336,7 +336,9 @@ namespace libcloudphxx adve_scheme(_opts_init.adve_scheme), allow_sstp_cond(_opts_init.sstp_cond > 1 || _opts_init.variable_dt_switch), allow_sstp_chem(_opts_init.sstp_chem > 1 || _opts_init.variable_dt_switch), - pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)) // coal prob can be greater than one only in sd_conc simulations + pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)), // coal prob can be greater than one only in sd_conc simulations + tmp_device_real_part(6), + tmp_device_real_cell(3) { // set 0 dev_count to mark that its not a multi_CUDA spawn @@ -503,7 +505,7 @@ namespace libcloudphxx void init_kernel(); void init_vterm(); - void fill_outbuf(); + void fill_outbuf(thrust::host_vector&); std::vector fill_attr_outbuf(const std::string&); void mpi_exchange(); diff --git a/src/impl/particles_impl_fill_outbuf.ipp b/src/impl/particles_impl_fill_outbuf.ipp index ecb71bca9..3763c392f 100644 --- a/src/impl/particles_impl_fill_outbuf.ipp +++ b/src/impl/particles_impl_fill_outbuf.ipp @@ -10,9 +10,9 @@ namespace libcloudphxx namespace lgrngn { template - void particles_t::impl::fill_outbuf() + void particles_t::impl::fill_outbuf(thrust::host_vector &outbuf) { - thrust::fill(tmp_host_real_cell.begin(), tmp_host_real_cell.end(), 0); + thrust::fill(outbuf.begin(), outbuf.end(), 0); #if defined(__NVCC__) thrust::copy( @@ -31,7 +31,7 @@ namespace libcloudphxx count_mom.begin(), // input - begin count_mom.begin() + count_n, // input - end thrust::make_permutation_iterator( // output - tmp_host_real_cell.begin(), // data + outbuf.begin(), // data pi.begin() // permutation ) ); diff --git a/src/impl/particles_impl_init_hskpng_ncell.ipp b/src/impl/particles_impl_init_hskpng_ncell.ipp index 2b92d6601..45cc9ca6a 100644 --- a/src/impl/particles_impl_init_hskpng_ncell.ipp +++ b/src/impl/particles_impl_init_hskpng_ncell.ipp @@ -24,8 +24,6 @@ namespace libcloudphxx // initialising device temporary arrays tmp_device_real_cell.resize(n_cell); - tmp_device_real_cell1.resize(n_cell); - tmp_device_real_cell2.resize(n_cell); tmp_device_size_cell.resize(n_cell); tmp_host_size_cell.resize(n_cell); tmp_host_real_cell.resize(n_cell); diff --git a/src/particles_ctor.ipp b/src/particles_ctor.ipp index bb22aa7e4..3a5278719 100644 --- a/src/particles_ctor.ipp +++ b/src/particles_ctor.ipp @@ -82,10 +82,13 @@ namespace libcloudphxx template real_t *particles_t::outbuf() { - pimpl->fill_outbuf(); + auto outbuf_g = pimpl->tmp_host_real_cell.get_guard(); + thrust::host_vector &outbuf = outbuf_g.get(); + + pimpl->fill_outbuf(outbuf); // restore the count_num and count_ijk arrays pimpl->hskpng_count(); - return &(*(pimpl->tmp_host_real_cell.begin())); + return &(*(outbuf.begin())); } template From c55c3e8dc7bf0e673f2247a9b9a7b2be37de1a5a Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Mon, 6 Oct 2025 18:30:35 +0200 Subject: [PATCH 04/22] pool of temp vectors; tmp_host_real_cell --- src/particles_multi_gpu_diag.ipp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/particles_multi_gpu_diag.ipp b/src/particles_multi_gpu_diag.ipp index b3dad92cc..af70ac2f5 100644 --- a/src/particles_multi_gpu_diag.ipp +++ b/src/particles_multi_gpu_diag.ipp @@ -209,9 +209,12 @@ namespace libcloudphxx for(auto &p : pimpl->particles) // TODO: perform this copy in parallell? { + auto outbuf_g = p->pimpl->tmp_host_real_cell.get_guard(); + thrust::host_vector &outbuf = outbuf_g.get(); + thrust::copy( - p->pimpl->tmp_host_real_cell.begin(), - p->pimpl->tmp_host_real_cell.end(), + outbuf.begin(), + outbuf.end(), pimpl->real_n_cell_tot.begin() + p->pimpl->n_cell_bfr ); } From f7b93f67c31ff69639a5c7569d9c0638856b6e07 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 10:10:55 +0200 Subject: [PATCH 05/22] pool of temp vectors; tmp_device_real_part p.1 --- src/detail/tmp_vector_pool.hpp | 5 +++++ src/impl/particles_impl.ipp | 27 ++++++++++++++--------- src/impl/particles_impl_adve.ipp | 9 +++++--- src/impl/particles_impl_bcnd.ipp | 3 ++- src/impl/particles_impl_chem_ante.ipp | 12 +++++++++- src/impl/particles_impl_chem_dissoc.ipp | 2 +- src/impl/particles_impl_chem_henry.ipp | 2 +- src/impl/particles_impl_chem_react.ipp | 5 +++-- src/impl/particles_impl_chem_strength.ipp | 2 +- src/impl/particles_impl_hskpng_resize.ipp | 6 ++--- src/impl/particles_impl_moms.ipp | 17 +++++++++----- src/particles_diag.ipp | 6 ++++- src/particles_step.ipp | 1 + 13 files changed, 67 insertions(+), 30 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index 1548003ea..3f0b7616f 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -90,6 +90,11 @@ namespace libcloudphxx } return *this; } + // void release() { + // pool = nullptr; + // idx = 4444444; + // valid = false; + // } vec_t& get() { return pool.get(idx); } // vec_t* operator->() { return &pool.get(idx); } vec_t& operator*() { return pool.get(idx); } diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index e38e099e5..7918556c0 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -217,6 +217,13 @@ namespace libcloudphxx tmp_device_size_cell, tmp_device_size_part; + // guards for temp vectors that are used in multiple functions and need to stay unchanged inbetween + std::uniqe_ptr< + tmp_vector_pool>::guard + > n_filtered_gp, + V_gp; + + // to simplify foreach calls const thrust::counting_iterator zero; @@ -268,7 +275,7 @@ namespace libcloudphxx // std::set*> distmem_size_vctrs; // no size vectors copied? // // vetors that are not in distmem_real_vctrs that need to be resized when the number of SDs changes, these are helper variables - std::set*> resize_real_vctrs; +// std::set*> resize_real_vctrs; // std::set*> resize_n_vctrs; std::set*> resize_size_vctrs; @@ -337,8 +344,8 @@ namespace libcloudphxx allow_sstp_cond(_opts_init.sstp_cond > 1 || _opts_init.variable_dt_switch), allow_sstp_chem(_opts_init.sstp_chem > 1 || _opts_init.variable_dt_switch), pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)), // coal prob can be greater than one only in sd_conc simulations - tmp_device_real_part(6), - tmp_device_real_cell(3) + //tmp_device_real_part(6), + tmp_device_real_cell(3) // 3 temporary vectors of this type; NOTE: default is 1 { // set 0 dev_count to mark that its not a multi_CUDA spawn @@ -423,18 +430,17 @@ namespace libcloudphxx // initializing distmem_n_vctrs - list of n_t vectors with properties of SDs that have to be copied/removed/recycled when a SD is copied/removed/recycled distmem_n_vctrs.insert(&n); - // real vctrs that need to be resized but do need to be copied in distmem - resize_real_vctrs.insert(&tmp_device_real_part); + // init number of temporary real vctrs if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2) - resize_real_vctrs.insert(&tmp_device_real_part1); + tmp_device_real_part.add_vector(); if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch) - resize_real_vctrs.insert(&tmp_device_real_part2); + tmp_device_real_part.add_vector(); if(allow_sstp_cond && opts_init.exact_sstp_cond) { - resize_real_vctrs.insert(&tmp_device_real_part3); - resize_real_vctrs.insert(&tmp_device_real_part4); + tmp_device_real_part.add_vector(); + tmp_device_real_part.add_vector(); if(opts_init.const_p) - resize_real_vctrs.insert(&tmp_device_real_part5); + tmp_device_real_part.add_vector(); } resize_size_vctrs.insert(&ijk); @@ -600,6 +606,7 @@ namespace libcloudphxx void chem_dissoc(); void chem_react(const real_t &dt); void chem_cleanup(); + void chem_post_step(); thrust_size_t rcyc(); void bcnd(); diff --git a/src/impl/particles_impl_adve.ipp b/src/impl/particles_impl_adve.ipp index e1036155a..c50af0657 100644 --- a/src/impl/particles_impl_adve.ipp +++ b/src/impl/particles_impl_adve.ipp @@ -179,10 +179,13 @@ namespace libcloudphxx namespace arg = thrust::placeholders; // old positions storage + auto x_old_g = tmp_device_real_part.get_guard(), + y_old_g = tmp_device_real_part.get_guard(), + z_old_g = tmp_device_real_part.get_guard(); thrust_device::vector - &x_old(tmp_device_real_part), - &y_old(tmp_device_real_part2), - &z_old(tmp_device_real_part1); + &x_old(x_old_g.get()), + &y_old(y_old_g.get()), + &z_old(z_old_g.get()); // shift to coordiante system starting at halo's left edge thrust::transform(x.begin(), x.end(), x.begin(), arg::_1 + real_t(halo_size) * opts_init.dx); diff --git a/src/impl/particles_impl_bcnd.ipp b/src/impl/particles_impl_bcnd.ipp index 0bf8b8165..7dabb7844 100644 --- a/src/impl/particles_impl_bcnd.ipp +++ b/src/impl/particles_impl_bcnd.ipp @@ -194,7 +194,8 @@ namespace libcloudphxx { namespace arg = thrust::placeholders; - thrust_device::vector &n_filtered(tmp_device_real_part); + auto n_filtered_g = tmp_device_real_part.get_guard(); + thrust_device::vector &n_filtered = n_filtered_g.get(); thrust::fill(n_filtered.begin(), n_filtered.end(), 0.); diff --git a/src/impl/particles_impl_chem_ante.ipp b/src/impl/particles_impl_chem_ante.ipp index c16bd4185..2352d4be7 100644 --- a/src/impl/particles_impl_chem_ante.ipp +++ b/src/impl/particles_impl_chem_ante.ipp @@ -56,13 +56,23 @@ namespace libcloudphxx ); } + template + void particles_t::impl::chem_post_step() + { + if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); + V_gp.reset(); // release temorary array used to store volume in chemistry + } + template void particles_t::impl::chem_vol_ante() { if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); //calculate new drop volumes (to be used in chem) - thrust_device::vector &V(tmp_device_real_part); + V_gp.reset(&(tmp_device_real_part.get_guard())); + thrust_device::vector &V = V_gp->get(); + + thrust::transform( rw2.begin(), rw2.end(), // input diff --git a/src/impl/particles_impl_chem_dissoc.ipp b/src/impl/particles_impl_chem_dissoc.ipp index 393d6abc8..dbe54e634 100644 --- a/src/impl/particles_impl_chem_dissoc.ipp +++ b/src/impl/particles_impl_chem_dissoc.ipp @@ -145,7 +145,7 @@ namespace libcloudphxx { using namespace common::molar_mass; // M-prefixed - thrust_device::vector &V(tmp_device_real_part); + thrust_device::vector &V = V_gp->get(); const thrust_device::vector &chem_flag(tmp_device_n_part); if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); diff --git a/src/impl/particles_impl_chem_henry.ipp b/src/impl/particles_impl_chem_henry.ipp index 4759a706e..d5630d61f 100644 --- a/src/impl/particles_impl_chem_henry.ipp +++ b/src/impl/particles_impl_chem_henry.ipp @@ -250,7 +250,7 @@ namespace libcloudphxx using namespace common::dissoc; // K-prefixed const thrust_device::vector &chem_flag(tmp_device_n_part); - const thrust_device::vector &V(tmp_device_real_part); + const thrust_device::vector &V = V_gp->get(); if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); diff --git a/src/impl/particles_impl_chem_react.ipp b/src/impl/particles_impl_chem_react.ipp index 5f13c3682..4dd9ba7e2 100644 --- a/src/impl/particles_impl_chem_react.ipp +++ b/src/impl/particles_impl_chem_react.ipp @@ -259,13 +259,14 @@ namespace libcloudphxx { using namespace common::molar_mass; // M-prefixed - thrust_device::vector &V(tmp_device_real_part); + thrust_device::vector &V = V_gp->get(); thrust_device::vector &chem_flag(tmp_device_n_part); //non-equilibrium chemical reactions (oxidation) if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); - thrust_device::vector &old_S_VI(tmp_device_real_part1); + auto old_S_VI_g = tmp_device_real_part.get_guard(); + thrust_device::vector &old_S_VI = old_S_VI_g.get(); // copy old H2SO4 values to allow dry radii recalculation thrust::copy( diff --git a/src/impl/particles_impl_chem_strength.ipp b/src/impl/particles_impl_chem_strength.ipp index 43338ea9f..c7ac2519b 100644 --- a/src/impl/particles_impl_chem_strength.ipp +++ b/src/impl/particles_impl_chem_strength.ipp @@ -71,7 +71,7 @@ namespace libcloudphxx void particles_t::impl::chem_flag_ante() { thrust_device::vector &chem_flag(tmp_device_n_part); - thrust_device::vector &V(tmp_device_real_part); + thrust_device::vector &V = V_gp->get(); typedef thrust::permutation_iterator< typename thrust_device::vector::iterator, diff --git a/src/impl/particles_impl_hskpng_resize.ipp b/src/impl/particles_impl_hskpng_resize.ipp index 15828069d..f7cbb2c22 100644 --- a/src/impl/particles_impl_hskpng_resize.ipp +++ b/src/impl/particles_impl_hskpng_resize.ipp @@ -19,13 +19,13 @@ namespace libcloudphxx pair.first->resize(n_part, pair.second); } - for(auto &vec: resize_real_vctrs) - vec->resize(n_part); +// for(auto &vec: resize_real_vctrs) +// vec->resize(n_part); + tmp_device_real_part.resize(n_part); for(auto &vec: resize_size_vctrs) vec->resize(n_part); - // its unsigned int vector, probably only one we will use, hence no resize_t_vctrs helper used tmp_device_n_part.resize(n_part); } }; diff --git a/src/impl/particles_impl_moms.ipp b/src/impl/particles_impl_moms.ipp index 38d9b87dc..ffe7ffba8 100644 --- a/src/impl/particles_impl_moms.ipp +++ b/src/impl/particles_impl_moms.ipp @@ -51,7 +51,8 @@ namespace libcloudphxx { hskpng_sort(); - thrust_device::vector &n_filtered(tmp_device_real_part); + n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + thrust_device::vector &n_filtered = n_filtered_gp->get(); thrust::copy( n.begin(), n.end(), @@ -72,7 +73,8 @@ namespace libcloudphxx hskpng_sort(); // transforming n -> n if within range, else 0 - thrust_device::vector &n_filtered(tmp_device_real_part); + n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + thrust_device::vector &n_filtered = n_filtered_gp->get(); if(!cons) thrust::transform( @@ -111,7 +113,8 @@ namespace libcloudphxx { hskpng_sort(); - thrust_device::vector &n_filtered(tmp_device_real_part); + n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + thrust_device::vector &n_filtered = n_filtered_gp->get(); thrust::transform( n.begin(), n.end(), // input - 1st arg @@ -133,7 +136,8 @@ namespace libcloudphxx { hskpng_sort(); - thrust_device::vector &n_filtered(tmp_device_real_part); + n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + thrust_device::vector &n_filtered = n_filtered_gp->get(); { namespace arg = thrust::placeholders; @@ -197,8 +201,7 @@ namespace libcloudphxx { assert(selected_before_counting); - // same as above - thrust_device::vector &n_filtered(tmp_device_real_part); + thrust_device::vector &n_filtered = n_filtered_gp->get(); typedef thrust::permutation_iterator< typename thrust_device::vector::const_iterator, @@ -226,6 +229,8 @@ namespace libcloudphxx count_mom.begin() ); + n_filtered_gp.reset(); // n_filtered not needed anymore, destroy the guard to a tmp array that stored it + count_n = it_pair.first - count_ijk.begin(); #if !defined(NDEBUG) { diff --git a/src/particles_diag.ipp b/src/particles_diag.ipp index 698c8d2f2..39c255791 100644 --- a/src/particles_diag.ipp +++ b/src/particles_diag.ipp @@ -164,7 +164,8 @@ namespace libcloudphxx namespace arg = thrust::placeholders; assert(pimpl->selected_before_counting); - thrust_device::vector &n_filtered(pimpl->tmp_device_real_part); + auto n_filtered_g = pimpl->tmp_device_real_part.get_guard(); + thrust_device::vector &n_filtered = n_filtered_g.get(); // similar to hskpng_count pimpl->hskpng_sort(); @@ -250,6 +251,9 @@ namespace libcloudphxx // intentionally using the same tmp vector as inside moms_cmp below thrust_device::vector &RH_minus_Sc(pimpl->tmp_device_real_part); + auto RH_minus_Sc_g = pimpl->tmp_device_real_part.get_guard(); + thrust_device::vector &RH_minus_Sc = RH_minus_Sc_g.get(); + // computing RH_minus_Sc for each particle thrust::transform( pimpl->rd3.begin(), pimpl->rd3.end(), // input - 1st arg diff --git a/src/particles_step.ipp b/src/particles_step.ipp index 689ce8a21..d5c19143c 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -260,6 +260,7 @@ namespace libcloudphxx //cleanup - TODO think of something better pimpl->chem_cleanup(); } + pimpl->chem_post_step(); } } From 9cfddb2722b9cc42be05723bb679469ab3f426ab Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 11:55:49 +0200 Subject: [PATCH 06/22] pool of temp vectors; tmp_device_real_part p.2 --- src/detail/tmp_vector_pool.hpp | 6 ++++ src/impl/particles_impl.ipp | 6 +++- src/impl/particles_impl_coal.ipp | 12 ++++--- src/impl/particles_impl_cond.ipp | 3 +- src/impl/particles_impl_cond_sstp.ipp | 6 ++-- src/impl/particles_impl_hskpng_turb_vel.ipp | 3 +- src/impl/particles_impl_mass_dens.ipp | 3 +- .../particles_impl_reserve_hskpng_npart.ipp | 10 ------ ...articles_impl_src_dry_distros_matching.ipp | 3 +- src/impl/particles_impl_sstp.ipp | 31 ++++++++++++++++++- .../particles_impl_update_incloud_time.ipp | 3 +- 11 files changed, 63 insertions(+), 23 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index 3f0b7616f..8ba7fa8a3 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -36,6 +36,12 @@ namespace libcloudphxx } } + void reserve(size_t n) { + for (size_t i = 0; i < pool.size(); ++i) { + pool[i].vec.reserve(n); + } + } + // Acquire an available vector, returns its index size_t acquire() { for (size_t i = 0; i < pool.size(); ++i) { diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 7918556c0..f53ac9f0a 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -221,7 +221,11 @@ namespace libcloudphxx std::uniqe_ptr< tmp_vector_pool>::guard > n_filtered_gp, - V_gp; + V_gp, + sstp_dlt_rv_gp, + sstp_dlt_th_gp, + sstp_dlt_rhod_gp, + sstp_dlt_p_gp; // to simplify foreach calls diff --git a/src/impl/particles_impl_coal.ipp b/src/impl/particles_impl_coal.ipp index 0d085d1fa..64e6c2767 100644 --- a/src/impl/particles_impl_coal.ipp +++ b/src/impl/particles_impl_coal.ipp @@ -272,12 +272,16 @@ namespace libcloudphxx nancheck_range(count_mom.begin(), count_mom.begin() + count_n, "count_mom storing scale_factors"); // references to tmp data + auto scl_g = tmp_device_real_cell.get_guard(), + col_g = tmp_device_real_part.get_guard(); + auto off_g = tmp_device_size_cell.get_guard(), + thrust_device::vector - &scl(tmp_device_real_cell), // scale factor for probablility - &col(tmp_device_real_part); // number of collisions, used in chemistry, NOTE: it's the same as u01, so it overwrites already used random numbers - // 1st one of a pair stores number of collisions, 2nd one stores info on which one has greater multiplicity + &scl(scl_g.get()), // scale factor for probablility + &col(col_g.get()); // number of collisions, used in chemistry, NOTE: it's the same as u01, so it overwrites already used random numbers + // 1st one of a pair stores number of collisions, 2nd one stores info on which one has greater multiplicity thrust_device::vector - &off(tmp_device_size_cell); // offset for getting index of particle within a cell + &off(off_g.get()); // offset for getting index of particle within a cell // laying out scale factor onto ijk grid // fill with 0s if not all cells will be updated in the following copy diff --git a/src/impl/particles_impl_cond.ipp b/src/impl/particles_impl_cond.ipp index c3fdfa618..abdd22c46 100644 --- a/src/impl/particles_impl_cond.ipp +++ b/src/impl/particles_impl_cond.ipp @@ -55,7 +55,8 @@ namespace libcloudphxx // TODO: both calls almost identical, use std::bind or sth? if(turb_cond) { - thrust_device::vector &RH_plus_ssp(tmp_device_real_part2); + auto RH_plus_ssp_g = tmp_device_real_part.get_guard(); + thrust_device::vector &RH_plus_ssp = RH_plus_ssp_g.get(); thrust::transform( ssp.begin(), ssp.end(), thrust::make_permutation_iterator(RH.begin(), ijk.begin()), diff --git a/src/impl/particles_impl_cond_sstp.ipp b/src/impl/particles_impl_cond_sstp.ipp index a4f08d891..76d118b56 100644 --- a/src/impl/particles_impl_cond_sstp.ipp +++ b/src/impl/particles_impl_cond_sstp.ipp @@ -85,7 +85,8 @@ namespace libcloudphxx // prerequisite hskpng_sort(); // particle's local change in rv - thrust_device::vector &pdrv(tmp_device_real_part4); + auto pdrv_g = tmp_device_real_part.get_guard(); + thrust_device::vector &pdrv = pdrv_g.get(); // -rw3_old thrust::transform( thrust::make_transform_iterator(rw2.begin(), detail::rw2torw3()), @@ -95,7 +96,8 @@ namespace libcloudphxx ); // vector for each particle's T - thrust_device::vector &Tp(tmp_device_real_part3); + auto Tp_g = tmp_device_real_part.get_guard(); + thrust_device::vector &Tp = Tp_g.get(); // calc Tp if(opts_init.th_dry) diff --git a/src/impl/particles_impl_hskpng_turb_vel.ipp b/src/impl/particles_impl_hskpng_turb_vel.ipp index e20221a9e..12fdf9d29 100644 --- a/src/impl/particles_impl_hskpng_turb_vel.ipp +++ b/src/impl/particles_impl_hskpng_turb_vel.ipp @@ -67,7 +67,8 @@ namespace libcloudphxx detail::common__turbulence__tau() ); - thrust_device::vector &r_normal(tmp_device_real_part); + auto r_normal_g = tmp_device_real_part.get_guard(); + thrust_device::vector &r_normal = r_normal_g.get(); thrust_device::vector * vel_turbs_vctrs_a[] = {&up, &wp, &vp}; for(int i = (only_vertical ? 1 : 0); i < (only_vertical ? 2 : n_dims); ++i) { diff --git a/src/impl/particles_impl_mass_dens.ipp b/src/impl/particles_impl_mass_dens.ipp index 8e02b4e44..4fa8dd28b 100644 --- a/src/impl/particles_impl_mass_dens.ipp +++ b/src/impl/particles_impl_mass_dens.ipp @@ -36,7 +36,8 @@ namespace libcloudphxx assert(selected_before_counting); //TODO: force moms_all() before mass density estimation? // same as above - thrust_device::vector &n_filtered(tmp_device_real_part); + auto n_filtered_g = tmp_device_real_part.get_guard(); + thrust_device::vector &n_filtered = n_filtered_g.get(); // number of SD in each cell casted to real_t thrust_device::vector &count_num_real_t(tmp_device_real_cell); diff --git a/src/impl/particles_impl_reserve_hskpng_npart.ipp b/src/impl/particles_impl_reserve_hskpng_npart.ipp index 6c6530b0e..f74ded896 100644 --- a/src/impl/particles_impl_reserve_hskpng_npart.ipp +++ b/src/impl/particles_impl_reserve_hskpng_npart.ipp @@ -53,24 +53,14 @@ namespace libcloudphxx if(opts_init.diag_incloud_time) incloud_time.reserve(opts_init.n_sd_max); - { - tmp_device_real_part1.reserve(opts_init.n_sd_max); - } - if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch) - { - tmp_device_real_part2.reserve(opts_init.n_sd_max); - } if(allow_sstp_cond && opts_init.exact_sstp_cond) { - tmp_device_real_part3.reserve(opts_init.n_sd_max); - tmp_device_real_part4.reserve(opts_init.n_sd_max); sstp_tmp_rv.reserve(opts_init.n_sd_max); sstp_tmp_th.reserve(opts_init.n_sd_max); sstp_tmp_rh.reserve(opts_init.n_sd_max); if(opts_init.const_p) // in const_p pressure is not diagnostic (it's constant) - in per-particle sub-stepping it has to be substepped and we need two vectors to do that { sstp_tmp_p.reserve(opts_init.n_sd_max); - tmp_device_real_part5.reserve(opts_init.n_sd_max); } } // reserve memory for in/out buffers diff --git a/src/impl/particles_impl_src_dry_distros_matching.ipp b/src/impl/particles_impl_src_dry_distros_matching.ipp index 542ee39fd..fbc808842 100644 --- a/src/impl/particles_impl_src_dry_distros_matching.ipp +++ b/src/impl/particles_impl_src_dry_distros_matching.ipp @@ -60,7 +60,8 @@ namespace libcloudphxx thrust::sequence(sorted_id.begin(), sorted_id.end()); // tmp vector with sorted rd3 - thrust_device::vector &sorted_rd3(tmp_device_real_part); + auto sorted_rd3_g = tmp_device_real_part.get_guard(); + thrust_device::vector &sorted_rd3 = sorted_rd3_g.get(); // use sorted_rd3 as tmp copy of rd3 thrust::copy( diff --git a/src/impl/particles_impl_sstp.ipp b/src/impl/particles_impl_sstp.ipp index e2dee1ea8..4bc6c7118 100644 --- a/src/impl/particles_impl_sstp.ipp +++ b/src/impl/particles_impl_sstp.ipp @@ -117,11 +117,28 @@ namespace libcloudphxx thrust_device::vector *scl[n] = { &rv, &th, &rhod, &p }, *tmp[n] = { &sstp_tmp_rv, &sstp_tmp_th, &sstp_tmp_rh, &sstp_tmp_p }, - *dlt[n] = { &tmp_device_real_part, &tmp_device_real_part1, &tmp_device_real_part2, &tmp_device_real_part5 }; + *dlt[n]; + + // acquire temporary arrays to store sstp_delta + if (step == 0) + { + sstp_dlt_rv_gp.reset(&(tmp_device_real_part.get_guard())); + dlt[0] = &sstp_dlt_rv_gp->get(); + sstp_dlt_th_gp.reset(&(tmp_device_real_part.get_guard())); + dlt[1] = &sstp_dlt_th_gp->get(); + sstp_dlt_rhod_gp.reset(&(tmp_device_real_part.get_guard())); + dlt[2] = &sstp_dlt_rhod_gp->get(); + if(opts_init.const_p) + { + sstp_dlt_p_gp.reset(&(tmp_device_real_part.get_guard())); + dlt[3] = &sstp_dlt_p_gp->get(); + } + } for (int ix = 0; ix < (opts_init.const_p ? n : n-1); ++ix) { const real_t sstp = sstp_cond; + if (step == 0) { // sstp_tmp_scl = dscl_adv (i.e. delta, i.e. new - old) @@ -152,6 +169,18 @@ namespace libcloudphxx ); } } + + // release temporary arrays that stored sstp_delta + if (step == sstp_cond-1) + { + sstp_dlt_rv_gp.reset(); + sstp_dlt_th_gp.reset(); + sstp_dlt_rhod_gp.reset(); + if(opts_init.const_p) + { + sstp_dlt_p_gp.reset(); + } + } } diff --git a/src/impl/particles_impl_update_incloud_time.ipp b/src/impl/particles_impl_update_incloud_time.ipp index 75dbec831..66a6a06ae 100644 --- a/src/impl/particles_impl_update_incloud_time.ipp +++ b/src/impl/particles_impl_update_incloud_time.ipp @@ -36,7 +36,8 @@ namespace libcloudphxx void particles_t::impl::update_incloud_time(const real_t &dt) { // tmp vector to store crit radius of each SD - thrust_device::vector &rc2(tmp_device_real_part); + auto rc2_g = tmp_device_real_part.get_guard(); + thrust_device::vector &rc2 = rc2_g.get(); // computing rc2 thrust::transform( From 18ddd999416e3e150d199787a821ba1944a56a5d Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 12:32:19 +0200 Subject: [PATCH 07/22] pool of temp vectors; tmp_device_real_cell p.1 --- src/impl/particles_impl.ipp | 9 +++------ src/impl/particles_impl_ante_adding_SD.ipp | 4 +++- src/impl/particles_impl_post_adding_SD.ipp | 4 +++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index f53ac9f0a..2eb5b6314 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -84,6 +84,7 @@ namespace libcloudphxx sstp_tmp_rh, // ditto for rho sstp_tmp_p, // ditto for pressure incloud_time; // time this SD has been within a cloud + // TODO: sstp_tmp_X could be reused after condensation; create additional temporary arrays within tmp_device_real_part and use them as sstp_tmp (add a guard that protects them during condesnation, just like sstp_tmp_X_gp). same goes for sstp_tmp_chem_X // dry radii distribution characteristics real_t log_rd_min, // logarithm of the lower bound of the distr @@ -203,11 +204,6 @@ namespace libcloudphxx tmp_host_size_cell; tmp_vector_pool> tmp_device_real_part, - // tmp_device_real_part1, - // tmp_device_real_part2, - // tmp_device_real_part3, - // tmp_device_real_part4, - // tmp_device_real_part5, tmp_device_real_cell; // tmp_device_real_cell1, // tmp_device_real_cell2, @@ -225,7 +221,8 @@ namespace libcloudphxx sstp_dlt_rv_gp, sstp_dlt_th_gp, sstp_dlt_rhod_gp, - sstp_dlt_p_gp; + sstp_dlt_p_gp, + drv_gp; // to simplify foreach calls diff --git a/src/impl/particles_impl_ante_adding_SD.ipp b/src/impl/particles_impl_ante_adding_SD.ipp index b77502a11..83c1daf19 100644 --- a/src/impl/particles_impl_ante_adding_SD.ipp +++ b/src/impl/particles_impl_ante_adding_SD.ipp @@ -14,7 +14,9 @@ namespace libcloudphxx { // --- calc liquid water content before src --- hskpng_sort(); - thrust_device::vector &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called before a call to after_adding_SD... + drv_gp.reset(&(tmp_device_real_cell.get_guard())); + thrust_device::vector &drv = drv_gp->get(); + thrust::fill(drv.begin(), drv.end(), real_t(0.)); moms_all(); diff --git a/src/impl/particles_impl_post_adding_SD.ipp b/src/impl/particles_impl_post_adding_SD.ipp index 9bebe5352..feb2b6774 100644 --- a/src/impl/particles_impl_post_adding_SD.ipp +++ b/src/impl/particles_impl_post_adding_SD.ipp @@ -12,7 +12,7 @@ namespace libcloudphxx template void particles_t::impl::post_adding_SD() { - thrust_device::vector &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called since ante_adding_SD() was called..... + thrust_device::vector &drv = drv_gp->get(); // --- after source particles are no longer sorted --- sorted = false; @@ -52,6 +52,8 @@ namespace libcloudphxx // init _old values in per-particle substepping init_sstp(); + + drv_gp.reset(); // release the tmp array that stored drv } }; }; From 7db779b8efc1cb36920065564f3c721dfc4b54b3 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 13:26:17 +0200 Subject: [PATCH 08/22] pool of temp vectors; tmp_device_real_cell p.2 --- src/detail/tmp_vector_pool.hpp | 10 +++++----- src/impl/particles_impl_chem_henry.ipp | 6 ++++-- src/impl/particles_impl_cond.ipp | 11 +++++++---- src/impl/particles_impl_cond_sstp.ipp | 6 ++++-- src/impl/particles_impl_hskpng_mfp.ipp | 8 +++++--- src/impl/particles_impl_hskpng_turb_ss.ipp | 3 ++- src/impl/particles_impl_hskpng_turb_vel.ipp | 3 ++- src/impl/particles_impl_init_count_num.ipp | 3 ++- src/impl/particles_impl_init_n.ipp | 3 ++- src/impl/particles_impl_mass_dens.ipp | 3 ++- src/impl/particles_impl_update_th_rv.ipp | 3 ++- 11 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index 8ba7fa8a3..8a7519f8f 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -66,11 +66,11 @@ namespace libcloudphxx return pool[idx].vec; } - std::pair get() { - const size_t idx = acquire(); - assert(idx < pool.size() && pool[idx].in_use); - return std::make_pair(idx, pool[idx].vec); - } + // std::pair get() { + // const size_t idx = acquire(); + // assert(idx < pool.size() && pool[idx].in_use); + // return std::make_pair(idx, pool[idx].vec); + // } // RAII guard class guard { diff --git a/src/impl/particles_impl_chem_henry.ipp b/src/impl/particles_impl_chem_henry.ipp index d5630d61f..02b110f16 100644 --- a/src/impl/particles_impl_chem_henry.ipp +++ b/src/impl/particles_impl_chem_henry.ipp @@ -330,8 +330,10 @@ namespace libcloudphxx hskpng_sort(); // temporarily needed to store old mass per cell - thrust_device::vector &mass_old(tmp_device_real_cell); - thrust_device::vector &mass_new(tmp_device_real_cell1); + auto mass_old_g = tmp_device_real_cell.get_guard(); + auto mass_new_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &mass_old(mass_old_g->get()); + thrust_device::vector &mass_new(mass_new_g->get()); for (int i = 0; i < chem_gas_n; ++i) { diff --git a/src/impl/particles_impl_cond.ipp b/src/impl/particles_impl_cond.ipp index abdd22c46..6ef590c44 100644 --- a/src/impl/particles_impl_cond.ipp +++ b/src/impl/particles_impl_cond.ipp @@ -18,12 +18,15 @@ namespace libcloudphxx namespace arg = thrust::placeholders; - thrust_device::vector &lambda_D(tmp_device_real_cell1); // real_cell used in cond.ipp - thrust_device::vector &lambda_K(tmp_device_real_cell2); // real_cell used in cond.ipp - + auto lambda_D_g = tmp_device_real_cell.get_guard(); + auto lambda_K_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &lambda_D(lambda_D_g->get()); + thrust_device::vector &lambda_K(lambda_K_g->get()); + // --- calc liquid water content before cond --- hskpng_sort(); - thrust_device::vector &drv(tmp_device_real_cell); + auto drv_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &drv = drv_g->get(); // calculating the 3rd wet moment before condensation moms_all(); diff --git a/src/impl/particles_impl_cond_sstp.ipp b/src/impl/particles_impl_cond_sstp.ipp index 76d118b56..c8b0a8e03 100644 --- a/src/impl/particles_impl_cond_sstp.ipp +++ b/src/impl/particles_impl_cond_sstp.ipp @@ -38,8 +38,10 @@ namespace libcloudphxx const RH_iter &rhi ) { - thrust_device::vector &lambda_D(tmp_device_real_cell1); // real_cell used in cond.ipp - thrust_device::vector &lambda_K(tmp_device_real_cell2); // real_cell used in cond.ipp + auto lambda_D_g = tmp_device_real_cell.get_guard(); + auto lambda_K_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &lambda_D(lambda_D_g->get()); + thrust_device::vector &lambda_K(lambda_K_g->get()); auto hlpr_zip_iter = thrust::make_zip_iterator(thrust::make_tuple( sstp_tmp_rh.begin(), diff --git a/src/impl/particles_impl_hskpng_mfp.ipp b/src/impl/particles_impl_hskpng_mfp.ipp index 1cf02535b..176c3f9ca 100644 --- a/src/impl/particles_impl_hskpng_mfp.ipp +++ b/src/impl/particles_impl_hskpng_mfp.ipp @@ -40,9 +40,11 @@ namespace libcloudphxx template void particles_t::impl::hskpng_mfp() - { - thrust_device::vector &lambda_D(tmp_device_real_cell1); // real_cell used in cond.ipp - thrust_device::vector &lambda_K(tmp_device_real_cell2); // real_cell used in cond.ipp + { + auto lambda_D_g = tmp_device_real_cell.get_guard(); + auto lambda_K_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &lambda_D(lambda_D_g->get()); + thrust_device::vector &lambda_K(lambda_K_g->get()); thrust::transform(T.begin(), T.end(), lambda_D.begin(), detail::common__mean_free_path__lambda_D()); thrust::transform(T.begin(), T.end(), p.begin(), lambda_K.begin(), detail::common__mean_free_path__lambda_K()); diff --git a/src/impl/particles_impl_hskpng_turb_ss.ipp b/src/impl/particles_impl_hskpng_turb_ss.ipp index f14d8ea74..558c689ff 100644 --- a/src/impl/particles_impl_hskpng_turb_ss.ipp +++ b/src/impl/particles_impl_hskpng_turb_ss.ipp @@ -44,7 +44,8 @@ namespace libcloudphxx template void particles_t::impl::hskpng_turb_dot_ss() { - thrust_device::vector &tau_rlx(tmp_device_real_cell); // tau_rlx needs to have length n_cell + auto tau_rlx_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &tau_rlx = tau_rlx_g->get(); #if !defined(NDEBUG) // fill with a dummy value for debugging thrust::fill(tau_rlx.begin(), tau_rlx.end(), -44); diff --git a/src/impl/particles_impl_hskpng_turb_vel.ipp b/src/impl/particles_impl_hskpng_turb_vel.ipp index 12fdf9d29..3693a82a8 100644 --- a/src/impl/particles_impl_hskpng_turb_vel.ipp +++ b/src/impl/particles_impl_hskpng_turb_vel.ipp @@ -53,7 +53,8 @@ namespace libcloudphxx { namespace arg = thrust::placeholders; - thrust_device::vector &tau(tmp_device_real_cell); + auto tau_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &tau(tau_g->get()); thrust_device::vector &tke(diss_rate); // should be called after hskpng_tke, which replaces diss_rate with tke thrust::transform(tke.begin(), tke.end(), diff --git a/src/impl/particles_impl_init_count_num.ipp b/src/impl/particles_impl_init_count_num.ipp index 7b5e22eeb..a1fa141c8 100644 --- a/src/impl/particles_impl_init_count_num.ipp +++ b/src/impl/particles_impl_init_count_num.ipp @@ -77,7 +77,8 @@ namespace libcloudphxx template void particles_t::impl::init_count_num_hlpr(const real_t &conc, const thrust_size_t &const_multi) { - thrust_device::vector &concentration(tmp_device_real_cell); + auto concentration_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &concentration(concentration_g->get()); thrust::fill(concentration.begin(), concentration.end(), conc); conc_to_number(concentration); diff --git a/src/impl/particles_impl_init_n.ipp b/src/impl/particles_impl_init_n.ipp index d8628c164..2e34c71c7 100644 --- a/src/impl/particles_impl_init_n.ipp +++ b/src/impl/particles_impl_init_n.ipp @@ -152,7 +152,8 @@ namespace libcloudphxx void particles_t::impl::init_n_dry_sizes(const real_t &conc, const thrust_size_t &sd_count) { namespace arg = thrust::placeholders; - thrust_device::vector &concentration(tmp_device_real_cell); + auto concentration_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &concentration(concentration_g->get()); thrust::fill(concentration.begin(), concentration.end(), conc); conc_to_number(concentration); thrust::transform( diff --git a/src/impl/particles_impl_mass_dens.ipp b/src/impl/particles_impl_mass_dens.ipp index 4fa8dd28b..cfae76741 100644 --- a/src/impl/particles_impl_mass_dens.ipp +++ b/src/impl/particles_impl_mass_dens.ipp @@ -40,7 +40,8 @@ namespace libcloudphxx thrust_device::vector &n_filtered = n_filtered_g.get(); // number of SD in each cell casted to real_t - thrust_device::vector &count_num_real_t(tmp_device_real_cell); + auto count_num_real_t_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &count_num_real_t(count_num_real_t_g.get()); // get number of SD in each cell hskpng_count(); diff --git a/src/impl/particles_impl_update_th_rv.ipp b/src/impl/particles_impl_update_th_rv.ipp index 238465344..db4dc1734 100644 --- a/src/impl/particles_impl_update_th_rv.ipp +++ b/src/impl/particles_impl_update_th_rv.ipp @@ -100,7 +100,8 @@ namespace libcloudphxx if(!sorted) throw std::runtime_error("libcloudph++: update_uh_rv called on an unsorted set"); // cell-wise change in state - thrust_device::vector &dstate(tmp_device_real_cell); + auto dstate_g = tmp_device_real_cell.get_guard(); + thrust_device::vector &dstate(dstate_g->get()); // init dstate with 0s thrust::fill(dstate.begin(), dstate.end(), real_t(0)); // calc sum of pdstate in each cell From 22e92df0607f927df850eb3658d5d9b8127bb447 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 13:33:59 +0200 Subject: [PATCH 09/22] pool of temp vectors; tmp_device_n_part --- src/impl/particles_impl.ipp | 6 ++++-- src/impl/particles_impl_chem_ante.ipp | 3 +-- src/impl/particles_impl_chem_dissoc.ipp | 2 +- src/impl/particles_impl_chem_henry.ipp | 2 +- src/impl/particles_impl_chem_react.ipp | 2 +- src/impl/particles_impl_chem_strength.ipp | 3 ++- src/particles_step.ipp | 1 - 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 2eb5b6314..60b906531 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -205,8 +205,6 @@ namespace libcloudphxx tmp_vector_pool> tmp_device_real_part, tmp_device_real_cell; - // tmp_device_real_cell1, - // tmp_device_real_cell2, tmp_vector_pool> tmp_device_n_part; tmp_vector_pool> @@ -224,6 +222,10 @@ namespace libcloudphxx sstp_dlt_p_gp, drv_gp; + std::uniqe_ptr< + tmp_vector_pool>::guard + > chem_flag_gp, + // to simplify foreach calls const thrust::counting_iterator zero; diff --git a/src/impl/particles_impl_chem_ante.ipp b/src/impl/particles_impl_chem_ante.ipp index 2352d4be7..ebaa188d3 100644 --- a/src/impl/particles_impl_chem_ante.ipp +++ b/src/impl/particles_impl_chem_ante.ipp @@ -61,6 +61,7 @@ namespace libcloudphxx { if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); V_gp.reset(); // release temorary array used to store volume in chemistry + chem_flag_gp.reset(); // release temorary array used to store chem flag } template @@ -72,8 +73,6 @@ namespace libcloudphxx V_gp.reset(&(tmp_device_real_part.get_guard())); thrust_device::vector &V = V_gp->get(); - - thrust::transform( rw2.begin(), rw2.end(), // input V.begin(), // output diff --git a/src/impl/particles_impl_chem_dissoc.ipp b/src/impl/particles_impl_chem_dissoc.ipp index dbe54e634..a010768f5 100644 --- a/src/impl/particles_impl_chem_dissoc.ipp +++ b/src/impl/particles_impl_chem_dissoc.ipp @@ -146,7 +146,7 @@ namespace libcloudphxx using namespace common::molar_mass; // M-prefixed thrust_device::vector &V = V_gp->get(); - const thrust_device::vector &chem_flag(tmp_device_n_part); + thrust_device::vector &chem_flag(chem_flag_gp->get()); if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); diff --git a/src/impl/particles_impl_chem_henry.ipp b/src/impl/particles_impl_chem_henry.ipp index 02b110f16..9cacb58db 100644 --- a/src/impl/particles_impl_chem_henry.ipp +++ b/src/impl/particles_impl_chem_henry.ipp @@ -249,7 +249,7 @@ namespace libcloudphxx using namespace common::molar_mass; // M-prefixed using namespace common::dissoc; // K-prefixed - const thrust_device::vector &chem_flag(tmp_device_n_part); + thrust_device::vector &chem_flag(chem_flag_gp->get()); const thrust_device::vector &V = V_gp->get(); if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); diff --git a/src/impl/particles_impl_chem_react.ipp b/src/impl/particles_impl_chem_react.ipp index 4dd9ba7e2..247674b69 100644 --- a/src/impl/particles_impl_chem_react.ipp +++ b/src/impl/particles_impl_chem_react.ipp @@ -260,7 +260,7 @@ namespace libcloudphxx using namespace common::molar_mass; // M-prefixed thrust_device::vector &V = V_gp->get(); - thrust_device::vector &chem_flag(tmp_device_n_part); + thrust_device::vector &chem_flag(chem_flag_gp->get()); //non-equilibrium chemical reactions (oxidation) if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); diff --git a/src/impl/particles_impl_chem_strength.ipp b/src/impl/particles_impl_chem_strength.ipp index c7ac2519b..2ed8b17a9 100644 --- a/src/impl/particles_impl_chem_strength.ipp +++ b/src/impl/particles_impl_chem_strength.ipp @@ -70,7 +70,8 @@ namespace libcloudphxx template void particles_t::impl::chem_flag_ante() { - thrust_device::vector &chem_flag(tmp_device_n_part); + chem_flag_gp.reset(&(tmp_device_n_part.get_guard())); + thrust_device::vector &chem_flag(chem_flag_gp->get()); thrust_device::vector &V = V_gp->get(); typedef thrust::permutation_iterator< diff --git a/src/particles_step.ipp b/src/particles_step.ipp index d5c19143c..6b068622c 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -231,7 +231,6 @@ namespace libcloudphxx pimpl->chem_vol_ante(); // set flag for those SD that are big enough to have chemical reactions pimpl->chem_flag_ante(); - // NOTE: volume and flag are stored in temporary arrays (tmp_device_real_part and tmp_device_n_part), so these arrays should not be changed until chemistry is done if (opts.chem_dsl) { From 7c4d199ff25de952ed4349806aaa9dde006829f8 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 13:42:41 +0200 Subject: [PATCH 10/22] pool of temp vectors; tmp_device_size_cell --- src/impl/particles_impl_init_dry_sd_conc.ipp | 3 ++- src/impl/particles_impl_init_ijk.ipp | 3 ++- src/impl/particles_impl_rlx_dry_distros.ipp | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/impl/particles_impl_init_dry_sd_conc.ipp b/src/impl/particles_impl_init_dry_sd_conc.ipp index fa4ebb2fe..f60556596 100644 --- a/src/impl/particles_impl_init_dry_sd_conc.ipp +++ b/src/impl/particles_impl_init_dry_sd_conc.ipp @@ -48,7 +48,8 @@ namespace libcloudphxx // rd3 temporarily means logarithm of radius! thrust_device::vector &lnrd(rd3); - thrust_device::vector &ptr(tmp_device_size_cell); + auto ptr_g = tmp_device_size_cell.get_guard(); + thrust_device::vector &ptr(ptr_g->get()); thrust::exclusive_scan(count_num.begin(), count_num.end(), ptr.begin()); // number of SDs to init in cells up to (i-1), TODO: same is done in init_xyz, store it? // shifting from [0,1] to [log(rd_min),log(rd_max)] and storing into rd3 diff --git a/src/impl/particles_impl_init_ijk.ipp b/src/impl/particles_impl_init_ijk.ipp index ee2282a33..f83746275 100644 --- a/src/impl/particles_impl_init_ijk.ipp +++ b/src/impl/particles_impl_init_ijk.ipp @@ -35,7 +35,8 @@ namespace libcloudphxx template void particles_t::impl::init_ijk() { - thrust_device::vector &ptr(tmp_device_size_cell); + auto ptr_g = tmp_device_size_cell.get_guard(); + thrust_device::vector &ptr(ptr_g->get()); thrust::exclusive_scan(count_num.begin(), count_num.end(), ptr.begin()); // number of SDs in cells to init up to (i-1) // fill ijk with cell number of each SD diff --git a/src/impl/particles_impl_rlx_dry_distros.ipp b/src/impl/particles_impl_rlx_dry_distros.ipp index 528b9eca7..e76a34c1d 100644 --- a/src/impl/particles_impl_rlx_dry_distros.ipp +++ b/src/impl/particles_impl_rlx_dry_distros.ipp @@ -165,7 +165,8 @@ namespace libcloudphxx // horizontal sum of this moment thrust::fill(hor_sum.begin(), hor_sum.end(), 0); - thrust_device::vector &count_k(tmp_device_size_cell); // NOTE: tmp_device_size_cell is also used in some other inits, careful not to overwrite it! + auto count_k_gp = tmp_device_size_cell.get_guard(); + thrust_device::vector &count_k(count_k_gp->get()); thrust::transform(count_ijk.begin(), count_ijk.begin() + count_n, count_k.begin(), arg::_1 % opts_init.nz); thrust::sort_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin()); @@ -224,7 +225,8 @@ namespace libcloudphxx n.resize(n_part); // --- init k --- - thrust_device::vector &ptr(tmp_device_size_cell); + auto ptr_g = tmp_device_size_cell.get_guard(); + thrust_device::vector &ptr(ptr_g->get()); thrust::exclusive_scan(n_SD_to_create.begin(), n_SD_to_create.end(), ptr.begin()); // number of SDs in cells to init up to (i-1) thrust::for_each( From ace5ff11f29ec4258e944d48303450741a2e1f6a Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 7 Oct 2025 14:03:21 +0200 Subject: [PATCH 11/22] pool of temp vectors; tmp_device_size_part --- src/impl/particles_impl.ipp | 1 - src/impl/particles_impl_hskpng_resize.ipp | 3 ++- src/impl/particles_impl_hskpng_vterm.ipp | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 60b906531..5e2deaa16 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -449,7 +449,6 @@ namespace libcloudphxx resize_size_vctrs.insert(&ijk); resize_size_vctrs.insert(&sorted_ijk); resize_size_vctrs.insert(&sorted_id); - resize_size_vctrs.insert(&tmp_device_size_part); if (opts_init.nx != 0) resize_size_vctrs.insert(&i); if (opts_init.ny != 0) resize_size_vctrs.insert(&j); if (opts_init.nz != 0) resize_size_vctrs.insert(&k); diff --git a/src/impl/particles_impl_hskpng_resize.ipp b/src/impl/particles_impl_hskpng_resize.ipp index f7cbb2c22..6dcc8b523 100644 --- a/src/impl/particles_impl_hskpng_resize.ipp +++ b/src/impl/particles_impl_hskpng_resize.ipp @@ -22,11 +22,12 @@ namespace libcloudphxx // for(auto &vec: resize_real_vctrs) // vec->resize(n_part); tmp_device_real_part.resize(n_part); + tmp_device_n_part.resize(n_part); + tmp_device_size_part.resize(n_part); for(auto &vec: resize_size_vctrs) vec->resize(n_part); - tmp_device_n_part.resize(n_part); } }; }; diff --git a/src/impl/particles_impl_hskpng_vterm.ipp b/src/impl/particles_impl_hskpng_vterm.ipp index 765d7768c..c25bcfcb1 100644 --- a/src/impl/particles_impl_hskpng_vterm.ipp +++ b/src/impl/particles_impl_hskpng_vterm.ipp @@ -143,7 +143,8 @@ namespace libcloudphxx if(opts_init.terminal_velocity == vt_t::beard77fast) //use cached vt at sea level { - thrust_device::vector &vt0_bin(tmp_device_size_part); + auto vt0_bin_gp = tmp_device_size_part.get_guard(); + thrust_device::vector &vt0_bin(vt0_bin_gp->get()); // get cached bin number thrust::transform_if( rw2.begin(), rw2.end(), @@ -195,7 +196,8 @@ namespace libcloudphxx if(opts_init.terminal_velocity == vt_t::beard77fast) //use cached vt at sea level { - thrust_device::vector &vt0_bin(tmp_device_size_part); + auto vt0_bin_gp = tmp_device_size_part.get_guard(); + thrust_device::vector &vt0_bin(vt0_bin_gp->get()); // get cached bin number thrust::transform( rw2.begin(), rw2.end(), From c57cc57bc435f8e294a6a9b5a8ce02ce3689609f Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Wed, 8 Oct 2025 17:12:50 +0200 Subject: [PATCH 12/22] fix rand usage --- src/detail/tmp_vector_pool.hpp | 22 +++++++++---------- src/impl/particles_impl.ipp | 12 +++++----- src/impl/particles_impl_hskpng_sort.ipp | 4 +++- .../particles_impl_init_dry_const_multi.ipp | 4 +++- src/impl/particles_impl_init_dry_sd_conc.ipp | 6 +++-- src/impl/particles_impl_init_xyz.ipp | 4 +++- src/impl/particles_impl_rlx_dry_distros.ipp | 4 +++- ...articles_impl_src_dry_distros_matching.ipp | 4 +++- 8 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index 8a7519f8f..54b082aea 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -114,15 +114,15 @@ namespace libcloudphxx }; -// Usage example 1: -{ - tmp_vector_pool::guard tmp(pool); - thrust::device_vector& tmp = guard.get(); - // Use *tmp or tmp.get() as a thrust::device_vector& -} // Automatically released here +// // Usage example 1: +// { +// tmp_vector_pool::guard tmp(pool); +// thrust::device_vector& tmp = guard.get(); +// // Use *tmp or tmp.get() as a thrust::device_vector& +// } // Automatically released here -// Usage example 2: -size_t idx = pool.acquire(); -thrust::device_vector& tmp = pool.get(idx); -// Use tmp... -pool.release(idx); // Mark as available again +// // Usage example 2: +// size_t idx = pool.acquire(); +// thrust::device_vector& tmp = pool.get(idx); +// // Use tmp... +// pool.release(idx); // Mark as available again diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 5e2deaa16..3ede52bb9 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -212,8 +212,9 @@ namespace libcloudphxx tmp_device_size_part; // guards for temp vectors that are used in multiple functions and need to stay unchanged inbetween - std::uniqe_ptr< - tmp_vector_pool>::guard + // tmp_vector_pool>::guard asd; + std::unique_ptr< + typename tmp_vector_pool>::guard > n_filtered_gp, V_gp, sstp_dlt_rv_gp, @@ -222,10 +223,9 @@ namespace libcloudphxx sstp_dlt_p_gp, drv_gp; - std::uniqe_ptr< - tmp_vector_pool>::guard - > chem_flag_gp, - + std::unique_ptr< + typename tmp_vector_pool>::guard + > chem_flag_gp; // to simplify foreach calls const thrust::counting_iterator zero; diff --git a/src/impl/particles_impl_hskpng_sort.ipp b/src/impl/particles_impl_hskpng_sort.ipp index 4f4abcd55..ef0fd53ab 100644 --- a/src/impl/particles_impl_hskpng_sort.ipp +++ b/src/impl/particles_impl_hskpng_sort.ipp @@ -28,7 +28,9 @@ namespace libcloudphxx else { // generating a random sorting key - rand_un(n_part); + auto un_g = tmp_device_n_part.get_guard(); + thrust_device::vector &un = un_g.get(); + rand_un(un, n_part); // sorting the sequence with the random key thrust::sort_by_key( diff --git a/src/impl/particles_impl_init_dry_const_multi.ipp b/src/impl/particles_impl_init_dry_const_multi.ipp index 28f86cf44..01f0072dd 100644 --- a/src/impl/particles_impl_init_dry_const_multi.ipp +++ b/src/impl/particles_impl_init_dry_const_multi.ipp @@ -55,7 +55,9 @@ namespace libcloudphxx detail::calc_CDF(n_of_lnrd_stp, log_rd_min, log_rd_max, config.bin_precision, cdf); // tossing random numbers - rand_u01(n_part_to_init); + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, n_part_to_init); // rd3 temporarily means logarithm of radius! thrust_device::vector &lnrd(rd3); diff --git a/src/impl/particles_impl_init_dry_sd_conc.ipp b/src/impl/particles_impl_init_dry_sd_conc.ipp index f60556596..477ceac52 100644 --- a/src/impl/particles_impl_init_dry_sd_conc.ipp +++ b/src/impl/particles_impl_init_dry_sd_conc.ipp @@ -43,13 +43,15 @@ namespace libcloudphxx void particles_t::impl::init_dry_sd_conc() { // tossing random numbers - rand_u01(n_part_to_init); + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, n_part_to_init); // rd3 temporarily means logarithm of radius! thrust_device::vector &lnrd(rd3); auto ptr_g = tmp_device_size_cell.get_guard(); - thrust_device::vector &ptr(ptr_g->get()); + thrust_device::vector &ptr(ptr_g.get()); thrust::exclusive_scan(count_num.begin(), count_num.end(), ptr.begin()); // number of SDs to init in cells up to (i-1), TODO: same is done in init_xyz, store it? // shifting from [0,1] to [log(rd_min),log(rd_max)] and storing into rd3 diff --git a/src/impl/particles_impl_init_xyz.ipp b/src/impl/particles_impl_init_xyz.ipp index 23978d31e..9ade86240 100644 --- a/src/impl/particles_impl_init_xyz.ipp +++ b/src/impl/particles_impl_init_xyz.ipp @@ -53,7 +53,9 @@ namespace libcloudphxx if (n[ix] == 0) continue; // tossing random numbers - rand_u01(n_part_to_init); + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, n_part_to_init); // shifting from [0,1] to random position within respective cell // TODO: now the rand range is [0,1), include this here diff --git a/src/impl/particles_impl_rlx_dry_distros.ipp b/src/impl/particles_impl_rlx_dry_distros.ipp index e76a34c1d..3f0bd629d 100644 --- a/src/impl/particles_impl_rlx_dry_distros.ipp +++ b/src/impl/particles_impl_rlx_dry_distros.ipp @@ -265,7 +265,9 @@ namespace libcloudphxx // --- init of i and j --- // tossing random numbers [0,1) TODO: do it once for all bins - rand_u01(n_part_to_init * (n_dims)); // random numbers for: i, rd, j (j only in 3D) + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, n_part_to_init * (n_dims)); // random numbers for: i, rd, j (j only in 3D) thrust::transform(u01.begin(), u01.begin() + n_part_to_init, i.begin() + n_part_old, detail::multiply_by_constant_and_cast(opts_init.nx)); if(n_dims==3) thrust::transform(u01.begin() + 2*n_part_to_init, u01.begin() + 3*n_part_to_init, j.begin() + n_part_old, detail::multiply_by_constant_and_cast(opts_init.ny)); diff --git a/src/impl/particles_impl_src_dry_distros_matching.ipp b/src/impl/particles_impl_src_dry_distros_matching.ipp index fbc808842..d7318d50a 100644 --- a/src/impl/particles_impl_src_dry_distros_matching.ipp +++ b/src/impl/particles_impl_src_dry_distros_matching.ipp @@ -309,7 +309,9 @@ namespace libcloudphxx // randomly select which old SD will be increased // overwrites sorted_rd3 - rand_u01(count_bins); + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, count_bins); // TODO: merge the following transforms into one From 20d6fae124db2c19e66597b6df81e73a289fbc1b Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Wed, 8 Oct 2025 17:28:36 +0200 Subject: [PATCH 13/22] compilation fixes --- src/impl/particles_impl.ipp | 2 + src/impl/particles_impl_chem_strength.ipp | 2 +- src/impl/particles_impl_coal.ipp | 44 +++++++++---------- src/impl/particles_impl_hskpng_resize.ipp | 2 + src/impl/particles_impl_hskpng_vterm.ipp | 8 ++-- src/impl/particles_impl_init_ijk.ipp | 2 +- src/impl/particles_impl_init_n.ipp | 2 +- .../particles_impl_reserve_hskpng_npart.ipp | 2 + src/impl/particles_impl_rlx_dry_distros.ipp | 6 +-- 9 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 3ede52bb9..856a943ca 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -198,9 +198,11 @@ namespace libcloudphxx // temporary data tmp_vector_pool> + tmp_host_real_part, tmp_host_real_grid, tmp_host_real_cell; tmp_vector_pool> + tmp_host_size_part, tmp_host_size_cell; tmp_vector_pool> tmp_device_real_part, diff --git a/src/impl/particles_impl_chem_strength.ipp b/src/impl/particles_impl_chem_strength.ipp index 2ed8b17a9..9bfa5250d 100644 --- a/src/impl/particles_impl_chem_strength.ipp +++ b/src/impl/particles_impl_chem_strength.ipp @@ -70,7 +70,7 @@ namespace libcloudphxx template void particles_t::impl::chem_flag_ante() { - chem_flag_gp.reset(&(tmp_device_n_part.get_guard())); + chem_flag_gp = std::move(tmp_device_n_part.get_guard()); thrust_device::vector &chem_flag(chem_flag_gp->get()); thrust_device::vector &V = V_gp->get(); diff --git a/src/impl/particles_impl_coal.ipp b/src/impl/particles_impl_coal.ipp index 64e6c2767..f276de331 100644 --- a/src/impl/particles_impl_coal.ipp +++ b/src/impl/particles_impl_coal.ipp @@ -274,7 +274,7 @@ namespace libcloudphxx // references to tmp data auto scl_g = tmp_device_real_cell.get_guard(), col_g = tmp_device_real_part.get_guard(); - auto off_g = tmp_device_size_cell.get_guard(), + auto off_g = tmp_device_size_cell.get_guard(); thrust_device::vector &scl(scl_g.get()), // scale factor for probablility @@ -354,28 +354,26 @@ namespace libcloudphxx > zip_rw_t; // tossing n_part/2 random numbers for comparing with probability of collisions in a pair of droplets - { - auto u01g = tmp_device_real_part.get_guard(); - thrust_device::vector &u01 = u01g.get(); - rand_u01(u01, n_part); - - zip_ro_t zip_ro_it( - thrust::make_tuple( - // u01 - u01.begin(), - // scl - thrust::make_permutation_iterator(scl.begin(), sorted_ijk.begin()), - // ix - zero, - zero+1, - // cid - thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin()), - thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin())+1, - // dv - thrust::make_permutation_iterator(dv.begin(), sorted_ijk.begin()) - ) - ); - } + auto u01g = tmp_device_real_part.get_guard(); + thrust_device::vector &u01 = u01g.get(); + rand_u01(u01, n_part); + + zip_ro_t zip_ro_it( + thrust::make_tuple( + // u01 + u01.begin(), + // scl + thrust::make_permutation_iterator(scl.begin(), sorted_ijk.begin()), + // ix + zero, + zero+1, + // cid + thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin()), + thrust::make_permutation_iterator(off.begin(), sorted_ijk.begin())+1, + // dv + thrust::make_permutation_iterator(dv.begin(), sorted_ijk.begin()) + ) + ); auto zip_ro_calc_it = thrust::make_zip_iterator( diff --git a/src/impl/particles_impl_hskpng_resize.ipp b/src/impl/particles_impl_hskpng_resize.ipp index 6dcc8b523..0b0354487 100644 --- a/src/impl/particles_impl_hskpng_resize.ipp +++ b/src/impl/particles_impl_hskpng_resize.ipp @@ -24,6 +24,8 @@ namespace libcloudphxx tmp_device_real_part.resize(n_part); tmp_device_n_part.resize(n_part); tmp_device_size_part.resize(n_part); + tmp_host_size_part.resize(n_part); + tmp_host_real_part.resize(n_part); for(auto &vec: resize_size_vctrs) vec->resize(n_part); diff --git a/src/impl/particles_impl_hskpng_vterm.ipp b/src/impl/particles_impl_hskpng_vterm.ipp index c25bcfcb1..4eeeed148 100644 --- a/src/impl/particles_impl_hskpng_vterm.ipp +++ b/src/impl/particles_impl_hskpng_vterm.ipp @@ -143,8 +143,8 @@ namespace libcloudphxx if(opts_init.terminal_velocity == vt_t::beard77fast) //use cached vt at sea level { - auto vt0_bin_gp = tmp_device_size_part.get_guard(); - thrust_device::vector &vt0_bin(vt0_bin_gp->get()); + auto vt0_bin_g = tmp_device_size_part.get_guard(); + thrust_device::vector &vt0_bin(vt0_bin_g.get()); // get cached bin number thrust::transform_if( rw2.begin(), rw2.end(), @@ -196,8 +196,8 @@ namespace libcloudphxx if(opts_init.terminal_velocity == vt_t::beard77fast) //use cached vt at sea level { - auto vt0_bin_gp = tmp_device_size_part.get_guard(); - thrust_device::vector &vt0_bin(vt0_bin_gp->get()); + auto vt0_bin_g = tmp_device_size_part.get_guard(); + thrust_device::vector &vt0_bin(vt0_bin_g.get()); // get cached bin number thrust::transform( rw2.begin(), rw2.end(), diff --git a/src/impl/particles_impl_init_ijk.ipp b/src/impl/particles_impl_init_ijk.ipp index f83746275..a23e82462 100644 --- a/src/impl/particles_impl_init_ijk.ipp +++ b/src/impl/particles_impl_init_ijk.ipp @@ -36,7 +36,7 @@ namespace libcloudphxx void particles_t::impl::init_ijk() { auto ptr_g = tmp_device_size_cell.get_guard(); - thrust_device::vector &ptr(ptr_g->get()); + thrust_device::vector &ptr(ptr_g.get()); thrust::exclusive_scan(count_num.begin(), count_num.end(), ptr.begin()); // number of SDs in cells to init up to (i-1) // fill ijk with cell number of each SD diff --git a/src/impl/particles_impl_init_n.ipp b/src/impl/particles_impl_init_n.ipp index 2e34c71c7..090c857bc 100644 --- a/src/impl/particles_impl_init_n.ipp +++ b/src/impl/particles_impl_init_n.ipp @@ -51,7 +51,7 @@ namespace libcloudphxx { // temporary space on the host auto tmp_real_g = tmp_host_real_part.get_guard(); - thrust::host_vector &tmp_real = tmp_real_g.get(); + thrust::host_vector &tmp_real = tmp_real_g.get(); auto tmp_ijk_g = tmp_host_size_part.get_guard(); thrust::host_vector &tmp_ijk = tmp_ijk_g.get(); diff --git a/src/impl/particles_impl_reserve_hskpng_npart.ipp b/src/impl/particles_impl_reserve_hskpng_npart.ipp index f74ded896..7fa055bd6 100644 --- a/src/impl/particles_impl_reserve_hskpng_npart.ipp +++ b/src/impl/particles_impl_reserve_hskpng_npart.ipp @@ -44,6 +44,8 @@ namespace libcloudphxx tmp_device_real_part.reserve(opts_init.n_sd_max); tmp_device_n_part.reserve(opts_init.n_sd_max); tmp_device_size_part.reserve(opts_init.n_sd_max); + tmp_host_size_part.reserve(opts_init.n_sd_max); + tmp_host_real_part.reserve(opts_init.n_sd_max); rd3.reserve(opts_init.n_sd_max); rw2.reserve(opts_init.n_sd_max); diff --git a/src/impl/particles_impl_rlx_dry_distros.ipp b/src/impl/particles_impl_rlx_dry_distros.ipp index 3f0bd629d..aaf71f903 100644 --- a/src/impl/particles_impl_rlx_dry_distros.ipp +++ b/src/impl/particles_impl_rlx_dry_distros.ipp @@ -165,8 +165,8 @@ namespace libcloudphxx // horizontal sum of this moment thrust::fill(hor_sum.begin(), hor_sum.end(), 0); - auto count_k_gp = tmp_device_size_cell.get_guard(); - thrust_device::vector &count_k(count_k_gp->get()); + auto count_k_g = tmp_device_size_cell.get_guard(); + thrust_device::vector &count_k(count_k_g.get()); thrust::transform(count_ijk.begin(), count_ijk.begin() + count_n, count_k.begin(), arg::_1 % opts_init.nz); thrust::sort_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin()); @@ -226,7 +226,7 @@ namespace libcloudphxx // --- init k --- auto ptr_g = tmp_device_size_cell.get_guard(); - thrust_device::vector &ptr(ptr_g->get()); + thrust_device::vector &ptr(ptr_g.get()); thrust::exclusive_scan(n_SD_to_create.begin(), n_SD_to_create.end(), ptr.begin()); // number of SDs in cells to init up to (i-1) thrust::for_each( From da4d8cfcf6c0d026c7ced8e781290c97b570c824 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Thu, 9 Oct 2025 12:38:40 +0200 Subject: [PATCH 14/22] get rid of tmp_device_size_part; use tmp_device_real_part instead; Also lft_id and rgt_id using temporary arrays, not i and tmp_device_size_part --- src/impl/particles_impl.ipp | 16 +++++++++------- src/impl/particles_impl_bcnd.ipp | 5 +++++ src/impl/particles_impl_hskpng_resize.ipp | 2 +- src/impl/particles_impl_hskpng_vterm.ipp | 10 ++++++---- src/impl/particles_impl_pack.ipp | 13 ++++++++++++- src/impl/particles_impl_post_copy.ipp | 3 +++ src/impl/particles_impl_reserve_hskpng_npart.ipp | 2 +- src/impl/particles_impl_unpack.ipp | 4 ++++ src/particles_step.ipp | 2 -- 9 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 856a943ca..019a98d22 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -210,8 +210,8 @@ namespace libcloudphxx tmp_vector_pool> tmp_device_n_part; tmp_vector_pool> - tmp_device_size_cell, - tmp_device_size_part; + tmp_device_size_cell; + // tmp_device_size_part; // guards for temp vectors that are used in multiple functions and need to stay unchanged inbetween // tmp_vector_pool>::guard asd; @@ -223,7 +223,9 @@ namespace libcloudphxx sstp_dlt_th_gp, sstp_dlt_rhod_gp, sstp_dlt_p_gp, - drv_gp; + drv_gp, + lft_id_gp, + rgt_id_gp; std::unique_ptr< typename tmp_vector_pool>::guard @@ -270,7 +272,7 @@ namespace libcloudphxx thrust_device::vector in_real_bfr, out_real_bfr; // ids of sds to be copied with distmem - thrust_device::vector &lft_id, &rgt_id; + // thrust_device::vector &lft_id, &rgt_id; // --- containters with vector pointers to help resize and copy vectors --- @@ -321,15 +323,15 @@ namespace libcloudphxx rng(_opts_init.rng_seed), src_stp_ctr(0), rlx_stp_ctr(0), - bcond(bcond), + bcond(bcond), n_x_bfr(0), n_cell_bfr(0), mpi_rank(mpi_rank), mpi_size(mpi_size), lft_x1(-1), // default to no rgt_x0(-1), // MPI boudanry - lft_id(i), // note: reuses i vector - rgt_id(tmp_device_size_part), + // lft_id(i), // note: reuses i vector + // rgt_id(tmp_device_size_part), n_x_tot(n_x_tot), halo_size(_opts_init.adve_scheme == as_t::pred_corr ? 2 : 0), halo_x( diff --git a/src/impl/particles_impl_bcnd.ipp b/src/impl/particles_impl_bcnd.ipp index 7dabb7844..69833ab0c 100644 --- a/src/impl/particles_impl_bcnd.ipp +++ b/src/impl/particles_impl_bcnd.ipp @@ -109,6 +109,11 @@ namespace libcloudphxx { namespace arg = thrust::placeholders; + lft_id_gp = std::move(tmp_device_real_part.get_guard()); + thrust_device::vector &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + rgt_id_gp = std::move(tmp_device_real_part.get_guard()); + thrust_device::vector &rgt_id(rgt_id_gp->get()); + // save ids of SDs to copy lft_count = thrust::copy_if( zero, zero+n_part, diff --git a/src/impl/particles_impl_hskpng_resize.ipp b/src/impl/particles_impl_hskpng_resize.ipp index 0b0354487..00c0c4008 100644 --- a/src/impl/particles_impl_hskpng_resize.ipp +++ b/src/impl/particles_impl_hskpng_resize.ipp @@ -23,7 +23,7 @@ namespace libcloudphxx // vec->resize(n_part); tmp_device_real_part.resize(n_part); tmp_device_n_part.resize(n_part); - tmp_device_size_part.resize(n_part); + // tmp_device_size_part.resize(n_part); tmp_host_size_part.resize(n_part); tmp_host_real_part.resize(n_part); diff --git a/src/impl/particles_impl_hskpng_vterm.ipp b/src/impl/particles_impl_hskpng_vterm.ipp index 4eeeed148..4cbd243d3 100644 --- a/src/impl/particles_impl_hskpng_vterm.ipp +++ b/src/impl/particles_impl_hskpng_vterm.ipp @@ -143,8 +143,8 @@ namespace libcloudphxx if(opts_init.terminal_velocity == vt_t::beard77fast) //use cached vt at sea level { - auto vt0_bin_g = tmp_device_size_part.get_guard(); - thrust_device::vector &vt0_bin(vt0_bin_g.get()); + auto vt0_bin_g = tmp_device_real_part.get_guard(); // should be thrust_size_t, but we have many real_ available (do we?) + thrust_device::vector &vt0_bin(vt0_bin_g.get()); // get cached bin number thrust::transform_if( rw2.begin(), rw2.end(), @@ -196,8 +196,10 @@ namespace libcloudphxx if(opts_init.terminal_velocity == vt_t::beard77fast) //use cached vt at sea level { - auto vt0_bin_g = tmp_device_size_part.get_guard(); - thrust_device::vector &vt0_bin(vt0_bin_g.get()); + // auto vt0_bin_g = tmp_device_size_part.get_guard(); + // thrust_device::vector &vt0_bin(vt0_bin_g.get()); + auto vt0_bin_g = tmp_device_real_part.get_guard(); // should be thrust_size_t, but we have many real_ available (do we?) + thrust_device::vector &vt0_bin(vt0_bin_g.get()); // get cached bin number thrust::transform( rw2.begin(), rw2.end(), diff --git a/src/impl/particles_impl_pack.ipp b/src/impl/particles_impl_pack.ipp index 915f0e873..12ea3a2a9 100644 --- a/src/impl/particles_impl_pack.ipp +++ b/src/impl/particles_impl_pack.ipp @@ -32,6 +32,8 @@ namespace libcloudphxx assert(out_n_bfr.size() >= lft_count * distmem_n_vctrs.size()); assert(in_n_bfr.size() >= lft_count * distmem_n_vctrs.size()); + thrust_device::vector &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + auto it = distmem_n_vctrs.begin(); while (it != distmem_n_vctrs.end()) { @@ -48,7 +50,8 @@ namespace libcloudphxx void particles_t::impl::pack_n_rgt() { assert(out_n_bfr.size() >= rgt_count * distmem_n_vctrs.size()); - assert(in_n_bfr.size() >= rgt_count * distmem_n_vctrs.size()); + + thrust_device::vector &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many availableassert(in_n_bfr.size() >= rgt_count * distmem_n_vctrs.size()); auto it = distmem_n_vctrs.begin(); while (it != distmem_n_vctrs.end()) @@ -67,6 +70,8 @@ namespace libcloudphxx { assert(out_real_bfr.size() >= lft_count * distmem_real_vctrs.size()); assert(in_real_bfr.size() >= lft_count * distmem_real_vctrs.size()); + + thrust_device::vector &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available auto it = distmem_real_vctrs.begin(); while (it != distmem_real_vctrs.end()) @@ -86,6 +91,8 @@ namespace libcloudphxx assert(out_real_bfr.size() >= rgt_count * distmem_real_vctrs.size()); assert(in_real_bfr.size() >= rgt_count * distmem_real_vctrs.size()); + thrust_device::vector &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + auto it = distmem_real_vctrs.begin(); while (it != distmem_real_vctrs.end()) { @@ -101,6 +108,8 @@ namespace libcloudphxx template void particles_t::impl::bcnd_remote_lft(const real_t &x0, const real_t &x1) { + thrust_device::vector &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + thrust::transform( thrust::make_permutation_iterator(x.begin(), lft_id.begin()), thrust::make_permutation_iterator(x.begin(), lft_id.begin()) + lft_count, @@ -112,6 +121,8 @@ namespace libcloudphxx template void particles_t::impl::bcnd_remote_rgt(const real_t &x1, const real_t &x0) { + thrust_device::vector &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + thrust::transform( thrust::make_permutation_iterator(x.begin(), rgt_id.begin()), thrust::make_permutation_iterator(x.begin(), rgt_id.begin()) + rgt_count, diff --git a/src/impl/particles_impl_post_copy.ipp b/src/impl/particles_impl_post_copy.ipp index b31dfa42e..4c700d84e 100644 --- a/src/impl/particles_impl_post_copy.ipp +++ b/src/impl/particles_impl_post_copy.ipp @@ -17,6 +17,9 @@ namespace libcloudphxx template void particles_t::impl::post_copy(const opts_t &opts) { + // release temporary arrays + lft_id_gp.reset(); + rgt_id_gp.reset(); // recycling out-of-domain/invalidated particles if(opts.rcyc) rcyc(); diff --git a/src/impl/particles_impl_reserve_hskpng_npart.ipp b/src/impl/particles_impl_reserve_hskpng_npart.ipp index 7fa055bd6..badebc16e 100644 --- a/src/impl/particles_impl_reserve_hskpng_npart.ipp +++ b/src/impl/particles_impl_reserve_hskpng_npart.ipp @@ -43,7 +43,7 @@ namespace libcloudphxx tmp_device_real_part.reserve(opts_init.n_sd_max); tmp_device_n_part.reserve(opts_init.n_sd_max); - tmp_device_size_part.reserve(opts_init.n_sd_max); + // tmp_device_size_part.reserve(opts_init.n_sd_max); tmp_host_size_part.reserve(opts_init.n_sd_max); tmp_host_real_part.reserve(opts_init.n_sd_max); diff --git a/src/impl/particles_impl_unpack.ipp b/src/impl/particles_impl_unpack.ipp index 1daa205d0..24271938c 100644 --- a/src/impl/particles_impl_unpack.ipp +++ b/src/impl/particles_impl_unpack.ipp @@ -121,6 +121,8 @@ namespace libcloudphxx template void particles_t::impl::flag_lft() { + thrust_device::vector &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + thrust::copy( thrust::make_constant_iterator(0), thrust::make_constant_iterator(0) + lft_count, @@ -131,6 +133,8 @@ namespace libcloudphxx template void particles_t::impl::flag_rgt() { + thrust_device::vector &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available + thrust::copy( thrust::make_constant_iterator(0), thrust::make_constant_iterator(0) + rgt_count, diff --git a/src/particles_step.ipp b/src/particles_step.ipp index 6b068622c..6897b8183 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -431,8 +431,6 @@ namespace libcloudphxx else pimpl->rlx_stp_ctr = 0; //reset the counter if source was turned off // boundary condition + accumulated rainfall to be returned - // distmem version overwrites i and tmp_device_size_part - // and they both need to be unchanged untill distmem copies pimpl->bcnd(); // copy advected SDs using asynchronous MPI; From 39196824c34929c85b9fd87963bff52383e06af2 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Thu, 9 Oct 2025 17:28:19 +0200 Subject: [PATCH 15/22] n_filtered tmp arrays in diag --- src/impl/particles_impl_mass_dens.ipp | 4 ++-- src/impl/particles_impl_moms.ipp | 12 ++++++------ src/particles_diag.ipp | 15 +++++++-------- src/particles_step.ipp | 4 ++++ 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/impl/particles_impl_mass_dens.ipp b/src/impl/particles_impl_mass_dens.ipp index cfae76741..d134b7891 100644 --- a/src/impl/particles_impl_mass_dens.ipp +++ b/src/impl/particles_impl_mass_dens.ipp @@ -36,8 +36,8 @@ namespace libcloudphxx assert(selected_before_counting); //TODO: force moms_all() before mass density estimation? // same as above - auto n_filtered_g = tmp_device_real_part.get_guard(); - thrust_device::vector &n_filtered = n_filtered_g.get(); + // auto n_filtered_g = tmp_device_real_part.get_guard(); + thrust_device::vector &n_filtered = n_filtered_gp->get(); // number of SD in each cell casted to real_t auto count_num_real_t_g = tmp_device_real_cell.get_guard(); diff --git a/src/impl/particles_impl_moms.ipp b/src/impl/particles_impl_moms.ipp index ffe7ffba8..2357558da 100644 --- a/src/impl/particles_impl_moms.ipp +++ b/src/impl/particles_impl_moms.ipp @@ -51,7 +51,7 @@ namespace libcloudphxx { hskpng_sort(); - n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + n_filtered_gp = std::move(tmp_device_real_part.get_guard()); thrust_device::vector &n_filtered = n_filtered_gp->get(); thrust::copy( @@ -73,7 +73,9 @@ namespace libcloudphxx hskpng_sort(); // transforming n -> n if within range, else 0 - n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + if(!cons) + n_filtered_gp = std::move(tmp_device_real_part.get_guard()); + thrust_device::vector &n_filtered = n_filtered_gp->get(); if(!cons) @@ -113,7 +115,7 @@ namespace libcloudphxx { hskpng_sort(); - n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + n_filtered_gp = std::move(tmp_device_real_part.get_guard()); thrust_device::vector &n_filtered = n_filtered_gp->get(); thrust::transform( @@ -136,7 +138,7 @@ namespace libcloudphxx { hskpng_sort(); - n_filtered_gp.reset(&(tmp_device_real_part.get_guard())); + n_filtered_gp = std::move(tmp_device_real_part.get_guard()); thrust_device::vector &n_filtered = n_filtered_gp->get(); { @@ -229,8 +231,6 @@ namespace libcloudphxx count_mom.begin() ); - n_filtered_gp.reset(); // n_filtered not needed anymore, destroy the guard to a tmp array that stored it - count_n = it_pair.first - count_ijk.begin(); #if !defined(NDEBUG) { diff --git a/src/particles_diag.ipp b/src/particles_diag.ipp index 39c255791..c35a8fe99 100644 --- a/src/particles_diag.ipp +++ b/src/particles_diag.ipp @@ -164,8 +164,7 @@ namespace libcloudphxx namespace arg = thrust::placeholders; assert(pimpl->selected_before_counting); - auto n_filtered_g = pimpl->tmp_device_real_part.get_guard(); - thrust_device::vector &n_filtered = n_filtered_g.get(); + thrust_device::vector &n_filtered = n_filtered_gp->get(); // similar to hskpng_count pimpl->hskpng_sort(); @@ -249,8 +248,7 @@ namespace libcloudphxx void particles_t::diag_RH_ge_Sc() { // intentionally using the same tmp vector as inside moms_cmp below - thrust_device::vector &RH_minus_Sc(pimpl->tmp_device_real_part); - + // thrust_device::vector &RH_minus_Sc(pimpl->tmp_device_real_part); auto RH_minus_Sc_g = pimpl->tmp_device_real_part.get_guard(); thrust_device::vector &RH_minus_Sc = RH_minus_Sc_g.get(); @@ -281,7 +279,9 @@ namespace libcloudphxx void particles_t::diag_rw_ge_rc() { // intentionally using the same tmp vector as inside moms_cmp below - thrust_device::vector &rc2(pimpl->tmp_device_real_part); + // thrust_device::vector &rc2(pimpl->tmp_device_real_part); + auto rc2_g = pimpl->tmp_device_real_part.get_guard(); + thrust_device::vector &rc2 = rc2_g.get(); // computing rc2 for each particle thrust::transform( @@ -428,7 +428,8 @@ namespace libcloudphxx pimpl->hskpng_vterm_all(); // temporary vector to store vt - thrust::host_vector tmp_vt(pimpl->n_part); + auto tmp_vt_g = pimpl->tmp_device_real_part.get_guard(); + thrust_device::vector &tmp_vt = tmp_vt_g.get thrust::copy(pimpl->vt.begin(), pimpl->vt.end(), tmp_vt.begin()); thrust::transform( @@ -444,8 +445,6 @@ namespace libcloudphxx // copy back stored vterm thrust::copy(tmp_vt.begin(), tmp_vt.end(), pimpl->vt.begin()); - // release the memory - tmp_vt.erase(tmp_vt.begin(), tmp_vt.end()); } // get max rw in each cell diff --git a/src/particles_step.ipp b/src/particles_step.ipp index 6897b8183..e4f267d0d 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -91,6 +91,8 @@ namespace libcloudphxx */ } + n_filtered_gp.reset(); // n_filtered (used mostly in diag) not needed anymore, destroy the guard to a tmp array that stored it + if (pimpl->l2e[&pimpl->diss_rate].size() == 0) if (!diss_rate.is_null()) pimpl->init_e2l(diss_rate, &pimpl->diss_rate); @@ -312,6 +314,8 @@ namespace libcloudphxx if(opts.turb_adve && pimpl->n_dims==0) throw std::runtime_error("libcloudph++: turbulent advection does not work in 0D"); + + n_filtered_gp.reset(); // n_filtered (used mostly in diag) not needed anymore, destroy the guard to a tmp array that stored it // dt defined in opts_init can be overriden by dt in opts pimpl->adjust_timesteps(opts.dt); From 61ea8c59eaf7f93c71f3e01346c62925bc7fd1b1 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Fri, 10 Oct 2025 12:33:38 +0200 Subject: [PATCH 16/22] fix unit tests --- src/detail/tmp_vector_pool.hpp | 18 +++-- src/impl/particles_impl_ante_adding_SD.ipp | 2 +- src/impl/particles_impl_bcnd.ipp | 4 +- src/impl/particles_impl_chem_ante.ipp | 2 +- src/impl/particles_impl_chem_henry.ipp | 4 +- src/impl/particles_impl_chem_strength.ipp | 2 +- src/impl/particles_impl_coal.ipp | 7 +- src/impl/particles_impl_cond.ipp | 6 +- src/impl/particles_impl_cond_sstp.ipp | 4 +- src/impl/particles_impl_hskpng_mfp.ipp | 4 +- src/impl/particles_impl_hskpng_sort.ipp | 2 +- src/impl/particles_impl_hskpng_turb_ss.ipp | 2 +- src/impl/particles_impl_hskpng_turb_vel.ipp | 2 +- src/impl/particles_impl_hskpng_vterm.ipp | 4 +- src/impl/particles_impl_init_count_num.ipp | 2 +- src/impl/particles_impl_init_n.ipp | 2 +- src/impl/particles_impl_moms.ipp | 9 +-- src/impl/particles_impl_rlx_dry_distros.ipp | 74 +++++++++++---------- src/impl/particles_impl_sstp.ipp | 21 +++--- src/impl/particles_impl_update_th_rv.ipp | 2 +- src/particles_diag.ipp | 4 +- src/particles_step.ipp | 6 +- tests/python/unit/sstp_cond.py | 4 +- 23 files changed, 102 insertions(+), 85 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index 54b082aea..4bc4075b7 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -17,17 +17,14 @@ namespace libcloudphxx }; std::vector pool; public: - tmp_vector_pool(size_t pool_size = 1) { - for (size_t i = 0; i < pool_size; ++i) - pool.emplace_back(); - } + tmp_vector_pool(size_t pool_size = 1): pool(pool_size, 0) {} // Add a new vector to the pool // void add_vector(size_t vec_size) { // pool.emplace_back(vec_size); // } void add_vector() { - pool.emplace_back(); + pool.emplace_back(0); } void resize(size_t n) { @@ -44,6 +41,7 @@ namespace libcloudphxx // Acquire an available vector, returns its index size_t acquire() { + std::cerr << "tmp_vector_pool: acquiring vector from pool of size " << pool.size() << "\n"; for (size_t i = 0; i < pool.size(); ++i) { if (!pool[i].in_use) { pool[i].in_use = true; @@ -109,7 +107,17 @@ namespace libcloudphxx guard get_guard() { return guard(*this); } + guard* get_guardp() { + return new guard(*this); + } }; + + // helper function to reset a guard pointer, but first destroy the old guard + template + void reset_guardp(GuardPtr& guard_ptr, Pool& pool) { + guard_ptr.reset(); // destroy old guard + guard_ptr.reset(pool.get_guardp()); // acquire new guard + } }; }; diff --git a/src/impl/particles_impl_ante_adding_SD.ipp b/src/impl/particles_impl_ante_adding_SD.ipp index 83c1daf19..c791b7301 100644 --- a/src/impl/particles_impl_ante_adding_SD.ipp +++ b/src/impl/particles_impl_ante_adding_SD.ipp @@ -14,7 +14,7 @@ namespace libcloudphxx { // --- calc liquid water content before src --- hskpng_sort(); - drv_gp.reset(&(tmp_device_real_cell.get_guard())); + reset_guardp(drv_gp, tmp_device_real_cell); thrust_device::vector &drv = drv_gp->get(); thrust::fill(drv.begin(), drv.end(), real_t(0.)); diff --git a/src/impl/particles_impl_bcnd.ipp b/src/impl/particles_impl_bcnd.ipp index 69833ab0c..415949a31 100644 --- a/src/impl/particles_impl_bcnd.ipp +++ b/src/impl/particles_impl_bcnd.ipp @@ -109,9 +109,9 @@ namespace libcloudphxx { namespace arg = thrust::placeholders; - lft_id_gp = std::move(tmp_device_real_part.get_guard()); + reset_guardp(lft_id_gp, tmp_device_real_part); thrust_device::vector &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available - rgt_id_gp = std::move(tmp_device_real_part.get_guard()); + reset_guardp(rgt_id_gp, tmp_device_real_part); thrust_device::vector &rgt_id(rgt_id_gp->get()); // save ids of SDs to copy diff --git a/src/impl/particles_impl_chem_ante.ipp b/src/impl/particles_impl_chem_ante.ipp index ebaa188d3..a48cdf856 100644 --- a/src/impl/particles_impl_chem_ante.ipp +++ b/src/impl/particles_impl_chem_ante.ipp @@ -70,7 +70,7 @@ namespace libcloudphxx if (opts_init.chem_switch == false) throw std::runtime_error("libcloudph++: all chemistry was switched off"); //calculate new drop volumes (to be used in chem) - V_gp.reset(&(tmp_device_real_part.get_guard())); + reset_guardp(V_gp, tmp_device_real_part); thrust_device::vector &V = V_gp->get(); thrust::transform( diff --git a/src/impl/particles_impl_chem_henry.ipp b/src/impl/particles_impl_chem_henry.ipp index 9cacb58db..e0d97c427 100644 --- a/src/impl/particles_impl_chem_henry.ipp +++ b/src/impl/particles_impl_chem_henry.ipp @@ -332,8 +332,8 @@ namespace libcloudphxx // temporarily needed to store old mass per cell auto mass_old_g = tmp_device_real_cell.get_guard(); auto mass_new_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &mass_old(mass_old_g->get()); - thrust_device::vector &mass_new(mass_new_g->get()); + thrust_device::vector &mass_old(mass_old_g.get()); + thrust_device::vector &mass_new(mass_new_g.get()); for (int i = 0; i < chem_gas_n; ++i) { diff --git a/src/impl/particles_impl_chem_strength.ipp b/src/impl/particles_impl_chem_strength.ipp index 9bfa5250d..1f9b17e87 100644 --- a/src/impl/particles_impl_chem_strength.ipp +++ b/src/impl/particles_impl_chem_strength.ipp @@ -70,7 +70,7 @@ namespace libcloudphxx template void particles_t::impl::chem_flag_ante() { - chem_flag_gp = std::move(tmp_device_n_part.get_guard()); + reset_guardp(chem_flag_gp, tmp_device_n_part); thrust_device::vector &chem_flag(chem_flag_gp->get()); thrust_device::vector &V = V_gp->get(); diff --git a/src/impl/particles_impl_coal.ipp b/src/impl/particles_impl_coal.ipp index f276de331..0da22c4ed 100644 --- a/src/impl/particles_impl_coal.ipp +++ b/src/impl/particles_impl_coal.ipp @@ -278,7 +278,7 @@ namespace libcloudphxx thrust_device::vector &scl(scl_g.get()), // scale factor for probablility - &col(col_g.get()); // number of collisions, used in chemistry, NOTE: it's the same as u01, so it overwrites already used random numbers + &col(col_g.get()); // number of collisions, used in chemistry, // 1st one of a pair stores number of collisions, 2nd one stores info on which one has greater multiplicity thrust_device::vector &off(off_g.get()); // offset for getting index of particle within a cell @@ -354,8 +354,9 @@ namespace libcloudphxx > zip_rw_t; // tossing n_part/2 random numbers for comparing with probability of collisions in a pair of droplets - auto u01g = tmp_device_real_part.get_guard(); - thrust_device::vector &u01 = u01g.get(); + // auto u01g = tmp_device_real_part.get_guard(); + // thrust_device::vector &u01 = u01g.get(); + thrust_device::vector &u01(col); // inplace output - u01 needed to check if collision happened, later overwritten with the number of collisions rand_u01(u01, n_part); zip_ro_t zip_ro_it( diff --git a/src/impl/particles_impl_cond.ipp b/src/impl/particles_impl_cond.ipp index 6ef590c44..7135a5f16 100644 --- a/src/impl/particles_impl_cond.ipp +++ b/src/impl/particles_impl_cond.ipp @@ -20,13 +20,13 @@ namespace libcloudphxx auto lambda_D_g = tmp_device_real_cell.get_guard(); auto lambda_K_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &lambda_D(lambda_D_g->get()); - thrust_device::vector &lambda_K(lambda_K_g->get()); + thrust_device::vector &lambda_D(lambda_D_g.get()); + thrust_device::vector &lambda_K(lambda_K_g.get()); // --- calc liquid water content before cond --- hskpng_sort(); auto drv_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &drv = drv_g->get(); + thrust_device::vector &drv = drv_g.get(); // calculating the 3rd wet moment before condensation moms_all(); diff --git a/src/impl/particles_impl_cond_sstp.ipp b/src/impl/particles_impl_cond_sstp.ipp index c8b0a8e03..1723b4824 100644 --- a/src/impl/particles_impl_cond_sstp.ipp +++ b/src/impl/particles_impl_cond_sstp.ipp @@ -40,8 +40,8 @@ namespace libcloudphxx auto lambda_D_g = tmp_device_real_cell.get_guard(); auto lambda_K_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &lambda_D(lambda_D_g->get()); - thrust_device::vector &lambda_K(lambda_K_g->get()); + thrust_device::vector &lambda_D(lambda_D_g.get()); + thrust_device::vector &lambda_K(lambda_K_g.get()); auto hlpr_zip_iter = thrust::make_zip_iterator(thrust::make_tuple( sstp_tmp_rh.begin(), diff --git a/src/impl/particles_impl_hskpng_mfp.ipp b/src/impl/particles_impl_hskpng_mfp.ipp index 176c3f9ca..00c0151d8 100644 --- a/src/impl/particles_impl_hskpng_mfp.ipp +++ b/src/impl/particles_impl_hskpng_mfp.ipp @@ -43,8 +43,8 @@ namespace libcloudphxx { auto lambda_D_g = tmp_device_real_cell.get_guard(); auto lambda_K_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &lambda_D(lambda_D_g->get()); - thrust_device::vector &lambda_K(lambda_K_g->get()); + thrust_device::vector &lambda_D(lambda_D_g.get()); + thrust_device::vector &lambda_K(lambda_K_g.get()); thrust::transform(T.begin(), T.end(), lambda_D.begin(), detail::common__mean_free_path__lambda_D()); thrust::transform(T.begin(), T.end(), p.begin(), lambda_K.begin(), detail::common__mean_free_path__lambda_K()); diff --git a/src/impl/particles_impl_hskpng_sort.ipp b/src/impl/particles_impl_hskpng_sort.ipp index ef0fd53ab..ef3626843 100644 --- a/src/impl/particles_impl_hskpng_sort.ipp +++ b/src/impl/particles_impl_hskpng_sort.ipp @@ -29,7 +29,7 @@ namespace libcloudphxx { // generating a random sorting key auto un_g = tmp_device_n_part.get_guard(); - thrust_device::vector &un = un_g.get(); + thrust_device::vector &un = un_g.get(); rand_un(un, n_part); // sorting the sequence with the random key diff --git a/src/impl/particles_impl_hskpng_turb_ss.ipp b/src/impl/particles_impl_hskpng_turb_ss.ipp index 558c689ff..27ea0c815 100644 --- a/src/impl/particles_impl_hskpng_turb_ss.ipp +++ b/src/impl/particles_impl_hskpng_turb_ss.ipp @@ -45,7 +45,7 @@ namespace libcloudphxx void particles_t::impl::hskpng_turb_dot_ss() { auto tau_rlx_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &tau_rlx = tau_rlx_g->get(); + thrust_device::vector &tau_rlx = tau_rlx_g.get(); #if !defined(NDEBUG) // fill with a dummy value for debugging thrust::fill(tau_rlx.begin(), tau_rlx.end(), -44); diff --git a/src/impl/particles_impl_hskpng_turb_vel.ipp b/src/impl/particles_impl_hskpng_turb_vel.ipp index 3693a82a8..77dcbc6c3 100644 --- a/src/impl/particles_impl_hskpng_turb_vel.ipp +++ b/src/impl/particles_impl_hskpng_turb_vel.ipp @@ -54,7 +54,7 @@ namespace libcloudphxx namespace arg = thrust::placeholders; auto tau_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &tau(tau_g->get()); + thrust_device::vector &tau(tau_g.get()); thrust_device::vector &tke(diss_rate); // should be called after hskpng_tke, which replaces diss_rate with tke thrust::transform(tke.begin(), tke.end(), diff --git a/src/impl/particles_impl_hskpng_vterm.ipp b/src/impl/particles_impl_hskpng_vterm.ipp index 4cbd243d3..17c05ac29 100644 --- a/src/impl/particles_impl_hskpng_vterm.ipp +++ b/src/impl/particles_impl_hskpng_vterm.ipp @@ -156,7 +156,7 @@ namespace libcloudphxx // calc the vt thrust::transform_if( rw2.begin(), rw2.end(), // input - 1st arg - zip_it_t(thrust::make_tuple( + thrust::make_zip_iterator(thrust::make_tuple( thrust::make_permutation_iterator(vt_0.begin(), vt0_bin.begin()), thrust::make_permutation_iterator(p.begin(), ijk.begin()), thrust::make_permutation_iterator(rhod.begin(), ijk.begin()), @@ -209,7 +209,7 @@ namespace libcloudphxx // calc the vt thrust::transform( rw2.begin(), rw2.end(), // input - 1st arg - zip_it_t(thrust::make_tuple( + thrust::make_zip_iterator(thrust::make_tuple( thrust::make_permutation_iterator(vt_0.begin(), vt0_bin.begin()), thrust::make_permutation_iterator(p.begin(), ijk.begin()), thrust::make_permutation_iterator(rhod.begin(), ijk.begin()), diff --git a/src/impl/particles_impl_init_count_num.ipp b/src/impl/particles_impl_init_count_num.ipp index a1fa141c8..f2aa17109 100644 --- a/src/impl/particles_impl_init_count_num.ipp +++ b/src/impl/particles_impl_init_count_num.ipp @@ -78,7 +78,7 @@ namespace libcloudphxx void particles_t::impl::init_count_num_hlpr(const real_t &conc, const thrust_size_t &const_multi) { auto concentration_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &concentration(concentration_g->get()); + thrust_device::vector &concentration(concentration_g.get()); thrust::fill(concentration.begin(), concentration.end(), conc); conc_to_number(concentration); diff --git a/src/impl/particles_impl_init_n.ipp b/src/impl/particles_impl_init_n.ipp index 090c857bc..e2b3ac0be 100644 --- a/src/impl/particles_impl_init_n.ipp +++ b/src/impl/particles_impl_init_n.ipp @@ -153,7 +153,7 @@ namespace libcloudphxx { namespace arg = thrust::placeholders; auto concentration_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &concentration(concentration_g->get()); + thrust_device::vector &concentration(concentration_g.get()); thrust::fill(concentration.begin(), concentration.end(), conc); conc_to_number(concentration); thrust::transform( diff --git a/src/impl/particles_impl_moms.ipp b/src/impl/particles_impl_moms.ipp index 2357558da..b85f4b817 100644 --- a/src/impl/particles_impl_moms.ipp +++ b/src/impl/particles_impl_moms.ipp @@ -51,7 +51,8 @@ namespace libcloudphxx { hskpng_sort(); - n_filtered_gp = std::move(tmp_device_real_part.get_guard()); + // reset_guardp(n_filtered_gp, tmp_device_real_part); + reset_guardp(n_filtered_gp, tmp_device_real_part); thrust_device::vector &n_filtered = n_filtered_gp->get(); thrust::copy( @@ -74,7 +75,7 @@ namespace libcloudphxx // transforming n -> n if within range, else 0 if(!cons) - n_filtered_gp = std::move(tmp_device_real_part.get_guard()); + reset_guardp(n_filtered_gp, tmp_device_real_part); thrust_device::vector &n_filtered = n_filtered_gp->get(); @@ -115,7 +116,7 @@ namespace libcloudphxx { hskpng_sort(); - n_filtered_gp = std::move(tmp_device_real_part.get_guard()); + reset_guardp(n_filtered_gp, tmp_device_real_part); thrust_device::vector &n_filtered = n_filtered_gp->get(); thrust::transform( @@ -138,7 +139,7 @@ namespace libcloudphxx { hskpng_sort(); - n_filtered_gp = std::move(tmp_device_real_part.get_guard()); + reset_guardp(n_filtered_gp, tmp_device_real_part); thrust_device::vector &n_filtered = n_filtered_gp->get(); { diff --git a/src/impl/particles_impl_rlx_dry_distros.ipp b/src/impl/particles_impl_rlx_dry_distros.ipp index aaf71f903..b63cc8f43 100644 --- a/src/impl/particles_impl_rlx_dry_distros.ipp +++ b/src/impl/particles_impl_rlx_dry_distros.ipp @@ -165,17 +165,19 @@ namespace libcloudphxx // horizontal sum of this moment thrust::fill(hor_sum.begin(), hor_sum.end(), 0); - auto count_k_g = tmp_device_size_cell.get_guard(); - thrust_device::vector &count_k(count_k_g.get()); - thrust::transform(count_ijk.begin(), count_ijk.begin() + count_n, count_k.begin(), arg::_1 % opts_init.nz); - thrust::sort_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin()); + { + auto count_k_g = tmp_device_size_cell.get_guard(); + thrust_device::vector &count_k(count_k_g.get()); + thrust::transform(count_ijk.begin(), count_ijk.begin() + count_n, count_k.begin(), arg::_1 % opts_init.nz); + thrust::sort_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin()); - auto new_end = thrust::reduce_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin(), hor_sum_k.begin(), hor_sum_count.begin()); + auto new_end = thrust::reduce_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin(), hor_sum_k.begin(), hor_sum_count.begin()); - int number_of_levels_with_droplets = new_end.first - hor_sum_k.begin(); // number of levels with any SD, not with SD in this size and kappa range - - assert(number_of_levels_with_droplets <= opts_init.nz); - thrust::copy(hor_sum_count.begin(), hor_sum_count.begin() + number_of_levels_with_droplets, thrust::make_permutation_iterator(hor_sum.begin(), hor_sum_k.begin())); + int number_of_levels_with_droplets = new_end.first - hor_sum_k.begin(); // number of levels with any SD, not with SD in this size and kappa range + + assert(number_of_levels_with_droplets <= opts_init.nz); + thrust::copy(hor_sum_count.begin(), hor_sum_count.begin() + number_of_levels_with_droplets, thrust::make_permutation_iterator(hor_sum.begin(), hor_sum_k.begin())); + } // divide sum by the number of cells at this level // thrust::transform(hor_sum.begin(), hor_sum.end(), hor_sum.begin(), arg::_1 / (opts_init.nx * m1(opts_init.ny))); @@ -225,37 +227,39 @@ namespace libcloudphxx n.resize(n_part); // --- init k --- - auto ptr_g = tmp_device_size_cell.get_guard(); - thrust_device::vector &ptr(ptr_g.get()); - thrust::exclusive_scan(n_SD_to_create.begin(), n_SD_to_create.end(), ptr.begin()); // number of SDs in cells to init up to (i-1) - - thrust::for_each( - thrust::make_zip_iterator(thrust::make_tuple( - n_SD_to_create.begin(), ptr.begin(), zero - )), - thrust::make_zip_iterator(thrust::make_tuple( - n_SD_to_create.end(), ptr.end(), zero + opts_init.nz - )), - detail::arbitrary_sequence(&(k[n_part_old])) - ); + { + auto ptr_g = tmp_device_size_cell.get_guard(); + thrust_device::vector &ptr(ptr_g.get()); + thrust::exclusive_scan(n_SD_to_create.begin(), n_SD_to_create.end(), ptr.begin()); // number of SDs in cells to init up to (i-1) + + thrust::for_each( + thrust::make_zip_iterator(thrust::make_tuple( + n_SD_to_create.begin(), ptr.begin(), zero + )), + thrust::make_zip_iterator(thrust::make_tuple( + n_SD_to_create.end(), ptr.end(), zero + opts_init.nz + )), + detail::arbitrary_sequence(&(k[n_part_old])) + ); - // --- init multiplicities (includes casting from real to n) --- + // --- init multiplicities (includes casting from real to n) --- #if !defined(__NVCC__) - using std::min; + using std::min; #endif - thrust::for_each( - thrust::make_zip_iterator(thrust::make_tuple( - n_SD_to_create.begin(), ptr.begin(), - thrust::make_transform_iterator(hor_missing.begin(), arg::_1 / real_t(opts_init.rlx_sd_per_bin) * min(dt / opts_init.rlx_timescale, real_t(1)) + real_t(0.5)) - )), - thrust::make_zip_iterator(thrust::make_tuple( - n_SD_to_create.end(), ptr.end(), - thrust::make_transform_iterator(hor_missing.end(), arg::_1 / real_t(opts_init.rlx_sd_per_bin) * min(dt / opts_init.rlx_timescale, real_t(1)) + real_t(0.5)) - )), - detail::arbitrary_sequence(&(n[n_part_old])) - ); + thrust::for_each( + thrust::make_zip_iterator(thrust::make_tuple( + n_SD_to_create.begin(), ptr.begin(), + thrust::make_transform_iterator(hor_missing.begin(), arg::_1 / real_t(opts_init.rlx_sd_per_bin) * min(dt / opts_init.rlx_timescale, real_t(1)) + real_t(0.5)) + )), + thrust::make_zip_iterator(thrust::make_tuple( + n_SD_to_create.end(), ptr.end(), + thrust::make_transform_iterator(hor_missing.end(), arg::_1 / real_t(opts_init.rlx_sd_per_bin) * min(dt / opts_init.rlx_timescale, real_t(1)) + real_t(0.5)) + )), + detail::arbitrary_sequence(&(n[n_part_old])) + ); + } // detecting possible overflows of n type { diff --git a/src/impl/particles_impl_sstp.ipp b/src/impl/particles_impl_sstp.ipp index 4bc6c7118..da81a2c7b 100644 --- a/src/impl/particles_impl_sstp.ipp +++ b/src/impl/particles_impl_sstp.ipp @@ -122,19 +122,22 @@ namespace libcloudphxx // acquire temporary arrays to store sstp_delta if (step == 0) { - sstp_dlt_rv_gp.reset(&(tmp_device_real_part.get_guard())); - dlt[0] = &sstp_dlt_rv_gp->get(); - sstp_dlt_th_gp.reset(&(tmp_device_real_part.get_guard())); - dlt[1] = &sstp_dlt_th_gp->get(); - sstp_dlt_rhod_gp.reset(&(tmp_device_real_part.get_guard())); - dlt[2] = &sstp_dlt_rhod_gp->get(); + reset_guardp(sstp_dlt_rv_gp, tmp_device_real_part); + reset_guardp(sstp_dlt_th_gp, tmp_device_real_part); + reset_guardp(sstp_dlt_rhod_gp, tmp_device_real_part); if(opts_init.const_p) { - sstp_dlt_p_gp.reset(&(tmp_device_real_part.get_guard())); - dlt[3] = &sstp_dlt_p_gp->get(); + reset_guardp(sstp_dlt_p_gp, tmp_device_real_part); + dlt[3] = &(sstp_dlt_p_gp->get()); } } + dlt[0] = &(sstp_dlt_rv_gp->get()); + dlt[1] = &(sstp_dlt_th_gp->get()); + dlt[2] = &(sstp_dlt_rhod_gp->get()); + if(opts_init.const_p) + dlt[3] = &(sstp_dlt_p_gp->get()); + for (int ix = 0; ix < (opts_init.const_p ? n : n-1); ++ix) { const real_t sstp = sstp_cond; @@ -177,9 +180,7 @@ namespace libcloudphxx sstp_dlt_th_gp.reset(); sstp_dlt_rhod_gp.reset(); if(opts_init.const_p) - { sstp_dlt_p_gp.reset(); - } } } diff --git a/src/impl/particles_impl_update_th_rv.ipp b/src/impl/particles_impl_update_th_rv.ipp index db4dc1734..833ebf480 100644 --- a/src/impl/particles_impl_update_th_rv.ipp +++ b/src/impl/particles_impl_update_th_rv.ipp @@ -101,7 +101,7 @@ namespace libcloudphxx // cell-wise change in state auto dstate_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &dstate(dstate_g->get()); + thrust_device::vector &dstate(dstate_g.get()); // init dstate with 0s thrust::fill(dstate.begin(), dstate.end(), real_t(0)); // calc sum of pdstate in each cell diff --git a/src/particles_diag.ipp b/src/particles_diag.ipp index c35a8fe99..29a2df0e9 100644 --- a/src/particles_diag.ipp +++ b/src/particles_diag.ipp @@ -164,7 +164,7 @@ namespace libcloudphxx namespace arg = thrust::placeholders; assert(pimpl->selected_before_counting); - thrust_device::vector &n_filtered = n_filtered_gp->get(); + thrust_device::vector &n_filtered = pimpl->n_filtered_gp->get(); // similar to hskpng_count pimpl->hskpng_sort(); @@ -429,7 +429,7 @@ namespace libcloudphxx // temporary vector to store vt auto tmp_vt_g = pimpl->tmp_device_real_part.get_guard(); - thrust_device::vector &tmp_vt = tmp_vt_g.get + thrust_device::vector &tmp_vt = tmp_vt_g.get(); thrust::copy(pimpl->vt.begin(), pimpl->vt.end(), tmp_vt.begin()); thrust::transform( diff --git a/src/particles_step.ipp b/src/particles_step.ipp index e4f267d0d..e683de04f 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -91,7 +91,7 @@ namespace libcloudphxx */ } - n_filtered_gp.reset(); // n_filtered (used mostly in diag) not needed anymore, destroy the guard to a tmp array that stored it + pimpl->n_filtered_gp.reset(); // n_filtered (used mostly in diag) not needed anymore, destroy the guard to a tmp array that stored it if (pimpl->l2e[&pimpl->diss_rate].size() == 0) if (!diss_rate.is_null()) pimpl->init_e2l(diss_rate, &pimpl->diss_rate); @@ -314,8 +314,8 @@ namespace libcloudphxx if(opts.turb_adve && pimpl->n_dims==0) throw std::runtime_error("libcloudph++: turbulent advection does not work in 0D"); - - n_filtered_gp.reset(); // n_filtered (used mostly in diag) not needed anymore, destroy the guard to a tmp array that stored it + + pimpl->n_filtered_gp.reset(); // n_filtered (used mostly in diag) not needed anymore, destroy the guard to a tmp array that stored it // dt defined in opts_init can be overriden by dt in opts pimpl->adjust_timesteps(opts.dt); diff --git a/tests/python/unit/sstp_cond.py b/tests/python/unit/sstp_cond.py index 7ac0295a9..629f79127 100644 --- a/tests/python/unit/sstp_cond.py +++ b/tests/python/unit/sstp_cond.py @@ -1,5 +1,6 @@ import sys sys.path.insert(0, "../../bindings/python/") +sys.path.insert(0, "../../../build/bindings/python/") from libcloudphxx import lgrngn @@ -118,7 +119,8 @@ def test(turb_cond): prtcls.diag_all() prtcls.diag_wet_mom(3); wet_post_adve_cond = copy(frombuffer(prtcls.outbuf()).reshape(opts_init.nx, opts_init.nz)) - assert allclose(wet_post_adve, wet_post_adve_cond, atol=0, rtol=3e-2) + print(wet_post_adve, wet_post_adve_cond) + assert allclose(wet_post_adve, wet_post_adve_cond, atol=0, rtol=5e-2) test(False) test(True) From cc46914ac03022e94ea4f0558b189f3f89227818 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Fri, 10 Oct 2025 16:36:19 +0200 Subject: [PATCH 17/22] mcuda outbuf fixes --- src/impl/particles_impl.ipp | 4 ++++ src/impl/particles_impl_fill_outbuf.ipp | 13 ++++++------- src/particles_multi_gpu_diag.ipp | 11 ++++++++--- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 019a98d22..458381859 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -227,6 +227,10 @@ namespace libcloudphxx lft_id_gp, rgt_id_gp; + std::unique_ptr< + typename tmp_vector_pool>::guard + > outbuf_gp; + std::unique_ptr< typename tmp_vector_pool>::guard > chem_flag_gp; diff --git a/src/impl/particles_impl_fill_outbuf.ipp b/src/impl/particles_impl_fill_outbuf.ipp index 3763c392f..abf937cb3 100644 --- a/src/impl/particles_impl_fill_outbuf.ipp +++ b/src/impl/particles_impl_fill_outbuf.ipp @@ -14,18 +14,17 @@ namespace libcloudphxx { thrust::fill(outbuf.begin(), outbuf.end(), 0); -#if defined(__NVCC__) +#if !defined(__NVCC__) + thrust_device::vector &pi(count_ijk); +#else + auto pi_g = tmp_host_size_cell.get_guard(); + thrust::host_vector &pi(pi_g.get()); thrust::copy( count_ijk.begin(), count_ijk.end(), // from - tmp_host_size_cell.begin() + pi.begin() ); #endif -#if !defined(__NVCC__) - thrust_device::vector &pi(count_ijk); -#else - thrust::host_vector &pi(tmp_host_size_cell); -#endif thrust::copy( count_mom.begin(), // input - begin diff --git a/src/particles_multi_gpu_diag.ipp b/src/particles_multi_gpu_diag.ipp index af70ac2f5..2a096964e 100644 --- a/src/particles_multi_gpu_diag.ipp +++ b/src/particles_multi_gpu_diag.ipp @@ -197,11 +197,16 @@ namespace libcloudphxx std::vector threads; for (int i = 0; i < this->opts_init->dev_count; ++i) { + auto &ipimpl = pimpl->particles[i]->pimpl; + reset_guardp(ipimpl->outbuf_gp, ipimpl->tmp_host_real_cell); + thrust::host_vector &outbuf(ipimpl->outbuf_gp->get()); + threads.emplace_back( detail::set_device_and_run, i, std::bind( &particles_t::impl::fill_outbuf, - &(*(pimpl->particles[i]->pimpl)) + &(*(pimpl->particles[i]->pimpl)), + outbuf ) ); } @@ -209,14 +214,14 @@ namespace libcloudphxx for(auto &p : pimpl->particles) // TODO: perform this copy in parallell? { - auto outbuf_g = p->pimpl->tmp_host_real_cell.get_guard(); - thrust::host_vector &outbuf = outbuf_g.get(); + thrust::host_vector &outbuf(p->pimpl->outbuf_gp->get()); thrust::copy( outbuf.begin(), outbuf.end(), pimpl->real_n_cell_tot.begin() + p->pimpl->n_cell_bfr ); + p->pimpl->outbuf_gp.reset(); // release the guard } return &(*(pimpl->real_n_cell_tot.begin())); } From 513b58d9f4367a73df54d61a3399acf5f9e6771a Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Mon, 13 Oct 2025 12:20:43 +0200 Subject: [PATCH 18/22] distmem: 1 more tmp vector; larger epsilon in lgrngn_cond --- src/impl/particles_impl.ipp | 5 ++--- tests/python/physics/lgrngn_cond.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 458381859..4cabd2048 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -79,12 +79,11 @@ namespace libcloudphxx wp, // turbulent perturbation of velocity ssp, // turbulent perturbation of supersaturation dot_ssp, // time derivative of the turbulent perturbation of supersaturation - sstp_tmp_rv, // either rv_old or advection-caused change in water vapour mixing ratio + sstp_tmp_rv, // either rv_old or advection-caused change in water vapour mixing ratio; NOTE: not using tmp_ vectors for this, because either size of the vector is ncell (for per-cell substepping) or size is npart, but value needs to be remembered between model steps (for per-particle) sstp_tmp_th, // ditto for theta sstp_tmp_rh, // ditto for rho sstp_tmp_p, // ditto for pressure incloud_time; // time this SD has been within a cloud - // TODO: sstp_tmp_X could be reused after condensation; create additional temporary arrays within tmp_device_real_part and use them as sstp_tmp (add a guard that protects them during condesnation, just like sstp_tmp_X_gp). same goes for sstp_tmp_chem_X // dry radii distribution characteristics real_t log_rd_min, // logarithm of the lower bound of the distr @@ -444,7 +443,7 @@ namespace libcloudphxx // init number of temporary real vctrs if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2) tmp_device_real_part.add_vector(); - if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch) + if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch || distmem()) tmp_device_real_part.add_vector(); if(allow_sstp_cond && opts_init.exact_sstp_cond) { diff --git a/tests/python/physics/lgrngn_cond.py b/tests/python/physics/lgrngn_cond.py index cfa25b557..10483d057 100644 --- a/tests/python/physics/lgrngn_cond.py +++ b/tests/python/physics/lgrngn_cond.py @@ -124,8 +124,8 @@ def test(RH_formula, _step_count, substep_count, exact_substep, constp, opts_dt) ss_post_cond = supersaturation(prtcls) print("supersaturation after condensation", ss_post_cond, th[0], rv[0]) - assert(abs(th[0] - exp_th[constp]) < 1e-4 * exp_th[constp]) - assert(abs(rv[0] - exp_rv[constp]) < 1e-3 * exp_rv[constp]) + assert(abs(th[0] - exp_th[constp]) < 1e-3 * exp_th[constp]) + assert(abs(rv[0] - exp_rv[constp]) < 1e-2 * exp_rv[constp]) rv_diff = rv_init.copy() - rv[0].copy() # change to subsaturated air - test evaporation From 9cce252c7cf28affb95aaaa9c3b18928670f5a0d Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Wed, 15 Oct 2025 10:01:47 +0200 Subject: [PATCH 19/22] condensation: lambda_D and lambda_K living for entire time of condensation --- src/detail/tmp_vector_pool.hpp | 2 +- src/impl/particles_impl.ipp | 4 +++- src/impl/particles_impl_cond.ipp | 6 ++---- src/impl/particles_impl_cond_sstp.ipp | 7 ++----- src/impl/particles_impl_hskpng_mfp.ipp | 8 ++++---- src/impl/particles_impl_hskpng_resize.ipp | 1 - src/impl/particles_impl_sstp.ipp | 3 --- src/particles_step.ipp | 4 ++++ tests/python/physics/lgrngn_cond.py | 8 ++++---- 9 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp index 4bc4075b7..f53ca9dfe 100644 --- a/src/detail/tmp_vector_pool.hpp +++ b/src/detail/tmp_vector_pool.hpp @@ -41,7 +41,7 @@ namespace libcloudphxx // Acquire an available vector, returns its index size_t acquire() { - std::cerr << "tmp_vector_pool: acquiring vector from pool of size " << pool.size() << "\n"; +// std::cerr << "tmp_vector_pool: acquiring vector from pool of size " << pool.size() << "\n"; for (size_t i = 0; i < pool.size(); ++i) { if (!pool[i].in_use) { pool[i].in_use = true; diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 4cabd2048..53647c1d7 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -224,7 +224,9 @@ namespace libcloudphxx sstp_dlt_p_gp, drv_gp, lft_id_gp, - rgt_id_gp; + rgt_id_gp, + lambda_D_gp, + lambda_K_gp; std::unique_ptr< typename tmp_vector_pool>::guard diff --git a/src/impl/particles_impl_cond.ipp b/src/impl/particles_impl_cond.ipp index 7135a5f16..3682e026c 100644 --- a/src/impl/particles_impl_cond.ipp +++ b/src/impl/particles_impl_cond.ipp @@ -18,10 +18,8 @@ namespace libcloudphxx namespace arg = thrust::placeholders; - auto lambda_D_g = tmp_device_real_cell.get_guard(); - auto lambda_K_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &lambda_D(lambda_D_g.get()); - thrust_device::vector &lambda_K(lambda_K_g.get()); + thrust_device::vector &lambda_D(lambda_D_gp->get()); + thrust_device::vector &lambda_K(lambda_K_gp->get()); // --- calc liquid water content before cond --- hskpng_sort(); diff --git a/src/impl/particles_impl_cond_sstp.ipp b/src/impl/particles_impl_cond_sstp.ipp index 1723b4824..1e3de43b0 100644 --- a/src/impl/particles_impl_cond_sstp.ipp +++ b/src/impl/particles_impl_cond_sstp.ipp @@ -37,11 +37,8 @@ namespace libcloudphxx const pres_iter &pi, const RH_iter &rhi ) { - - auto lambda_D_g = tmp_device_real_cell.get_guard(); - auto lambda_K_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &lambda_D(lambda_D_g.get()); - thrust_device::vector &lambda_K(lambda_K_g.get()); + thrust_device::vector &lambda_D(lambda_D_gp->get()); + thrust_device::vector &lambda_K(lambda_K_gp->get()); auto hlpr_zip_iter = thrust::make_zip_iterator(thrust::make_tuple( sstp_tmp_rh.begin(), diff --git a/src/impl/particles_impl_hskpng_mfp.ipp b/src/impl/particles_impl_hskpng_mfp.ipp index 00c0151d8..c29047db1 100644 --- a/src/impl/particles_impl_hskpng_mfp.ipp +++ b/src/impl/particles_impl_hskpng_mfp.ipp @@ -41,10 +41,10 @@ namespace libcloudphxx template void particles_t::impl::hskpng_mfp() { - auto lambda_D_g = tmp_device_real_cell.get_guard(); - auto lambda_K_g = tmp_device_real_cell.get_guard(); - thrust_device::vector &lambda_D(lambda_D_g.get()); - thrust_device::vector &lambda_K(lambda_K_g.get()); + reset_guardp(lambda_D_gp, tmp_device_real_cell); + reset_guardp(lambda_K_gp, tmp_device_real_cell); + thrust_device::vector &lambda_D(lambda_D_gp->get()); + thrust_device::vector &lambda_K(lambda_K_gp->get()); thrust::transform(T.begin(), T.end(), lambda_D.begin(), detail::common__mean_free_path__lambda_D()); thrust::transform(T.begin(), T.end(), p.begin(), lambda_K.begin(), detail::common__mean_free_path__lambda_K()); diff --git a/src/impl/particles_impl_hskpng_resize.ipp b/src/impl/particles_impl_hskpng_resize.ipp index 00c0c4008..ee0ca684b 100644 --- a/src/impl/particles_impl_hskpng_resize.ipp +++ b/src/impl/particles_impl_hskpng_resize.ipp @@ -29,7 +29,6 @@ namespace libcloudphxx for(auto &vec: resize_size_vctrs) vec->resize(n_part); - } }; }; diff --git a/src/impl/particles_impl_sstp.ipp b/src/impl/particles_impl_sstp.ipp index da81a2c7b..83bf7b30e 100644 --- a/src/impl/particles_impl_sstp.ipp +++ b/src/impl/particles_impl_sstp.ipp @@ -126,10 +126,7 @@ namespace libcloudphxx reset_guardp(sstp_dlt_th_gp, tmp_device_real_part); reset_guardp(sstp_dlt_rhod_gp, tmp_device_real_part); if(opts_init.const_p) - { reset_guardp(sstp_dlt_p_gp, tmp_device_real_part); - dlt[3] = &(sstp_dlt_p_gp->get()); - } } dlt[0] = &(sstp_dlt_rv_gp->get()); diff --git a/src/particles_step.ipp b/src/particles_step.ipp index e683de04f..d43167cc3 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -215,6 +215,10 @@ namespace libcloudphxx } } + // destroy guards to temporary arrays used in condensation (they were created in hskpng_mfp) + pimpl->lambda_D_gp.reset(); + pimpl->lambda_K_gp.reset(); + nancheck(pimpl->th, " th after cond"); nancheck(pimpl->rv, " rv after cond"); diff --git a/tests/python/physics/lgrngn_cond.py b/tests/python/physics/lgrngn_cond.py index 10483d057..0562d0259 100644 --- a/tests/python/physics/lgrngn_cond.py +++ b/tests/python/physics/lgrngn_cond.py @@ -119,13 +119,13 @@ def test(RH_formula, _step_count, substep_count, exact_substep, constp, opts_dt) exectime += timeit.timeit(wrapped, number=1) prtcls.step_async(opts) opts.cond = True - # print step, supersaturation(prtcls), temperature(prtcls), pressure(prtcls), th[0], rv[0] + # print(step, supersaturation(prtcls), temperature(prtcls), pressure(prtcls), th[0], rv[0]) ss_post_cond = supersaturation(prtcls) print("supersaturation after condensation", ss_post_cond, th[0], rv[0]) - assert(abs(th[0] - exp_th[constp]) < 1e-3 * exp_th[constp]) - assert(abs(rv[0] - exp_rv[constp]) < 1e-2 * exp_rv[constp]) + assert(abs(th[0] - exp_th[constp]) < 1e-4 * exp_th[constp]) + assert(abs(rv[0] - exp_rv[constp]) < 1e-3 * exp_rv[constp]) rv_diff = rv_init.copy() - rv[0].copy() # change to subsaturated air - test evaporation @@ -136,7 +136,7 @@ def test(RH_formula, _step_count, substep_count, exact_substep, constp, opts_dt) wrapped = wrapper(prtcls.step_sync, opts, th, rv, rhod) exectime += timeit.timeit(wrapped, number=1) prtcls.step_async(opts) - print(step, supersaturation(prtcls), temperature(prtcls), pressure(prtcls), th[0], rv[0]) + # print(step, supersaturation(prtcls), temperature(prtcls), pressure(prtcls), th[0], rv[0]) ss_post_evap = supersaturation(prtcls) print("supersaturation after evaporation", ss_post_evap, th[0], rv[0]) From 12397e0fac245e5e15c272aa4d2160994008dd5f Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Wed, 15 Oct 2025 10:57:27 +0200 Subject: [PATCH 20/22] rw_mom3 calculated once per substep of condensation --- src/impl/particles_impl.ipp | 7 +++-- src/impl/particles_impl_cond.ipp | 52 ++++++++++++++++++++++++-------- src/particles_step.ipp | 2 +- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 53647c1d7..23847e30b 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -226,7 +226,8 @@ namespace libcloudphxx lft_id_gp, rgt_id_gp, lambda_D_gp, - lambda_K_gp; + lambda_K_gp, + rw_mom3_gp; std::unique_ptr< typename tmp_vector_pool>::guard @@ -357,7 +358,7 @@ namespace libcloudphxx allow_sstp_chem(_opts_init.sstp_chem > 1 || _opts_init.variable_dt_switch), pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)), // coal prob can be greater than one only in sd_conc simulations //tmp_device_real_part(6), - tmp_device_real_cell(3) // 3 temporary vectors of this type; NOTE: default is 1 + tmp_device_real_cell(4) // 4 temporary vectors of this type; NOTE: default constructor creates 1 { // set 0 dev_count to mark that its not a multi_CUDA spawn @@ -600,7 +601,7 @@ namespace libcloudphxx void subs(const real_t &dt); void cond_dm3_helper(); - void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond); + void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step); void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond); template void cond_sstp_hlpr(const real_t &dt, const real_t &RH_max, const thrust_device::vector &Tp, const pres_iter &pi, const RH_iter &rhi); diff --git a/src/impl/particles_impl_cond.ipp b/src/impl/particles_impl_cond.ipp index 3682e026c..9cbaa09ed 100644 --- a/src/impl/particles_impl_cond.ipp +++ b/src/impl/particles_impl_cond.ipp @@ -13,7 +13,8 @@ namespace libcloudphxx void particles_t::impl::cond( const real_t &dt, const real_t &RH_max, - const bool turb_cond + const bool turb_cond, + const int step ) { namespace arg = thrust::placeholders; @@ -25,18 +26,30 @@ namespace libcloudphxx hskpng_sort(); auto drv_g = tmp_device_real_cell.get_guard(); thrust_device::vector &drv = drv_g.get(); + if(step == 0) + reset_guardp(rw_mom3_gp, tmp_device_real_cell); + thrust_device::vector &rw_mom3 = rw_mom3_gp->get(); // calculating the 3rd wet moment before condensation - moms_all(); - moms_calc(rw2.begin(), real_t(3./2.)); - nancheck_range(count_mom.begin(), count_mom.begin() + count_n, "count_mom (3rd wet moment) before condensation"); + if(step == 0) + { + moms_all(); + moms_calc(rw2.begin(), real_t(3./2.)); + nancheck_range(count_mom.begin(), count_mom.begin() + count_n, "count_mom (3rd wet moment) before condensation"); + + // store 3rd moment of rw + // fill with 0s if not all cells will be updated in the following transform + if(count_n!=n_cell) thrust::fill(rw_mom3.begin(), rw_mom3.end(), real_t(0.)); + thrust::copy( + count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg + thrust::make_permutation_iterator(rw_mom3.begin(), count_ijk.begin()) // output + ); + } - // permute-copying the result to -dm_3 - // fill with 0s if not all cells will be updated in the following transform - if(count_n!=n_cell) thrust::fill(drv.begin(), drv.end(), real_t(0.)); + // drv = -rw_mom3 precond thrust::transform( - count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg - thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output + rw_mom3.begin(), rw_mom3.end(), + drv.begin(), thrust::negate() ); @@ -97,16 +110,31 @@ namespace libcloudphxx moms_calc(rw2.begin(), real_t(3./2.)); nancheck_range(count_mom.begin(), count_mom.begin() + count_n, "count_mom (3rd wet moment) after condensation"); + thrust::copy( + count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg + thrust::make_permutation_iterator(rw_mom3.begin(), count_ijk.begin()) // output + ); + // adding the third moment after condensation to dm_3 thrust::transform( - count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg - thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // input - 2nd arg - thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output + rw_mom3.begin(), rw_mom3.end(), + drv.begin(), + drv.begin(), thrust::plus() ); + // thrust::transform( + // count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg + // thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // input - 2nd arg + // thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output + // thrust::plus() + // ); + // update th and rv according to changes in third specific wet moment update_th_rv(drv); + + if(step == sstp_cond - 1) + rw_mom3_gp.reset(); // destroy guard to tmp array that stored 3rd moment of rw } }; }; diff --git a/src/particles_step.ipp b/src/particles_step.ipp index d43167cc3..6e11a3ad1 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -211,7 +211,7 @@ namespace libcloudphxx if(opts.turb_cond) pimpl->sstp_step_ssp(pimpl->dt / pimpl->sstp_cond); pimpl->hskpng_Tpr(); - pimpl->cond(pimpl->dt / pimpl->sstp_cond, opts.RH_max, opts.turb_cond); + pimpl->cond(pimpl->dt / pimpl->sstp_cond, opts.RH_max, opts.turb_cond, step); } } From 71c5d746d33e45baed51b1e3d7c2244e9a1f79f4 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Wed, 15 Oct 2025 11:34:17 +0200 Subject: [PATCH 21/22] condensation: store rw3 to decrease number of calculations --- src/impl/particles_impl.ipp | 6 ++- src/impl/particles_impl_cond.ipp | 78 +++++++++++++++------------ src/impl/particles_impl_cond_sstp.ipp | 66 +++++++++++++++++------ src/particles_step.ipp | 2 +- 4 files changed, 101 insertions(+), 51 deletions(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 23847e30b..2fdbdd746 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -227,7 +227,8 @@ namespace libcloudphxx rgt_id_gp, lambda_D_gp, lambda_K_gp, - rw_mom3_gp; + rw_mom3_gp, + rw3_gp; std::unique_ptr< typename tmp_vector_pool>::guard @@ -450,6 +451,7 @@ namespace libcloudphxx tmp_device_real_part.add_vector(); if(allow_sstp_cond && opts_init.exact_sstp_cond) { + tmp_device_real_part.add_vector(); tmp_device_real_part.add_vector(); tmp_device_real_part.add_vector(); if(opts_init.const_p) @@ -602,7 +604,7 @@ namespace libcloudphxx void cond_dm3_helper(); void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step); - void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond); + void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step); template void cond_sstp_hlpr(const real_t &dt, const real_t &RH_max, const thrust_device::vector &Tp, const pres_iter &pi, const RH_iter &rhi); void update_th_rv(thrust_device::vector &); diff --git a/src/impl/particles_impl_cond.ipp b/src/impl/particles_impl_cond.ipp index 9cbaa09ed..d8c06960d 100644 --- a/src/impl/particles_impl_cond.ipp +++ b/src/impl/particles_impl_cond.ipp @@ -37,21 +37,29 @@ namespace libcloudphxx moms_calc(rw2.begin(), real_t(3./2.)); nancheck_range(count_mom.begin(), count_mom.begin() + count_n, "count_mom (3rd wet moment) before condensation"); - // store 3rd moment of rw - // fill with 0s if not all cells will be updated in the following transform - if(count_n!=n_cell) thrust::fill(rw_mom3.begin(), rw_mom3.end(), real_t(0.)); - thrust::copy( - count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg - thrust::make_permutation_iterator(rw_mom3.begin(), count_ijk.begin()) // output + // fill with 0s if not all cells have particles + if(count_n!=n_cell) + { + thrust::fill(drv.begin(), drv.end(), real_t(0.)); + thrust::fill(rw_mom3.begin(), rw_mom3.end(), real_t(0.)); + } + + // drv = -rw_mom3 precond + thrust::transform( + count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg + thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output + thrust::negate() + ); + } + else // copy rw_mom3 from previous step + { + // drv = -rw_mom3 precond + thrust::transform( + rw_mom3.begin(), rw_mom3.end(), + drv.begin(), + thrust::negate() ); } - - // drv = -rw_mom3 precond - thrust::transform( - rw_mom3.begin(), rw_mom3.end(), - drv.begin(), - thrust::negate() - ); auto hlpr_zip_iter = thrust::make_zip_iterator(thrust::make_tuple( thrust::make_permutation_iterator(rhod.begin(), ijk.begin()), @@ -110,31 +118,35 @@ namespace libcloudphxx moms_calc(rw2.begin(), real_t(3./2.)); nancheck_range(count_mom.begin(), count_mom.begin() + count_n, "count_mom (3rd wet moment) after condensation"); - thrust::copy( - count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg - thrust::make_permutation_iterator(rw_mom3.begin(), count_ijk.begin()) // output - ); - - // adding the third moment after condensation to dm_3 - thrust::transform( - rw_mom3.begin(), rw_mom3.end(), - drv.begin(), - drv.begin(), - thrust::plus() - ); + if(step < sstp_cond - 1) + { + thrust::copy( + count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg + thrust::make_permutation_iterator(rw_mom3.begin(), count_ijk.begin()) // output + ); - // thrust::transform( - // count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg - // thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // input - 2nd arg - // thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output - // thrust::plus() - // ); + // adding the third moment after condensation to dm_3 + thrust::transform( + rw_mom3.begin(), rw_mom3.end(), + drv.begin(), + drv.begin(), + thrust::plus() + ); + } + else // last step, calculate change in 3rd moment and update th and rv + { + thrust::transform( + count_mom.begin(), count_mom.begin() + count_n, // input - 1st arg + thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // input - 2nd arg + thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output + thrust::plus() + ); + rw_mom3_gp.reset(); // destroy guard to tmp array that stored 3rd moment of rw + } // update th and rv according to changes in third specific wet moment update_th_rv(drv); - if(step == sstp_cond - 1) - rw_mom3_gp.reset(); // destroy guard to tmp array that stored 3rd moment of rw } }; }; diff --git a/src/impl/particles_impl_cond_sstp.ipp b/src/impl/particles_impl_cond_sstp.ipp index 1e3de43b0..62dc8f4d4 100644 --- a/src/impl/particles_impl_cond_sstp.ipp +++ b/src/impl/particles_impl_cond_sstp.ipp @@ -76,7 +76,8 @@ namespace libcloudphxx void particles_t::impl::cond_sstp( const real_t &dt, const real_t &RH_max, - const bool turb_cond + const bool turb_cond, + const int step ) { namespace arg = thrust::placeholders; @@ -86,13 +87,28 @@ namespace libcloudphxx // particle's local change in rv auto pdrv_g = tmp_device_real_part.get_guard(); thrust_device::vector &pdrv = pdrv_g.get(); + if(step == 0) + reset_guardp(rw3_gp, tmp_device_real_part); + thrust_device::vector &rw3 = rw3_gp->get(); + // -rw3_old - thrust::transform( - thrust::make_transform_iterator(rw2.begin(), detail::rw2torw3()), - thrust::make_transform_iterator(rw2.end(), detail::rw2torw3()), - pdrv.begin(), - thrust::negate() - ); + if(step == 0) + { + thrust::transform( + thrust::make_transform_iterator(rw2.begin(), detail::rw2torw3()), + thrust::make_transform_iterator(rw2.end(), detail::rw2torw3()), + pdrv.begin(), + thrust::negate() + ); + } + else + { + thrust::transform( + rw3.begin(), rw3.end(), + pdrv.begin(), + thrust::negate() + ); + } // vector for each particle's T auto Tp_g = tmp_device_real_part.get_guard(); @@ -200,14 +216,34 @@ namespace libcloudphxx ); } - // calc rw3_new - rw3_old - thrust::transform( - thrust::make_transform_iterator(rw2.begin(), detail::rw2torw3()), - thrust::make_transform_iterator(rw2.end(), detail::rw2torw3()), - pdrv.begin(), - pdrv.begin(), - thrust::plus() - ); + // rw3_new - rw3_old + if(step < sstp_cond - 1) + { + // rw3_new + thrust::transform( + rw2.begin(), rw2.end(), + rw3.begin(), + detail::rw2torw3() + ); + + thrust::transform( + rw3.begin(), rw3.end(), + pdrv.begin(), + pdrv.begin(), + thrust::plus() + ); + } + else // last step, no need to store rw3 + { + thrust::transform( + thrust::make_transform_iterator(rw2.begin(), detail::rw2torw3()), + thrust::make_transform_iterator(rw2.end(), detail::rw2torw3()), + pdrv.begin(), + pdrv.begin(), + thrust::plus() + ); + rw3_gp.reset(); // destroy guard to tmp array that stored rw3 + } // calc - 4/3 * pi * rho_w * n * (rw3_new - rw3_old) / (dV * rhod) thrust::transform( diff --git a/src/particles_step.ipp b/src/particles_step.ipp index 6e11a3ad1..c80c80b24 100644 --- a/src/particles_step.ipp +++ b/src/particles_step.ipp @@ -196,7 +196,7 @@ namespace libcloudphxx pimpl->sstp_step_exact(step); if(opts.turb_cond) pimpl->sstp_step_ssp(pimpl->dt / pimpl->sstp_cond); - pimpl->cond_sstp(pimpl->dt / pimpl->sstp_cond, opts.RH_max, opts.turb_cond); + pimpl->cond_sstp(pimpl->dt / pimpl->sstp_cond, opts.RH_max, opts.turb_cond, step); } // copy sstp_tmp_rv and th to rv and th pimpl->update_state(pimpl->rv, pimpl->sstp_tmp_rv); From 8582e95312b3636c718ef81816490b1293af2584 Mon Sep 17 00:00:00 2001 From: Piotr Dziekan Date: Tue, 21 Oct 2025 12:01:31 +0200 Subject: [PATCH 22/22] chemistry: one more tmp vector --- src/impl/particles_impl.ipp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp index 2fdbdd746..3bd6db436 100644 --- a/src/impl/particles_impl.ipp +++ b/src/impl/particles_impl.ipp @@ -447,7 +447,7 @@ namespace libcloudphxx // init number of temporary real vctrs if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2) tmp_device_real_part.add_vector(); - if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch || distmem()) + if(opts_init.chem_switch || (allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch || distmem()) tmp_device_real_part.add_vector(); if(allow_sstp_cond && opts_init.exact_sstp_cond) {