igfuw · pdziekan · Oct 21, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
diff --git a/src/detail/tmp_vector_pool.hpp b/src/detail/tmp_vector_pool.hpp
@@ -0,0 +1,136 @@
+#pragma once
+#include <thrust/device_vector.h>
+#include <vector>
+#include <cassert>
+
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    template<typename vec_t>
+    class tmp_vector_pool {
+        struct entry {
+            vec_t vec;
+            bool in_use = false;
+            entry(size_t n) : vec(n) {}
+        };
+        std::vector<entry> pool;
+    public:
+        tmp_vector_pool(size_t pool_size = 1): pool(pool_size, 0) {}
+
+        // Add a new vector to the pool
+        // void add_vector(size_t vec_size) {
+        //     pool.emplace_back(vec_size);
+        // }
+        void add_vector() {
+            pool.emplace_back(0);
+        }
+
+        void resize(size_t n) {
+            for (size_t i = 0; i < pool.size(); ++i) {
+                pool[i].vec.resize(n);
+            }
+        }
+
+        void reserve(size_t n) {
+            for (size_t i = 0; i < pool.size(); ++i) {
+                pool[i].vec.reserve(n);
+            }
+        }
+
+        // Acquire an available vector, returns its index
+        size_t acquire() {
+//            std::cerr << "tmp_vector_pool: acquiring vector from pool of size " << pool.size() << "\n";
+            for (size_t i = 0; i < pool.size(); ++i) {
+                if (!pool[i].in_use) {
+                    pool[i].in_use = true;
+                    return i;
+                }
+            }
+            assert(false && "No available temporary vectors in pool!");
+            return size_t(-1);
+        }
+
+        // Release a vector by index
+        void release(size_t idx) {
+            assert(idx < pool.size() && pool[idx].in_use);
+            pool[idx].in_use = false;
+        }
+
+        // Access vector by index
+        vec_t& get(size_t idx) {
+            assert(idx < pool.size() && pool[idx].in_use);
+            return pool[idx].vec;
+        }
+
+        // std::pair<size_t, vec_t&> get() {
+        //     const size_t idx = acquire();
+        //     assert(idx < pool.size() && pool[idx].in_use);
+        //     return std::make_pair(idx, pool[idx].vec);
+        // }
+
+        // RAII guard
+        class guard {
+            tmp_vector_pool<vec_t>& pool;
+            size_t idx;
+            bool valid;
+        public:
+            guard(tmp_vector_pool<vec_t>& pool_)
+                : pool(pool_), idx(pool_.acquire()), valid(true) {}
+            ~guard() { if (valid) pool.release(idx); }
+            guard(const guard&) = delete;
+            guard& operator=(const guard&) = delete;
+            guard(guard&& other) noexcept : pool(other.pool), idx(other.idx), valid(other.valid) {
+                other.valid = false;
+            }
+            guard& operator=(guard&& other) noexcept {
+                if (this != &other) {
+                    if (valid) pool.release(idx);
+                    pool = other.pool;
+                    idx = other.idx;
+                    valid = other.valid;
+                    other.valid = false;
+                }
+                return *this;
+            }
+            // void release() {
+            //   pool = nullptr;
+            //   idx = 4444444;
+            //   valid = false;
+            // }
+            vec_t& get() { return pool.get(idx); }
+            // vec_t* operator->() { return &pool.get(idx); }
+            vec_t& operator*() { return pool.get(idx); }
+        };
+
+        guard get_guard() {
+            return guard(*this);
+        }
+        guard* get_guardp() {
+            return new guard(*this);
+        }
+    };
+
+    // helper function to reset a guard pointer, but first destroy the old guard
+    template<typename GuardPtr, typename Pool>
+    void reset_guardp(GuardPtr& guard_ptr, Pool& pool) {
+        guard_ptr.reset(); // destroy old guard
+        guard_ptr.reset(pool.get_guardp()); // acquire new guard
+    }
+  };
+};
+
+
+// // Usage example 1:
+// {
+//     tmp_vector_pool<float>::guard tmp(pool);
+//     thrust::device_vector<float>& tmp = guard.get();
+//     // Use *tmp or tmp.get() as a thrust::device_vector<float>&
+// } // Automatically released here
+
+// // Usage example 2:
+// size_t idx = pool.acquire();
+// thrust::device_vector<float>& tmp = pool.get(idx);
+// // Use tmp...
+// pool.release(idx); // Mark as available again
diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp
@@ -79,7 +79,7 @@ namespace libcloudphxx
         wp,  // turbulent perturbation of velocity
         ssp, // turbulent perturbation of supersaturation
         dot_ssp, // time derivative of the turbulent perturbation of supersaturation
-        sstp_tmp_rv, // either rv_old or advection-caused change in water vapour mixing ratio
+        sstp_tmp_rv, // either rv_old or advection-caused change in water vapour mixing ratio; NOTE: not using tmp_ vectors for this, because either size of the vector is ncell (for per-cell substepping) or size is npart, but value needs to be remembered between model steps (for per-particle)
         sstp_tmp_th, // ditto for theta
         sstp_tmp_rh, // ditto for rho
         sstp_tmp_p, // ditto for pressure
@@ -196,28 +196,47 @@ namespace libcloudphxx
       > chem_stepper;
 
       // temporary data
-      thrust::host_vector<real_t>
+      tmp_vector_pool<thrust::host_vector<real_t>> 
+        tmp_host_real_part,
         tmp_host_real_grid,
         tmp_host_real_cell;
-      thrust::host_vector<thrust_size_t>
+      tmp_vector_pool<thrust::host_vector<thrust_size_t>> 
+        tmp_host_size_part,
         tmp_host_size_cell;
-      thrust_device::vector<real_t>
+      tmp_vector_pool<thrust_device::vector<real_t>>       
         tmp_device_real_part,
-        tmp_device_real_part1,  
-        tmp_device_real_part2,  
-        tmp_device_real_part3,
-        tmp_device_real_part4,
-        tmp_device_real_part5,
-        tmp_device_real_cell,
-        tmp_device_real_cell1,
-        tmp_device_real_cell2,
-        &u01;  // uniform random numbers between 0 and 1 // TODO: use the tmp array as rand argument?
-      thrust_device::vector<unsigned int>
-        tmp_device_n_part,
-        &un; // uniform natural random numbers between 0 and max value of unsigned int
-      thrust_device::vector<thrust_size_t>
-        tmp_device_size_cell,
-        tmp_device_size_part;
+        tmp_device_real_cell;
+      tmp_vector_pool<thrust_device::vector<unsigned int>>
+        tmp_device_n_part;
+      tmp_vector_pool<thrust_device::vector<thrust_size_t>>
+        tmp_device_size_cell;
+        // tmp_device_size_part;
+
+      // guards for temp vectors that are used in multiple functions and need to stay unchanged inbetween
+      // tmp_vector_pool<thrust_device::vector<real_t>>::guard asd;
+      std::unique_ptr<
+        typename tmp_vector_pool<thrust_device::vector<real_t>>::guard
+      > n_filtered_gp,
+        V_gp,
+        sstp_dlt_rv_gp,
+        sstp_dlt_th_gp,
+        sstp_dlt_rhod_gp,
+        sstp_dlt_p_gp,
+        drv_gp,
+        lft_id_gp,
+        rgt_id_gp,
+        lambda_D_gp,
+        lambda_K_gp,
+        rw_mom3_gp,
+        rw3_gp;
+
+      std::unique_ptr<
+        typename tmp_vector_pool<thrust::host_vector<real_t>>::guard
+      > outbuf_gp;
+
+      std::unique_ptr<
+        typename tmp_vector_pool<thrust_device::vector<unsigned int>>::guard
+      > chem_flag_gp;
 
       // to simplify foreach calls
       const thrust::counting_iterator<thrust_size_t> zero;
@@ -260,7 +279,7 @@ namespace libcloudphxx
       thrust_device::vector<real_t> in_real_bfr, out_real_bfr;
 
       // ids of sds to be copied with distmem
-      thrust_device::vector<thrust_size_t> &lft_id, &rgt_id;
+      // thrust_device::vector<thrust_size_t> &lft_id, &rgt_id;
 
       // --- containters with vector pointers to help resize and copy vectors ---
 
@@ -270,18 +289,18 @@ namespace libcloudphxx
 //      std::set<thrust_device::vector<thrust_size_t>*>  distmem_size_vctrs; // no size vectors copied?
 //
       // vetors that are not in distmem_real_vctrs that need to be resized when the number of SDs changes, these are helper variables
-      std::set<thrust_device::vector<real_t>*>         resize_real_vctrs;
+//      std::set<thrust_device::vector<real_t>*>         resize_real_vctrs;
 //      std::set<thrust_device::vector<n_t>*>            resize_n_vctrs;
       std::set<thrust_device::vector<thrust_size_t>*>  resize_size_vctrs;
 
 
       // --- methods ---
 
       // fills u01 with n random real numbers uniformly distributed in range [0,1)
-      void rand_u01(thrust_size_t n) { rng.generate_n(u01, n); }
+      void rand_u01(thrust_device::vector<real_t> &u01, thrust_size_t n) { rng.generate_n(u01, n); }
 
       // fills un with n random integers uniformly distributed on the whole integer range
-      void rand_un(thrust_size_t n) { rng.generate_n(un, n); }
+      void rand_un(thrust_device::vector<unsigned int> &un, thrust_size_t n) { rng.generate_n(un, n); }
 
       // max(1, n)
       int m1(int n) { return n == 0 ? 1 : n; }
@@ -307,21 +326,19 @@ namespace libcloudphxx
         zero(0),
         n_part(0),
         sorted(false), 
-        u01(tmp_device_real_part),
         n_user_params(_opts_init.kernel_parameters.size()),
-        un(tmp_device_n_part),
         rng(_opts_init.rng_seed),
         src_stp_ctr(0),
         rlx_stp_ctr(0),
-	bcond(bcond),
+	      bcond(bcond),
         n_x_bfr(0),
         n_cell_bfr(0),
         mpi_rank(mpi_rank),
         mpi_size(mpi_size),
         lft_x1(-1),  // default to no
         rgt_x0(-1),  // MPI boudanry
-        lft_id(i),   // note: reuses i vector
-        rgt_id(tmp_device_size_part),
+        // lft_id(i),   // note: reuses i vector
+        // rgt_id(tmp_device_size_part),
         n_x_tot(n_x_tot),
         halo_size(_opts_init.adve_scheme == as_t::pred_corr ? 2 : 0), 
         halo_x( 
@@ -340,7 +357,9 @@ namespace libcloudphxx
         adve_scheme(_opts_init.adve_scheme),
         allow_sstp_cond(_opts_init.sstp_cond > 1 || _opts_init.variable_dt_switch),
         allow_sstp_chem(_opts_init.sstp_chem > 1 || _opts_init.variable_dt_switch),
-        pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)) // coal prob can be greater than one only in sd_conc simulations
+        pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)), // coal prob can be greater than one only in sd_conc simulations
+        //tmp_device_real_part(6),
+        tmp_device_real_cell(4) // 4 temporary vectors of this type; NOTE: default constructor creates 1
       {
 
         // set 0 dev_count to mark that its not a multi_CUDA spawn
@@ -425,24 +444,23 @@ namespace libcloudphxx
         // initializing distmem_n_vctrs - list of n_t vectors with properties of SDs that have to be copied/removed/recycled when a SD is copied/removed/recycled
         distmem_n_vctrs.insert(&n);
 
-        // real vctrs that need to be resized but do need to be copied in distmem
-        resize_real_vctrs.insert(&tmp_device_real_part);
+        // init number of temporary real vctrs
         if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2)
-          resize_real_vctrs.insert(&tmp_device_real_part1);
-        if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch)
-          resize_real_vctrs.insert(&tmp_device_real_part2);
+          tmp_device_real_part.add_vector();
+        if(opts_init.chem_switch || (allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch || distmem())
+          tmp_device_real_part.add_vector();
         if(allow_sstp_cond && opts_init.exact_sstp_cond)
         {
-          resize_real_vctrs.insert(&tmp_device_real_part3);
-          resize_real_vctrs.insert(&tmp_device_real_part4);
+          tmp_device_real_part.add_vector();
+          tmp_device_real_part.add_vector();
+          tmp_device_real_part.add_vector();
           if(opts_init.const_p)
-            resize_real_vctrs.insert(&tmp_device_real_part5);
+            tmp_device_real_part.add_vector();
         }
 
         resize_size_vctrs.insert(&ijk);
         resize_size_vctrs.insert(&sorted_ijk);
         resize_size_vctrs.insert(&sorted_id);
-        resize_size_vctrs.insert(&tmp_device_size_part);
         if (opts_init.nx != 0) resize_size_vctrs.insert(&i);
         if (opts_init.ny != 0) resize_size_vctrs.insert(&j);
         if (opts_init.nz != 0) resize_size_vctrs.insert(&k);
@@ -507,7 +525,7 @@ namespace libcloudphxx
       void init_kernel();
       void init_vterm();
 
-      void fill_outbuf();
+      void fill_outbuf(thrust::host_vector<real_t>&);
       std::vector<real_t> fill_attr_outbuf(const std::string&);
       void mpi_exchange();
 
@@ -585,8 +603,8 @@ namespace libcloudphxx
       void subs(const real_t &dt);
 
       void cond_dm3_helper();
-      void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond);
-      void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond);
+      void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step);
+      void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step);
       template<class pres_iter, class RH_iter>
       void cond_sstp_hlpr(const real_t &dt, const real_t &RH_max, const thrust_device::vector<real_t> &Tp, const pres_iter &pi, const RH_iter &rhi);
       void update_th_rv(thrust_device::vector<real_t> &);
@@ -602,6 +620,7 @@ namespace libcloudphxx
       void chem_dissoc();
       void chem_react(const real_t &dt);
       void chem_cleanup();
+      void chem_post_step();
 
       thrust_size_t rcyc();
       void bcnd();

diff --git a/src/impl/particles_impl_adve.ipp b/src/impl/particles_impl_adve.ipp
@@ -179,10 +179,13 @@ namespace libcloudphxx
       namespace arg = thrust::placeholders;
 
       // old positions storage
+      auto x_old_g = tmp_device_real_part.get_guard(),
+           y_old_g = tmp_device_real_part.get_guard(),
+           z_old_g = tmp_device_real_part.get_guard();
       thrust_device::vector<real_t> 
-        &x_old(tmp_device_real_part),
-        &y_old(tmp_device_real_part2),
-        &z_old(tmp_device_real_part1);
+        &x_old(x_old_g.get()),
+        &y_old(y_old_g.get()),
+        &z_old(z_old_g.get());
 
       // shift to coordiante system starting at halo's left edge
       thrust::transform(x.begin(), x.end(), x.begin(), arg::_1 + real_t(halo_size) * opts_init.dx);

diff --git a/src/impl/particles_impl_ante_adding_SD.ipp b/src/impl/particles_impl_ante_adding_SD.ipp
@@ -14,7 +14,9 @@ namespace libcloudphxx
     {   
       // --- calc liquid water content before src ---
       hskpng_sort(); 
-      thrust_device::vector<real_t> &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called before a call to after_adding_SD...
+      reset_guardp(drv_gp, tmp_device_real_cell); 
+      thrust_device::vector<real_t> &drv = drv_gp->get();
+
       thrust::fill(drv.begin(), drv.end(), real_t(0.));
 
       moms_all();

diff --git a/src/impl/particles_impl_bcnd.ipp b/src/impl/particles_impl_bcnd.ipp
@@ -109,6 +109,11 @@ namespace libcloudphxx
           {
 	          namespace arg = thrust::placeholders;
 
+            reset_guardp(lft_id_gp, tmp_device_real_part); 
+            thrust_device::vector<real_t> &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
+            reset_guardp(rgt_id_gp, tmp_device_real_part);
+            thrust_device::vector<real_t> &rgt_id(rgt_id_gp->get());
+
             // save ids of SDs to copy
             lft_count = thrust::copy_if(
               zero, zero+n_part,
@@ -194,7 +199,8 @@ namespace libcloudphxx
               {
                 namespace arg = thrust::placeholders;
 
-                thrust_device::vector<real_t> &n_filtered(tmp_device_real_part);
+                auto n_filtered_g = tmp_device_real_part.get_guard();
+                thrust_device::vector<real_t> &n_filtered = n_filtered_g.get();
 
                 thrust::fill(n_filtered.begin(), n_filtered.end(), 0.);