Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0c2adb5
pool of temp vectors; tmp_host_real_grid done
pdziekan Oct 6, 2025
7aa5fce
pool of temp vectors; impl_init_n done
pdziekan Oct 6, 2025
eb62433
pool of temp vectors; tmp_host_real_cell
pdziekan Oct 6, 2025
c55c3e8
pool of temp vectors; tmp_host_real_cell
pdziekan Oct 6, 2025
f7b93f6
pool of temp vectors; tmp_device_real_part p.1
pdziekan Oct 7, 2025
9cfddb2
pool of temp vectors; tmp_device_real_part p.2
pdziekan Oct 7, 2025
18ddd99
pool of temp vectors; tmp_device_real_cell p.1
pdziekan Oct 7, 2025
7db779b
pool of temp vectors; tmp_device_real_cell p.2
pdziekan Oct 7, 2025
22e92df
pool of temp vectors; tmp_device_n_part
pdziekan Oct 7, 2025
7c4d199
pool of temp vectors; tmp_device_size_cell
pdziekan Oct 7, 2025
ace5ff1
pool of temp vectors; tmp_device_size_part
pdziekan Oct 7, 2025
c57cc57
fix rand usage
pdziekan Oct 8, 2025
20d6fae
compilation fixes
pdziekan Oct 8, 2025
da4d8cf
get rid of tmp_device_size_part; use tmp_device_real_part instead; Al…
pdziekan Oct 9, 2025
3919682
n_filtered tmp arrays in diag
pdziekan Oct 9, 2025
61ea8c5
fix unit tests
pdziekan Oct 10, 2025
cc46914
mcuda outbuf fixes
pdziekan Oct 10, 2025
513b58d
distmem: 1 more tmp vector; larger epsilon in lgrngn_cond
pdziekan Oct 13, 2025
9cce252
condensation: lambda_D and lambda_K living for entire time of condens…
pdziekan Oct 15, 2025
12397e0
rw_mom3 calculated once per substep of condensation
pdziekan Oct 15, 2025
71c5d74
condensation: store rw3 to decrease number of calculations
pdziekan Oct 15, 2025
8582e95
chemistry: one more tmp vector
pdziekan Oct 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions src/detail/tmp_vector_pool.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#pragma once
#include <thrust/device_vector.h>
#include <vector>
#include <cassert>


namespace libcloudphxx
{
namespace lgrngn
{
template<typename vec_t>
class tmp_vector_pool {
struct entry {
vec_t vec;
bool in_use = false;
entry(size_t n) : vec(n) {}
};
std::vector<entry> pool;
public:
tmp_vector_pool(size_t pool_size = 1): pool(pool_size, 0) {}

// Add a new vector to the pool
// void add_vector(size_t vec_size) {
// pool.emplace_back(vec_size);
// }
void add_vector() {
pool.emplace_back(0);
}

void resize(size_t n) {
for (size_t i = 0; i < pool.size(); ++i) {
pool[i].vec.resize(n);
}
}

void reserve(size_t n) {
for (size_t i = 0; i < pool.size(); ++i) {
pool[i].vec.reserve(n);
}
}

// Acquire an available vector, returns its index
size_t acquire() {
// std::cerr << "tmp_vector_pool: acquiring vector from pool of size " << pool.size() << "\n";
for (size_t i = 0; i < pool.size(); ++i) {
if (!pool[i].in_use) {
pool[i].in_use = true;
return i;
}
}
assert(false && "No available temporary vectors in pool!");
return size_t(-1);
}

// Release a vector by index
void release(size_t idx) {
assert(idx < pool.size() && pool[idx].in_use);
pool[idx].in_use = false;
}

// Access vector by index
vec_t& get(size_t idx) {
assert(idx < pool.size() && pool[idx].in_use);
return pool[idx].vec;
}

// std::pair<size_t, vec_t&> get() {
// const size_t idx = acquire();
// assert(idx < pool.size() && pool[idx].in_use);
// return std::make_pair(idx, pool[idx].vec);
// }

// RAII guard
class guard {
tmp_vector_pool<vec_t>& pool;
size_t idx;
bool valid;
public:
guard(tmp_vector_pool<vec_t>& pool_)
: pool(pool_), idx(pool_.acquire()), valid(true) {}
~guard() { if (valid) pool.release(idx); }
guard(const guard&) = delete;
guard& operator=(const guard&) = delete;
guard(guard&& other) noexcept : pool(other.pool), idx(other.idx), valid(other.valid) {
other.valid = false;
}
guard& operator=(guard&& other) noexcept {
if (this != &other) {
if (valid) pool.release(idx);
pool = other.pool;
idx = other.idx;
valid = other.valid;
other.valid = false;
}
return *this;
}
// void release() {
// pool = nullptr;
// idx = 4444444;
// valid = false;
// }
vec_t& get() { return pool.get(idx); }
// vec_t* operator->() { return &pool.get(idx); }
vec_t& operator*() { return pool.get(idx); }
};

guard get_guard() {
return guard(*this);
}
guard* get_guardp() {
return new guard(*this);
}
};

// helper function to reset a guard pointer, but first destroy the old guard
template<typename GuardPtr, typename Pool>
void reset_guardp(GuardPtr& guard_ptr, Pool& pool) {
guard_ptr.reset(); // destroy old guard
guard_ptr.reset(pool.get_guardp()); // acquire new guard
}
};
};


// // Usage example 1:
// {
// tmp_vector_pool<float>::guard tmp(pool);
// thrust::device_vector<float>& tmp = guard.get();
// // Use *tmp or tmp.get() as a thrust::device_vector<float>&
// } // Automatically released here

// // Usage example 2:
// size_t idx = pool.acquire();
// thrust::device_vector<float>& tmp = pool.get(idx);
// // Use tmp...
// pool.release(idx); // Mark as available again
101 changes: 60 additions & 41 deletions src/impl/particles_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ namespace libcloudphxx
wp, // turbulent perturbation of velocity
ssp, // turbulent perturbation of supersaturation
dot_ssp, // time derivative of the turbulent perturbation of supersaturation
sstp_tmp_rv, // either rv_old or advection-caused change in water vapour mixing ratio
sstp_tmp_rv, // either rv_old or advection-caused change in water vapour mixing ratio; NOTE: not using tmp_ vectors for this, because either size of the vector is ncell (for per-cell substepping) or size is npart, but value needs to be remembered between model steps (for per-particle)
sstp_tmp_th, // ditto for theta
sstp_tmp_rh, // ditto for rho
sstp_tmp_p, // ditto for pressure
Expand Down Expand Up @@ -196,28 +196,47 @@ namespace libcloudphxx
> chem_stepper;

// temporary data
thrust::host_vector<real_t>
tmp_vector_pool<thrust::host_vector<real_t>>
tmp_host_real_part,
tmp_host_real_grid,
tmp_host_real_cell;
thrust::host_vector<thrust_size_t>
tmp_vector_pool<thrust::host_vector<thrust_size_t>>
tmp_host_size_part,
tmp_host_size_cell;
thrust_device::vector<real_t>
tmp_vector_pool<thrust_device::vector<real_t>>
tmp_device_real_part,
tmp_device_real_part1,
tmp_device_real_part2,
tmp_device_real_part3,
tmp_device_real_part4,
tmp_device_real_part5,
tmp_device_real_cell,
tmp_device_real_cell1,
tmp_device_real_cell2,
&u01; // uniform random numbers between 0 and 1 // TODO: use the tmp array as rand argument?
thrust_device::vector<unsigned int>
tmp_device_n_part,
&un; // uniform natural random numbers between 0 and max value of unsigned int
thrust_device::vector<thrust_size_t>
tmp_device_size_cell,
tmp_device_size_part;
tmp_device_real_cell;
tmp_vector_pool<thrust_device::vector<unsigned int>>
tmp_device_n_part;
tmp_vector_pool<thrust_device::vector<thrust_size_t>>
tmp_device_size_cell;
// tmp_device_size_part;

// guards for temp vectors that are used in multiple functions and need to stay unchanged inbetween
// tmp_vector_pool<thrust_device::vector<real_t>>::guard asd;
std::unique_ptr<
typename tmp_vector_pool<thrust_device::vector<real_t>>::guard
> n_filtered_gp,
V_gp,
sstp_dlt_rv_gp,
sstp_dlt_th_gp,
sstp_dlt_rhod_gp,
sstp_dlt_p_gp,
drv_gp,
lft_id_gp,
rgt_id_gp,
lambda_D_gp,
lambda_K_gp,
rw_mom3_gp,
rw3_gp;

std::unique_ptr<
typename tmp_vector_pool<thrust::host_vector<real_t>>::guard
> outbuf_gp;

std::unique_ptr<
typename tmp_vector_pool<thrust_device::vector<unsigned int>>::guard
> chem_flag_gp;

// to simplify foreach calls
const thrust::counting_iterator<thrust_size_t> zero;
Expand Down Expand Up @@ -260,7 +279,7 @@ namespace libcloudphxx
thrust_device::vector<real_t> in_real_bfr, out_real_bfr;

// ids of sds to be copied with distmem
thrust_device::vector<thrust_size_t> &lft_id, &rgt_id;
// thrust_device::vector<thrust_size_t> &lft_id, &rgt_id;

// --- containters with vector pointers to help resize and copy vectors ---

Expand All @@ -270,18 +289,18 @@ namespace libcloudphxx
// std::set<thrust_device::vector<thrust_size_t>*> distmem_size_vctrs; // no size vectors copied?
//
// vetors that are not in distmem_real_vctrs that need to be resized when the number of SDs changes, these are helper variables
std::set<thrust_device::vector<real_t>*> resize_real_vctrs;
// std::set<thrust_device::vector<real_t>*> resize_real_vctrs;
// std::set<thrust_device::vector<n_t>*> resize_n_vctrs;
std::set<thrust_device::vector<thrust_size_t>*> resize_size_vctrs;


// --- methods ---

// fills u01 with n random real numbers uniformly distributed in range [0,1)
void rand_u01(thrust_size_t n) { rng.generate_n(u01, n); }
void rand_u01(thrust_device::vector<real_t> &u01, thrust_size_t n) { rng.generate_n(u01, n); }

// fills un with n random integers uniformly distributed on the whole integer range
void rand_un(thrust_size_t n) { rng.generate_n(un, n); }
void rand_un(thrust_device::vector<unsigned int> &un, thrust_size_t n) { rng.generate_n(un, n); }

// max(1, n)
int m1(int n) { return n == 0 ? 1 : n; }
Expand All @@ -307,21 +326,19 @@ namespace libcloudphxx
zero(0),
n_part(0),
sorted(false),
u01(tmp_device_real_part),
n_user_params(_opts_init.kernel_parameters.size()),
un(tmp_device_n_part),
rng(_opts_init.rng_seed),
src_stp_ctr(0),
rlx_stp_ctr(0),
bcond(bcond),
bcond(bcond),
n_x_bfr(0),
n_cell_bfr(0),
mpi_rank(mpi_rank),
mpi_size(mpi_size),
lft_x1(-1), // default to no
rgt_x0(-1), // MPI boudanry
lft_id(i), // note: reuses i vector
rgt_id(tmp_device_size_part),
// lft_id(i), // note: reuses i vector
// rgt_id(tmp_device_size_part),
n_x_tot(n_x_tot),
halo_size(_opts_init.adve_scheme == as_t::pred_corr ? 2 : 0),
halo_x(
Expand All @@ -340,7 +357,9 @@ namespace libcloudphxx
adve_scheme(_opts_init.adve_scheme),
allow_sstp_cond(_opts_init.sstp_cond > 1 || _opts_init.variable_dt_switch),
allow_sstp_chem(_opts_init.sstp_chem > 1 || _opts_init.variable_dt_switch),
pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)) // coal prob can be greater than one only in sd_conc simulations
pure_const_multi (((_opts_init.sd_conc) == 0) && (_opts_init.sd_const_multi > 0 || _opts_init.dry_sizes.size() > 0)), // coal prob can be greater than one only in sd_conc simulations
//tmp_device_real_part(6),
tmp_device_real_cell(4) // 4 temporary vectors of this type; NOTE: default constructor creates 1
{

// set 0 dev_count to mark that its not a multi_CUDA spawn
Expand Down Expand Up @@ -425,24 +444,23 @@ namespace libcloudphxx
// initializing distmem_n_vctrs - list of n_t vectors with properties of SDs that have to be copied/removed/recycled when a SD is copied/removed/recycled
distmem_n_vctrs.insert(&n);

// real vctrs that need to be resized but do need to be copied in distmem
resize_real_vctrs.insert(&tmp_device_real_part);
// init number of temporary real vctrs
if(opts_init.chem_switch || allow_sstp_cond || n_dims >= 2)
resize_real_vctrs.insert(&tmp_device_real_part1);
if((allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch)
resize_real_vctrs.insert(&tmp_device_real_part2);
tmp_device_real_part.add_vector();
if(opts_init.chem_switch || (allow_sstp_cond && opts_init.exact_sstp_cond) || n_dims==3 || opts_init.turb_cond_switch || distmem())
tmp_device_real_part.add_vector();
if(allow_sstp_cond && opts_init.exact_sstp_cond)
{
resize_real_vctrs.insert(&tmp_device_real_part3);
resize_real_vctrs.insert(&tmp_device_real_part4);
tmp_device_real_part.add_vector();
tmp_device_real_part.add_vector();
tmp_device_real_part.add_vector();
if(opts_init.const_p)
resize_real_vctrs.insert(&tmp_device_real_part5);
tmp_device_real_part.add_vector();
}

resize_size_vctrs.insert(&ijk);
resize_size_vctrs.insert(&sorted_ijk);
resize_size_vctrs.insert(&sorted_id);
resize_size_vctrs.insert(&tmp_device_size_part);
if (opts_init.nx != 0) resize_size_vctrs.insert(&i);
if (opts_init.ny != 0) resize_size_vctrs.insert(&j);
if (opts_init.nz != 0) resize_size_vctrs.insert(&k);
Expand Down Expand Up @@ -507,7 +525,7 @@ namespace libcloudphxx
void init_kernel();
void init_vterm();

void fill_outbuf();
void fill_outbuf(thrust::host_vector<real_t>&);
std::vector<real_t> fill_attr_outbuf(const std::string&);
void mpi_exchange();

Expand Down Expand Up @@ -585,8 +603,8 @@ namespace libcloudphxx
void subs(const real_t &dt);

void cond_dm3_helper();
void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond);
void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond);
void cond(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step);
void cond_sstp(const real_t &dt, const real_t &RH_max, const bool turb_cond, const int step);
template<class pres_iter, class RH_iter>
void cond_sstp_hlpr(const real_t &dt, const real_t &RH_max, const thrust_device::vector<real_t> &Tp, const pres_iter &pi, const RH_iter &rhi);
void update_th_rv(thrust_device::vector<real_t> &);
Expand All @@ -602,6 +620,7 @@ namespace libcloudphxx
void chem_dissoc();
void chem_react(const real_t &dt);
void chem_cleanup();
void chem_post_step();

thrust_size_t rcyc();
void bcnd();
Expand Down
9 changes: 6 additions & 3 deletions src/impl/particles_impl_adve.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,13 @@ namespace libcloudphxx
namespace arg = thrust::placeholders;

// old positions storage
auto x_old_g = tmp_device_real_part.get_guard(),
y_old_g = tmp_device_real_part.get_guard(),
z_old_g = tmp_device_real_part.get_guard();
thrust_device::vector<real_t>
&x_old(tmp_device_real_part),
&y_old(tmp_device_real_part2),
&z_old(tmp_device_real_part1);
&x_old(x_old_g.get()),
&y_old(y_old_g.get()),
&z_old(z_old_g.get());

// shift to coordiante system starting at halo's left edge
thrust::transform(x.begin(), x.end(), x.begin(), arg::_1 + real_t(halo_size) * opts_init.dx);
Expand Down
4 changes: 3 additions & 1 deletion src/impl/particles_impl_ante_adding_SD.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ namespace libcloudphxx
{
// --- calc liquid water content before src ---
hskpng_sort();
thrust_device::vector<real_t> &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called before a call to after_adding_SD...
reset_guardp(drv_gp, tmp_device_real_cell);
thrust_device::vector<real_t> &drv = drv_gp->get();

thrust::fill(drv.begin(), drv.end(), real_t(0.));

moms_all();
Expand Down
8 changes: 7 additions & 1 deletion src/impl/particles_impl_bcnd.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ namespace libcloudphxx
{
namespace arg = thrust::placeholders;

reset_guardp(lft_id_gp, tmp_device_real_part);
thrust_device::vector<real_t> &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
reset_guardp(rgt_id_gp, tmp_device_real_part);
thrust_device::vector<real_t> &rgt_id(rgt_id_gp->get());

// save ids of SDs to copy
lft_count = thrust::copy_if(
zero, zero+n_part,
Expand Down Expand Up @@ -194,7 +199,8 @@ namespace libcloudphxx
{
namespace arg = thrust::placeholders;

thrust_device::vector<real_t> &n_filtered(tmp_device_real_part);
auto n_filtered_g = tmp_device_real_part.get_guard();
thrust_device::vector<real_t> &n_filtered = n_filtered_g.get();

thrust::fill(n_filtered.begin(), n_filtered.end(), 0.);

Expand Down
Loading
Loading