Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions components/eamxx/docs/user/diags/field_contraction.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
In EAMxx, we can automatically calculate field reductions
across the horizontal columns and across the model vertical levels.
We call these horizontal and vertical reductions.
We can also automatically calculate zonal averages.
We can also automatically calculate zonal averages and histograms.
We have *experimental* support for composing diagnostics; below,
the horizontal and vertical reductions can be composed
sequentially, but if using dp- or dz-weighted reductions,
Expand Down Expand Up @@ -85,6 +85,19 @@ And so on...
| --------- | ------ | ----------- |
| `X_zonal_avg_Y_bins` | Area fraction | Average across the zonal direction |

## Histograms

We currently have a utility to calculate histograms online.
To select the histogram, you need to suffix a field name `X` with
`_histogram_` followed by the values specifying the edges of the bins,
separated by `_`. For example, the histogram specified by
`T_mid_histogram_250_270_290_310` has 5 bins effectively defined by
[$-\infty$, 250), [250, 270), [270, 290), [290, 310), and [310, $\infty$).

| Reduction | Weight | Description |
| --------- | ------ | ----------- |
| `X_histogram_V0_V1_..._VN` | 1 or 0 | Count of field values within each range |

## Example

```yaml
Expand All @@ -99,7 +112,7 @@ fields:
# in this example, we use T_mid in units of K
- T_mid_horiz_avg # K
- T_mid_vert_avg_dp_weighted # K
- T_mid_vert_sum_dp_weighted # K * Pa * s / (m * m)
- T_mid_vert_sum_dp_weighted # K * Pa * s / (m * m)
- T_mid_vert_avg_dz_weighted # K
- T_mid_vert_sum_dz_weighted # K * m
- T_mid_vert_avg # K
Expand All @@ -110,6 +123,7 @@ fields:
- T_mid_vert_avg_dp_weighted_horiz_avg # K
- T_mid_zonal_avg_180_bins # K
- T_mid_zonal_avg_90_bins # K
- T_mid_histogram_250_270_290_310 # count
output_control:
frequency: 1
frequency_units: nmonths
Expand Down
1 change: 1 addition & 0 deletions components/eamxx/src/diagnostics/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ set(DIAGNOSTIC_SRCS
field_at_height.cpp
field_at_level.cpp
field_at_pressure_level.cpp
histogram.cpp
horiz_avg.cpp
longwave_cloud_forcing.cpp
number_path.cpp
Expand Down
164 changes: 164 additions & 0 deletions components/eamxx/src/diagnostics/histogram.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#include "diagnostics/histogram.hpp"

#include <ekat_team_policy_utils.hpp>
#include <ekat_string_utils.hpp>


namespace scream {

HistogramDiag::HistogramDiag(const ekat::Comm &comm, const ekat::ParameterList &params)
: AtmosphereDiagnostic(comm, params) {
const auto &field_name = m_params.get<std::string>("field_name");
const std::string bin_config = m_params.get<std::string>("bin_configuration");
m_diag_name = field_name + "_histogram_" + bin_config;

// extract bin values from configuration, append end values, and check
const std::vector<std::string> bin_strings = ekat::split(bin_config, "_");
m_bin_reals.resize(bin_strings.size()+2);
m_bin_reals[0] = std::numeric_limits<Real>::lowest();
for (long unsigned int i=1; i < m_bin_reals.size()-1; i++)
{
m_bin_reals[i] = std::stod(bin_strings[i-1]);
EKAT_REQUIRE_MSG(m_bin_reals[i] > m_bin_reals[i-1],
"Error! HistogramDiag bin values must be monotonically "
"increasing.\n"
" - bin configuration: " + bin_config + "\n");
}
m_bin_reals.back() = std::numeric_limits<Real>::max();
}

void HistogramDiag::set_grids(const std::shared_ptr<const GridsManager> grids_manager) {
const auto &field_name = m_params.get<std::string>("field_name");
const auto &grid_name = m_params.get<std::string>("grid_name");
add_field<Required>(field_name, grid_name);
}

void HistogramDiag::initialize_impl(const RunType /*run_type*/) {
using FieldIdentifier = FieldHeader::identifier_type;
using FieldLayout = FieldIdentifier::layout_type;
using ShortFieldTagsNames::CMP;
const Field &field = get_fields_in().front();
const FieldIdentifier &field_id = field.get_header().get_identifier();
const FieldLayout &field_layout = field_id.get_layout();
EKAT_REQUIRE_MSG(field_layout.rank() >= 1 && field_layout.rank() <= 3,
"Error! Field rank not supported by HistogramDiag.\n"
" - field name: " +
field_id.name() +
"\n"
" - field layout: " +
field_layout.to_string() + "\n");

// allocate histogram field
const int num_bins = m_bin_reals.size()-1;
FieldLayout diagnostic_layout({CMP}, {num_bins}, {"bin"});
FieldIdentifier diagnostic_id(m_diag_name, diagnostic_layout,
FieldIdentifier::Units::nondimensional(),
field_id.get_grid_name());
m_diagnostic_output = Field(diagnostic_id);
m_diagnostic_output.allocate_view();

// allocate field for bin values
FieldLayout bin_values_layout({CMP}, {num_bins+1}, {"bin"});
FieldIdentifier bin_values_id(m_diag_name + "_bin_values", bin_values_layout,
field_id.get_units(), field_id.get_grid_name());
m_bin_values = Field(bin_values_id);
m_bin_values.allocate_view();

// copy bin values into field
auto bin_values_view = m_bin_values.get_view<Real *>();
auto bin_values_view_host = Kokkos::create_mirror_view(bin_values_view);
for (long unsigned int i=0; i < bin_values_layout.dim(0); i++)
bin_values_view_host(i) = m_bin_reals[i];
Kokkos::deep_copy(bin_values_view,bin_values_view_host);
}

void HistogramDiag::compute_diagnostic_impl() {
const auto &field = get_fields_in().front();
auto field_layout = field.get_header().get_identifier().get_layout();
auto histogram_layout = m_diagnostic_output.get_header().get_identifier().get_layout();
const int num_bins = histogram_layout.dim(0);

auto bin_values_view = m_bin_values.get_view<Real *>();
auto histogram_view = m_diagnostic_output.get_view<Real *>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you consider making this an int? I don't know if making an int is a good idea (not sure how it would chain against other ops like vert_avg or just even averaging in IO ops...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question: my original implementation used ints but then I ran into IO issues. I don't quite remember the error but I can reproduce if it would be helpful?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our IO layer will likely either crash or produce garbage if we try to do the time average of an int variable. As for chaining with other diags, that's also a question mark. I am ok keeping the data type Real for now. We have lots of cleanup work to do in the diags/IO layer anyways, so we can just add this to the list...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, that makes a lot of sense. Thanks for the clarification!

using KT = ekat::KokkosTypes<DefaultDevice>;
using TeamPolicy = Kokkos::TeamPolicy<Field::device_t::execution_space>;
using TeamMember = typename TeamPolicy::member_type;
using TPF = ekat::TeamPolicyFactory<typename KT::ExeSpace>;
switch (field_layout.rank())
{
case 1: {
const int d1 = field_layout.dim(0);
auto field_view = field.get_view<const Real *>();
TeamPolicy team_policy = TPF::get_default_team_policy(num_bins, d1);
Kokkos::parallel_for("compute_histogram_" + field.name(), team_policy,
KOKKOS_LAMBDA(const TeamMember &tm) {
const int bin_i = tm.league_rank();
const Real bin_lower = bin_values_view(bin_i);
const Real bin_upper = bin_values_view(bin_i+1);
Kokkos::parallel_reduce(Kokkos::TeamVectorRange(tm, d1),
[&](int i, Real &val) {
if ((bin_lower <= field_view(i)) && (field_view(i) < bin_upper))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if an entry falls outside of all bins? E.g., there's a fillValue, or maybe the input bins were a bit too narrow? The current impl simply "skips" this entry, without any error/warning, are you ok with that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this impl seems to rely on the fact that bins boundaries are well ordered. Perhaps we should check that in the constructor, to avoid odd behaviors in case of a typo like F_100_200_30 (missing a 0 for the last value).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current impl simply "skips" this entry, without any error/warning, are you ok with that?

That was intentional ("closed bins" at the ends of the histogram), with the initial idea that the user could put in large magnitude values on end bins. That said, we were also looking at considering the end bins as "open" to capture all the values... I suppose the latter is necessary if someone wants to do something like normalize the histogram by the total number of values (to get an approximate PDF). I'm now in favor of the open end bins, so I'll double check with collaborators and implement the change.

Perhaps we should check that in the constructor, to avoid odd behaviors in case of a typo like F_100_200_30 (missing a 0 for the last value).

Good call, I'll implement a check that the bin boundaries are monotonically increasing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've implement both the "infinite" extent (open ended bins) and the monotonicity check in e1a7a01... with the documentation updated in 7a873bb

val += sp(1.0);
},
histogram_view(bin_i));
});
} break;
case 2: {
const int d1 = field_layout.dim(0);
const int d2 = field_layout.dim(1);
auto field_view = field.get_view<const Real **>();
TeamPolicy team_policy = TPF::get_default_team_policy(num_bins, d1*d2);
Kokkos::parallel_for("compute_histogram_" + field.name(), team_policy,
KOKKOS_LAMBDA(const TeamMember &tm) {
const int bin_i = tm.league_rank();
const Real bin_lower = bin_values_view(bin_i);
const Real bin_upper = bin_values_view(bin_i+1);
Kokkos::parallel_reduce(Kokkos::TeamVectorRange(tm, d1*d2),
[&](int ind, Real &val) {
const int i1 = ind / d2;
const int i2 = ind % d2;
if ((bin_lower <= field_view(i1,i2)) && (field_view(i1,i2) < bin_upper))
val += sp(1.0);
},
histogram_view(bin_i));
});
} break;
case 3: {
const int d1 = field_layout.dim(0);
const int d2 = field_layout.dim(1);
const int d3 = field_layout.dim(2);
auto field_view = field.get_view<const Real ***>();
TeamPolicy team_policy = TPF::get_default_team_policy(num_bins, d1*d2*d3);
Kokkos::parallel_for("compute_histogram_" + field.name(), team_policy,
KOKKOS_LAMBDA(const TeamMember &tm) {
const int bin_i = tm.league_rank();
const Real bin_lower = bin_values_view(bin_i);
const Real bin_upper = bin_values_view(bin_i+1);
Kokkos::parallel_reduce(Kokkos::TeamVectorRange(tm, d1*d2*d3),
[&](int ind, Real &val) {
const int i1 = ind / (d2*d3);
const int ind2 = ind % (d2*d3);
const int i2 = ind2 / d3;
const int i3 = ind2 % d3;
if ((bin_lower <= field_view(i1,i2,i3)) && (field_view(i1,i2,i3) < bin_upper))
val += sp(1.0);
},
histogram_view(bin_i));
});
} break;

default:
EKAT_ERROR_MSG("Error! Unsupported field rank for histogram.\n");
}

// TODO: use device-side MPI calls
// TODO: the dev ptr causes problems; revisit this later
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at some point we should actually revisit this ;) I put this TODO here because I wasn't interested in investigating an error I was getting, but I should actually look into it again... thanks for keeping the TODO

// TODO: doing cuda-aware MPI allreduce would be ~10% faster
Kokkos::fence();
m_diagnostic_output.sync_to_host();
m_comm.all_reduce(m_diagnostic_output.template get_internal_view_data<Real, Host>(),
histogram_layout.size(), MPI_SUM);
m_diagnostic_output.sync_to_dev();
}

} // namespace scream
40 changes: 40 additions & 0 deletions components/eamxx/src/diagnostics/histogram.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef EAMXX_HISTOTGRAM_HPP
#define EAMXX_HISTOGRAM_HPP

#include "share/atm_process/atmosphere_diagnostic.hpp"

namespace scream {
/*
* This diagnostic will calculate a histogram of a field across all dimensions
* producing a one dimensional field, with CMP tag dimension named "bin", that
* indicates how many times a field value in the specified range occurred.
*/

class HistogramDiag : public AtmosphereDiagnostic {

public:
// Constructors
HistogramDiag(const ekat::Comm &comm, const ekat::ParameterList &params);

// The name of the diagnostic
std::string name() const { return m_diag_name; }

// Set the grid
void set_grids(const std::shared_ptr<const GridsManager> grids_manager);

protected:
#ifdef KOKKOS_ENABLE_CUDA
public:
#endif
void initialize_impl(const RunType /*run_type*/);
void compute_diagnostic_impl();

protected:
std::string m_diag_name;
std::vector<Real> m_bin_reals;
Field m_bin_values;
};

} // namespace scream

#endif // EAMXX_HISTOGRAM_HPP
2 changes: 2 additions & 0 deletions components/eamxx/src/diagnostics/register_diagnostics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "diagnostics/zonal_avg.hpp"
#include "diagnostics/conditional_sampling.hpp"
#include "diagnostics/binary_ops.hpp"
#include "diagnostics/histogram.hpp"

namespace scream {

Expand Down Expand Up @@ -63,6 +64,7 @@ inline void register_diagnostics () {
diag_factory.register_product("ZonalAvgDiag",&create_atmosphere_diagnostic<ZonalAvgDiag>);
diag_factory.register_product("ConditionalSampling",&create_atmosphere_diagnostic<ConditionalSampling>);
diag_factory.register_product("BinaryOpsDiag", &create_atmosphere_diagnostic<BinaryOpsDiag>);
diag_factory.register_product("HistogramDiag",&create_atmosphere_diagnostic<HistogramDiag>);
}

} // namespace scream
Expand Down
5 changes: 4 additions & 1 deletion components/eamxx/src/diagnostics/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,13 @@ CreateDiagTest(vert_contract "vert_contract_test.cpp" MPI_RANKS 1 ${SCREAM_TEST_
CreateDiagTest(vert_derivative "vert_derivative_test.cpp")

# Test zonal averaging
CreateDiagTest(zonal_avg zonal_avg_test.cpp MPI_RANKS 1 ${SCREAM_TEST_MAX_RANKS})
CreateDiagTest(zonal_avg "zonal_avg_test.cpp" MPI_RANKS 1 ${SCREAM_TEST_MAX_RANKS})

# Test conditional sampling
CreateDiagTest(conditional_sampling "conditional_sampling_test.cpp")

# Test binary ops
CreateDiagTest(binary_ops "binary_ops_test.cpp")

# Test histogram
CreateDiagTest(histogram "histogram_test.cpp" MPI_RANKS 1 ${SCREAM_TEST_MAX_RANKS})
Loading