From e91392d3f579bfa81784c43c94a1a391dbf423d2 Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 01:23:29 +0000 Subject: [PATCH 1/7] checkpoint --- cpp/CMakeLists.txt | 4 +- cpp/benchmarks/ndsh/q09.cpp | 1 + cpp/benchmarks/transform/polynomials.cpp | 1 + cpp/benchmarks/transform/transform.cpp | 1 + .../compute_checksum_jit.cpp | 1 + .../string_transforms/extract_email_jit.cpp | 1 + .../string_transforms/format_phone_jit.cpp | 1 + .../string_transforms/localize_phone_jit.cpp | 1 + cpp/include/cudf/transform.hpp | 2 + cpp/include/cudf/types.hpp | 6 + cpp/src/jit/accessors.cuh | 18 ++ cpp/src/jit/helpers.cpp | 36 --- cpp/src/jit/helpers.hpp | 8 +- cpp/src/jit/row_ir.cpp | 227 ++++++++++++------ cpp/src/jit/row_ir.hpp | 60 +++-- cpp/src/stream_compaction/filter/filter.cu | 34 ++- .../stream_compaction/filter/jit/kernel.cu | 14 +- cpp/src/transform/jit/kernel.cu | 86 +++++-- .../transform/{transform.cpp => transform.cu} | 216 +++++++++++------ cpp/tests/filter/filter_test.cpp | 2 +- cpp/tests/jit/row_ir.cpp | 4 +- cpp/tests/streams/transform_test.cpp | 1 + .../integration/unary_transform_test.cpp | 10 +- .../pylibcudf/pylibcudf/libcudf/transform.pxd | 3 +- python/pylibcudf/pylibcudf/libcudf/types.pxd | 4 + python/pylibcudf/pylibcudf/transform.pxd | 3 +- python/pylibcudf/pylibcudf/transform.pyi | 3 +- python/pylibcudf/pylibcudf/transform.pyx | 8 +- python/pylibcudf/pylibcudf/types.pxd | 1 + python/pylibcudf/pylibcudf/types.pyi | 4 + python/pylibcudf/pylibcudf/types.pyx | 3 + python/pylibcudf/tests/test_transform.py | 1 + 32 files changed, 522 insertions(+), 243 deletions(-) rename cpp/src/transform/{transform.cpp => transform.cu} (66%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b3ad7cda1b7..27dfa4c21ad 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -824,7 +824,7 @@ add_library( src/transform/nans_to_nulls.cu src/transform/one_hot_encode.cu src/transform/row_bit_count.cu - src/transform/transform.cpp + src/transform/transform.cu src/transpose/transpose.cu src/unary/cast_ops.cu src/unary/math_ops.cu @@ -854,7 +854,7 @@ set_source_files_properties( src/rolling/detail/rolling_variable_window.cu src/rolling/grouped_rolling.cu src/rolling/rolling.cu - src/transform/transform.cpp + src/transform/transform.cu PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64" ) diff --git a/cpp/benchmarks/ndsh/q09.cpp b/cpp/benchmarks/ndsh/q09.cpp index 7bb333a4956..688d3f84b4c 100644 --- a/cpp/benchmarks/ndsh/q09.cpp +++ b/cpp/benchmarks/ndsh/q09.cpp @@ -164,6 +164,7 @@ struct q9_data { false, std::nullopt, cudf::null_aware::NO, + cudf::null_output::PRESERVE, stream, mr); } diff --git a/cpp/benchmarks/transform/polynomials.cpp b/cpp/benchmarks/transform/polynomials.cpp index 5b03ae1dfbe..63c74f95b61 100644 --- a/cpp/benchmarks/transform/polynomials.cpp +++ b/cpp/benchmarks/transform/polynomials.cpp @@ -96,6 +96,7 @@ static void BM_transform_polynomials(nvbench::state& state) false, std::nullopt, cudf::null_aware::NO, + cudf::null_output::PRESERVE, launch.get_stream().get_stream()); }); } diff --git a/cpp/benchmarks/transform/transform.cpp b/cpp/benchmarks/transform/transform.cpp index a404a9e9582..27fcd95f55a 100644 --- a/cpp/benchmarks/transform/transform.cpp +++ b/cpp/benchmarks/transform/transform.cpp @@ -102,6 +102,7 @@ static void BM_transform(nvbench::state& state) false, std::nullopt, cudf::null_aware::NO, + cudf::null_output::PRESERVE, launch.get_stream().get_stream()); }); } diff --git a/cpp/examples/string_transforms/compute_checksum_jit.cpp b/cpp/examples/string_transforms/compute_checksum_jit.cpp index e23729a930d..34028a4cc9c 100644 --- a/cpp/examples/string_transforms/compute_checksum_jit.cpp +++ b/cpp/examples/string_transforms/compute_checksum_jit.cpp @@ -55,6 +55,7 @@ std::tuple, std::vector> transform( false, std::nullopt, cudf::null_aware::NO, + cudf::null_output::PRESERVE, stream, mr); diff --git a/cpp/examples/string_transforms/extract_email_jit.cpp b/cpp/examples/string_transforms/extract_email_jit.cpp index 686dc5814b3..0fb24bb79e4 100644 --- a/cpp/examples/string_transforms/extract_email_jit.cpp +++ b/cpp/examples/string_transforms/extract_email_jit.cpp @@ -70,6 +70,7 @@ __device__ void email_provider(cudf::string_view* out, false, std::nullopt, cudf::null_aware::NO, + cudf::null_output::PRESERVE, stream, mr); diff --git a/cpp/examples/string_transforms/format_phone_jit.cpp b/cpp/examples/string_transforms/format_phone_jit.cpp index dfbf106fec4..91caf5a91cf 100644 --- a/cpp/examples/string_transforms/format_phone_jit.cpp +++ b/cpp/examples/string_transforms/format_phone_jit.cpp @@ -133,6 +133,7 @@ __device__ void e164_format(void* scratch, false, scratch.data(), cudf::null_aware::NO, + cudf::null_output::PRESERVE, stream, mr); diff --git a/cpp/examples/string_transforms/localize_phone_jit.cpp b/cpp/examples/string_transforms/localize_phone_jit.cpp index cd720ec47d0..2564a4e16df 100644 --- a/cpp/examples/string_transforms/localize_phone_jit.cpp +++ b/cpp/examples/string_transforms/localize_phone_jit.cpp @@ -156,6 +156,7 @@ __device__ void format_phone(void* scratch, false, scratch.data(), cudf::null_aware::NO, + cudf::null_output::PRESERVE, stream, mr); diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 37573ae3867..0d21ec66733 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -57,6 +57,7 @@ namespace CUDF_EXPORT cudf { * @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code * @param user_data User-defined device data to pass to the UDF. * @param is_null_aware Signifies the UDF will receive row inputs as optional values + * @param null_policy Signifies if a null mask should be created for the output column * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return The column resulting from applying the transform function to @@ -69,6 +70,7 @@ std::unique_ptr transform( bool is_ptx, std::optional user_data = std::nullopt, null_aware is_null_aware = null_aware::NO, + null_output null_policy = null_output::PRESERVE, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 408594bf080..e6ffbefa7f0 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -240,6 +240,12 @@ enum class null_aware : bool { YES = 1 ///< The function is null-aware }; +/// @brief Indicates the null output policy of a function. +enum class null_output : uint8_t { + PRESERVE = 0, ///< The resulting column's nullmask is preserved + NON_NULLABLE = 1 ///< A null-mask is not produced and all values are considered valid +}; + /** * @brief Indicator for the logical data type of an element in a column. * diff --git a/cpp/src/jit/accessors.cuh b/cpp/src/jit/accessors.cuh index f31150dcb2e..35799a220eb 100644 --- a/cpp/src/jit/accessors.cuh +++ b/cpp/src/jit/accessors.cuh @@ -52,6 +52,12 @@ struct column_accessor { return inputs[index].is_null(row); } + template + static __device__ bool is_valid(ColumnView const* inputs, cudf::size_type row) + { + return inputs[index].is_valid(row); + } + template static __device__ cuda::std::optional nullable_element(ColumnView const* columns, cudf::size_type row) @@ -85,6 +91,12 @@ struct span_accessor { return inputs[index].is_null(row); } + static __device__ bool is_valid(cudf::jit::device_optional_span const* inputs, + cudf::size_type row) + { + return inputs[index].is_valid(row); + } + static __device__ cuda::std::optional nullable_element( cudf::jit::device_optional_span const* outputs, cudf::size_type row) { @@ -117,6 +129,12 @@ struct scalar_accessor { return Accessor::is_null(columns, 0); } + template + static __device__ bool is_valid(ColumnView const* columns, cudf::size_type) + { + return Accessor::is_valid(columns, 0); + } + template static __device__ decltype(auto) nullable_element(ColumnView const* columns, cudf::size_type) { diff --git a/cpp/src/jit/helpers.cpp b/cpp/src/jit/helpers.cpp index d87bc3513de..fcd85f5fbcf 100644 --- a/cpp/src/jit/helpers.cpp +++ b/cpp/src/jit/helpers.cpp @@ -39,42 +39,6 @@ typename std::vector::const_iterator get_transform_base_column( return largest; } -jitify2::StringVec build_jit_template_params( - bool has_user_data, - null_aware is_null_aware, - std::vector const& span_outputs, - std::vector const& column_outputs, - std::vector const& column_inputs) -{ - jitify2::StringVec tparams; - - tparams.emplace_back(jitify2::reflection::reflect(has_user_data)); - tparams.emplace_back(jitify2::reflection::reflect(is_null_aware == null_aware::YES)); - - std::transform(thrust::counting_iterator(0), - thrust::counting_iterator(span_outputs.size()), - std::back_inserter(tparams), - [&](auto i) { - return jitify2::reflection::Template("cudf::jit::span_accessor") - .instantiate(span_outputs[i], i); - }); - - std::transform(thrust::counting_iterator(0), - thrust::counting_iterator(column_outputs.size()), - std::back_inserter(tparams), - [&](auto i) { - return jitify2::reflection::Template("cudf::jit::column_accessor") - .instantiate(column_outputs[i], i); - }); - - std::transform(thrust::counting_iterator(0), - thrust::counting_iterator(column_inputs.size()), - std::back_inserter(tparams), - [&](auto i) { return column_inputs[i].accessor(i); }); - - return tparams; -} - std::map build_ptx_params(std::vector const& output_typenames, std::vector const& input_typenames, bool has_user_data) diff --git a/cpp/src/jit/helpers.hpp b/cpp/src/jit/helpers.hpp index 4ac2976c2fc..c0fa9004505 100644 --- a/cpp/src/jit/helpers.hpp +++ b/cpp/src/jit/helpers.hpp @@ -34,6 +34,7 @@ constexpr bool is_scalar(cudf::size_type base_column_size, cudf::size_type colum typename std::vector::const_iterator get_transform_base_column( std::vector const& inputs); + struct input_column_reflection { std::string type_name; bool is_scalar = false; @@ -49,13 +50,6 @@ struct input_column_reflection { } }; -jitify2::StringVec build_jit_template_params( - bool has_user_data, - null_aware is_null_aware, - std::vector const& span_outputs, - std::vector const& column_outputs, - std::vector const& column_inputs); - std::map build_ptx_params(std::vector const& output_typenames, std::vector const& input_typenames, bool has_user_data); diff --git a/cpp/src/jit/row_ir.cpp b/cpp/src/jit/row_ir.cpp index 11e175eda5b..a308f5a3ea7 100644 --- a/cpp/src/jit/row_ir.cpp +++ b/cpp/src/jit/row_ir.cpp @@ -30,20 +30,32 @@ namespace detail { namespace row_ir { -std::string cuda_type(type_info type) { return type_to_name(type.type); } +std::string cuda_type(cudf::data_type type, bool nullable) +{ + auto name = type_to_name(type); + return nullable ? std::format("cuda::std::optional<{}>", name) : name; +} std::string instance_context::make_tmp_id() { return std::format("{}{}", tmp_prefix_, num_tmp_vars_++); } +bool instance_context::has_nulls() const { return has_nulls_; } + +void instance_context::set_has_nulls(bool has_nulls) { has_nulls_ = has_nulls; } + void instance_context::reset() { num_tmp_vars_ = 0; } get_input::get_input(int32_t input) : id_(), input_(input), type_() {} std::string_view get_input::get_id() { return id_; } -type_info get_input::get_type() { return type_; } +data_type get_input::get_type() { return type_; } + +bool get_input::is_null_aware() { return false; } + +bool get_input::is_always_nonnullable() { return false; } void get_input::instantiate(instance_context& ctx, instance_info const& info) { @@ -58,7 +70,8 @@ std::string get_input::generate_code(instance_context& ctx, { switch (info.id) { case target::CUDA: { - return std::format("{} {} = {};", cuda_type(type_), id_, instance.inputs[input_].id); + return std::format( + "{} {} = {};", cuda_type(type_, ctx.has_nulls()), id_, instance.inputs[input_].id); } default: CUDF_FAIL("Unsupported target: " + std::to_string(static_cast(info.id)), @@ -73,7 +86,11 @@ set_output::set_output(int32_t output, std::unique_ptr source) std::string_view set_output::get_id() { return id_; } -type_info set_output::get_type() { return type_; } +data_type set_output::get_type() { return type_; } + +bool set_output::is_null_aware() { return source_->is_null_aware(); } + +bool set_output::is_always_nonnullable() { return source_->is_always_nonnullable(); } node& set_output::get_source() { return *source_; } @@ -82,9 +99,8 @@ void set_output::instantiate(instance_context& ctx, instance_info const& info) source_->instantiate(ctx, info); id_ = ctx.make_tmp_id(); auto source_type = source_->get_type(); - // output is never allowed to be nullable - type_ = type_info{source_type.type}; - output_id_ = info.outputs[output_].id; + type_ = source_type; + output_id_ = info.outputs[output_].id; } std::string set_output::generate_code(instance_context& ctx, @@ -98,8 +114,8 @@ std::string set_output::generate_code(instance_context& ctx, "{}\n" "{} {} = {};\n" "*{} = {};", + cuda_type(type_, ctx.has_nulls()), source_code, - cuda_type(this->get_type()), id_, source_->get_id(), output_id_, @@ -111,14 +127,33 @@ std::string set_output::generate_code(instance_context& ctx, } } -/// @brief returns true if the operator depends on the nullability of its operands -static bool is_nullness_dependent_operator(cudf::ast::ast_operator op) +operation::operation(opcode op, std::unique_ptr* move_begin, std::unique_ptr* move_end) + : id_(), op_(op), operands_(), type_() +{ + std::move(move_begin, move_end, std::back_inserter(operands_)); + CUDF_EXPECTS(static_cast(operands_.size()) == ast::detail::ast_operator_arity(op), + "Invalid number of arguments for operator.", + std::invalid_argument); + CUDF_EXPECTS( + operands_.size() > 0, "Operator must have at least one operand", std::invalid_argument); +} + +operation::operation(opcode op, std::vector> operands) + : operation(op, operands.data(), operands.data() + operands.size()) +{ +} + +std::string_view operation::get_id() { return id_; } + +data_type operation::get_type() { return type_; } + +bool operation::is_null_aware() { - switch (op) { + switch (op_) { + case ast::ast_operator::IS_NULL: case ast::ast_operator::NULL_EQUAL: case ast::ast_operator::NULL_LOGICAL_AND: - case ast::ast_operator::NULL_LOGICAL_OR: - case ast::ast_operator::IS_NULL: return true; + case ast::ast_operator::NULL_LOGICAL_OR: return true; case ast::ast_operator::ADD: case ast::ast_operator::SUB: @@ -165,36 +200,75 @@ static bool is_nullness_dependent_operator(cudf::ast::ast_operator op) case ast::ast_operator::NOT: case ast::ast_operator::CAST_TO_INT64: case ast::ast_operator::CAST_TO_UINT64: - case ast::ast_operator::CAST_TO_FLOAT64: return false; + case ast::ast_operator::CAST_TO_FLOAT64: + return std::any_of( + operands_.begin(), operands_.end(), [](auto& op) { return op->is_null_aware(); }); default: CUDF_UNREACHABLE("Unrecognized operator type."); } } -operation::operation(opcode op, std::unique_ptr* move_begin, std::unique_ptr* move_end) - : id_(), op_(op), operands_(), type_() +bool operation::is_always_nonnullable() { - std::move(move_begin, move_end, std::back_inserter(operands_)); - CUDF_EXPECTS(static_cast(operands_.size()) == ast::detail::ast_operator_arity(op), - "Invalid number of arguments for operator.", - std::invalid_argument); - CUDF_EXPECTS( - operands_.size() > 0, "Operator must have at least one operand", std::invalid_argument); + switch (op_) { + case ast::ast_operator::IS_NULL: return true; - CUDF_EXPECTS(!is_nullness_dependent_operator(op), - "Nullness-dependent operators are not supported", - std::invalid_argument); -} + case ast::ast_operator::NULL_EQUAL: + case ast::ast_operator::NULL_LOGICAL_AND: + case ast::ast_operator::NULL_LOGICAL_OR: + case ast::ast_operator::ADD: + case ast::ast_operator::SUB: + case ast::ast_operator::MUL: + case ast::ast_operator::DIV: + case ast::ast_operator::TRUE_DIV: + case ast::ast_operator::FLOOR_DIV: + case ast::ast_operator::MOD: + case ast::ast_operator::PYMOD: + case ast::ast_operator::POW: + case ast::ast_operator::NOT_EQUAL: + case ast::ast_operator::EQUAL: + case ast::ast_operator::LESS: + case ast::ast_operator::GREATER: + case ast::ast_operator::LESS_EQUAL: + case ast::ast_operator::GREATER_EQUAL: + case ast::ast_operator::BITWISE_AND: + case ast::ast_operator::BITWISE_OR: + case ast::ast_operator::BITWISE_XOR: + case ast::ast_operator::LOGICAL_AND: + case ast::ast_operator::LOGICAL_OR: + case ast::ast_operator::IDENTITY: + case ast::ast_operator::SIN: + case ast::ast_operator::COS: + case ast::ast_operator::TAN: + case ast::ast_operator::ARCSIN: + case ast::ast_operator::ARCCOS: + case ast::ast_operator::ARCTAN: + case ast::ast_operator::SINH: + case ast::ast_operator::COSH: + case ast::ast_operator::TANH: + case ast::ast_operator::ARCSINH: + case ast::ast_operator::ARCCOSH: + case ast::ast_operator::ARCTANH: + case ast::ast_operator::EXP: + case ast::ast_operator::LOG: + case ast::ast_operator::SQRT: + case ast::ast_operator::CBRT: + case ast::ast_operator::CEIL: + case ast::ast_operator::FLOOR: + case ast::ast_operator::ABS: + case ast::ast_operator::RINT: + case ast::ast_operator::BIT_INVERT: + case ast::ast_operator::NOT: + case ast::ast_operator::CAST_TO_INT64: + case ast::ast_operator::CAST_TO_UINT64: + case ast::ast_operator::CAST_TO_FLOAT64: + return std::all_of( + operands_.begin(), operands_.end(), [](auto& op) { return op->is_always_nonnullable(); }); -operation::operation(opcode op, std::vector> operands) - : operation(op, operands.data(), operands.data() + operands.size()) -{ + default: CUDF_UNREACHABLE("Unrecognized operator type."); + } } -std::string_view operation::get_id() { return id_; } - -type_info operation::get_type() { return type_; } - opcode operation::get_opcode() const { return op_; } std::span const> operation::get_operands() const { return operands_; } @@ -209,19 +283,10 @@ void operation::instantiate(instance_context& ctx, instance_info const& info) std::vector operand_types; for (auto& arg : operands_) { - operand_types.emplace_back(arg->get_type().type); + operand_types.emplace_back(arg->get_type()); } - auto type_id = ast::detail::ast_operator_return_type(op_, operand_types).id(); - - // all decimal operation result types should have scale equal to the minimum scale of the operands - auto scale = std::accumulate(operand_types.begin() + 1, - operand_types.end(), - operand_types.front().scale(), - [](auto const& a, auto const& b) { return std::min(a, b.scale()); }); - - auto dt = cudf::data_type{type_id}; - type_ = type_info{cudf::is_fixed_point(dt) ? cudf::data_type{type_id, scale} : dt}; + type_ = ast::detail::ast_operator_return_type(op_, operand_types); } std::string operation::generate_code(instance_context& ctx, @@ -249,11 +314,12 @@ std::string operation::generate_code(instance_context& ctx, }); auto cuda = std::format( - "{} {} = cudf::ast::detail::operator_functor{{}}({});", - cuda_type(type_), + "auto {} = cudf::ast::detail::operator_functor{{}}({});", + cuda_type(type_, ctx.has_nulls()), id_, ast::detail::ast_operator_string(op_), + ctx.has_nulls(), operands_str); return cuda; } @@ -308,7 +374,7 @@ void ast_converter::add_input_var(ast_column_input_spec const& in, ast_args cons // TODO(lamarrr): consider mangling column name to make debugging easier auto id = std::format("in_{}", input_vars_.size()); auto type = args.table.column(in.column).type(); - input_vars_.emplace_back(std::move(id), type_info{type}); + input_vars_.emplace_back(std::move(id), type); } void ast_converter::add_input_var(ast_scalar_input_spec const& in, @@ -316,7 +382,7 @@ void ast_converter::add_input_var(ast_scalar_input_spec const& in, { auto id = std::format("in_{}", input_vars_.size()); auto type = in.ref.get().type(); - input_vars_.emplace_back(std::move(id), type_info{type}); + input_vars_.emplace_back(std::move(id), type); } void ast_converter::add_output_var() @@ -350,9 +416,9 @@ column_view get_column_view(ast_scalar_input_spec const& spec, ast_args const& a return spec.broadcast_column->view(); } -void ast_converter::generate_code(target target_id, - ast::expression const& expr, - ast_args const& args) +std::tuple ast_converter::generate_code(target target_id, + ast::expression const& expr, + ast_args const& args) { auto output_expr_ir = expr.accept(*this); output_irs_.emplace_back(std::make_unique(0, std::move(output_expr_ir))); @@ -363,10 +429,10 @@ void ast_converter::generate_code(target target_id, }); if (!uses_input_table && args.table.num_columns() > 0) { - // this means none of the inputs tables to the IR are actually used in the expression. In order - // to still run the transform-equivalent operation of AST, we need to add one of the table's - // columns as an unused input. This is done because the output size of a transform is determined - // by the largest input column. + // this means none of the inputs tables to the IR are actually used in the expression. In + // order to still run the transform-equivalent operation of AST, we need to add one of the + // table's columns as an unused input. This is done because the output size of a transform is + // determined by the largest input column. input_specs_.emplace_back(ast_column_input_spec{ast::table_reference::LEFT, 0}); } @@ -375,12 +441,33 @@ void ast_converter::generate_code(target target_id, dispatch_input_spec(input, [this](auto&... args) { add_input_var(args...); }, args); } + bool has_nullable_inputs = + std::any_of(input_specs_.begin(), input_specs_.end(), [&](auto const& input) { + return dispatch_input_spec( + input, [](auto&... args) { return get_column_view(args...); }, args) + .nullable(); + }); + // add 1 auto-deduced output variable add_output_var(); instance_context instance_ctx; instance_info instance{input_vars_, output_vars_}; + auto is_null_aware = + std::any_of( + output_irs_.cbegin(), output_irs_.cend(), [](auto& ir) { return ir->is_null_aware(); }) + ? null_aware::YES + : null_aware::NO; + + bool output_is_always_non_nullable = std::all_of( + output_irs_.cbegin(), output_irs_.cend(), [](auto& ir) { return ir->is_always_nonnullable(); }); + + bool may_evaluate_null = !output_is_always_non_nullable && has_nullable_inputs; + auto null_policy = may_evaluate_null ? null_output::PRESERVE : null_output::NON_NULLABLE; + + instance_ctx.set_has_nulls(is_null_aware == null_aware::YES); + // instantiate the IR nodes for (auto& ir : output_irs_) { ir->instantiate(instance_ctx, instance); @@ -401,12 +488,12 @@ void ast_converter::generate_code(target target_id, auto const& var = output_vars_[i]; auto const& ir = output_irs_[i]; auto output_type = ir->get_type(); - return std::format("{}* {}", cuda_type(output_type), var.id); + return std::format("{}* {}", cuda_type(output_type, instance_ctx.has_nulls()), var.id); }; auto input_decl = [&](size_t i) { auto const& var = input_vars_[i]; - return std::format("{} {}", cuda_type(var.type), var.id); + return std::format("{} {}", cuda_type(var.type, instance_ctx.has_nulls()), var.id); }; std::vector params_decls; @@ -443,6 +530,8 @@ return; )***", params_decl, body); + + return {is_null_aware, null_policy}; } break; } @@ -454,7 +543,6 @@ return; // Due to the AST expression tree structure, we can't generate the IR without the target // tables - transform_args ast_converter::compute_column(target target_id, ast::expression const& expr, ast_args const& args, @@ -463,12 +551,10 @@ transform_args ast_converter::compute_column(target target_id, { ast_converter converter{stream, mr}; - // TODO(lamarrr): support null-sensitive operators - // TODO(lamarrr): consider deduplicating ast expression's input column references. See // TransformTest/1.DeeplyNestedArithmeticLogicalExpression for reference - converter.generate_code(target_id, expr, args); + auto [is_null_aware, null_output] = converter.generate_code(target_id, expr, args); std::vector columns; std::vector> scalar_columns; @@ -484,15 +570,17 @@ transform_args ast_converter::compute_column(target target_id, } } - auto output_column_type = converter.output_irs_[0]->get_type(); + auto& out = converter.output_irs_[0]; + auto output_column_type = out->get_type(); transform_args transform{std::move(scalar_columns), std::move(columns), std::move(converter.code_), - output_column_type.type, + output_column_type, false, std::nullopt, - null_aware::NO}; + is_null_aware, + null_output}; if (get_context().dump_codegen()) { std::cout << "Generated code for transform: " << transform.udf << std::endl; @@ -509,12 +597,15 @@ filter_args ast_converter::filter(target target_id, rmm::device_async_resource_ref mr) { ast_converter converter{stream, mr}; - converter.generate_code(target_id, expr, args); + auto [is_null_aware, _null_output] = converter.generate_code(target_id, expr, args); CUDF_EXPECTS(converter.output_irs_.size() == 1, "Filter expression must return a single output.", std::invalid_argument); - CUDF_EXPECTS(converter.output_irs_[0]->get_type().type == data_type{type_id::BOOL8}, + + auto& out_ir = converter.output_irs_[0]; + + CUDF_EXPECTS(out_ir->get_type() == data_type{type_id::BOOL8}, "Filter expression must return a boolean type.", std::invalid_argument); @@ -546,7 +637,7 @@ filter_args ast_converter::filter(target target_id, std::move(filter_columns), false, std::nullopt, - null_aware::NO}; + is_null_aware}; if (get_context().dump_codegen()) { std::cout << "Generated code for filter: " << filter.predicate_udf << std::endl; diff --git a/cpp/src/jit/row_ir.hpp b/cpp/src/jit/row_ir.hpp index d3e09a54a11..cca504a83bb 100644 --- a/cpp/src/jit/row_ir.hpp +++ b/cpp/src/jit/row_ir.hpp @@ -39,19 +39,12 @@ enum class target { CUDA = 0 /// < CUDA C++ }; -/** - * @brief The type information of the variable used in the IR. - */ -struct type_info { - data_type type = data_type{type_id::EMPTY}; ///< The data type of the variable -}; - /** * @brief The information about the variable used in the IR. */ struct var_info { - std::string id = {}; ///< The variable identifier - type_info type = {}; ///< The type information of the variable + std::string id = {}; ///< The variable identifier + data_type type = data_type{type_id::EMPTY}; ///< The data type of the variable }; /** @@ -86,6 +79,7 @@ struct instance_context { private: int32_t num_tmp_vars_ = 0; ///< The number of temporary variables generated std::string tmp_prefix_ = "tmp_"; ///< The prefix for temporary variable identifiers + bool has_nulls_ = false; ///< True if expressions involve null values public: instance_context() = default; ///< Default constructor @@ -106,6 +100,10 @@ struct instance_context { */ std::string make_tmp_id(); + [[nodiscard]] bool has_nulls() const; + + void set_has_nulls(bool has_nulls); + void reset(); }; @@ -120,7 +118,18 @@ struct node { * @brief Get the type info of the IR node * @return The type information of the IR node */ - virtual type_info get_type() = 0; + virtual data_type get_type() = 0; + + /** + * @brief Returns `false` if this node forwards nulls from its inputs to its output + */ + virtual bool is_null_aware() = 0; + + /** + * @brief Returns true if this node always produces a non-nullable output even if its inputs are + * nullable + */ + virtual bool is_always_nonnullable() = 0; /** * @brief Instantiate the IR node with the given context and instance information, setting up any @@ -157,7 +166,7 @@ struct get_input final : node { private: std::string id_; ///< The identifier of the IR node int32_t input_; ///< The index of the input variable - type_info type_; ///< The type information of the IR node + data_type type_; ///< The type information of the IR node public: /** @@ -184,7 +193,11 @@ struct get_input final : node { /** * @copydoc node::get_type */ - type_info get_type() override; + data_type get_type() override; + + [[nodiscard]] bool is_null_aware() override; + + [[nodiscard]] bool is_always_nonnullable() override; /** * @copydoc node::instantiate @@ -207,7 +220,7 @@ struct set_output final : node { std::string id_; ///< The identifier of the IR node int32_t output_; ///< The index of the output variable std::unique_ptr source_; ///< The source IR node from which the value is taken - type_info type_; ///< The type information of the IR node + data_type type_; ///< The type information of the IR node std::string output_id_; ///< The identifier of the output variable public: @@ -236,7 +249,11 @@ struct set_output final : node { /** * @copydoc node::get_type */ - type_info get_type() override; + data_type get_type() override; + + [[nodiscard]] bool is_null_aware() override; + + [[nodiscard]] bool is_always_nonnullable() override; /** * @brief Get the source IR node from which the value is taken @@ -264,7 +281,7 @@ struct operation final : node { std::string id_; ///< The identifier of the IR node opcode op_; ///< The operation code std::vector> operands_; ///< The operands of the operation - type_info type_; ///< The type information of the IR node + data_type type_; ///< The type information of the IR node operation(opcode op, std::unique_ptr* move_begin, std::unique_ptr* move_end); @@ -320,7 +337,11 @@ struct operation final : node { /** * @copydoc node::get_type */ - type_info get_type() override; + data_type get_type() override; + + [[nodiscard]] bool is_null_aware() override; + + [[nodiscard]] bool is_always_nonnullable() override; /** * @brief Get the operation code of the operation @@ -381,6 +402,7 @@ struct transform_args { bool is_ptx = false; ///< Whether the transform is a PTX kernel std::optional user_data = std::nullopt; ///< User data to pass to the transform null_aware is_null_aware = null_aware::NO; ///< Whether the transform is null-aware + null_output null_policy = null_output::PRESERVE; ///< Null-transformation policy }; /** @@ -464,14 +486,15 @@ struct ast_converter { void add_output_var(); - void generate_code(target target, ast::expression const& expr, ast_args const& args); + std::tuple generate_code(target target, + ast::expression const& expr, + ast_args const& args); public: /** * @brief Convert an AST `compute_column` expression to a `cudf::transform` * @param target The target for which the IR is generated * @param expr The AST expression to convert - * @param null_aware Whether to use null-aware operators * @param args The arguments needed to resolve the AST expression * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory @@ -487,7 +510,6 @@ struct ast_converter { * @brief Convert an AST `filter` expression to a `cudf::filter` * @param target The target for which the IR is generated * @param expr The AST expression to convert - * @param null_aware Whether to use null-aware operators * @param args The arguments needed to resolve the AST expression * @param filter_table The table to be filtered * @param stream CUDA stream used for device memory operations and kernel launches. diff --git a/cpp/src/stream_compaction/filter/filter.cu b/cpp/src/stream_compaction/filter/filter.cu index 4de9c7633a2..e547db07d2c 100644 --- a/cpp/src/stream_compaction/filter/filter.cu +++ b/cpp/src/stream_compaction/filter/filter.cu @@ -213,6 +213,33 @@ jitify2::Kernel get_kernel(std::string const& kernel_name, std::string const& cu .get_kernel(kernel_name, {}, {{"cudf/detail/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}); } +jitify2::StringVec build_jit_template_params( + null_aware is_null_aware, + bool has_user_data, + std::vector const& span_outputs, + std::vector const& column_inputs) +{ + jitify2::StringVec tparams; + + tparams.emplace_back(jitify2::reflection::reflect(is_null_aware)); + tparams.emplace_back(jitify2::reflection::reflect(has_user_data)); + + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(span_outputs.size()), + std::back_inserter(tparams), + [&](auto i) { + return jitify2::reflection::Template("cudf::jit::span_accessor") + .instantiate(span_outputs[i], i); + }); + + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(column_inputs.size()), + std::back_inserter(tparams), + [&](auto i) { return column_inputs[i].accessor(i); }); + + return tparams; +} + jitify2::ConfiguredKernel build_kernel(std::string const& kernel_name, size_type base_column_size, std::vector const& span_outputs, @@ -236,11 +263,10 @@ jitify2::ConfiguredKernel build_kernel(std::string const& kernel_name, : cudf::jit::parse_single_function_cuda(udf, "GENERIC_FILTER_OP"); return get_kernel(jitify2::reflection::Template(kernel_name) - .instantiate(cudf::jit::build_jit_template_params( - has_user_data, + .instantiate(build_jit_template_params( is_null_aware, + has_user_data, span_outputs, - {}, cudf::jit::reflect_input_columns(base_column_size, input_columns))), cuda_source) ->configure_1d_max_occupancy(0, 0, nullptr, stream.value()); @@ -271,6 +297,8 @@ std::vector> filter_operation( stream, mr); + // [ ] handle null-masks and null-aware properly; use null_policy + cudf::jit::device_span const filter_indices_span{filter_indices.data(), filter_indices.size()}; diff --git a/cpp/src/stream_compaction/filter/jit/kernel.cu b/cpp/src/stream_compaction/filter/jit/kernel.cu index 67c4d9ad808..24b52813060 100644 --- a/cpp/src/stream_compaction/filter/jit/kernel.cu +++ b/cpp/src/stream_compaction/filter/jit/kernel.cu @@ -37,7 +37,7 @@ namespace cudf { namespace filtering { namespace jit { -template +template CUDF_KERNEL void kernel(cudf::jit::device_optional_span const* outputs, cudf::column_device_view_core const* inputs, void* user_data) @@ -54,7 +54,7 @@ CUDF_KERNEL void kernel(cudf::jit::device_optional_span cons for (auto i = start; i < size; i += stride) { bool applies = false; - if constexpr (!is_null_aware) { + if constexpr (is_null_aware == null_aware::NO) { auto const any_null = (false || ... || In::is_null(inputs, i)); if (!any_null) { @@ -64,12 +64,16 @@ CUDF_KERNEL void kernel(cudf::jit::device_optional_span cons GENERIC_FILTER_OP(&applies, In::element(inputs, i)...); } } - } else { + } else { // is_null_aware == null_aware::YES + cuda::std::optional nullable_applies; + if constexpr (has_user_data) { - GENERIC_FILTER_OP(user_data, i, &applies, In::nullable_element(inputs, i)...); + GENERIC_FILTER_OP(user_data, i, &nullable_applies, In::nullable_element(inputs, i)...); } else { - GENERIC_FILTER_OP(&applies, In::nullable_element(inputs, i)...); + GENERIC_FILTER_OP(&nullable_applies, In::nullable_element(inputs, i)...); } + + applies = nullable_applies.value_or(false); } output[i] = applies ? static_cast(i) : NOT_APPLIED; diff --git a/cpp/src/transform/jit/kernel.cu b/cpp/src/transform/jit/kernel.cu index 5ac76ed65a9..64a04db6868 100644 --- a/cpp/src/transform/jit/kernel.cu +++ b/cpp/src/transform/jit/kernel.cu @@ -37,9 +37,14 @@ namespace cudf { namespace transformation { namespace jit { -template +template CUDF_KERNEL void kernel(cudf::mutable_column_device_view_core const* outputs, cudf::column_device_view_core const* inputs, + bool* null_mask, void* user_data) { // inputs to JITIFY kernels have to be either sized-integral types or pointers. Structs or @@ -50,28 +55,42 @@ CUDF_KERNEL void kernel(cudf::mutable_column_device_view_core const* outputs, auto const size = outputs[0].size(); for (auto i = start; i < size; i += stride) { - if constexpr (!is_null_aware) { - if (Out::is_null(outputs, i)) { continue; } + if constexpr (is_null_aware == null_aware::NO) { + auto const is_valid = (true && ... && In::is_valid(inputs, i)); + if constexpr (may_evaluate_null) { null_mask[i] = is_valid; } + + if (!is_valid) { continue; } if constexpr (has_user_data) { GENERIC_TRANSFORM_OP(user_data, i, &Out::element(outputs, i), In::element(inputs, i)...); } else { GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::element(inputs, i)...); } - } else { + + } else { // is_null_aware == null_aware::YES + cuda::std::optional result; + if constexpr (has_user_data) { - GENERIC_TRANSFORM_OP( - user_data, i, &Out::element(outputs, i), In::nullable_element(inputs, i)...); + GENERIC_TRANSFORM_OP(user_data, i, &result, In::nullable_element(inputs, i)...); } else { - GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::nullable_element(inputs, i)...); + GENERIC_TRANSFORM_OP(&result, In::nullable_element(inputs, i)...); } + + Out::assign(outputs, i, *result); + + if constexpr (may_evaluate_null) { null_mask[i] = result.has_value(); } } } } -template +template CUDF_KERNEL void fixed_point_kernel(cudf::mutable_column_device_view_core const* outputs, cudf::column_device_view_core const* inputs, + bool* null_mask, void* user_data) { auto const start = cudf::detail::grid_1d::global_thread_id(); @@ -80,10 +99,14 @@ CUDF_KERNEL void fixed_point_kernel(cudf::mutable_column_device_view_core const* auto const output_scale = static_cast(outputs[0].type().scale()); for (auto i = start; i < size; i += stride) { - typename Out::type result{numeric::scaled_integer{0, output_scale}}; + if constexpr (is_null_aware == null_aware::NO) { + auto const is_valid = (true && ... && In::is_valid(inputs, i)); + + if constexpr (may_evaluate_null) { null_mask[i] = is_valid; } + + if (!is_valid) { continue; } - if constexpr (!is_null_aware) { - if (Out::is_null(outputs, i)) { continue; } + typename Out::type result{numeric::scaled_integer{0, output_scale}}; if constexpr (has_user_data) { GENERIC_TRANSFORM_OP(user_data, i, &result, In::element(inputs, i)...); @@ -91,21 +114,33 @@ CUDF_KERNEL void fixed_point_kernel(cudf::mutable_column_device_view_core const* GENERIC_TRANSFORM_OP(&result, In::element(inputs, i)...); } - } else { + Out::assign(outputs, i, result); + + } else { // is_null_aware == null_aware::YES + cuda::std::optional result{ + typename Out::type{numeric::scaled_integer{0, output_scale}}}; + if constexpr (has_user_data) { GENERIC_TRANSFORM_OP(user_data, i, &result, In::nullable_element(inputs, i)...); } else { GENERIC_TRANSFORM_OP(&result, In::nullable_element(inputs, i)...); } - } - Out::assign(outputs, i, result); + Out::assign(outputs, i, *result); + + if constexpr (may_evaluate_null) { null_mask[i] = result.has_value(); } + } } } -template +template CUDF_KERNEL void span_kernel(cudf::jit::device_optional_span const* outputs, cudf::column_device_view_core const* inputs, + bool* null_mask, void* user_data) { auto const start = cudf::detail::grid_1d::global_thread_id(); @@ -113,21 +148,30 @@ CUDF_KERNEL void span_kernel(cudf::jit::device_optional_span auto const size = outputs[0].size(); for (auto i = start; i < size; i += stride) { - if constexpr (!is_null_aware) { - if (Out::is_null(outputs, i)) { continue; } + if constexpr (is_null_aware == null_aware::NO) { + auto const is_valid = (true && ... && In::is_valid(inputs, i)); + + if constexpr (may_evaluate_null) { null_mask[i] = is_valid; } + + if (!is_valid) { continue; } if constexpr (has_user_data) { GENERIC_TRANSFORM_OP(user_data, i, &Out::element(outputs, i), In::element(inputs, i)...); } else { GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::element(inputs, i)...); } - } else { + } else { // is_null_aware == null_aware::YES + cuda::std::optional result; + if constexpr (has_user_data) { - GENERIC_TRANSFORM_OP( - user_data, i, &Out::element(outputs, i), In::nullable_element(inputs, i)...); + GENERIC_TRANSFORM_OP(user_data, i, &result, In::nullable_element(inputs, i)...); } else { - GENERIC_TRANSFORM_OP(&Out::element(outputs, i), In::nullable_element(inputs, i)...); + GENERIC_TRANSFORM_OP(&result, In::nullable_element(inputs, i)...); } + + Out::assign(outputs, i, *result); + + if constexpr (may_evaluate_null) { null_mask[i] = result.has_value(); } } } } diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cu similarity index 66% rename from cpp/src/transform/transform.cpp rename to cpp/src/transform/transform.cu index 5c1fde01b1d..ad701090e97 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -46,13 +47,52 @@ jitify2::Kernel get_kernel(std::string const& kernel_name, std::string const& cu .get_kernel(kernel_name, {}, {{"cudf/detail/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}); } +jitify2::StringVec build_jit_template_params( + null_aware is_null_aware, + bool may_evaluate_null, + bool has_user_data, + std::vector const& span_outputs, + std::vector const& column_outputs, + std::vector const& column_inputs) +{ + jitify2::StringVec tparams; + + tparams.emplace_back(jitify2::reflection::reflect(is_null_aware)); + tparams.emplace_back(jitify2::reflection::reflect(may_evaluate_null)); + tparams.emplace_back(jitify2::reflection::reflect(has_user_data)); + + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(span_outputs.size()), + std::back_inserter(tparams), + [&](auto i) { + return jitify2::reflection::Template("cudf::jit::span_accessor") + .instantiate(span_outputs[i], i); + }); + + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(column_outputs.size()), + std::back_inserter(tparams), + [&](auto i) { + return jitify2::reflection::Template("cudf::jit::column_accessor") + .instantiate(column_outputs[i], i); + }); + + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(column_inputs.size()), + std::back_inserter(tparams), + [&](auto i) { return column_inputs[i].accessor(i); }); + + return tparams; +} + jitify2::ConfiguredKernel build_transform_kernel( std::string const& kernel_name, size_type base_column_size, std::vector const& output_columns, std::vector const& input_columns, - bool has_user_data, null_aware is_null_aware, + bool may_evaluate_null, + bool has_user_data, std::string const& udf, bool is_ptx, rmm::cuda_stream_view stream, @@ -68,10 +108,10 @@ jitify2::ConfiguredKernel build_transform_kernel( : cudf::jit::parse_single_function_cuda(udf, "GENERIC_TRANSFORM_OP"); return get_kernel(jitify2::reflection::Template(kernel_name) - .instantiate(cudf::jit::build_jit_template_params( - has_user_data, + .instantiate(build_jit_template_params( is_null_aware, - {}, + may_evaluate_null, + has_user_data, cudf::jit::column_type_names(output_columns), cudf::jit::reflect_input_columns(base_column_size, input_columns))), cuda_source) @@ -82,8 +122,9 @@ jitify2::ConfiguredKernel build_span_kernel(std::string const& kernel_name, size_type base_column_size, std::vector const& span_outputs, std::vector const& input_columns, - bool has_user_data, null_aware is_null_aware, + bool may_evaluate_null, + bool has_user_data, std::string const& udf, bool is_ptx, rmm::cuda_stream_view stream, @@ -99,8 +140,9 @@ jitify2::ConfiguredKernel build_span_kernel(std::string const& kernel_name, return get_kernel(jitify2::reflection::Template(kernel_name) .instantiate(cudf::jit::build_jit_template_params( - has_user_data, is_null_aware, + may_evaluate_null, + has_user_data, span_outputs, {}, cudf::jit::reflect_input_columns(base_column_size, input_columns))), @@ -111,6 +153,7 @@ jitify2::ConfiguredKernel build_span_kernel(std::string const& kernel_name, void launch_column_output_kernel(jitify2::ConfiguredKernel& kernel, std::vector const& output_columns, std::vector const& input_columns, + std::optional null_mask, std::optional user_data, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -121,11 +164,12 @@ void launch_column_output_kernel(jitify2::ConfiguredKernel& kernel, auto [input_handles, inputs] = cudf::jit::column_views_to_device(input_columns, stream, mr); - mutable_column_device_view const* outputs_ptr = outputs.data(); - column_device_view const* inputs_ptr = inputs.data(); - void* p_user_data = user_data.value_or(nullptr); + mutable_column_device_view const* p_outputs = outputs.data(); + column_device_view const* p_inputs = inputs.data(); + bool* p_null_mask = null_mask.value_or(nullptr); + void* p_user_data = user_data.value_or(nullptr); - std::array args{&outputs_ptr, &inputs_ptr, &p_user_data}; + std::array args{&p_outputs, &p_inputs, &p_null_mask, &p_user_data}; kernel->launch_raw(args.data()); } @@ -133,56 +177,46 @@ void launch_column_output_kernel(jitify2::ConfiguredKernel& kernel, template void launch_span_kernel(jitify2::ConfiguredKernel& kernel, device_span output, - bitmask_type* null_mask, std::vector const& input_cols, + std::optional null_mask, std::optional user_data, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { auto outputs = cudf::jit::to_device_vector( std::vector{cudf::jit::device_optional_span{ - cudf::jit::device_span{output.data(), output.size()}, null_mask}}, + cudf::jit::device_span{output.data(), output.size()}, nullptr}}, stream, mr); auto [input_handles, inputs] = cudf::jit::column_views_to_device(input_cols, stream, mr); - cudf::jit::device_optional_span const* outputs_ptr = outputs.data(); - column_device_view const* inputs_ptr = inputs.data(); - void* p_user_data = user_data.value_or(nullptr); + cudf::jit::device_optional_span const* p_outputs = outputs.data(); + column_device_view const* p_inputs = inputs.data(); + bool* p_null_mask = null_mask.value_or(nullptr); + void* p_user_data = user_data.value_or(nullptr); - std::array args{&outputs_ptr, &inputs_ptr, &p_user_data}; + std::array args{&p_outputs, &p_inputs, &p_null_mask, &p_user_data}; kernel->launch_raw(args.data()); } -std::tuple make_transform_null_mask( - column_view base_column, - std::vector const& inputs, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +bool may_evaluate_null(column_view base_column, + std::vector const& inputs, + null_aware is_null_aware, + null_output null_out) { - // collect the non-scalar elements that contribute to the resulting bitmask - std::vector bitmask_columns; - - // to handle null masks for scalars, we just check if the scalar element is null. If it is null, - // then all the rows of the transform output will be null. This helps us prevent creating - // column-sized bitmasks for each scalar. - for (column_view const& col : inputs) { - if (cudf::jit::is_scalar(base_column.size(), col.size())) { - // all nulls - if (col.has_nulls()) { - return std::make_tuple( - create_null_mask(base_column.size(), mask_state::ALL_NULL, stream, mr), - base_column.size()); - } - } else { - bitmask_columns.emplace_back(col); - } + // null-aware UDFs will evaluate nulls unless explicitly marked as not producing nulls + if (is_null_aware == null_aware::YES) { + return null_out != null_output::NON_NULLABLE; + } else { + /// null-unaware UDFs will evaluate nulls if any input is nullable unless explicitly marked + /// as not producing nulls + bool any_nullable = + std::any_of(inputs.begin(), inputs.end(), [](auto const& col) { return col.nullable(); }); + return any_nullable && null_out == null_output::PRESERVE; } - - return cudf::bitmask_and(table_view{bitmask_columns}, stream, mr); } std::unique_ptr transform_operation(column_view base_column, @@ -192,19 +226,17 @@ std::unique_ptr transform_operation(column_view base_column, bool is_ptx, std::optional user_data, null_aware is_null_aware, + null_output null_policy, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - rmm::device_buffer null_mask{}; - cudf::size_type null_count{0}; - if (is_null_aware == null_aware::NO) { - std::tie(null_mask, null_count) = make_transform_null_mask(base_column, inputs, stream, mr); - } else { - null_mask = create_null_mask(base_column.size(), mask_state::UNALLOCATED, stream, mr); - } - auto output = make_fixed_width_column( - output_type, base_column.size(), std::move(null_mask), null_count, stream, mr); + output_type, base_column.size(), cudf::mask_state::UNALLOCATED, stream, mr); + + auto may_return_nulls = may_evaluate_null(base_column, inputs, is_null_aware, null_policy); + auto bool_null_mask = may_return_nulls + ? std::optional>(base_column.size(), stream, mr) + : std::nullopt; auto kernel = build_transform_kernel(is_fixed_point(output_type) ? "cudf::transformation::jit::fixed_point_kernel" @@ -212,14 +244,34 @@ std::unique_ptr transform_operation(column_view base_column, base_column.size(), {*output}, inputs, - user_data.has_value(), is_null_aware, + may_return_nulls, + user_data.has_value(), udf, is_ptx, stream, mr); - launch_column_output_kernel(kernel, {*output}, inputs, user_data, stream, mr); + launch_column_output_kernel( + kernel, + {*output}, + inputs, + bool_null_mask ? std::optional(bool_null_mask->data()) : std::nullopt, + user_data, + stream, + mr); + + if (bool_null_mask) { + auto [null_mask, null_count] = detail::valid_if( + bool_null_mask->begin(), + bool_null_mask->end(), + [] __device__(bool element) { return element; }, + stream, + mr); + + output->set_null_mask(std::move(null_mask), null_count); + } + return output; } @@ -229,24 +281,22 @@ std::unique_ptr string_view_operation(column_view base_column, bool is_ptx, std::optional user_data, null_aware is_null_aware, + null_output null_policy, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - rmm::device_buffer null_mask{}; - cudf::size_type null_count{0}; - - if (is_null_aware == null_aware::NO) { - std::tie(null_mask, null_count) = make_transform_null_mask(base_column, inputs, stream, mr); - } else { - null_mask = create_null_mask(base_column.size(), mask_state::UNALLOCATED, stream, mr); - } + auto may_return_nulls = may_evaluate_null(base_column, inputs, is_null_aware, null_policy); + auto bool_null_mask = may_return_nulls + ? std::optional>(base_column.size(), stream, mr) + : std::nullopt; auto kernel = build_span_kernel("cudf::transformation::jit::span_kernel", base_column.size(), {"cudf::string_view"}, inputs, - user_data.has_value(), is_null_aware, + may_return_nulls, + user_data.has_value(), udf, is_ptx, stream, @@ -254,17 +304,27 @@ std::unique_ptr string_view_operation(column_view base_column, rmm::device_uvector string_views(base_column.size(), stream, mr); - launch_span_kernel(kernel, - string_views, - static_cast(null_mask.data()), - inputs, - user_data, - stream, - mr); + launch_span_kernel( + kernel, + string_views, + inputs, + bool_null_mask ? std::optional(bool_null_mask->data()) : std::nullopt, + user_data, + stream, + mr); auto output = make_strings_column(string_views, string_view{}, stream, mr); - output->set_null_mask(std::move(null_mask), null_count); + if (bool_null_mask) { + auto [null_mask, null_count] = detail::valid_if( + bool_null_mask->begin(), + bool_null_mask->end(), + [] __device__(bool element) { return element; }, + stream, + mr); + + output->set_null_mask(std::move(null_mask), null_count); + } return output; } @@ -307,6 +367,7 @@ std::unique_ptr transform(std::vector const& inputs, bool is_ptx, std::optional user_data, null_aware is_null_aware, + null_output null_policy, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { @@ -321,11 +382,19 @@ std::unique_ptr transform(std::vector const& inputs, transformation::jit::perform_checks(*base_column, output_type, inputs); if (is_fixed_width(output_type)) { - return transformation::jit::transform_operation( - *base_column, output_type, inputs, udf, is_ptx, user_data, is_null_aware, stream, mr); + return transformation::jit::transform_operation(*base_column, + output_type, + inputs, + udf, + is_ptx, + user_data, + is_null_aware, + null_policy, + stream, + mr); } else if (output_type.id() == type_id::STRING) { return transformation::jit::string_view_operation( - *base_column, inputs, udf, is_ptx, user_data, is_null_aware, stream, mr); + *base_column, inputs, udf, is_ptx, user_data, is_null_aware, null_policy, stream, mr); } else { CUDF_FAIL("Unsupported output type for transform operation"); } @@ -339,11 +408,13 @@ std::unique_ptr transform(std::vector const& inputs, bool is_ptx, std::optional user_data, null_aware is_null_aware, + null_output null_policy, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::transform(inputs, udf, output_type, is_ptx, user_data, is_null_aware, stream, mr); + return detail::transform( + inputs, udf, output_type, is_ptx, user_data, is_null_aware, null_policy, stream, mr); } std::unique_ptr compute_column_jit(table_view const& table, @@ -361,6 +432,7 @@ std::unique_ptr compute_column_jit(table_view const& table, args.is_ptx, args.user_data, args.is_null_aware, + args.null_policy, stream, mr); } diff --git a/cpp/tests/filter/filter_test.cpp b/cpp/tests/filter/filter_test.cpp index 4fe17424c40..74ec1c116a0 100644 --- a/cpp/tests/filter/filter_test.cpp +++ b/cpp/tests/filter/filter_test.cpp @@ -141,7 +141,7 @@ __device__ void is_even(bool* out, int32_t a) { *out = (a % 2 == 0); } CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result[0]->view()); std::string null_cuda = R"***( -__device__ void is_even(bool* out, cuda::std::optional a) { *out = a.has_value() && (*a % 2 == 0); } +__device__ void is_even(cuda::std::optional* out, cuda::std::optional a) { *out = a.has_value() && (*a % 2 == 0); } )***"; auto null_result = cudf::filter({a}, null_cuda, {a}, false, std::nullopt, cudf::null_aware::YES); diff --git a/cpp/tests/jit/row_ir.cpp b/cpp/tests/jit/row_ir.cpp index 73f4f2ffa93..46090d66b2a 100644 --- a/cpp/tests/jit/row_ir.cpp +++ b/cpp/tests/jit/row_ir.cpp @@ -275,6 +275,7 @@ TEST_F(RowIRCudaCodeGenTest, AstConversionBasic) ASSERT_EQ(transform_args.scalar_columns[0]->view().size(), 1); EXPECT_FALSE(transform_args.is_ptx); EXPECT_EQ(transform_args.is_null_aware, null_aware::NO); + EXPECT_EQ(transform_args.null_policy, null_output::PRESERVE); EXPECT_EQ(transform_args.output_type, data_type{type_id::INT32}); ASSERT_EQ(transform_args.columns.size(), 2); @@ -308,7 +309,8 @@ return; transform_args.output_type, transform_args.is_ptx, transform_args.user_data, - transform_args.is_null_aware); + transform_args.is_null_aware, + transform_args.null_policy); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp index 86d5fb18b6f..ffcbcbd23dd 100644 --- a/cpp/tests/streams/transform_test.cpp +++ b/cpp/tests/streams/transform_test.cpp @@ -39,6 +39,7 @@ void test_udf(char const* udf, Data data_init, cudf::size_type size, bool is_ptx is_ptx, std::nullopt, cudf::null_aware::NO, + cudf::null_output::PRESERVE, cudf::test::get_default_stream()); } diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp index ab82c94c348..8c1dba4ab8e 100644 --- a/cpp/tests/transform/integration/unary_transform_test.cpp +++ b/cpp/tests/transform/integration/unary_transform_test.cpp @@ -803,7 +803,7 @@ TEST_F(NullTest, ColumnNulls_And_ScalarNull) TEST_F(NullTest, IsNull) { auto udf = R"***( - __device__ inline void is_null(bool * output, cuda::std::optional input) + __device__ inline void is_null(cuda::std::optional* output, cuda::std::optional input) { *output = !input.has_value(); } @@ -820,7 +820,8 @@ TEST_F(NullTest, IsNull) cudf::data_type(cudf::type_id::BOOL8), false, std::nullopt, - cudf::null_aware::YES); + cudf::null_aware::YES, + cudf::null_output::NON_NULLABLE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -829,7 +830,7 @@ TEST_F(NullTest, NullProject) { auto udf = R"***( __device__ inline void null_lerp( - float* output, + cuda::std::optional* output, cuda::std::optional low, cuda::std::optional high, cuda::std::optional t @@ -864,7 +865,8 @@ return l - t * l + t * h; cudf::data_type(cudf::type_id::FLOAT32), false, std::nullopt, - cudf::null_aware::YES); + cudf::null_aware::YES, + cudf::null_output::NON_NULLABLE); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*cuda_result, *expected); } diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index 84ff7c809a3..93c62511b38 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.expressions cimport expression from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type, null_aware +from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type, null_aware, null_output from rmm.librmm.device_buffer cimport device_buffer from rmm.librmm.cuda_stream_view cimport cuda_stream_view @@ -49,6 +49,7 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: bool is_ptx, optional[void *] user_data, null_aware is_null_aware, + null_output null_policy, cuda_stream_view stream, device_memory_resource* mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd index 727bc4e926c..5a71240224e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd @@ -53,6 +53,10 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: NO YES + cpdef enum class null_output(int8_t): + PRESERVE + NON_NULLABLE + cpdef enum class nan_equality(bool): ALL_EQUAL UNEQUAL diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd index d52b6e34ee7..c2803040cbe 100644 --- a/python/pylibcudf/pylibcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/transform.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2024-2025, NVIDIA CORPORATION. from libcpp cimport bool -from pylibcudf.libcudf.types cimport bitmask_type, data_type, null_aware +from pylibcudf.libcudf.types cimport bitmask_type, data_type, null_aware, null_output from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -33,6 +33,7 @@ cpdef Column transform( DataType output_type, bool is_ptx, null_aware is_null_aware, + null_output null_policy, Stream stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi index 34684d35323..79eddc835b9 100644 --- a/python/pylibcudf/pylibcudf/transform.pyi +++ b/python/pylibcudf/pylibcudf/transform.pyi @@ -6,7 +6,7 @@ from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.gpumemoryview import gpumemoryview from pylibcudf.table import Table -from pylibcudf.types import DataType, NullAware +from pylibcudf.types import DataType, NullAware, NullOutput def nans_to_nulls( input: Column, stream: Stream | None = None @@ -33,6 +33,7 @@ def transform( output_type: DataType, is_ptx: bool, null_aware: NullAware = NullAware.NO, + null_output: NullOutput = NullOutput.PRESERVE, stream: Stream | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index e9b96a5ebe0..1a68b8aabc2 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -23,7 +23,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .expressions cimport Expression from .gpumemoryview cimport gpumemoryview -from .types cimport DataType, null_aware +from .types cimport DataType, null_aware, null_output from .utils cimport _get_stream, _get_memory_resource __all__ = [ @@ -185,6 +185,7 @@ cpdef Column transform( DataType output_type, bool is_ptx, null_aware is_null_aware, + null_output null_policy, Stream stream=None, DeviceMemoryResource mr=None, ): @@ -205,6 +206,9 @@ cpdef Column transform( is_null_aware: NullAware If `NO`, the UDF gets non-nullable parameters If `YES`, the UDF gets nullable parameters + null_policy: NullOutput + If `PRESERVE`, null-masks are produced if necessary + If `NON_NULLABLE`, null-masks are not produced stream : Stream | None CUDA stream on which to perform the operation. mr : DeviceMemoryResource | None @@ -220,6 +224,7 @@ cpdef Column transform( cdef string c_transform_udf = transform_udf.encode() cdef bool c_is_ptx = is_ptx cdef null_aware c_is_null_aware = is_null_aware + cdef null_output c_null_policy = null_policy cdef optional[void *] user_data stream = _get_stream(stream) @@ -236,6 +241,7 @@ cpdef Column transform( c_is_ptx, user_data, c_is_null_aware, + c_null_policy, stream.view(), mr.get_mr() ) diff --git a/python/pylibcudf/pylibcudf/types.pxd b/python/pylibcudf/pylibcudf/types.pxd index 2a263862b59..c4bb007d8bb 100644 --- a/python/pylibcudf/pylibcudf/types.pxd +++ b/python/pylibcudf/pylibcudf/types.pxd @@ -13,6 +13,7 @@ from pylibcudf.libcudf.types cimport ( null_order, null_policy, null_aware, + null_output, order, size_type, sorted, diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi index b56dc54dbcc..b06a0241e80 100644 --- a/python/pylibcudf/pylibcudf/types.pyi +++ b/python/pylibcudf/pylibcudf/types.pyi @@ -33,6 +33,10 @@ class NullAware(IntEnum): NO = ... YES = ... +class NullOutput(IntEnum): + PRESERVE = ... + NON_NULLABLE = ... + class NullOrder(IntEnum): AFTER = ... BEFORE = ... diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx index 2dce0dc2080..3f8bb991896 100644 --- a/python/pylibcudf/pylibcudf/types.pyx +++ b/python/pylibcudf/pylibcudf/types.pyx @@ -19,6 +19,7 @@ from pylibcudf.libcudf.types import mask_state as MaskState # no-cython-lint, i from pylibcudf.libcudf.types import nan_equality as NanEquality # no-cython-lint, isort:skip from pylibcudf.libcudf.types import null_equality as NullEquality # no-cython-lint, isort:skip from pylibcudf.libcudf.types import null_aware as NullAware # no-cython-lint, isort:skip +from pylibcudf.libcudf.types import null_output as NullOutput # no-cython-lint, isort:skip from pylibcudf.libcudf.types import null_order as NullOrder # no-cython-lint, isort:skip from pylibcudf.libcudf.types import order as Order # no-cython-lint, isort:skip from pylibcudf.libcudf.types import sorted as Sorted # no-cython-lint, isort:skip @@ -89,6 +90,7 @@ __all__ = [ "NullEquality", "NullOrder", "NullAware", + "NullOutput", "NullPolicy", "Order", "SIZE_TYPE", @@ -380,6 +382,7 @@ MaskState.__str__ = MaskState.__repr__ NanEquality.__str__ = NanEquality.__repr__ NullEquality.__str__ = NullEquality.__repr__ NullAware.__str__ = NullAware.__repr__ +NullOutput.__str__ = NullOutput.__repr__ NullOrder.__str__ = NullOrder.__repr__ Order.__str__ = Order.__repr__ Sorted.__str__ = Sorted.__repr__ diff --git a/python/pylibcudf/tests/test_transform.py b/python/pylibcudf/tests/test_transform.py index 2d0a9e188fd..de89a6e2c8e 100644 --- a/python/pylibcudf/tests/test_transform.py +++ b/python/pylibcudf/tests/test_transform.py @@ -103,5 +103,6 @@ def op(a, b, c): output_type=plc.DataType(plc.TypeId.FLOAT64), is_ptx=True, is_null_aware=plc.types.NullAware.NO, + null_policy=plc.types.NullOutput.PRESERVE, ) assert_column_eq(expect, got) From a6ce5b2b041901b438012abb20f7be75913fc88e Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 02:06:10 +0000 Subject: [PATCH 2/7] checkpoint --- cpp/src/stream_compaction/filter/filter.cu | 13 ++++++----- cpp/src/transform/transform.cu | 25 +++++++++++----------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/src/stream_compaction/filter/filter.cu b/cpp/src/stream_compaction/filter/filter.cu index e547db07d2c..1b881549d74 100644 --- a/cpp/src/stream_compaction/filter/filter.cu +++ b/cpp/src/stream_compaction/filter/filter.cu @@ -125,6 +125,7 @@ struct filter_dispatcher { stencil_iterator, stream, mr); + return cudf::make_strings_column(filtered, cudf::string_view{nullptr, 0}, stream, mr); } @@ -170,11 +171,11 @@ void launch_filter_kernel(jitify2::ConfiguredKernel& kernel, auto [input_handles, inputs] = cudf::jit::column_views_to_device(input_columns, stream, mr); - cudf::jit::device_optional_span const* outputs_ptr = outputs.data(); - column_device_view const* inputs_ptr = inputs.data(); - void* p_user_data = user_data.value_or(nullptr); + cudf::jit::device_optional_span const* p_outputs = outputs.data(); + column_device_view const* p_inputs = inputs.data(); + void* p_user_data = user_data.value_or(nullptr); - std::array args{&outputs_ptr, &inputs_ptr, &p_user_data}; + std::array args{&p_outputs, &p_inputs, &p_user_data}; kernel->launch_raw(args.data()); } @@ -217,7 +218,7 @@ jitify2::StringVec build_jit_template_params( null_aware is_null_aware, bool has_user_data, std::vector const& span_outputs, - std::vector const& column_inputs) + std::vector const& column_inputs) { jitify2::StringVec tparams; @@ -297,8 +298,6 @@ std::vector> filter_operation( stream, mr); - // [ ] handle null-masks and null-aware properly; use null_policy - cudf::jit::device_span const filter_indices_span{filter_indices.data(), filter_indices.size()}; diff --git a/cpp/src/transform/transform.cu b/cpp/src/transform/transform.cu index ad701090e97..a7f1bf418ea 100644 --- a/cpp/src/transform/transform.cu +++ b/cpp/src/transform/transform.cu @@ -53,7 +53,7 @@ jitify2::StringVec build_jit_template_params( bool has_user_data, std::vector const& span_outputs, std::vector const& column_outputs, - std::vector const& column_inputs) + std::vector const& column_inputs) { jitify2::StringVec tparams; @@ -113,6 +113,7 @@ jitify2::ConfiguredKernel build_transform_kernel( may_evaluate_null, has_user_data, cudf::jit::column_type_names(output_columns), + {}, cudf::jit::reflect_input_columns(base_column_size, input_columns))), cuda_source) ->configure_1d_max_occupancy(0, 0, nullptr, stream.value()); @@ -139,7 +140,7 @@ jitify2::ConfiguredKernel build_span_kernel(std::string const& kernel_name, : cudf::jit::parse_single_function_cuda(udf, "GENERIC_TRANSFORM_OP"); return get_kernel(jitify2::reflection::Template(kernel_name) - .instantiate(cudf::jit::build_jit_template_params( + .instantiate(build_jit_template_params( is_null_aware, may_evaluate_null, has_user_data, @@ -234,9 +235,9 @@ std::unique_ptr transform_operation(column_view base_column, output_type, base_column.size(), cudf::mask_state::UNALLOCATED, stream, mr); auto may_return_nulls = may_evaluate_null(base_column, inputs, is_null_aware, null_policy); - auto bool_null_mask = may_return_nulls - ? std::optional>(base_column.size(), stream, mr) - : std::nullopt; + auto bool_null_mask = + may_return_nulls ? std::make_optional>(base_column.size(), stream, mr) + : std::nullopt; auto kernel = build_transform_kernel(is_fixed_point(output_type) ? "cudf::transformation::jit::fixed_point_kernel" @@ -263,8 +264,8 @@ std::unique_ptr transform_operation(column_view base_column, if (bool_null_mask) { auto [null_mask, null_count] = detail::valid_if( - bool_null_mask->begin(), - bool_null_mask->end(), + bool_null_mask->begin(), + bool_null_mask->end(), [] __device__(bool element) { return element; }, stream, mr); @@ -286,9 +287,9 @@ std::unique_ptr string_view_operation(column_view base_column, rmm::device_async_resource_ref mr) { auto may_return_nulls = may_evaluate_null(base_column, inputs, is_null_aware, null_policy); - auto bool_null_mask = may_return_nulls - ? std::optional>(base_column.size(), stream, mr) - : std::nullopt; + auto bool_null_mask = + may_return_nulls ? std::make_optional>(base_column.size(), stream, mr) + : std::nullopt; auto kernel = build_span_kernel("cudf::transformation::jit::span_kernel", base_column.size(), @@ -317,8 +318,8 @@ std::unique_ptr string_view_operation(column_view base_column, if (bool_null_mask) { auto [null_mask, null_count] = detail::valid_if( - bool_null_mask->begin(), - bool_null_mask->end(), + bool_null_mask->begin(), + bool_null_mask->end(), [] __device__(bool element) { return element; }, stream, mr); From 5223b286258ae050473565e155f4a4df3b297434 Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 02:10:52 +0000 Subject: [PATCH 3/7] checkpoint --- cpp/src/jit/row_ir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/jit/row_ir.cpp b/cpp/src/jit/row_ir.cpp index a308f5a3ea7..81c55714627 100644 --- a/cpp/src/jit/row_ir.cpp +++ b/cpp/src/jit/row_ir.cpp @@ -314,7 +314,7 @@ std::string operation::generate_code(instance_context& ctx, }); auto cuda = std::format( - "auto {} = cudf::ast::detail::operator_functor{{}}({});", cuda_type(type_, ctx.has_nulls()), id_, From 58e0dc68be0bdbbbb5da4092d890ec68791335ad Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 02:16:13 +0000 Subject: [PATCH 4/7] checkpoint --- python/pylibcudf/pylibcudf/libcudf/types.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/libcudf/types.pxd b/python/pylibcudf/pylibcudf/libcudf/types.pxd index 5a71240224e..22a4afe91f2 100644 --- a/python/pylibcudf/pylibcudf/libcudf/types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/types.pxd @@ -1,6 +1,6 @@ # Copyright (c) 2020-2025, NVIDIA CORPORATION. from libc.stddef cimport size_t -from libc.stdint cimport int32_t, uint32_t +from libc.stdint cimport int32_t, uint32_t, int8_t from libcpp cimport bool from pylibcudf.exception_handler cimport libcudf_exception_handler From 75dc984767f00be2eded907533af3083ce120de3 Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 02:19:20 +0000 Subject: [PATCH 5/7] checkpoint --- cpp/src/jit/row_ir.hpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/cpp/src/jit/row_ir.hpp b/cpp/src/jit/row_ir.hpp index cca504a83bb..99b4942fd99 100644 --- a/cpp/src/jit/row_ir.hpp +++ b/cpp/src/jit/row_ir.hpp @@ -195,8 +195,14 @@ struct get_input final : node { */ data_type get_type() override; + /** + * @copydoc node::is_null_aware + */ [[nodiscard]] bool is_null_aware() override; + /** + * @copydoc node::is_always_nonnullable + */ [[nodiscard]] bool is_always_nonnullable() override; /** @@ -251,8 +257,14 @@ struct set_output final : node { */ data_type get_type() override; + /** + * @copydoc node::is_null_aware + */ [[nodiscard]] bool is_null_aware() override; + /** + * @copydoc node::is_always_nonnullable + */ [[nodiscard]] bool is_always_nonnullable() override; /** @@ -339,8 +351,14 @@ struct operation final : node { */ data_type get_type() override; + /** + * @copydoc node::is_null_aware + */ [[nodiscard]] bool is_null_aware() override; + /** + * @copydoc node::is_always_nonnullable + */ [[nodiscard]] bool is_always_nonnullable() override; /** From b9fabe030cf415dddc1bd43fe030e8aec6092fde Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 10:35:46 +0000 Subject: [PATCH 6/7] update tests --- cpp/src/jit/row_ir.cpp | 2 +- cpp/src/transform/transform.cu | 2 +- cpp/tests/ast/transform_tests.cpp | 36 +++++++++++-------- cpp/tests/jit/row_ir.cpp | 2 +- .../pylibcudf/pylibcudf/libcudf/transform.pxd | 3 +- 5 files changed, 27 insertions(+), 18 deletions(-) diff --git a/cpp/src/jit/row_ir.cpp b/cpp/src/jit/row_ir.cpp index 81c55714627..3414becf8a3 100644 --- a/cpp/src/jit/row_ir.cpp +++ b/cpp/src/jit/row_ir.cpp @@ -114,8 +114,8 @@ std::string set_output::generate_code(instance_context& ctx, "{}\n" "{} {} = {};\n" "*{} = {};", - cuda_type(type_, ctx.has_nulls()), source_code, + cuda_type(type_, ctx.has_nulls()), id_, source_->get_id(), output_id_, diff --git a/cpp/src/transform/transform.cu b/cpp/src/transform/transform.cu index a7f1bf418ea..a5b019e58b3 100644 --- a/cpp/src/transform/transform.cu +++ b/cpp/src/transform/transform.cu @@ -112,8 +112,8 @@ jitify2::ConfiguredKernel build_transform_kernel( is_null_aware, may_evaluate_null, has_user_data, - cudf::jit::column_type_names(output_columns), {}, + cudf::jit::column_type_names(output_columns), cudf::jit::reflect_input_columns(base_column_size, input_columns))), cuda_source) ->configure_1d_max_occupancy(0, 0, nullptr, stream.value()); diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp index a3c5c590413..1e554602f81 100644 --- a/cpp/tests/ast/transform_tests.cpp +++ b/cpp/tests/ast/transform_tests.cpp @@ -71,8 +71,6 @@ struct executor_jit { using Executors = cudf::test::Types; -using AstTransformTest = TransformTest; - TYPED_TEST_SUITE(TransformTest, Executors); TYPED_TEST(TransformTest, ColumnReference) @@ -145,8 +143,10 @@ TYPED_TEST(TransformTest, NullLiteral) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } -TEST_F(AstTransformTest, IsNull) +TYPED_TEST(TransformTest, IsNull) { + using Executor = TypeParam; + auto c_0 = column_wrapper{{0, 1, 2, 0}, {0, 1, 1, 0}}; auto table = cudf::table_view{{c_0}}; @@ -156,18 +156,18 @@ TEST_F(AstTransformTest, IsNull) auto literal = cudf::ast::literal(literal_value); auto expression = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, literal); - auto result = executor_ast::compute_column(table, expression); + auto result = Executor::compute_column(table, expression); auto expected1 = column_wrapper({0, 0, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result->view(), verbosity); literal_value.set_valid_async(false); - result = executor_ast::compute_column(table, expression); + result = Executor::compute_column(table, expression); auto expected2 = column_wrapper({1, 1, 1, 1}, cudf::test::iterators::no_nulls()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected2, result->view(), verbosity); auto col_ref_0 = cudf::ast::column_reference(0); auto expression2 = cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, col_ref_0); - result = executor_ast::compute_column(table, expression2); + result = Executor::compute_column(table, expression2); auto expected3 = column_wrapper({1, 0, 0, 1}, cudf::test::iterators::no_nulls()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected3, result->view(), verbosity); } @@ -778,8 +778,10 @@ TYPED_TEST(TransformTest, PyMod) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } -TEST_F(AstTransformTest, BasicEqualityNullEqualNoNulls) +TYPED_TEST(TransformTest, BasicEqualityNullEqualNoNulls) { + using Executor = TypeParam; + auto c_0 = column_wrapper{3, 20, 1, 50}; auto c_1 = column_wrapper{3, 7, 1, 0}; auto table = cudf::table_view{{c_0, c_1}}; @@ -789,7 +791,7 @@ TEST_F(AstTransformTest, BasicEqualityNullEqualNoNulls) auto expression = cudf::ast::operation(cudf::ast::ast_operator::NULL_EQUAL, col_ref_0, col_ref_1); auto expected = column_wrapper{true, false, true, false}; - auto result = executor_ast::compute_column(table, expression); + auto result = Executor::compute_column(table, expression); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } @@ -812,8 +814,10 @@ TYPED_TEST(TransformTest, BasicEqualityNormalEqualWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } -TEST_F(AstTransformTest, BasicEqualityNulls) +TYPED_TEST(TransformTest, BasicEqualityNulls) { + using Executor = TypeParam; + auto c_0 = column_wrapper{{3, 20, 1, 2, 50}, {1, 1, 0, 1, 0}}; auto c_1 = column_wrapper{{3, 7, 1, 2, 0}, {1, 1, 1, 0, 0}}; auto table = cudf::table_view{{c_0, c_1}}; @@ -823,7 +827,7 @@ TEST_F(AstTransformTest, BasicEqualityNulls) auto expression = cudf::ast::operation(cudf::ast::ast_operator::NULL_EQUAL, col_ref_0, col_ref_1); auto expected = column_wrapper{{true, false, false, false, true}, {1, 1, 1, 1, 1}}; - auto result = executor_ast::compute_column(table, expression); + auto result = Executor::compute_column(table, expression); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } @@ -891,8 +895,10 @@ TYPED_TEST(TransformTest, BasicAdditionLargeNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } -TEST_F(AstTransformTest, NullLogicalAnd) +TYPED_TEST(TransformTest, NullLogicalAnd) { + using Executor = TypeParam; + auto c_0 = column_wrapper{{false, false, true, true, false, false, true, true}, {1, 1, 1, 1, 1, 0, 0, 0}}; auto c_1 = column_wrapper{{false, true, false, true, true, true, false, true}, @@ -906,13 +912,15 @@ TEST_F(AstTransformTest, NullLogicalAnd) auto expected = column_wrapper{{false, false, false, true, false, false, false, true}, {1, 1, 1, 1, 1, 0, 1, 0}}; - auto result = executor_ast::compute_column(table, expression); + auto result = Executor::compute_column(table, expression); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } -TEST_F(AstTransformTest, NullLogicalOr) +TYPED_TEST(TransformTest, NullLogicalOr) { + using Executor = TypeParam; + auto c_0 = column_wrapper{{false, false, true, true, false, false, true, true}, {1, 1, 1, 1, 1, 0, 1, 0}}; auto c_1 = column_wrapper{{false, true, false, true, true, true, false, true}, @@ -926,7 +934,7 @@ TEST_F(AstTransformTest, NullLogicalOr) auto expected = column_wrapper{{false, true, true, true, false, true, true, true}, {1, 1, 1, 1, 0, 1, 1, 0}}; - auto result = executor_ast::compute_column(table, expression); + auto result = Executor::compute_column(table, expression); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } diff --git a/cpp/tests/jit/row_ir.cpp b/cpp/tests/jit/row_ir.cpp index 46090d66b2a..bb70a1043ba 100644 --- a/cpp/tests/jit/row_ir.cpp +++ b/cpp/tests/jit/row_ir.cpp @@ -275,7 +275,7 @@ TEST_F(RowIRCudaCodeGenTest, AstConversionBasic) ASSERT_EQ(transform_args.scalar_columns[0]->view().size(), 1); EXPECT_FALSE(transform_args.is_ptx); EXPECT_EQ(transform_args.is_null_aware, null_aware::NO); - EXPECT_EQ(transform_args.null_policy, null_output::PRESERVE); + EXPECT_EQ(transform_args.null_policy, null_output::NON_NULLABLE); EXPECT_EQ(transform_args.output_type, data_type{type_id::INT32}); ASSERT_EQ(transform_args.columns.size(), 2); diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index 93c62511b38..3e4686ea94a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -11,7 +11,8 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.expressions cimport expression from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type, null_aware, null_output +from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type +from pylibcudf.libcudf.types cimport null_aware, null_output from rmm.librmm.device_buffer cimport device_buffer from rmm.librmm.cuda_stream_view cimport cuda_stream_view From 88a2e06955ac43c3012ea63f9e1eccb585caada8 Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Wed, 8 Oct 2025 12:17:20 +0000 Subject: [PATCH 7/7] added Null-based filter expression tests --- cpp/src/jit/row_ir.cpp | 2 - cpp/src/jit/row_ir.hpp | 21 +++++-- cpp/tests/filter/filter_test.cpp | 99 ++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 8 deletions(-) diff --git a/cpp/src/jit/row_ir.cpp b/cpp/src/jit/row_ir.cpp index 3414becf8a3..9bacf686097 100644 --- a/cpp/src/jit/row_ir.cpp +++ b/cpp/src/jit/row_ir.cpp @@ -45,8 +45,6 @@ bool instance_context::has_nulls() const { return has_nulls_; } void instance_context::set_has_nulls(bool has_nulls) { has_nulls_ = has_nulls; } -void instance_context::reset() { num_tmp_vars_ = 0; } - get_input::get_input(int32_t input) : id_(), input_(input), type_() {} std::string_view get_input::get_id() { return id_; } diff --git a/cpp/src/jit/row_ir.hpp b/cpp/src/jit/row_ir.hpp index 99b4942fd99..bdff893ef96 100644 --- a/cpp/src/jit/row_ir.hpp +++ b/cpp/src/jit/row_ir.hpp @@ -79,7 +79,7 @@ struct instance_context { private: int32_t num_tmp_vars_ = 0; ///< The number of temporary variables generated std::string tmp_prefix_ = "tmp_"; ///< The prefix for temporary variable identifiers - bool has_nulls_ = false; ///< True if expressions involve null values + bool has_nulls_ = false; ///< If expressions involve null values public: instance_context() = default; ///< Default constructor @@ -100,11 +100,16 @@ struct instance_context { */ std::string make_tmp_id(); + /** + * @brief Returns true if expressions involve null values + */ [[nodiscard]] bool has_nulls() const; + /** + * @brief Sets whether expressions involve null values + * @param has_nulls True if expressions involve null values + */ void set_has_nulls(bool has_nulls); - - void reset(); }; struct node { @@ -121,13 +126,17 @@ struct node { virtual data_type get_type() = 0; /** - * @brief Returns `false` if this node forwards nulls from its inputs to its output + * @brief Returns `false` if this node forwards nulls from its inputs to its output. + * i.e. `ADD` operator is not null-aware because if any of its inputs is null, the output is null. + * but `NULL_EQUAL` operator is null-aware because it can produce a non-null output even if its + * inputs are null. */ virtual bool is_null_aware() = 0; /** - * @brief Returns true if this node always produces a non-nullable output even if its inputs are - * nullable + * @brief Returns `true` if this node always produces a non-nullable output even if its inputs are + * nullable, i.e. `IS_NULL` operator produces a non-nullable boolean output regardless of the + * nullability of its input. */ virtual bool is_always_nonnullable() = 0; diff --git a/cpp/tests/filter/filter_test.cpp b/cpp/tests/filter/filter_test.cpp index 74ec1c116a0..b848c08f03d 100644 --- a/cpp/tests/filter/filter_test.cpp +++ b/cpp/tests/filter/filter_test.cpp @@ -22,6 +22,7 @@ #include #include +#include namespace filters { @@ -287,6 +288,104 @@ __device__ void filter(bool* out, CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_timezones, result[1]->view()); } +struct ast_expression_executor { + static std::unique_ptr filter(cudf::ast::expression const& expr, + cudf::table_view const& table) + { + auto booleans = cudf::compute_column(table, expr); + return cudf::apply_boolean_mask(table, booleans->view()); + } +}; + +struct jit_expression_executor { + static std::unique_ptr filter(cudf::ast::expression const& expr, + cudf::table_view const& table) + { + return cudf::filter(table, expr, table); + } +}; + +template +struct FilterExpressionTest : public cudf::test::BaseFixture { + std::unique_ptr a = + cudf::test::fixed_width_column_wrapper{{1, 2, 3, 4, 5, 6, 7, 8}, + {1, 1, 1, 1, 1, 1, 1, 0}} + .release(); + std::unique_ptr b = + cudf::test::fixed_width_column_wrapper{{1, 8, 3, 4, 5, 6, 7, 8}, + {1, 1, 1, 1, 1, 1, 1, 0}} + .release(); + std::unique_ptr bool_a = + cudf::test::fixed_width_column_wrapper{ + {false, false, true, true, false, false, true, true}, {1, 1, 1, 1, 1, 0, 0, 0}} + .release(); + std::unique_ptr bool_b = + cudf::test::fixed_width_column_wrapper{ + {false, true, false, true, true, true, false, true}, {1, 1, 1, 1, 0, 1, 1, 0}} + .release(); + + cudf::table_view table = cudf::table_view({a->view(), b->view(), bool_a->view(), bool_b->view()}); +}; + +using Executors = cudf::test::Types; + +TYPED_TEST_SUITE(FilterExpressionTest, Executors); + +TYPED_TEST(FilterExpressionTest, IsNull) +{ + using Executor = TypeParam; + + auto tree = cudf::ast::tree(); + auto& ref_0 = tree.push(cudf::ast::column_reference(0)); + auto& is_null_expr = tree.push(cudf::ast::operation(cudf::ast::ast_operator::IS_NULL, ref_0)); + auto& filter_expr = tree.push(cudf::ast::operation(cudf::ast::ast_operator::NOT, is_null_expr)); + auto result = Executor::filter(filter_expr, this->table); + auto expected = cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->get_column(0).view()); +} + +TYPED_TEST(FilterExpressionTest, NullEqual) +{ + using Executor = TypeParam; + + auto tree = cudf::ast::tree(); + auto& ref_0 = tree.push(cudf::ast::column_reference(0)); + auto& ref_1 = tree.push(cudf::ast::column_reference(1)); + auto& null_equal_expr = + tree.push(cudf::ast::operation(cudf::ast::ast_operator::NULL_EQUAL, ref_0, ref_1)); + auto result = Executor::filter(null_equal_expr, this->table); + auto expected = cudf::test::fixed_width_column_wrapper{1, 3, 4, 5, 6, 7, 8}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->get_column(0).view()); +} + +TYPED_TEST(FilterExpressionTest, NullLogicalAnd) +{ + using Executor = TypeParam; + + auto tree = cudf::ast::tree(); + auto& ref_2 = tree.push(cudf::ast::column_reference(2)); + auto& ref_3 = tree.push(cudf::ast::column_reference(3)); + auto& and_expr = + tree.push(cudf::ast::operation(cudf::ast::ast_operator::NULL_LOGICAL_AND, ref_2, ref_3)); + auto result = Executor::filter(and_expr, this->table); + auto expected = cudf::test::fixed_width_column_wrapper{4}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->get_column(0).view()); +} + +TYPED_TEST(FilterExpressionTest, NullLogicalOr) +{ + using Executor = TypeParam; + + auto tree = cudf::ast::tree(); + auto& ref_2 = tree.push(cudf::ast::column_reference(2)); + auto& ref_3 = tree.push(cudf::ast::column_reference(3)); + auto& or_expr = + tree.push(cudf::ast::operation(cudf::ast::ast_operator::NULL_LOGICAL_OR, ref_2, ref_3)); + auto result = Executor::filter(or_expr, this->table); + auto expected = cudf::test::fixed_width_column_wrapper{2, 3, 4, 6}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->get_column(0).view()); +} + } // namespace filters CUDF_TEST_PROGRAM_MAIN()