From 485b53c85b7b1cf56d54697e371800670d912d2b Mon Sep 17 00:00:00 2001 From: dvermd <315743+dvermd@users.noreply.github.com> Date: Tue, 5 Aug 2025 08:30:49 +0200 Subject: [PATCH 1/5] fix 128bit ctlz intrinsic UB --- src/intrinsic/mod.rs | 114 ++++++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index db9f32bad5a..b5e15b3a359 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -405,7 +405,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc | sym::saturating_sub => { match int_type_width_signed(args[0].layout.ty, self) { Some((width, signed)) => match name { - sym::ctlz | sym::cttz => { + sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()), + + sym::cttz => { let func = self.current_func(); let then_block = func.new_block("then"); let else_block = func.new_block("else"); @@ -426,11 +428,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc // in the state need to be updated. self.switch_to_block(else_block); - let zeros = match name { - sym::ctlz => self.count_leading_zeroes(width, arg), - sym::cttz => self.count_trailing_zeroes(width, arg), - _ => unreachable!(), - }; + let zeros = self.count_trailing_zeroes(width, arg); self.llbb().add_assignment(None, result, zeros); self.llbb().end_with_jump(None, after_block); @@ -440,7 +438,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc result.to_rvalue() } - sym::ctlz_nonzero => self.count_leading_zeroes(width, args[0].immediate()), + sym::ctlz_nonzero => { + self.count_leading_zeroes_nonzero(width, args[0].immediate()) + } sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()), sym::ctpop => self.pop_count(args[0].immediate()), sym::bswap => { @@ -877,16 +877,46 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + // if arg is 0, early return 0, else call count_leading_zeroes_nonzero to compute leading zeros + let func = self.current_func(); + let then_block = func.new_block("then"); + let else_block = func.new_block("else"); + let after_block = func.new_block("after"); + + let result = func.new_local(None, self.u32_type, "zeros"); + let zero = self.cx.gcc_zero(arg.get_type()); + let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); + self.llbb().end_with_conditional(None, cond, then_block, else_block); + + let zero_result = self.cx.gcc_uint(self.u32_type, width); + then_block.add_assignment(None, result, zero_result); + then_block.end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place count_leading_zeroes_nonzero() does not expect, + // the current block in the state need to be updated. + self.switch_to_block(else_block); + + let zeros = self.count_leading_zeroes_nonzero(width, arg); + self.llbb().add_assignment(None, result, zeros); + self.llbb().end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place rustc does not + // expect, the current block in the state need to be updated. + self.switch_to_block(after_block); + + result.to_rvalue() + } + + fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { // TODO(antoyo): use width? - let arg_type = arg.get_type(); let result_type = self.u32_type; + let mut arg_type = arg.get_type(); let arg = if arg_type.is_signed(self.cx) { - let new_type = arg_type.to_unsigned(self.cx); - self.gcc_int_cast(arg, new_type) + arg_type = arg_type.to_unsigned(self.cx); + self.gcc_int_cast(arg, arg_type) } else { arg }; - let arg_type = arg.get_type(); let count_leading_zeroes = // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here // instead of using is_uint(). @@ -900,51 +930,45 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { "__builtin_clzll" } else if width == 128 { - // Algorithm from: https://stackoverflow.com/a/28433850/389119 - let array_type = self.context.new_array_type(None, arg_type, 3); + // arg is guaranteed to not be 0, so either its 64 high or 64 low bits are not 0 + // __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0, + // else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0 + // because arg is not 0. + let result = self.current_func() - .new_local(None, array_type, "count_loading_zeroes_results"); + .new_local(None, result_type, "count_leading_zeroes_results"); + let ctlz_then_block = self.current_func().new_block("ctlz_then"); + let ctlz_else_block = self.current_func().new_block("ctlz_else"); + let ctlz_after_block = self.current_func().new_block("ctlz_after") + ; let sixty_four = self.const_uint(arg_type, 64); let shift = self.lshr(arg, sixty_four); let high = self.gcc_int_cast(shift, self.u64_type); - let low = self.gcc_int_cast(arg, self.u64_type); - - let zero = self.context.new_rvalue_zero(self.usize_type); - let one = self.context.new_rvalue_one(self.usize_type); - let two = self.context.new_rvalue_from_long(self.usize_type, 2); let clzll = self.context.get_builtin_function("__builtin_clzll"); - let first_elem = self.context.new_array_access(None, result, zero); - let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type); - self.llbb() - .add_assignment(self.location, first_elem, first_value); - - let second_elem = self.context.new_array_access(self.location, result, one); - let cast = self.gcc_int_cast(self.context.new_call(self.location, clzll, &[low]), arg_type); - let second_value = self.add(cast, sixty_four); - self.llbb() - .add_assignment(self.location, second_elem, second_value); - - let third_elem = self.context.new_array_access(self.location, result, two); - let third_value = self.const_uint(arg_type, 128); - self.llbb() - .add_assignment(self.location, third_elem, third_value); + let zero_hi = self.const_uint(high.get_type(), 0); + let cond = self.gcc_icmp(IntPredicate::IntNE, high, zero_hi); + self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block); + self.switch_to_block(ctlz_then_block); - let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high); - let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low); - let not_low_and_not_high = not_low & not_high; - let index = not_high + not_low_and_not_high; - // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in - // gcc. - // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the - // compilation stage. - let index = self.context.new_cast(self.location, index, self.i32_type); + let result_128 = + self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type); - let res = self.context.new_array_access(self.location, result, index); + ctlz_then_block.add_assignment(self.location, result, result_128); + ctlz_then_block.end_with_jump(self.location, ctlz_after_block); - return self.gcc_int_cast(res.to_rvalue(), result_type); + self.switch_to_block(ctlz_else_block); + let low = self.gcc_int_cast(arg, self.u64_type); + let low_leading_zeroes = + self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type); + let sixty_four_result_type = self.const_uint(result_type, 64); + let result_128 = self.add(low_leading_zeroes, sixty_four_result_type); + ctlz_else_block.add_assignment(self.location, result, result_128); + ctlz_else_block.end_with_jump(self.location, ctlz_after_block); + self.switch_to_block(ctlz_after_block); + return result.to_rvalue(); } else { let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); From e4fd312a1674559b440ef3fb840dd04ba4f28e65 Mon Sep 17 00:00:00 2001 From: dvermd <315743+dvermd@users.noreply.github.com> Date: Thu, 7 Aug 2025 10:08:23 +0200 Subject: [PATCH 2/5] fix 128bit cttz intrinsic UB --- src/intrinsic/mod.rs | 146 +++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 75 deletions(-) diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index b5e15b3a359..b1ce33da3a7 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -407,41 +407,13 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc Some((width, signed)) => match name { sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()), - sym::cttz => { - let func = self.current_func(); - let then_block = func.new_block("then"); - let else_block = func.new_block("else"); - let after_block = func.new_block("after"); - - let arg = args[0].immediate(); - let result = func.new_local(None, self.u32_type, "zeros"); - let zero = self.cx.gcc_zero(arg.get_type()); - let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); - self.llbb().end_with_conditional(None, cond, then_block, else_block); - - let zero_result = self.cx.gcc_uint(self.u32_type, width); - then_block.add_assignment(None, result, zero_result); - then_block.end_with_jump(None, after_block); - - // NOTE: since jumps were added in a place - // count_leading_zeroes() does not expect, the current block - // in the state need to be updated. - self.switch_to_block(else_block); - - let zeros = self.count_trailing_zeroes(width, arg); - self.llbb().add_assignment(None, result, zeros); - self.llbb().end_with_jump(None, after_block); - - // NOTE: since jumps were added in a place rustc does not - // expect, the current block in the state need to be updated. - self.switch_to_block(after_block); - - result.to_rvalue() - } sym::ctlz_nonzero => { self.count_leading_zeroes_nonzero(width, args[0].immediate()) } - sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()), + sym::cttz => self.count_trailing_zeroes(width, args[0].immediate()), + sym::cttz_nonzero => { + self.count_trailing_zeroes_nonzero(width, args[0].immediate()) + } sym::ctpop => self.pop_count(args[0].immediate()), sym::bswap => { if width == 8 { @@ -983,16 +955,46 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.context.new_cast(self.location, res, result_type) } - fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { - let arg_type = arg.get_type(); + fn count_trailing_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + // if arg is 0, early return width, else call count_trailing_zeroes_nonzero to compute trailing zeros + let func = self.current_func(); + let then_block = func.new_block("then"); + let else_block = func.new_block("else"); + let after_block = func.new_block("after"); + + let result = func.new_local(None, self.u32_type, "zeros"); + let zero = self.cx.gcc_zero(arg.get_type()); + let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); + self.llbb().end_with_conditional(None, cond, then_block, else_block); + + let zero_result = self.cx.gcc_uint(self.u32_type, width); + then_block.add_assignment(None, result, zero_result); + then_block.end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect, + // the current block in the state need to be updated. + self.switch_to_block(else_block); + + let zeros = self.count_trailing_zeroes_nonzero(width, arg); + self.llbb().add_assignment(None, result, zeros); + self.llbb().end_with_jump(None, after_block); + + // NOTE: since jumps were added in a place rustc does not + // expect, the current block in the state need to be updated. + self.switch_to_block(after_block); + + result.to_rvalue() + } + + fn count_trailing_zeroes_nonzero(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { let result_type = self.u32_type; + let mut arg_type = arg.get_type(); let arg = if arg_type.is_signed(self.cx) { - let new_type = arg_type.to_unsigned(self.cx); - self.gcc_int_cast(arg, new_type) + arg_type = arg_type.to_unsigned(self.cx); + self.gcc_int_cast(arg, arg_type) } else { arg }; - let arg_type = arg.get_type(); let (count_trailing_zeroes, expected_type) = // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here // instead of using is_uint(). @@ -1007,50 +1009,44 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { ("__builtin_ctzll", self.cx.ulonglong_type) } else if arg_type.is_u128(self.cx) { - // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119 - let array_type = self.context.new_array_type(None, arg_type, 3); + // arg is guaranteed to no be 0, so either its 64 high or 64 low bits are not 0 + // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0, + // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0 + // because arg is not 0. + let result = self.current_func() - .new_local(None, array_type, "count_loading_zeroes_results"); + .new_local(None, result_type, "count_trailing_zeroes_results"); + + let ctlz_then_block = self.current_func().new_block("cttz_then"); + let ctlz_else_block = self.current_func().new_block("cttz_else"); + let ctlz_after_block = self.current_func().new_block("cttz_after"); + let ctzll = self.context.get_builtin_function("__builtin_ctzll"); - let sixty_four = self.gcc_int(arg_type, 64); - let shift = self.gcc_lshr(arg, sixty_four); - let high = self.gcc_int_cast(shift, self.u64_type); let low = self.gcc_int_cast(arg, self.u64_type); + let sixty_four = self.const_uint(arg_type, 64); + let shift = self.lshr(arg, sixty_four); + let high = self.gcc_int_cast(shift, self.u64_type); + let zero_low = self.const_uint(low.get_type(), 0); + let cond = self.gcc_icmp(IntPredicate::IntNE, low, zero_low); + self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block); + self.switch_to_block(ctlz_then_block); - let zero = self.context.new_rvalue_zero(self.usize_type); - let one = self.context.new_rvalue_one(self.usize_type); - let two = self.context.new_rvalue_from_long(self.usize_type, 2); + let result_128 = + self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), result_type); - let ctzll = self.context.get_builtin_function("__builtin_ctzll"); + ctlz_then_block.add_assignment(self.location, result, result_128); + ctlz_then_block.end_with_jump(self.location, ctlz_after_block); - let first_elem = self.context.new_array_access(self.location, result, zero); - let first_value = self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[low]), arg_type); - self.llbb() - .add_assignment(self.location, first_elem, first_value); - - let second_elem = self.context.new_array_access(self.location, result, one); - let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[high]), arg_type), sixty_four); - self.llbb() - .add_assignment(self.location, second_elem, second_value); - - let third_elem = self.context.new_array_access(self.location, result, two); - let third_value = self.gcc_int(arg_type, 128); - self.llbb() - .add_assignment(self.location, third_elem, third_value); - - let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low); - let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high); - let not_low_and_not_high = not_low & not_high; - let index = not_low + not_low_and_not_high; - // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in - // gcc. - // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the - // compilation stage. - let index = self.context.new_cast(self.location, index, self.i32_type); - - let res = self.context.new_array_access(self.location, result, index); - - return self.gcc_int_cast(res.to_rvalue(), result_type); + self.switch_to_block(ctlz_else_block); + let high_trailing_zeroes = + self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), result_type); + + let sixty_four_result_type = self.const_uint(result_type, 64); + let result_128 = self.add(high_trailing_zeroes, sixty_four_result_type); + ctlz_else_block.add_assignment(self.location, result, result_128); + ctlz_else_block.end_with_jump(self.location, ctlz_after_block); + self.switch_to_block(ctlz_after_block); + return result.to_rvalue(); } else { let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll"); From 8f21e24bfeec2e43928e225b5b45575020599e36 Mon Sep 17 00:00:00 2001 From: dvermd <315743+dvermd@users.noreply.github.com> Date: Sun, 23 Nov 2025 09:59:58 +0100 Subject: [PATCH 3/5] factorize count leading and trailing zeros code --- src/intrinsic/mod.rs | 300 ++++++++++++++++++------------------------- 1 file changed, 124 insertions(+), 176 deletions(-) diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index b1ce33da3a7..f50d096e2e8 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -848,8 +848,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.gcc_int_cast(result, result_type) } - fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { - // if arg is 0, early return 0, else call count_leading_zeroes_nonzero to compute leading zeros + fn count_zeroes(&mut self, width: u64, arg: RValue<'gcc>, count_leading: bool) -> RValue<'gcc> { + // if arg is 0, early return 0, else call count_leading_zeroes_nonzero or count_trailing_zeroes_nonzero let func = self.current_func(); let then_block = func.new_block("then"); let else_block = func.new_block("else"); @@ -864,11 +864,15 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { then_block.add_assignment(None, result, zero_result); then_block.end_with_jump(None, after_block); - // NOTE: since jumps were added in a place count_leading_zeroes_nonzero() does not expect, + // NOTE: since jumps were added in a place count_xxxxing_zeroes_nonzero() does not expect, // the current block in the state need to be updated. self.switch_to_block(else_block); - let zeros = self.count_leading_zeroes_nonzero(width, arg); + let zeros = if count_leading { + self.count_leading_zeroes_nonzero(width, arg) + } else { + self.count_trailing_zeroes_nonzero(width, arg) + }; self.llbb().add_assignment(None, result, zeros); self.llbb().end_with_jump(None, after_block); @@ -879,7 +883,30 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { result.to_rvalue() } - fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + fn count_zeroes_nonzero( + &mut self, + width: u64, + arg: RValue<'gcc>, + count_leading: bool, + ) -> RValue<'gcc> { + // Pre-condition: arg is guaranteed to not be 0 by caller + fn use_builtin_function<'a, 'gcc, 'tcx>( + builder: &mut Builder<'a, 'gcc, 'tcx>, + builtin: &str, + arg: RValue<'gcc>, + arg_type: gccjit::Type<'gcc>, + expected_type: gccjit::Type<'gcc>, + ) -> RValue<'gcc> { + let arg = if arg_type != expected_type { + builder.context.new_cast(builder.location, arg, expected_type) + } else { + arg + }; + let builtin = builder.context.get_builtin_function(builtin); + let res = builder.context.new_call(builder.location, builtin, &[arg]); + builder.context.new_cast(builder.location, res, builder.u32_type) + } + // TODO(antoyo): use width? let result_type = self.u32_type; let mut arg_type = arg.get_type(); @@ -889,186 +916,107 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } else { arg }; - let count_leading_zeroes = - // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here - // instead of using is_uint(). - if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) { - "__builtin_clz" - } - else if arg_type.is_ulong(self.cx) { - "__builtin_clzl" - } - else if arg_type.is_ulonglong(self.cx) { - "__builtin_clzll" - } - else if width == 128 { - // arg is guaranteed to not be 0, so either its 64 high or 64 low bits are not 0 - // __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0, - // else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0 - // because arg is not 0. - - let result = self.current_func() - .new_local(None, result_type, "count_leading_zeroes_results"); - - let ctlz_then_block = self.current_func().new_block("ctlz_then"); - let ctlz_else_block = self.current_func().new_block("ctlz_else"); - let ctlz_after_block = self.current_func().new_block("ctlz_after") - ; - let sixty_four = self.const_uint(arg_type, 64); - let shift = self.lshr(arg, sixty_four); - let high = self.gcc_int_cast(shift, self.u64_type); - - let clzll = self.context.get_builtin_function("__builtin_clzll"); - - let zero_hi = self.const_uint(high.get_type(), 0); - let cond = self.gcc_icmp(IntPredicate::IntNE, high, zero_hi); - self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block); - self.switch_to_block(ctlz_then_block); - - let result_128 = - self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type); - - ctlz_then_block.add_assignment(self.location, result, result_128); - ctlz_then_block.end_with_jump(self.location, ctlz_after_block); - - self.switch_to_block(ctlz_else_block); - let low = self.gcc_int_cast(arg, self.u64_type); - let low_leading_zeroes = - self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type); - let sixty_four_result_type = self.const_uint(result_type, 64); - let result_128 = self.add(low_leading_zeroes, sixty_four_result_type); - ctlz_else_block.add_assignment(self.location, result, result_128); - ctlz_else_block.end_with_jump(self.location, ctlz_after_block); - self.switch_to_block(ctlz_after_block); - return result.to_rvalue(); - } - else { - let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); - let arg = self.context.new_cast(self.location, arg, self.ulonglong_type); - let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64; - let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8); - let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff; - return self.context.new_cast(self.location, res, result_type); + // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here + // instead of using is_uint(). + if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) { + let builtin = if count_leading { "__builtin_clz" } else { "__builtin_ctz" }; + use_builtin_function(self, builtin, arg, arg_type, self.cx.uint_type) + } else if arg_type.is_ulong(self.cx) { + let builtin = if count_leading { "__builtin_clzl" } else { "__builtin_ctzl" }; + use_builtin_function(self, builtin, arg, arg_type, self.cx.uint_type) + } else if arg_type.is_ulonglong(self.cx) { + let builtin = if count_leading { "__builtin_clzll" } else { "__builtin_ctzll" }; + use_builtin_function(self, builtin, arg, arg_type, self.cx.uint_type) + } else if width == 128 { + // arg is guaranteed to not be 0, so either its 64 high or 64 low bits are not 0 + // __buildin_clzll is UB when called with 0, so call it on the 64 high bits if they are not 0, + // else call it on the 64 low bits and add 64. In the else case, 64 low bits can't be 0 + // because arg is not 0. + // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0, + // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0 + // because arg is not 0. + + let result = self.current_func().new_local(None, result_type, "count_zeroes_results"); + + let cz_then_block = self.current_func().new_block("cz_then"); + let cz_else_block = self.current_func().new_block("cz_else"); + let cz_after_block = self.current_func().new_block("cz_after"); + + let low = self.gcc_int_cast(arg, self.u64_type); + let sixty_four = self.const_uint(arg_type, 64); + let shift = self.lshr(arg, sixty_four); + let high = self.gcc_int_cast(shift, self.u64_type); + + let (first, second, builtin) = if count_leading { + (low, high, self.context.get_builtin_function("__builtin_clzll")) + } else { + (high, low, self.context.get_builtin_function("__builtin_ctzll")) }; - let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes); - let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]); - self.context.new_cast(self.location, res, result_type) - } - fn count_trailing_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { - // if arg is 0, early return width, else call count_trailing_zeroes_nonzero to compute trailing zeros - let func = self.current_func(); - let then_block = func.new_block("then"); - let else_block = func.new_block("else"); - let after_block = func.new_block("after"); - - let result = func.new_local(None, self.u32_type, "zeros"); - let zero = self.cx.gcc_zero(arg.get_type()); - let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); - self.llbb().end_with_conditional(None, cond, then_block, else_block); + let zero_64 = self.const_uint(self.u64_type, 0); + let cond = self.gcc_icmp(IntPredicate::IntNE, second, zero_64); + self.llbb().end_with_conditional(self.location, cond, cz_then_block, cz_else_block); + self.switch_to_block(cz_then_block); + + let result_128 = + self.gcc_int_cast(self.context.new_call(None, builtin, &[second]), result_type); + + cz_then_block.add_assignment(self.location, result, result_128); + cz_then_block.end_with_jump(self.location, cz_after_block); + + self.switch_to_block(cz_else_block); + let count_more_zeroes = + self.gcc_int_cast(self.context.new_call(None, builtin, &[first]), result_type); + let sixty_four_result_type = self.const_uint(result_type, 64); + let count_result_type = self.add(count_more_zeroes, sixty_four_result_type); + cz_else_block.add_assignment(self.location, result, count_result_type); + cz_else_block.end_with_jump(self.location, cz_after_block); + self.switch_to_block(cz_after_block); + result.to_rvalue() + } else { + let byte_diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64; + let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8); + let ull_arg = self.context.new_cast(self.location, arg, self.ulonglong_type); - let zero_result = self.cx.gcc_uint(self.u32_type, width); - then_block.add_assignment(None, result, zero_result); - then_block.end_with_jump(None, after_block); + let res = if count_leading { + let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); + self.context.new_call(self.location, count_leading_zeroes, &[ull_arg]) - diff + } else { + let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll"); + let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set. + let masked = mask + & self.context.new_unary_op( + self.location, + UnaryOp::BitwiseNegate, + arg_type, + arg, + ); + let cond = + self.context.new_comparison(self.location, ComparisonOp::Equals, masked, mask); + let diff = diff * self.context.new_cast(self.location, cond, self.int_type); - // NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect, - // the current block in the state need to be updated. - self.switch_to_block(else_block); + self.context.new_call(self.location, count_trailing_zeroes, &[ull_arg]) - diff + }; + self.context.new_cast(self.location, res, result_type) + } + } - let zeros = self.count_trailing_zeroes_nonzero(width, arg); - self.llbb().add_assignment(None, result, zeros); - self.llbb().end_with_jump(None, after_block); + fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + self.count_zeroes(width, arg, true) + } - // NOTE: since jumps were added in a place rustc does not - // expect, the current block in the state need to be updated. - self.switch_to_block(after_block); + fn count_leading_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + // Pre-condition: arg is guaranteed to not be 0 by caller, else count_leading_zeros should be used + self.count_zeroes_nonzero(width, arg, true) + } - result.to_rvalue() + fn count_trailing_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + self.count_zeroes(width, arg, false) } - fn count_trailing_zeroes_nonzero(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { - let result_type = self.u32_type; - let mut arg_type = arg.get_type(); - let arg = if arg_type.is_signed(self.cx) { - arg_type = arg_type.to_unsigned(self.cx); - self.gcc_int_cast(arg, arg_type) - } else { - arg - }; - let (count_trailing_zeroes, expected_type) = - // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here - // instead of using is_uint(). - if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) { - // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero. - ("__builtin_ctz", self.cx.uint_type) - } - else if arg_type.is_ulong(self.cx) { - ("__builtin_ctzl", self.cx.ulong_type) - } - else if arg_type.is_ulonglong(self.cx) { - ("__builtin_ctzll", self.cx.ulonglong_type) - } - else if arg_type.is_u128(self.cx) { - // arg is guaranteed to no be 0, so either its 64 high or 64 low bits are not 0 - // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0, - // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0 - // because arg is not 0. - - let result = self.current_func() - .new_local(None, result_type, "count_trailing_zeroes_results"); - - let ctlz_then_block = self.current_func().new_block("cttz_then"); - let ctlz_else_block = self.current_func().new_block("cttz_else"); - let ctlz_after_block = self.current_func().new_block("cttz_after"); - let ctzll = self.context.get_builtin_function("__builtin_ctzll"); - - let low = self.gcc_int_cast(arg, self.u64_type); - let sixty_four = self.const_uint(arg_type, 64); - let shift = self.lshr(arg, sixty_four); - let high = self.gcc_int_cast(shift, self.u64_type); - let zero_low = self.const_uint(low.get_type(), 0); - let cond = self.gcc_icmp(IntPredicate::IntNE, low, zero_low); - self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block); - self.switch_to_block(ctlz_then_block); - - let result_128 = - self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), result_type); - - ctlz_then_block.add_assignment(self.location, result, result_128); - ctlz_then_block.end_with_jump(self.location, ctlz_after_block); - - self.switch_to_block(ctlz_else_block); - let high_trailing_zeroes = - self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), result_type); - - let sixty_four_result_type = self.const_uint(result_type, 64); - let result_128 = self.add(high_trailing_zeroes, sixty_four_result_type); - ctlz_else_block.add_assignment(self.location, result, result_128); - ctlz_else_block.end_with_jump(self.location, ctlz_after_block); - self.switch_to_block(ctlz_after_block); - return result.to_rvalue(); - } - else { - let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll"); - let arg_size = arg_type.get_size(); - let casted_arg = self.context.new_cast(self.location, arg, self.ulonglong_type); - let byte_diff = self.ulonglong_type.get_size() as i64 - arg_size as i64; - let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8); - let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set. - let masked = mask & self.context.new_unary_op(self.location, UnaryOp::BitwiseNegate, arg_type, arg); - let cond = self.context.new_comparison(self.location, ComparisonOp::Equals, masked, mask); - let diff = diff * self.context.new_cast(self.location, cond, self.int_type); - let res = self.context.new_call(self.location, count_trailing_zeroes, &[casted_arg]) - diff; - return self.context.new_cast(self.location, res, result_type); - }; - let count_trailing_zeroes = self.context.get_builtin_function(count_trailing_zeroes); - let arg = if arg_type != expected_type { - self.context.new_cast(self.location, arg, expected_type) - } else { - arg - }; - let res = self.context.new_call(self.location, count_trailing_zeroes, &[arg]); - self.context.new_cast(self.location, res, result_type) + fn count_trailing_zeroes_nonzero(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { + // Pre-condition: arg is guaranteed to not be 0 by caller, else count_trailing_zeros should be used + self.count_zeroes_nonzero(width, arg, false) } fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> { From d7023631df345a927ae0c331cd1a7c0f55bac842 Mon Sep 17 00:00:00 2001 From: dvermd <315743+dvermd@users.noreply.github.com> Date: Mon, 17 Nov 2025 06:01:06 +0100 Subject: [PATCH 4/5] add ctlz and cttz tests --- tests/run/int_intrinsics.rs | 72 +++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tests/run/int_intrinsics.rs diff --git a/tests/run/int_intrinsics.rs b/tests/run/int_intrinsics.rs new file mode 100644 index 00000000000..c3d363c8428 --- /dev/null +++ b/tests/run/int_intrinsics.rs @@ -0,0 +1,72 @@ +// Compiler: +// +// Run-time: +#![feature(core_intrinsics, intrinsics)] +#![no_main] + +use std::intrinsics::black_box; + +#[rustc_intrinsic] +pub const fn ctlz(_x: T) -> u32; + +#[rustc_intrinsic] +pub const fn cttz(_x: T) -> u32; + +#[no_mangle] +extern "C" fn main(_argc: i32, _argv: *const *const u8) -> i32 { + macro_rules! check { + ($func_name:ident, $input:expr, $expected:expr, $res_ident:ident) => {{ + $res_ident += 1; + if $func_name(black_box($input)) != $expected { + return $res_ident; + } + }}; + } + let mut res = 0; + check!(ctlz, 0_u128, 128_u32, res); + check!(ctlz, 1_u128, 127_u32, res); + check!(ctlz, 0x4000_0000_0000_0000_0000_0000_0000_0000_u128, 1_u32, res); + check!(ctlz, 0x8000_0000_0000_0000_0000_0000_0000_0000_u128, 0_u32, res); + check!(cttz, 0_u128, 128_u32, res); + check!(cttz, 1_u128, 0_u32, res); + check!(cttz, 2_u128, 1_u32, res); + check!(cttz, 0x8000_0000_0000_0000_0000_0000_0000_0000_u128, 127_u32, res); + + check!(ctlz, 0_u64, 64_u32, res); + check!(ctlz, 1_u64, 63_u32, res); + check!(ctlz, 0x4000_0000_0000_0000_u64, 1_u32, res); + check!(ctlz, 0x8000_0000_0000_0000_u64, 0_u32, res); + check!(cttz, 0_u64, 64_u32, res); + check!(cttz, 1_u64, 0_u32, res); + check!(cttz, 2_u64, 1_u32, res); + check!(cttz, 0x8000_0000_0000_0000_u64, 63_u32, res); + + check!(ctlz, 0_u32, 32_u32, res); + check!(ctlz, 1_u32, 31_u32, res); + check!(ctlz, 0x4000_0000_u32, 1_u32, res); + check!(ctlz, 0x8000_0000_u32, 0_u32, res); + check!(cttz, 0_u32, 32_u32, res); + check!(cttz, 1_u32, 0_u32, res); + check!(cttz, 2_u32, 1_u32, res); + check!(cttz, 0x8000_0000_u32, 31_u32, res); + + check!(ctlz, 0_u16, 16_u32, res); + check!(ctlz, 1_u16, 15_u32, res); + check!(ctlz, 0x4000_u16, 1_u32, res); + check!(ctlz, 0x8000_u16, 0_u32, res); + check!(cttz, 0_u16, 16_u32, res); + check!(cttz, 1_u16, 0_u32, res); + check!(cttz, 2_u16, 1_u32, res); + check!(cttz, 0x8000_u16, 15_u32, res); + + check!(ctlz, 0_u8, 8_u32, res); + check!(ctlz, 1_u8, 7_u32, res); + check!(ctlz, 0x40_u8, 1_u32, res); + check!(ctlz, 0x80_u8, 0_u32, res); + check!(cttz, 0_u8, 8_u32, res); + check!(cttz, 1_u8, 0_u32, res); + check!(cttz, 2_u8, 1_u32, res); + check!(cttz, 0x80_u8, 7_u32, res); + + 0 +} From a9b15b61033f246d04fe7e568b8d13ecb3da7f06 Mon Sep 17 00:00:00 2001 From: dvermd <315743+dvermd@users.noreply.github.com> Date: Mon, 17 Nov 2025 06:02:25 +0100 Subject: [PATCH 5/5] add ubsan to Debug test Profile --- tests/lang_tests_common.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/lang_tests_common.rs b/tests/lang_tests_common.rs index 9abe97b1087..983ffc2b640 100644 --- a/tests/lang_tests_common.rs +++ b/tests/lang_tests_common.rs @@ -115,7 +115,19 @@ pub fn main_inner(profile: Profile) { } } match profile { - Profile::Debug => {} + Profile::Debug => { + if test_target.is_ok() { + // m68k doesn't have lubsan for now + compiler.args(["-C", "llvm-args=sanitize-undefined"]); + } else { + compiler.args([ + "-C", + "llvm-args=sanitize-undefined", + "-C", + "link-args=-lubsan", + ]); + } + } Profile::Release => { compiler.args(["-C", "opt-level=3", "-C", "lto=no"]); }