Skip to content

Commit 5df3c66

Browse files
committed
Optimize multiplication and division operations
1 parent caf0167 commit 5df3c66

File tree

8 files changed

+475
-271
lines changed

8 files changed

+475
-271
lines changed

include/slimcpplib/long_fixdiv.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,13 @@ constexpr long_fixed_divider<type_t> long_fixed_divider<type_t>::create(const ty
157157
template<typename type_t>
158158
constexpr type_t long_fixed_divider<type_t>::divide(const type_t& dividend) const noexcept
159159
{
160-
type_t mul_hi = addition;
161-
const type_t mul_lo = mulc(dividend, multiplier, mul_hi);
160+
type_t mul_lo = dividend;
161+
type_t mul_hi = mul(mul_lo, multiplier);
162+
163+
if (addition != 0)
164+
mul_hi += add(mul_lo, addition);
162165

163-
return mul_hi >> shift;
166+
return mul_hi >>= shift;
164167
}
165168

166169

include/slimcpplib/long_int.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,11 @@ constexpr long_int_t<native_t, size>& long_int_t<native_t, size>::negate() noexc
219219
{
220220
bool borrow = true;
221221

222-
for (uint_t n = 0; n < std::size(digits); ++n)
223-
digits[n] = ~subb<native_t>(digits[n], 0, borrow);
222+
for (uint_t n = 0; n < std::size(digits); ++n) {
223+
224+
borrow = subb<native_t>(digits[n], 0, borrow);
225+
digits[n] = ~digits[n];
226+
}
224227

225228
return *this;
226229
}

include/slimcpplib/long_io.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ inline std::basic_istream<char_t, traits_t>& operator>>(std::basic_istream<char_
139139
for (uint_t digit_idx = 0; digit_idx < digit_count; ++digit_idx) {
140140

141141
long_uint_t carry = impl::from_char(input[digit_idx]);
142-
value = mulc(value, long_uint_t(base), carry);
142+
carry = mulc(value, long_uint_t(base), carry);
143143

144144
if (carry > 0)
145145
throw std::ios::failure("Input integer value is out of range.");

include/slimcpplib/long_math.h

Lines changed: 57 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -169,17 +169,23 @@ constexpr type_t shr2(type_t value_hi, type_t value_lo, uint_t shift) noexcept;
169169
// add with carry
170170

171171
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
172-
constexpr type_t addc(type_t value1, type_t value2, bool& carry) noexcept;
172+
constexpr bool add(type_t& value1, type_t value2) noexcept;
173+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
174+
constexpr bool addc(type_t& value1, type_t value2, bool carry) noexcept;
173175

174176
// subtract with borrow
175177

176178
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
177-
constexpr type_t subb(type_t value1, type_t value2, bool& borrow) noexcept;
179+
constexpr bool sub(type_t& value1, type_t value2) noexcept;
180+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
181+
constexpr bool subb(type_t& value1, type_t value2, bool borrow) noexcept;
178182

179183
// multiply with carry
180184

181185
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
182-
constexpr type_t mulc(type_t value1, type_t value2, type_t& carry) noexcept;
186+
constexpr type_t mul(type_t& value1, type_t value2) noexcept;
187+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
188+
constexpr type_t mulc(type_t& value1, type_t value2, type_t carry) noexcept;
183189

184190
// divide with remainder
185191

@@ -367,44 +373,58 @@ constexpr type_t shr2(type_t value_hi, type_t value_lo, uint_t shift) noexcept
367373

368374
////////////////////////////////////////////////////////////////////////////////////////////////////
369375
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
370-
constexpr type_t addc(type_t value1, const type_t value2, bool& carry) noexcept
376+
constexpr bool add(type_t& value1, type_t value2) noexcept
371377
{
372-
bool carry_new = false;
378+
value1 += value2;
379+
return value1 < value2;
380+
}
381+
373382

374-
type_t result = value1;
375-
result += value2;
376-
carry_new = carry_new || (result < value2);
377-
result += carry;
378-
carry_new = carry_new || (result < type_t(carry));
379383

380-
carry = carry_new;
381-
return result;
384+
////////////////////////////////////////////////////////////////////////////////////////////////////
385+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
386+
constexpr bool addc(type_t& value1, type_t value2, bool carry) noexcept
387+
{
388+
value1 += value2;
389+
bool carry_new = value1 < value2;
390+
value1 += carry;
391+
carry_new = carry_new || (value1 < type_t(carry));
392+
393+
return carry_new;
382394
}
383395

384396

385397

386398
////////////////////////////////////////////////////////////////////////////////////////////////////
387399
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
388-
constexpr type_t subb(type_t value1, type_t value2, bool& borrow) noexcept
400+
constexpr bool sub(type_t& value1, type_t value2) noexcept
389401
{
390-
bool borrow_new = false;
402+
const type_t tmp = value1;
403+
value1 -= value2;
404+
return value1 > tmp;
405+
}
406+
391407

392-
type_t result = value1;
393-
result -= value2;
394-
borrow_new = borrow_new || (result > value1);
395-
type_t result_tmp = result;
396-
result -= borrow;
397-
borrow_new = borrow_new || (result > result_tmp);
398408

399-
borrow = borrow_new;
400-
return result;
409+
////////////////////////////////////////////////////////////////////////////////////////////////////
410+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
411+
constexpr bool subb(type_t& value1, type_t value2, bool borrow) noexcept
412+
{
413+
type_t tmp = value1;
414+
value1 -= value2;
415+
bool borrow_new = value1 > tmp;
416+
tmp = value1;
417+
value1 -= borrow;
418+
borrow_new = borrow_new || (value1 > tmp);
419+
420+
return borrow_new;
401421
}
402422

403423

404424

405425
////////////////////////////////////////////////////////////////////////////////////////////////////
406-
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
407-
constexpr type_t mulc_classic(type_t value1, type_t value2, type_t& carry) noexcept
426+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
427+
constexpr type_t mul(type_t& value1, type_t value2) noexcept
408428
{
409429
const type_t value1_lo = half_lo(value1);
410430
const type_t value1_hi = half_hi(value1);
@@ -419,88 +439,34 @@ constexpr type_t mulc_classic(type_t value1, type_t value2, type_t& carry) noexc
419439
const type_t result_lo = half_make_hi(half_lo(t2)) + half_lo(t0);
420440
const type_t result_hi = t3 + half_hi(t1);
421441

422-
bool add_carry = false;
423-
const type_t result = addc(result_lo, carry, add_carry);
424-
carry = result_hi + add_carry;
442+
value1 = result_lo;
425443

426-
return result;
444+
return result_hi;
427445
}
428446

429447

430448

431449
////////////////////////////////////////////////////////////////////////////////////////////////////
432-
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int> = 0>
433-
constexpr type_t mulc_karatsuba(type_t value1, type_t value2, type_t& carry) noexcept
450+
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
451+
constexpr type_t mulc(type_t& value1, type_t value2, type_t carry) noexcept
434452
{
435453
const type_t value1_lo = half_lo(value1);
436454
const type_t value1_hi = half_hi(value1);
437455
const type_t value2_lo = half_lo(value2);
438456
const type_t value2_hi = half_hi(value2);
439457

440-
const type_t x = value1_hi * value2_hi;
441-
const type_t y = value1_lo * value2_lo;
442-
443-
const type_t a_plus_b = value1_lo + value1_hi;
444-
const type_t c_plus_d = value2_lo + value2_hi;
445-
assert(half_hi(a_plus_b) <= 1);
446-
assert(half_hi(c_plus_d) <= 1);
447-
const type_t a_plus_b_lo = half_lo(a_plus_b);
448-
const type_t a_plus_b_hi = half_hi(a_plus_b);
449-
const type_t c_plus_d_lo = half_lo(c_plus_d);
450-
const type_t c_plus_d_hi = half_hi(c_plus_d);
451-
452-
type_t z_lo = a_plus_b_lo * c_plus_d_lo;
453-
type_t z_hi = 0;
454-
455-
if (a_plus_b_hi != 0) {
456-
457-
bool add_carry = false;
458-
z_lo = addc(z_lo, half_make_hi(c_plus_d_lo), add_carry);
459-
z_hi += add_carry;
460-
}
461-
462-
if (c_plus_d_hi != 0) {
463-
464-
bool add_carry = false;
465-
z_lo = addc(z_lo, half_make_hi(a_plus_b_lo), add_carry);
466-
z_hi += add_carry;
467-
}
468-
469-
if (half_hi(a_plus_b) != 0 && half_hi(c_plus_d) != 0)
470-
++z_hi;
471-
472-
bool sub_borrow = false;
473-
z_lo = subb(z_lo, x, sub_borrow);
474-
z_hi -= sub_borrow;
475-
sub_borrow = false;
476-
z_lo = subb(z_lo, y, sub_borrow);
477-
z_hi -= sub_borrow;
478-
assert(z_hi <= 1);
479-
480-
z_hi = shl2(z_hi, z_lo, bit_count_v<type_t> / 2);
481-
z_lo <<= bit_count_v<type_t> / 2;
482-
483-
type_t result_lo = y;
484-
type_t result_hi = x;
485-
486-
bool add_carry = false;
487-
result_lo = addc(result_lo, z_lo, add_carry);
488-
result_hi += z_hi + add_carry;
489-
add_carry = false;
490-
result_lo = addc(result_lo, carry, add_carry);
491-
carry = result_hi + add_carry;
492-
493-
return result_lo;
494-
}
458+
const type_t t0 = value1_lo * value2_lo;
459+
const type_t t1 = value1_hi * value2_lo + half_hi(t0);
460+
const type_t t2 = value1_lo * value2_hi + half_lo(t1);
461+
const type_t t3 = value1_hi * value2_hi + half_hi(t2);
495462

463+
type_t result_lo = half_make_hi(half_lo(t2)) + half_lo(t0);
464+
type_t result_hi = t3 + half_hi(t1);
496465

466+
result_hi += add(result_lo, carry);
467+
value1 = result_lo;
497468

498-
////////////////////////////////////////////////////////////////////////////////////////////////////
499-
template<typename type_t, std::enable_if_t<is_unsigned_v<type_t>, int>>
500-
constexpr type_t mulc(type_t value1, type_t value2, type_t& carry) noexcept
501-
{
502-
//return mulc_karatsuba(value1, value2, carry);
503-
return mulc_classic(value1, value2, carry);
469+
return result_hi;
504470
}
505471

506472

0 commit comments

Comments
 (0)