大数乘法,如何捕捉溢出
我正在寻找一个有效的(可选的标准,优雅和易于实现)的解决scheme来乘以相对较大的数字,并将结果存储为一个或多个整数:
假设我有两个64位整数声明如下:
uint64_t a = xxx, b = yyy;
当我做a*b
,我怎么能检测到操作结果溢出,在这种情况下存储的地方进行?
请注意,我不想使用任何大数量的库,因为我存储数字的方式受到限制。
检测:
x = a * b; if (a != 0 && x / a != b) { // overflow handling }
编辑:固定除以0(谢谢马克!)
计算进位是相当复杂的。 一种方法是将两个操作数分为半个字,然后对半个字应用长乘法 :
uint64_t hi(uint64_t x) { return x >> 32; } uint64_t lo(uint64_t x) { return ((1L << 32) - 1) & x; } void multiply(uint64_t a, uint64_t b) { // actually uint32_t would do, but the casting is annoying uint64_t s0, s1, s2, s3; uint64_t x = lo(a) * lo(b); s0 = lo(x); x = hi(a) * lo(b) + hi(x); s1 = lo(x); s2 = hi(x); x = s1 + lo(a) * hi(b); s1 = lo(x); x = s2 + hi(a) * hi(b) + hi(x); s2 = lo(x); s3 = hi(x); uint64_t result = s1 << 32 | s0; uint64_t carry = s3 << 32 | s2; }
要看到部分款项本身都不会溢出,我们考虑最坏的情况:
x = s2 + hi(a) * hi(b) + hi(x)
让B = 1 << 32.然后我们有
x <= (B - 1) + (B - 1)(B - 1) + (B - 1) <= B*B - 1 < B*B
我相信这会起作用 – 至less它能处理Sjlver的testing案例。 除此之外,它没有经过testing(甚至可能不会编译,因为我手边没有C ++编译器)。
这个想法是使用下面的事实,这是积分操作是真实的:
a*b > c
当且仅当a > c/b
/
在这里是整体划分。
检查正数的溢出的伪代码如下:
如果(a> max_int64 / b),则“溢出”,否则“确定” 。
要处理零和负数,你应该添加更多的检查。
C代码非负a
和b
如下:
if (b > 0 && a > 18446744073709551615 / b) { // overflow handling }; else { c = a * b; }
注意:
18446744073709551615 == (1<<64)-1
为了计算进位,我们可以使用方法将数字拆分成两个32位数字,并在纸上进行相乘。 我们需要拆分数字以避免溢出。
代码如下:
// split input numbers into 32-bit digits uint64_t a0 = a & ((1LL<<32)-1); uint64_t a1 = a >> 32; uint64_t b0 = b & ((1LL<<32)-1); uint64_t b1 = b >> 32; // The following 3 lines of code is to calculate the carry of d1 // (d1 - 32-bit second digit of result, and it can be calculated as d1=d11+d12), // but to avoid overflow. // Actually rewriting the following 2 lines: // uint64_t d1 = (a0 * b0 >> 32) + a1 * b0 + a0 * b1; // uint64_t c1 = d1 >> 32; uint64_t d11 = a1 * b0 + (a0 * b0 >> 32); uint64_t d12 = a0 * b1; uint64_t c1 = (d11 > 18446744073709551615 - d12) ? 1 : 0; uint64_t d2 = a1 * b1 + c1; uint64_t carry = d2; // needed carry stored here
虽然对这个问题还有其他几个答案,但我其中有几个是完全没有经过testing的代码,至今还没有人对不同的可能选项进行充分的比较。
出于这个原因,我编写并testing了几个可能的实现(最后一个是基于OpenBSD的代码 , 这里在Reddit上讨论)。 代码如下:
/* Multiply with overflow checking, emulating clang's builtin function * * __builtin_umull_overflow * * This code benchmarks five possible schemes for doing so. */ #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <limits.h> #ifndef BOOL #define BOOL int #endif // Option 1, check for overflow a wider type // - Often fastest and the least code, especially on modern compilers // - When long is a 64-bit int, requires compiler support for 128-bits // ints (requires GCC >= 3.0 or Clang) #if LONG_BIT > 32 typedef __uint128_t long_overflow_t ; #else typedef uint64_t long_overflow_t; #endif BOOL umull_overflow1(unsigned long lhs, unsigned long rhs, unsigned long* result) { long_overflow_t prod = (long_overflow_t)lhs * (long_overflow_t)rhs; *result = (unsigned long) prod; return (prod >> LONG_BIT) != 0; } // Option 2, perform long multiplication using a smaller type // - Sometimes the fastest (eg, when mulitply on longs is a library // call). // - Performs at most three multiplies, and sometimes only performs one. // - Highly portable code; works no matter how many bits unsigned long is BOOL umull_overflow2(unsigned long lhs, unsigned long rhs, unsigned long* result) { const unsigned long HALFSIZE_MAX = (1ul << LONG_BIT/2) - 1ul; unsigned long lhs_high = lhs >> LONG_BIT/2; unsigned long lhs_low = lhs & HALFSIZE_MAX; unsigned long rhs_high = rhs >> LONG_BIT/2; unsigned long rhs_low = rhs & HALFSIZE_MAX; unsigned long bot_bits = lhs_low * rhs_low; if (!(lhs_high || rhs_high)) { *result = bot_bits; return 0; } BOOL overflowed = lhs_high && rhs_high; unsigned long mid_bits1 = lhs_low * rhs_high; unsigned long mid_bits2 = lhs_high * rhs_low; *result = bot_bits + ((mid_bits1+mid_bits2) << LONG_BIT/2); return overflowed || *result < bot_bits || (mid_bits1 >> LONG_BIT/2) != 0 || (mid_bits2 >> LONG_BIT/2) != 0; } // Option 3, perform long multiplication using a smaller type (this code is // very similar to option 2, but calculates overflow using a different but // equivalent method). // - Sometimes the fastest (eg, when mulitply on longs is a library // call; clang likes this code). // - Performs at most three multiplies, and sometimes only performs one. // - Highly portable code; works no matter how many bits unsigned long is BOOL umull_overflow3(unsigned long lhs, unsigned long rhs, unsigned long* result) { const unsigned long HALFSIZE_MAX = (1ul << LONG_BIT/2) - 1ul; unsigned long lhs_high = lhs >> LONG_BIT/2; unsigned long lhs_low = lhs & HALFSIZE_MAX; unsigned long rhs_high = rhs >> LONG_BIT/2; unsigned long rhs_low = rhs & HALFSIZE_MAX; unsigned long lowbits = lhs_low * rhs_low; if (!(lhs_high || rhs_high)) { *result = lowbits; return 0; } BOOL overflowed = lhs_high && rhs_high; unsigned long midbits1 = lhs_low * rhs_high; unsigned long midbits2 = lhs_high * rhs_low; unsigned long midbits = midbits1 + midbits2; overflowed = overflowed || midbits < midbits1 || midbits > HALFSIZE_MAX; unsigned long product = lowbits + (midbits << LONG_BIT/2); overflowed = overflowed || product < lowbits; *result = product; return overflowed; } // Option 4, checks for overflow using division // - Checks for overflow using division // - Division is slow, especially if it is a library call BOOL umull_overflow4(unsigned long lhs, unsigned long rhs, unsigned long* result) { *result = lhs * rhs; return rhs > 0 && (SIZE_MAX / rhs) < lhs; } // Option 5, checks for overflow using division // - Checks for overflow using division // - Avoids division when the numbers are "small enough" to trivially // rule out overflow // - Division is slow, especially if it is a library call BOOL umull_overflow5(unsigned long lhs, unsigned long rhs, unsigned long* result) { const unsigned long MUL_NO_OVERFLOW = (1ul << LONG_BIT/2) - 1ul; *result = lhs * rhs; return (lhs >= MUL_NO_OVERFLOW || rhs >= MUL_NO_OVERFLOW) && rhs > 0 && SIZE_MAX / rhs < lhs; } #ifndef umull_overflow #define umull_overflow2 #endif /* * This benchmark code performs a multiply at all bit sizes, * essentially assuming that sizes are logarithmically distributed. */ int main() { unsigned long i, j, k; int count = 0; unsigned long mult; unsigned long total = 0; for (k = 0; k < 0x40000000 / LONG_BIT / LONG_BIT; ++k) for (i = 0; i != LONG_MAX; i = i*2+1) for (j = 0; j != LONG_MAX; j = j*2+1) { count += umull_overflow(i+k, j+k, &mult); total += mult; } printf("%d overflows (total %lu)\n", count, total); }
下面是结果,使用各种编译器和系统进行testing(在这种情况下,所有的testing都是在OS X上完成的,但在BSD或Linux系统上的结果应该是相似的):
+------------------+----------+----------+----------+----------+----------+ | | Option 1 | Option 2 | Option 3 | Option 4 | Option 5 | | | BigInt | LngMult1 | LngMult2 | Div | OptDiv | +------------------+----------+----------+----------+----------+----------+ | Clang 3.5 i386 | 1.610 | 3.217 | 3.129 | 4.405 | 4.398 | | GCC 4.9.0 i386 | 1.488 | 3.469 | 5.853 | 4.704 | 4.712 | | GCC 4.2.1 i386 | 2.842 | 4.022 | 3.629 | 4.160 | 4.696 | | GCC 4.2.1 PPC32 | 8.227 | 7.756 | 7.242 | 20.632 | 20.481 | | GCC 3.3 PPC32 | 5.684 | 9.804 | 11.525 | 21.734 | 22.517 | +------------------+----------+----------+----------+----------+----------+ | Clang 3.5 x86_64 | 1.584 | 2.472 | 2.449 | 9.246 | 7.280 | | GCC 4.9 x86_64 | 1.414 | 2.623 | 4.327 | 9.047 | 7.538 | | GCC 4.2.1 x86_64 | 2.143 | 2.618 | 2.750 | 9.510 | 7.389 | | GCC 4.2.1 PPC64 | 13.178 | 8.994 | 8.567 | 37.504 | 29.851 | +------------------+----------+----------+----------+----------+----------+
基于这些结果,我们可以得出几点结论:
- 显然,基于分工的方法虽然简单便捷,但速度缓慢。
- 没有一种技术在所有情况下都是明显的赢家。
- 在现代编译器中,如果可以使用,那么use-a-greater-int方法是最好的
- 在较老的编译器上,长乘法是最好的
- 令人惊讶的是,GCC 4.9.0在GCC 4.2.1上的性能回归,而GCC 4.2.1在GCC 3.3上的性能回归
一个版本,也适用于== 0:
x = a * b; if (a != 0 && x / a != b) { // overflow handling }
如果您不仅需要检测溢出,而且还需要捕获进位,最好将您的数字分成32位。 代码是一场噩梦; 接下来的只是一个草图:
#include <stdint.h> uint64_t mul(uint64_t a, uint64_t b) { uint32_t ah = a >> 32; uint32_t al = a; // truncates: now a = al + 2**32 * ah uint32_t bh = b >> 32; uint32_t bl = b; // truncates: now b = bl + 2**32 * bh // a * b = 2**64 * ah * bh + 2**32 * (ah * bl + bh * al) + al * bl uint64_t partial = (uint64_t) al * (uint64_t) bl; uint64_t mid1 = (uint64_t) ah * (uint64_t) bl; uint64_t mid2 = (uint64_t) al * (uint64_t) bh; uint64_t carry = (uint64_t) ah * (uint64_t) bh; // add high parts of mid1 and mid2 to carry // add low parts of mid1 and mid2 to partial, carrying // any carry bits into carry... }
问题不仅在于部分产品,而且还有一个事实是任何一笔款项都可能溢出。
如果我必须这样做,我会用本地汇编语言编写一个扩展的例程。 也就是说,例如,将两个64位整数相乘得到一个128位结果,存储在两个64位寄存器中。 所有合理的硬件在一个本地乘法指令中提供了这个function – 它不仅可以从C中访问
这是那些最优雅和易于编程的解决scheme实际上是使用汇编语言的罕见情况之一。 但它肯定不是便携式的:-(
这一天我一直在处理这个问题,我不得不说,这让我印象深刻的是,我看到人们说最好的方法来知道溢出是否是分裂的结果,这是完全没有效率的,不必要。 这个function的要点是它必须尽可能快。
有两种溢出检测的选项:
1º-如果可能的话,创build结果variables的倍数乘以乘数,例如:
struct INT32struct {INT16 high, low;}; typedef union { struct INT32struct s; INT32 ll; } INT32union; INT16 mulFunction(INT16 a, INT16 b) { INT32union result.ll = a * b; //32Bits result if(result.s.high > 0) Overflow(); return (result.s.low) }
你会马上知道是否溢出,代码是最快的,而不用写入机器代码。 根据编译器的不同,这些代码可以在机器代码中得到改进。
2º-不可能创build一个乘数variables的两倍大的结果variables:那么你应该玩如果条件来确定最佳path。 继续举例说明:
INT32 mulFunction(INT32 a, INT32 b) { INT32union s_a.ll = abs(a); INT32union s_b.ll = abs(b); //32Bits result INT32union result; if(s_a.s.hi > 0 && s_b.s.hi > 0) { Overflow(); } else if (s_a.s.hi > 0) { INT32union res1.ll = s_a.s.hi * s_b.s.lo; INT32union res2.ll = s_a.s.lo * s_b.s.lo; if (res1.hi == 0) { result.s.lo = res1.s.lo + res2.s.hi; if (result.s.hi == 0) { result.s.ll = result.s.lo << 16 + res2.s.lo; if ((ashi >> 15) ^ (bshi >> 15) == 1) { result.s.ll = -result.s.ll; } return result.s.ll }else { Overflow(); } }else { Overflow(); } }else if (s_b.s.hi > 0) { //Same code changing a with b }else { return (s_a.lo * s_b.lo); } }
我希望这个代码可以帮助你有一个相当有效的程序,我希望代码是清楚的,如果不是,我会把一些coments。
最好的祝福。
也许解决这个问题的最好方法是有一个函数,它将两个UInt64相乘,并产生一对UInt64,UInt128结果的上半部分和下半部分。 这里是解决scheme,包括一个函数,它以hex显示结果。 我想你可能更喜欢C ++解决scheme,但是我有一个工作的Swift-Solution,它显示了如何pipe理这个问题:
func hex128 (_ hi: UInt64, _ lo: UInt64) -> String { var s: String = String(format: "%08X", hi >> 32) + String(format: "%08X", hi & 0xFFFFFFFF) + String(format: "%08X", lo >> 32) + String(format: "%08X", lo & 0xFFFFFFFF) return (s) } func mul64to128 (_ multiplier: UInt64, _ multiplicand : UInt64) -> (result_hi: UInt64, result_lo: UInt64) { let x: UInt64 = multiplier let x_lo: UInt64 = (x & 0xffffffff) let x_hi: UInt64 = x >> 32 let y: UInt64 = multiplicand let y_lo: UInt64 = (y & 0xffffffff) let y_hi: UInt64 = y >> 32 let mul_lo: UInt64 = (x_lo * y_lo) let mul_hi: UInt64 = (x_hi * y_lo) + (mul_lo >> 32) let mul_carry: UInt64 = (x_lo * y_hi) + (mul_hi & 0xffffffff) let result_hi: UInt64 = (x_hi * y_hi) + (mul_hi >> 32) + (mul_carry >> 32) let result_lo: UInt64 = (mul_carry << 32) + (mul_lo & 0xffffffff) return (result_hi, result_lo) }
这是一个例子来validation,该函数的工作原理:
var c: UInt64 = 0 var d: UInt64 = 0 (c, d) = mul64to128(0x1234567890123456, 0x9876543210987654) // 0AD77D742CE3C72E45FD10D81D28D038 is the result of the above example print(hex128(c, d)) (c, d) = mul64to128(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF) // FFFFFFFFFFFFFFFE0000000000000001 is the result of the above example print(hex128(c, d))
这是检测两个无符号整数的乘法是否溢出的技巧。
我们观察到,如果我们将N位宽的二进制数乘以M位宽的二进制数,那么乘积不会超过N + M位。
例如,如果要求我们用一个二十九位数乘以三位数,我们知道这不会溢出三十二位。
#include <stdlib.h> #include <stdio.h> int might_be_mul_oflow(unsigned long a, unsigned long b) { if (!a || !b) return 0; a = a | (a >> 1) | (a >> 2) | (a >> 4) | (a >> 8) | (a >> 16) | (a >> 32); b = b | (b >> 1) | (b >> 2) | (b >> 4) | (b >> 8) | (b >> 16) | (b >> 32); for (;;) { unsigned long na = a << 1; if (na <= a) break; a = na; } return (a & b) ? 1 : 0; } int main(int argc, char **argv) { unsigned long a, b; char *endptr; if (argc < 3) { printf("supply two unsigned long integers in C form\n"); return EXIT_FAILURE; } a = strtoul(argv[1], &endptr, 0); if (*endptr != 0) { printf("%s is garbage\n", argv[1]); return EXIT_FAILURE; } b = strtoul(argv[2], &endptr, 0); if (*endptr != 0) { printf("%s is garbage\n", argv[2]); return EXIT_FAILURE; } if (might_be_mul_oflow(a, b)) printf("might be multiplication overflow\n"); { unsigned long c = a * b; printf("%lu * %lu = %lu\n", a, b, c); if (a != 0 && c / a != b) printf("confirmed multiplication overflow\n"); } return 0; }
less量的testing:(在64位系统上):
$ ./uflow 0x3 0x3FFFFFFFFFFFFFFF 3 * 4611686018427387903 = 13835058055282163709 $ ./uflow 0x7 0x3FFFFFFFFFFFFFFF 可能是乘法溢出 7 * 4611686018427387903 = 13835058055282163705 确认了乘法溢出 $ ./uflow 0x4 0x3FFFFFFFFFFFFFFF 可能是乘法溢出 4 * 4611686018427387903 = 18446744073709551612 $ ./uflow 0x5 0x3FFFFFFFFFFFFFFF 可能是乘法溢出 5 * 4611686018427387903 = 4611686018427387899 确认了乘法溢出
might_be_mul_oflow
中的步骤几乎肯定might_be_mul_oflow
区testing要慢,至less在桌面工作站,服务器和移动设备中使用的主stream处理器上。 在没有良好的分支支持的芯片上,这可能是有用的。
我觉得还有另一种方法来做这个早期的拒绝testing。
-
我们从一对数字
arng
和brng
开始,它们被初始化为0x7FFF...FFFF
和1
。 -
如果
a <= arng
和b <= brng
我们可以断定没有溢出。 -
否则,我们向右移动,并向左移动
brng
,向brng
添加一位,使得它们是0x3FFF...FFFF
和3
。 -
如果
arng
为零,则结束; 否则重复2。
该函数现在看起来像:
int might_be_mul_oflow(unsigned long a, unsigned long b) { if (!a || !b) return 0; { unsigned long arng = ULONG_MAX >> 1; unsigned long brng = 1; while (arng != 0) { if (a <= arng && b <= brng) return 0; arng >>= 1; brng <<= 1; brng |= 1; } return 1; } }
如果你只是想检测溢出,如何转换为双倍,做乘法,如果
| X | <2 ^ 53,转换为int64
| X | <2 ^ 63,使用int64进行乘法运算
否则产生任何你想要的错误?
这似乎工作:
int64_t safemult(int64_t a, int64_t b) { double dx; dx = (double)a * (double)b; if ( fabs(dx) < (double)9007199254740992 ) return (int64_t)dx; if ( (double)INT64_MAX < fabs(dx) ) return INT64_MAX; return a*b; }