X-Git-Url: https://ginac.de/CLN/cln.git//cln.git?a=blobdiff_plain;f=src%2Fbase%2Fcl_low.h;h=98b371e5d06d24987687d22a471ce41795b811c2;hb=c84c6db5d56829d69083c819688a973867694a2a;hp=2ab26f418b9e891894fdfe5a99e73151a83902ae;hpb=dd9e0f894eec7e2a8cf85078330ddc0a6639090b;p=cln.git diff --git a/src/base/cl_low.h b/src/base/cl_low.h index 2ab26f4..98b371e 100644 --- a/src/base/cl_low.h +++ b/src/base/cl_low.h @@ -3,6 +3,7 @@ #ifndef _CL_LOW_H #define _CL_LOW_H +namespace cln { // Determines the sign of a 16-bit number. // sign_of(wert) @@ -132,7 +133,7 @@ inline uint32 mulu16 (uint16 arg1, uint16 arg2) ); return _prod; } -#elif defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && !defined(NO_ASM) inline uint32 mulu16 (uint16 arg1, uint16 arg2) { register uint16 _hi; @@ -186,7 +187,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) ); return _prod; } -#elif defined(__sparc__) +#elif defined(__sparc__) && !defined(NO_ASM) extern "C" uint32 mulu32_unchecked (uint32 x, uint32 y); // extern in Assembler #else // Wir können dafür auch die Bibliotheksroutine des C-Compilers nehmen: @@ -235,15 +236,15 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) }) #elif defined(__GNUC__) && defined(__sparc64__) && !defined(NO_ASM) #define mulu32(x,y,hi_zuweisung,lo_zuweisung) \ - ({ var register uint64 _hi; \ - var register uint64 _lo; \ - __asm__("umul %2,%3,%1\n\trd %y,%0" \ - : "=r" (_hi), "=r" (_lo) \ + ({ var register uint64 _prod; \ + __asm__("umul %1,%2,%0" \ + : "=r" (_prod) \ : "r" ((uint32)(x)), "r" ((uint32)(y)) \ ); \ - hi_zuweisung (uint32)_hi; lo_zuweisung (uint32)_lo; \ + hi_zuweisung (uint32)(_prod>>32); \ + lo_zuweisung (uint32)(_prod); \ }) -#elif defined(__GNUC__) && defined(__sparc__) +#elif defined(__GNUC__) && defined(__sparc__) && !defined(NO_ASM) #define mulu32(x,y,hi_zuweisung,lo_zuweisung) \ ({ lo_zuweisung mulu32_(x,y); /* extern in Assembler */ \ {var register uint32 _hi __asm__("%g1"); \ @@ -255,7 +256,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) {var register uint32 _hi __asm__("%r1"/*"%a2"*/); \ hi_zuweisung _hi; \ }}) -#elif defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && !defined(NO_ASM) #define mulu32(x,y,hi_zuweisung,lo_zuweisung) \ ({ var register uint32 _hi; \ var register uint32 _lo; \ @@ -294,7 +295,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) #else #define mulu32(x,y,hi_zuweisung,lo_zuweisung) \ { lo_zuweisung mulu32_(x,y); hi_zuweisung mulu32_high; } - #if defined(__m68k__) || defined(__sparc__) || defined(__sparc64__) || defined(__arm__) || (defined(__i386__) && !defined(WATCOM) && !defined(MICROSOFT)) || defined(__mips__) || defined(__hppa__) + #if (defined(__m68k__) || defined(__sparc__) || defined(__sparc64__) || defined(__arm__) || (defined(__i386__) && !defined(WATCOM) && !defined(MICROSOFT)) || defined(__x86_64__) || defined(__mips__) || defined(__hppa__)) && !defined(NO_ASM) // mulu32_ extern in Assembler #if defined(__sparc__) || defined(__sparc64__) extern "C" uint32 _get_g1 (void); @@ -313,7 +314,17 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // mulu32_w(arg1,arg2) // > arg1, arg2 : zwei 32-Bit-Zahlen // < result : eine 64-Bit-Zahl -#if defined(__GNUC__) +#if defined(__GNUC__) && defined(__sparc64__) && !defined(NO_ASM) + // Prefer the umul instruction over the mulx instruction (overkill). + #define mulu32_w(x,y) \ + ({ var register uint64 _prod; \ + __asm__("umul %1,%2,%0" \ + : "=r" (_prod) \ + : "r" ((uint32)(x)), "r" ((uint32)(y)) \ + ); \ + _prod; \ + }) +#elif defined(__GNUC__) #define mulu32_w(x,y) ((uint64)(uint32)(x) * (uint64)(uint32)(y)) #else extern "C" uint64 mulu32_w (uint32 arg1, uint32 arg2); @@ -343,12 +354,33 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) hi_zuweisung _hi; \ lo_zuweisung _lo; \ }) -#elif defined(__GNUC__) && defined(__sparc64__) +#elif defined(__GNUC__) && defined(__sparc64__) && !defined(NO_ASM) #define mulu64(x,y,hi_zuweisung,lo_zuweisung) \ ({ lo_zuweisung mulu64_(x,y); /* extern in Assembler */ \ {var register uint64 _hi __asm__("%g2"); \ hi_zuweisung _hi; \ }}) +#elif defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM) + #define mulu64(x,y,hi_zuweisung,lo_zuweisung) \ + ({ var register uint64 _hi; \ + var register uint64 _lo; \ + __asm__("mulq %2" \ + : "=d" /* %rdx */ (_hi), "=a" /* %rax */ (_lo) \ + : "rm" ((uint64)(x)), "1" /* %rax */ ((uint64)(y)) \ + ); \ + hi_zuweisung _hi; lo_zuweisung _lo; \ + }) +#elif defined(__GNUC__) && defined(__ia64__) && !defined(NO_ASM) + #define mulu64(x,y,hi_zuweisung,lo_zuweisung) \ + ({ var register uint64 _x = (x); \ + var register uint64 _y = (y); \ + var register uint64 _hi; \ + __asm__("xma.hu %0 = %1, %2, f0" \ + : "=f" (_hi) \ + : "f" ((uint64)(_x)), "f" ((uint64)(_y)) \ + ); \ + hi_zuweisung _hi; lo_zuweisung ((uint64)(_x)*(uint64)(_y));\ + }) #else #define mulu64(x,y,hi_zuweisung,lo_zuweisung) \ { lo_zuweisung mulu64_(x,y); hi_zuweisung mulu64_high; } @@ -414,7 +446,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) q_zuweisung (uint16)__q; \ r_zuweisung (uint16)__r; \ }) -#elif defined(__GNUC__) && (defined(__sparc__) || defined(__sparc64__)) +#elif defined(__GNUC__) && (defined(__sparc__) || defined(__sparc64__)) && !defined(NO_ASM) #define divu_3216_1616(x,y,q_zuweisung,r_zuweisung) \ ({ var uint32 __qr = divu_3216_1616_(x,y); /* extern in Assembler */\ q_zuweisung low16(__qr); \ @@ -431,7 +463,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) q_zuweisung low16(__qr); \ r_zuweisung high16(__qr); \ }) -#elif defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && !defined(NO_ASM) #define divu_3216_1616(x,y,q_zuweisung,r_zuweisung) \ ({var uint32 __x = (x); \ var uint16 __y = (y); \ @@ -446,9 +478,9 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) }) #elif defined(__GNUC__) && defined(__arm__) && 0 // see comment cl_asm_arm.cc #define divu_3216_1616(x,y,q_zuweisung,r_zuweisung) \ - { var uint32 _q = divu_3216_1616_(x,y); /* extern in Assembler */ \ - var register uint32 _r __asm__("%r1"/*"%a2"*/); \ - q_zuweisung _q; r_zuweisung _r; \ + { var uint32 __q = divu_3216_1616_(x,y); /* extern in Assembler */ \ + var register uint32 __r __asm__("%r1"/*"%a2"*/); \ + q_zuweisung __q; r_zuweisung __r; \ } #elif defined(__GNUC__) && !defined(__arm__) #define divu_3216_1616(x,y,q_zuweisung,r_zuweisung) \ @@ -458,13 +490,13 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) q_zuweisung __q; \ r_zuweisung (__x - __q * __y); \ }) -#elif defined(__sparc__) || defined(__sparc64__) +#elif (defined(__sparc__) || defined(__sparc64__)) && !defined(NO_ASM) #define divu_3216_1616(x,y,q_zuweisung,r_zuweisung) \ { var uint32 __qr = divu_3216_1616_(x,y); /* extern in Assembler */ \ q_zuweisung low16(__qr); \ r_zuweisung high16(__qr); \ } -#elif defined(__arm__) +#elif defined(__arm__) && !defined(NO_ASM) #define divu_3216_1616(x,y,q_zuweisung,r_zuweisung) \ { q_zuweisung divu_3216_1616_(x,y); /* extern in Assembler */ \ r_zuweisung divu_16_rest; \ @@ -503,7 +535,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) q_zuweisung (uint32)__q; \ r_zuweisung (uint16)__r; \ }) -#elif defined(__sparc__) || defined(__sparc64__) || defined(__i386__) +#elif defined(__sparc__) || defined(__sparc64__) || defined(__i386__) || defined(__x86_64__) #define divu_3216_3216 divu_3232_3232 #else // Methode: (beta = 2^16) @@ -553,7 +585,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) q_zuweisung (uint32)__q; \ r_zuweisung (uint32)__r; \ }) -#elif defined(__sparc__) || defined(__sparc64__) || defined(__i386__) +#elif defined(__sparc__) || defined(__sparc64__) || defined(__i386__) || defined(__x86_64__) #define divu_3232_3232(x,y,q_zuweisung,r_zuweisung) \ divu_6432_3232(0,x,y,q_zuweisung,r_zuweisung) #define divu_3232_3232_(x,y) divu_6432_3232_(0,x,y) @@ -665,7 +697,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) q_zuweisung (uint32)__q; \ r_zuweisung (uint32)__r; \ }) -#elif defined(__GNUC__) && (defined(__sparc__) || defined(__sparc64__)) +#elif defined(__GNUC__) && (defined(__sparc__) || defined(__sparc64__)) && !defined(NO_ASM) #define divu_6432_3232(xhi,xlo,y,q_zuweisung,r_zuweisung) \ ({ var uint32 _q = divu_6432_3232_(xhi,xlo,y); /* extern in Assembler */\ var register uint32 _r __asm__("%g1"); \ @@ -677,7 +709,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) var register uint32 _r __asm__("%r1"/*"%a2"*/); \ q_zuweisung _q; r_zuweisung _r; \ }) -#elif defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && !defined(NO_ASM) #define divu_6432_3232(xhi,xlo,y,q_zuweisung,r_zuweisung) \ ({var uint32 __xhi = (xhi); \ var uint32 __xlo = (xlo); \ @@ -722,7 +754,7 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) #else #define divu_6432_3232(xhi,xlo,y,q_zuweisung,r_zuweisung) \ { q_zuweisung divu_6432_3232_(xhi,xlo,y); r_zuweisung divu_32_rest; } - #if defined(__m68k__) || defined(__sparc__) || defined(__sparc64__) || defined(__arm__) || (defined(__i386__) && !defined(WATCOM) && !defined(MICROSOFT)) || defined(__hppa__) + #if (defined(__m68k__) || defined(__sparc__) || defined(__sparc64__) || defined(__arm__) || (defined(__i386__) && !defined(WATCOM) && !defined(MICROSOFT)) || defined(__x86_64__) || defined(__hppa__)) && !defined(NO_ASM) // divu_6432_3232_ extern in Assembler #if defined(__sparc__) || defined(__sparc64__) extern "C" uint32 _get_g1 (void); @@ -746,13 +778,54 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < uint32 q: floor(x/y) // < uint32 r: x mod y // < x = q*y+r -#if defined(__GNUC__) +#if defined(__GNUC__) && defined(__sparc64__) && !defined(NO_ASM) + // Prefer the udiv and umul instructions over the udivx and mulx instructions + // (overkill). + #define divu_6432_3232_w(x,y,q_zuweisung,r_zuweisung) \ + ({var uint64 __x = (x); \ + var uint32 __xhi = high32(__x); \ + var uint32 __xlo = low32(__x); \ + var uint32 __y = (y); \ + var uint64 __q; \ + var uint64 __r; \ + __asm__ __volatile__ ( \ + "wr %2,%%g0,%%y\n\t" \ + "udiv %3,%4,%0\n\t" \ + "umul %0,%4,%1" \ + "sub %3,%1,%1" \ + : "=&r" (__q), "=&r" (__r) \ + : "r" (__xhi), "r" (__xlo), "r" (__y)); \ + q_zuweisung (uint32)__q; \ + r_zuweisung (uint32)__r; \ + }) +#elif defined(__GNUC__) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__sparc64__)) + // On __alpha__, computing the remainder by multiplication is just two + // instructions, compared to the __remqu (libc) function call for the % + // operator. + // On __ia64__, computing the remainder by multiplication is just four + // instructions, compared to the __umoddi3 (libgcc) function call for the % + // operator. + // On __mips64__, computing the remainder by multiplication is just two + // instructions, compared to the __umoddi3 (libgcc) function call for the % + // operator. + // On __sparc64__, computing the remainder by multiplication uses a 32-bit + // multiplication instruction, compared to a 64-bit multiplication when the % + // operator is used. #define divu_6432_3232_w(x,y,q_zuweisung,r_zuweisung) \ ({var uint64 __x = (x); \ var uint32 __y = (y); \ var uint32 __q = floor(__x,(uint64)__y); \ q_zuweisung __q; r_zuweisung (uint32)__x - __q * __y; \ }) +#elif defined(__GNUC__) && defined(__x86_64__) + // On __x86_64__, gcc 4.0 performs both quotient and remainder computation + // in a single instruction. + #define divu_6432_3232_w(x,y,q_zuweisung,r_zuweisung) \ + ({var uint64 __x = (x); \ + var uint32 __y = (y); \ + var uint32 __q = floor(__x,(uint64)__y); \ + q_zuweisung __q; r_zuweisung __x % (uint64)__y; \ + }) #else #define divu_6432_3232_w(x,y,q_zuweisung,r_zuweisung) \ { var uint64 __x = (x); \ @@ -760,22 +833,125 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) } #endif +// Dividiert eine 64-Bit-Zahl durch eine 32-Bit-Zahl und +// liefert einen 64-Bit-Quotienten und einen 32-Bit-Rest. +// divu_6432_6432(x,y,q=,r=); +// > uint64 x: Zähler +// > uint32 y: Nenner +// > Es sei bekannt, daß y>0. +// < uint64 q: floor(x/y) +// < uint32 r: x mod y +// < x = q*y+r +#if defined(__GNUC__) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__sparc64__)) + // On __alpha__, computing the remainder by multiplication is just two + // instructions, compared to the __remqu (libc) function call for the % + // operator. + // On __ia64__, computing the remainder by multiplication is just four + // instructions, compared to the __umoddi3 (libgcc) function call for the % + // operator. + // On __mips64__, computing the remainder by multiplication is just two + // instructions, compared to the __umoddi3 (libgcc) function call for the % + // operator. + // On __sparc64__, computing the remainder by multiplication uses a 32-bit + // multiplication instruction, compared to a 64-bit multiplication when the % + // operator is used. + #define divu_6432_6432(x,y,q_zuweisung,r_zuweisung) \ + ({var uint64 _x = (x); \ + var uint32 _y = (y); \ + var uint64 _q; \ + q_zuweisung _q = floor(_x,(uint64)_y); \ + r_zuweisung low32(_x) - low32(_q) * _y; \ + }) +#elif defined(__GNUC__) && defined(__x86_64__) + // On __x86_64__, gcc 4.0 performs both quotient and remainder computation + // in a single instruction. + #define divu_6432_6432(x,y,q_zuweisung,r_zuweisung) \ + ({var uint64 _x = (x); \ + var uint32 _y = (y); \ + q_zuweisung floor(_x,(uint64)_y); \ + r_zuweisung _x % (uint64)_y; \ + }) +#else + // Methode: (beta = 2^32) + // x = x1*beta+x0 schreiben. + // Division mit Rest: x1 = q1*y + r1, wobei 0 <= x1 < beta <= beta*y. + // Also 0 <= q1 < beta, 0 <= r1 < y. + // Division mit Rest: (r1*beta+x0) = q0*y + r0, wobei 0 <= r1*beta+x0 < beta*y. + // Also 0 <= q0 < beta, 0 <= r0 < y + // und x = x1*beta+x0 = (q1*beta+q0)*y + r0. + // Setze q := q1*beta+q0 und r := r0. + #if defined(__GNUC__) + #define divu_6432_6432(x,y,q_zuweisung,r_zuweisung) \ + ({var uint64 _x = (x); \ + var uint32 _y = (y); \ + var uint32 _q1; \ + var uint32 _q0; \ + var uint32 _r1; \ + divu_6432_3232(0,high32(_x),_y, _q1 = , _r1 = ); \ + divu_6432_3232(_r1,low32(_x),_y, _q0 = , r_zuweisung); \ + q_zuweisung highlow64(_q1,_q0); \ + }) + #else + #define divu_6432_6432(x,y,q_zuweisung,r_zuweisung) \ + {var uint64 _x = (x); \ + var uint32 _y = (y); \ + var uint32 _q1; \ + var uint32 _q0; \ + var uint32 _r1; \ + divu_6432_3232(0,high32(_x),_y, _q1 = , _r1 = ); \ + divu_6432_3232(_r1,low32(_x),_y, _q0 = , r_zuweisung); \ + q_zuweisung highlow64(_q1,_q0); \ + } + #endif +#endif + // Dividiert eine 64-Bit-Zahl durch eine 64-Bit-Zahl und // liefert einen 64-Bit-Quotienten und einen 64-Bit-Rest. // divu_6464_6464(x,y,q=,r=); // > uint64 x: Zähler // > uint64 y: Nenner -// Es sei bekannt, daß y>0. +// > Es sei bekannt, daß y>0. // < uint64 q: floor(x/y) // < uint64 r: x mod y // < x = q*y+r -#if defined(__alpha__) || 1 +#if defined(__GNUC__) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__sparc64__)) + // On __alpha__, computing the remainder by multiplication is just two + // instructions, compared to the __remqu (libc) function call for the % + // operator. + // On __ia64__, computing the remainder by multiplication is just four + // instructions, compared to the __umoddi3 (libgcc) function call for the % + // operator. + // On __mips64__, computing the remainder by multiplication is just two + // instructions, compared to the __umoddi3 (libgcc) function call for the % + // operator. + // On __sparc64__, it doesn't matter. #define divu_6464_6464(x,y,q_zuweisung,r_zuweisung) \ - { var uint64 __x = (x); \ - var uint64 __y = (y); \ - q_zuweisung (__x / __y); \ - r_zuweisung (__x % __y); \ - } + ({var uint64 _x = (x); \ + var uint64 _y = (y); \ + var uint64 _q; \ + q_zuweisung _q = floor(_x,_y); \ + r_zuweisung _x - _q * _y; \ + }) +#elif defined(__GNUC__) && (defined(__sparc64__) || defined(__x86_64__)) + // On __sparc64__, it doesn't matter. + // On __x86_64__, gcc 4.0 performs both quotient and remainder computation + // in a single instruction. + #define divu_6464_6464(x,y,q_zuweisung,r_zuweisung) \ + ({var uint64 _x = (x); \ + var uint64 _y = (y); \ + q_zuweisung floor(_x,_y); \ + r_zuweisung _x % _y; \ + }) +#else + // For unknown CPUs, we don't know whether gcc's __udivdi3 function plus a + // multiplication is slower or faster than our own divu_6464_6464_ routine. + // Anyway, call our own routine. + extern "C" uint64 divu_6464_6464_ (uint64 x, uint64 y); // -> Quotient q + extern "C" uint64 divu_64_rest; // -> Rest r + #define divu_6464_6464(x,y,q_zuweisung,r_zuweisung) \ + { q_zuweisung divu_6464_6464_(x,y); r_zuweisung divu_64_rest; } + #define NEED_VAR_divu_64_rest + #define NEED_FUNCTION_divu_6464_6464_ #endif // Dividiert eine 128-Bit-Zahl durch eine 64-Bit-Zahl und @@ -789,9 +965,29 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < x = q*y+r extern "C" uint64 divu_12864_6464_ (uint64 xhi, uint64 xlo, uint64 y); // -> Quotient q extern "C" uint64 divu_64_rest; // -> Rest r +#if defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM) + #define divu_12864_6464(xhi,xlo,y,q_zuweisung,r_zuweisung) \ + ({var uint64 __xhi = (xhi); \ + var uint64 __xlo = (xlo); \ + var uint64 __y = (y); \ + var uint64 __q; \ + var uint64 __r; \ + __asm__ __volatile__ ( \ + "divq %4" \ + : "=a" /* %rax */ (__q), "=d" /* %rdx */ (__r) \ + : "1" /* %rdx */ (__xhi), "0" /* %rax */ (__xlo), "rm" (__y) \ + ); \ + q_zuweisung __q; \ + r_zuweisung __r; \ + }) + #define divu_12864_64364_(xhi,xlo,y) \ + ({var uint64 ___q; divu_12864_6464(xhi,xlo,y,___q=,); ___q; }) +#else #define divu_12864_6464(xhi,xlo,y,q_zuweisung,r_zuweisung) \ { q_zuweisung divu_12864_6464_(xhi,xlo,y); r_zuweisung divu_64_rest; } + #define NEED_VAR_divu_64_rest #define NEED_FUNCTION_divu_12864_6464_ +#endif #endif /* HAVE_FAST_LONGLONG */ @@ -1030,9 +1226,9 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < size: >0, <=8, mit 2^(size-1) <= digit < 2^size #if defined(__GNUC__) && defined(__m68k__) && !defined(NO_ASM) #define integerlength8(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("bfffo %1{#0:#8},%0" : "=d" (zero_counter) : "dm" ((uint8)(digit)) ); \ - size_zuweisung (8-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("bfffo %1{#0:#8},%0" : "=d" (_zero_counter) : "dm" ((uint8)(digit)) ); \ + size_zuweisung (8-_zero_counter); \ } #elif defined(__sparc__) && !defined(__sparc64__) #define integerlength8(digit,size_zuweisung) \ @@ -1042,16 +1238,16 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) integerlength16((uint16)(digit),size_zuweisung) #else #define integerlength8(digit,size_zuweisung) \ - { var uintC bitsize = 1; \ - var uintL x8 = (uint8)(digit); \ - /* x8 hat höchstens 8 Bits. */\ - if (x8 >= bit(4)) { x8 = x8>>4; bitsize += 4; } \ - /* x8 hat höchstens 4 Bits. */\ - if (x8 >= bit(2)) { x8 = x8>>2; bitsize += 2; } \ - /* x8 hat höchstens 2 Bits. */\ - if (x8 >= bit(1)) { /* x8 = x8>>1; */ bitsize += 1; } \ - /* x8 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ - size_zuweisung bitsize; \ + { var uintC _bitsize = 1; \ + var uintL _x8 = (uint8)(digit); \ + /* _x8 hat höchstens 8 Bits. */\ + if (_x8 >= bit(4)) { _x8 = _x8>>4; _bitsize += 4; } \ + /* _x8 hat höchstens 4 Bits. */\ + if (_x8 >= bit(2)) { _x8 = _x8>>2; _bitsize += 2; } \ + /* _x8 hat höchstens 2 Bits. */\ + if (_x8 >= bit(1)) { /* _x8 = _x8>>1; */ _bitsize += 1; } \ + /* _x8 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ + size_zuweisung _bitsize; \ } #endif @@ -1062,40 +1258,40 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < size: >0, <=16, mit 2^(size-1) <= digit < 2^size #if defined(__GNUC__) && defined(__m68k__) && !defined(NO_ASM) #define integerlength16(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("bfffo %1{#0:#16},%0" : "=d" (zero_counter) : "dm" ((uint16)(digit)) ); \ - size_zuweisung (16-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("bfffo %1{#0:#16},%0" : "=d" (_zero_counter) : "dm" ((uint16)(digit)) ); \ + size_zuweisung (16-_zero_counter); \ } #elif defined(__sparc__) && !defined(__sparc64__) #define integerlength16(digit,size_zuweisung) \ integerlength32((uint32)(digit),size_zuweisung) // siehe unten #elif defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) #define integerlength16(digit,size_zuweisung) \ - { var uintW one_position; /* Position der führenden 1 */\ - __asm__("bsrw %1,%0" : "=r" (one_position) : "r" ((uint16)(digit)) ); \ - size_zuweisung (1+one_position); \ + { var uintW _one_position; /* Position der führenden 1 */\ + __asm__("bsrw %1,%0" : "=r" (_one_position) : "r" ((uint16)(digit)) ); \ + size_zuweisung (1+_one_position); \ } // Die weiteren kommen von gcc/longlong.h : #elif defined(__GNUC__) && defined(__ibm032__) && !defined(NO_ASM) // RT/ROMP #define integerlength16(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("clz %0,%1" : "=r" (zero_counter) : "r" ((uint32)(digit)) ); \ - size_zuweisung (16-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("clz %0,%1" : "=r" (_zero_counter) : "r" ((uint32)(digit)) ); \ + size_zuweisung (16-_zero_counter); \ } #else #define integerlength16(digit,size_zuweisung) \ - { var uintC bitsize = 1; \ - var uintWL x16 = (uint16)(digit); \ - /* x16 hat höchstens 16 Bits. */\ - if (x16 >= bit(8)) { x16 = x16>>8; bitsize += 8; } \ - /* x16 hat höchstens 8 Bits. */\ - if (x16 >= bit(4)) { x16 = x16>>4; bitsize += 4; } \ - /* x16 hat höchstens 4 Bits. */\ - if (x16 >= bit(2)) { x16 = x16>>2; bitsize += 2; } \ - /* x16 hat höchstens 2 Bits. */\ - if (x16 >= bit(1)) { /* x16 = x16>>1; */ bitsize += 1; } \ - /* x16 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ - size_zuweisung bitsize; \ + { var uintC _bitsize = 1; \ + var uintWL _x16 = (uint16)(digit); \ + /* _x16 hat höchstens 16 Bits. */\ + if (_x16 >= bit(8)) { _x16 = _x16>>8; _bitsize += 8; } \ + /* _x16 hat höchstens 8 Bits. */\ + if (_x16 >= bit(4)) { _x16 = _x16>>4; _bitsize += 4; } \ + /* _x16 hat höchstens 4 Bits. */\ + if (_x16 >= bit(2)) { _x16 = _x16>>2; _bitsize += 2; } \ + /* _x16 hat höchstens 2 Bits. */\ + if (_x16 >= bit(1)) { /* _x16 = _x16>>1; */ _bitsize += 1; } \ + /* _x16 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ + size_zuweisung _bitsize; \ } #endif @@ -1106,9 +1302,9 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < size: >0, <=32, mit 2^(size-1) <= digit < 2^size #if defined(__GNUC__) && defined(__m68k__) && !defined(NO_ASM) #define integerlength32(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("bfffo %1{#0:#32},%0" : "=d" (zero_counter) : "dm" ((uint32)(digit)) ); \ - size_zuweisung (32-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("bfffo %1{#0:#32},%0" : "=d" (_zero_counter) : "dm" ((uint32)(digit)) ); \ + size_zuweisung (32-_zero_counter); \ } #elif defined(__sparc__) && !defined(__sparc64__) && defined(FAST_DOUBLE) #define integerlength32(digit,size_zuweisung) \ @@ -1125,9 +1321,9 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) } #elif defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) #define integerlength32(digit,size_zuweisung) \ - { var uintL one_position; /* Position der führenden 1 */\ - __asm__("bsrl %1,%0" : "=r" (one_position) : "rm" ((uint32)(digit)) ); \ - size_zuweisung (1+one_position); \ + { var uintL _one_position; /* Position der führenden 1 */\ + __asm__("bsrl %1,%0" : "=r" (_one_position) : "rm" ((uint32)(digit)) ); \ + size_zuweisung (1+_one_position); \ } #elif defined(__hppa__) && !defined(NO_ASM) #define integerlength32(digit,size_zuweisung) \ @@ -1136,63 +1332,64 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // Die weiteren kommen von gcc/longlong.h : #elif defined(__GNUC__) && (defined(__a29k__) || defined(___AM29K__)) && !defined(NO_ASM) #define integerlength32(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("clz %0,%1" : "=r" (zero_counter) : "r" ((uint32)(digit)) ); \ - size_zuweisung (32-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("clz %0,%1" : "=r" (_zero_counter) : "r" ((uint32)(digit)) ); \ + size_zuweisung (32-_zero_counter); \ } #elif defined(__GNUC__) && defined(__gmicro__) && !defined(NO_ASM) #define integerlength32(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("bsch/1 %1,%0" : "=g" (zero_counter) : "g" ((uint32)(digit)) ); \ - size_zuweisung (32-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("bsch/1 %1,%0" : "=g" (_zero_counter) : "g" ((uint32)(digit)) ); \ + size_zuweisung (32-_zero_counter); \ } #elif defined(__GNUC__) && defined(__rs6000__) && !defined(NO_ASM) #ifdef _AIX // old assembler syntax #define integerlength32(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("cntlz %0,%1" : "=r" (zero_counter) : "r" ((uint32)(digit)) ); \ - size_zuweisung (32-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("cntlz %0,%1" : "=r" (_zero_counter) : "r" ((uint32)(digit)) ); \ + size_zuweisung (32-_zero_counter); \ } #else // new assembler syntax #define integerlength32(digit,size_zuweisung) \ - { var uintL zero_counter; /* zählt die führenden Nullbits in digit */\ - __asm__("cntlzw %0,%1" : "=r" (zero_counter) : "r" ((uint32)(digit)) ); \ - size_zuweisung (32-zero_counter); \ + { var uintL _zero_counter; /* zählt die führenden Nullbits in digit */\ + __asm__("cntlzw %0,%1" : "=r" (_zero_counter) : "r" ((uint32)(digit)) ); \ + size_zuweisung (32-_zero_counter); \ } #endif #elif defined(__GNUC__) && defined(__m88k__) && !defined(NO_ASM) #define integerlength32(digit,size_zuweisung) \ - { var uintL one_position; /* Position der führenden 1 */\ - __asm__("ff1 %0,%1" : "=r" (one_position) : "r" ((uint32)(digit)) ); \ - size_zuweisung (1+one_position); \ + { var uintL _one_position; /* Position der führenden 1 */\ + __asm__("ff1 %0,%1" : "=r" (_one_position) : "r" ((uint32)(digit)) ); \ + size_zuweisung (1+_one_position); \ } #elif defined(__GNUC__) && defined(__ibm032__) && !defined(NO_ASM) // RT/ROMP #define integerlength32(digit,size_zuweisung) \ - { var uintL x32 = (uint32)(digit); \ - if (x32 >= bit(16)) \ - { integerlength16(x32>>16,size_zuweisung 16 + ); } \ + { var uintL _x32 = (uint32)(digit); \ + if (_x32 >= bit(16)) \ + { integerlength16(_x32>>16,size_zuweisung 16 + ); } \ else \ - { integerlength16(x32,size_zuweisung); } \ + { integerlength16(_x32,size_zuweisung); } \ } #else #define integerlength32(digit,size_zuweisung) \ - { var uintC bitsize = 1; \ - var uintL x32 = (uint32)(digit); \ - /* x32 hat höchstens 32 Bits. */\ - if (x32 >= bit(16)) { x32 = x32>>16; bitsize += 16; } \ - /* x32 hat höchstens 16 Bits. */\ - if (x32 >= bit(8)) { x32 = x32>>8; bitsize += 8; } \ - /* x32 hat höchstens 8 Bits. */\ - if (x32 >= bit(4)) { x32 = x32>>4; bitsize += 4; } \ - /* x32 hat höchstens 4 Bits. */\ - if (x32 >= bit(2)) { x32 = x32>>2; bitsize += 2; } \ - /* x32 hat höchstens 2 Bits. */\ - if (x32 >= bit(1)) { /* x32 = x32>>1; */ bitsize += 1; } \ - /* x32 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ - size_zuweisung bitsize; \ + { var uintC _bitsize = 1; \ + var uintL _x32 = (uint32)(digit); \ + /* _x32 hat höchstens 32 Bits. */\ + if (_x32 >= bit(16)) { _x32 = _x32>>16; _bitsize += 16; } \ + /* _x32 hat höchstens 16 Bits. */\ + if (_x32 >= bit(8)) { _x32 = _x32>>8; _bitsize += 8; } \ + /* _x32 hat höchstens 8 Bits. */\ + if (_x32 >= bit(4)) { _x32 = _x32>>4; _bitsize += 4; } \ + /* _x32 hat höchstens 4 Bits. */\ + if (_x32 >= bit(2)) { _x32 = _x32>>2; _bitsize += 2; } \ + /* _x32 hat höchstens 2 Bits. */\ + if (_x32 >= bit(1)) { /* _x32 = _x32>>1; */ _bitsize += 1; } \ + /* _x32 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ + size_zuweisung _bitsize; \ } + #define GENERIC_INTEGERLENGTH32 #endif // Bits einer 64-Bit-Zahl zählen: @@ -1200,24 +1397,50 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // setzt size auf die höchste in digit vorkommende Bitnummer. // > digit: ein uint64 >0 // < size: >0, <=64, mit 2^(size-1) <= digit < 2^size +#ifdef GENERIC_INTEGERLENGTH32 #define integerlength64(digit,size_zuweisung) \ - { var uintC bitsize = 1; \ - var uint64 x64 = (uint64)(digit); \ - /* x64 hat höchstens 64 Bits. */\ - if (x64 >= bit(32)) { x64 = x64>>32; bitsize += 32; } \ - /* x64 hat höchstens 32 Bits. */\ - if (x64 >= bit(16)) { x64 = x64>>16; bitsize += 16; } \ - /* x64 hat höchstens 16 Bits. */\ - if (x64 >= bit(8)) { x64 = x64>>8; bitsize += 8; } \ - /* x64 hat höchstens 8 Bits. */\ - if (x64 >= bit(4)) { x64 = x64>>4; bitsize += 4; } \ - /* x64 hat höchstens 4 Bits. */\ - if (x64 >= bit(2)) { x64 = x64>>2; bitsize += 2; } \ - /* x64 hat höchstens 2 Bits. */\ - if (x64 >= bit(1)) { /* x64 = x64>>1; */ bitsize += 1; } \ - /* x64 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ - size_zuweisung bitsize; \ + { var uintC _bitsize = 1; \ + var uint64 _x64 = (uint64)(digit); \ + /* _x64 hat höchstens 64 Bits. */\ + if (_x64 >= bit(32)) { _x64 = _x64>>32; _bitsize += 32; } \ + /* _x64 hat höchstens 32 Bits. */\ + if (_x64 >= bit(16)) { _x64 = _x64>>16; _bitsize += 16; } \ + /* _x64 hat höchstens 16 Bits. */\ + if (_x64 >= bit(8)) { _x64 = _x64>>8; _bitsize += 8; } \ + /* _x64 hat höchstens 8 Bits. */\ + if (_x64 >= bit(4)) { _x64 = _x64>>4; _bitsize += 4; } \ + /* _x64 hat höchstens 4 Bits. */\ + if (_x64 >= bit(2)) { _x64 = _x64>>2; _bitsize += 2; } \ + /* _x64 hat höchstens 2 Bits. */\ + if (_x64 >= bit(1)) { /* _x64 = _x64>>1; */ _bitsize += 1; } \ + /* _x64 hat höchstens 1 Bit. Dieses Bit muß gesetzt sein. */\ + size_zuweisung _bitsize; \ } +#else + #define integerlength64(digit,size_zuweisung) \ + { var uint64 _x64 = (digit); \ + var uintC _bitsize64 = 0; \ + var uint32 _x32_from_integerlength64; \ + if (_x64 >= (1ULL << 32)) { \ + _x32_from_integerlength64 = _x64>>32; _bitsize64 += 32; \ + } else { \ + _x32_from_integerlength64 = _x64; \ + } \ + integerlength32(_x32_from_integerlength64, size_zuweisung _bitsize64 + ); \ + } +#endif + +// Bits einer uintC-Zahl zählen: +// integerlengthC(digit,size=); +// setzt size auf die höchste in digit vorkommende Bitnummer. +// > digit: ein uintC >0 +// < size: >0, <=intCsize, mit 2^(size-1) <= digit < 2^size + #if (intCsize==32) + #define integerlengthC integerlength32 + #endif + #if (intCsize==64) + #define integerlengthC integerlength64 + #endif // Hintere Nullbits eines 32-Bit-Wortes zählen: // ord2_32(digit,count=); @@ -1226,9 +1449,9 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // < count: >=0, <32, mit 2^count | digit, digit/2^count ungerade #if defined(__GNUC__) && defined(__i386__) && !defined(NO_ASM) #define ord2_32(digit,count_zuweisung) \ - { var uintL one_position; /* Position der letzten 1 */\ - __asm__("bsfl %1,%0" : "=r" (one_position) : "rm" ((uint32)(digit)) ); \ - count_zuweisung one_position; \ + { var uintL _one_position; /* Position der letzten 1 */\ + __asm__("bsfl %1,%0" : "=r" (_one_position) : "rm" ((uint32)(digit)) ); \ + count_zuweisung _one_position; \ } #define FAST_ORD2 #elif defined(__sparc__) && !defined(__sparc64__) @@ -1247,11 +1470,23 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // Sei n = ord2(x). Dann ist logxor(x,x-1) = 2^n + (2^n-1) = 2^(n+1)-1. // Also (ord2 x) = (1- (integer-length (logxor x (1- x)))) . #define ord2_32(digit,count_zuweisung) \ - { var uint32 _digit = digit ^ (digit - 1); \ + { var uint32 _digit = (digit) ^ ((digit) - 1); \ integerlength32(_digit,count_zuweisung -1 + ) \ } #endif +// Hintere Nullbits eines 64-Bit-Wortes zählen: +// ord2_64(digit,count=); +// setzt size auf die kleinste in digit vorkommende Bitnummer. +// > digit: ein uint64 >0 +// < count: >=0, <64, mit 2^count | digit, digit/2^count ungerade + // Sei n = ord2(x). Dann ist logxor(x,x-1) = 2^n + (2^n-1) = 2^(n+1)-1. + // Also (ord2 x) = (1- (integer-length (logxor x (1- x)))) . + #define ord2_64(digit,count_zuweisung) \ + { var uint64 _digit = (digit) ^ ((digit) - 1); \ + integerlength64(_digit,count_zuweisung -1 + ) \ + } + // Bits eines Wortes zählen. // logcount_NN(); @@ -1296,9 +1531,9 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) // Bits von x64 zählen: (Input x64, Output x64) #define logcount_64() \ ( /* x64 besteht aus 64 1-Bit-Zählern (0,1). */\ - x64 = (x64 & 0x5555555555555555UL) + ((x64 & 0xAAAAAAAAAAAAAAAAUL) >> 1),\ + x64 = (x64 & 0x5555555555555555ULL) + ((x64 & 0xAAAAAAAAAAAAAAAAULL) >> 1),\ /* x64 besteht aus 32 2-Bit-Zählern (0,1,2). */\ - x64 = (x64 & 0x3333333333333333UL) + ((x64 & 0xCCCCCCCCCCCCCCCCUL) >> 2),\ + x64 = (x64 & 0x3333333333333333ULL) + ((x64 & 0xCCCCCCCCCCCCCCCCULL) >> 2),\ /* x64 besteht aus 16 4-Bit-Zählern (0,1,2,3,4). */\ x64 = (uint32)(x64 + (x64 >> 32)), \ /* x64 besteht aus 8 4-Bit-Zählern (0,...,8). */\ @@ -1310,5 +1545,6 @@ inline uint32 mulu32_unchecked (uint32 arg1, uint32 arg2) /* x64 besteht aus 1 16-Bit-Zähler (0,...,64). */\ ) +} // namespace cln #endif /* _CL_LOW_H */