src/base/digitseq/cl_asm_sparc_.cc

   1 // Externe Routinen zu ARILEV1.D
   2 // Prozessor: SPARC
   3 // Compiler: GNU-C oder SUN-C
   4 // Parameter-Übergabe: in Registern %o0-%o5.
   5 // Einstellungen: intCsize=32, intDsize=32.
   6
   7 #if defined(sparc_v8) || defined(__sparc_v8) || defined(__sparc_v8__)
   8   #define sparcv8
   9 #endif
  10
  11 #ifdef ASM_UNDERSCORE /* SunOS 4 */
  12   #if defined(__STDC__) || defined (__cplusplus)
  13     #define C(entrypoint) _##entrypoint
  14   #else
  15     #define C(entrypoint) _/**/entrypoint
  16   #endif
  17 #else /* SunOS 5 = Solaris 2 */
  18   #define C(entrypoint) entrypoint
  19 #endif
  20
  21 // When this file is compiled into a shared library, ELF linkers need to
  22 // know which symbols are functions.
  23 #if defined(__NetBSD__) || defined(__OpenBSD__)
  24   #define DECLARE_FUNCTION(name) .type C(name),@function
  25 #elif defined(__svr4__) || defined(__ELF__)
  26   // Some preprocessors keep the backslash in place, some don't.
  27   // Some complain about the # being not in front of an ANSI C macro.
  28   // Therefore we use a dollar, which will be sed-converted to # later.
  29   #define DECLARE_FUNCTION(name) .type C(name),$function
  30 #else
  31   #define DECLARE_FUNCTION(name)
  32 #endif
  33
  34   // Indikatoren für Anweisungen (Instruktionen) in Delay-Slots
  35   // (diese werden VOR der vorigen Instruktion ausgeführt):
  36   #define _             // Instruktion, die stets ausgeführt wird
  37   #define __            // Instruktion, die nur im Sprung-Fall ausgeführt wird
  38   // Abkürzungen für Anweisungen:
  39   #define ret   jmp %i7+8    // return from subroutine
  40   #define retl  jmp %o7+8    // return from leaf subroutine (no save/restore)
  41
  42         .seg "text"
  43
  44         .global C(mulu16_),C(mulu32_),C(mulu32_unchecked)
  45         .global C(divu_6432_3232_),C(divu_3216_1616_)
  46         .global C(copy_loop_up),C(copy_loop_down),C(fill_loop_up),C(fill_loop_down)
  47         .global C(clear_loop_up),C(clear_loop_down)
  48         .global C(test_loop_up),C(test_loop_down)
  49         .global C(xor_loop_up),C(compare_loop_up),C(shiftleftcopy_loop_up),C(shiftxor_loop_up)
  50 #if CL_DS_BIG_ENDIAN_P
  51         .global C(or_loop_up),C(and_loop_up),C(eqv_loop_up)
  52         .global C(nand_loop_up),C(nor_loop_up),C(andc2_loop_up),C(orc2_loop_up)
  53         .global C(not_loop_up)
  54         .global C(and_test_loop_up)
  55         .global C(add_loop_down),C(addto_loop_down),C(inc_loop_down)
  56         .global C(sub_loop_down),C(subx_loop_down),C(subfrom_loop_down),C(dec_loop_down)
  57         .global C(neg_loop_down)
  58         .global C(shift1left_loop_down),C(shiftleft_loop_down),C(shiftleftcopy_loop_down)
  59         .global C(shift1right_loop_up),C(shiftright_loop_up),C(shiftrightsigned_loop_up),C(shiftrightcopy_loop_up)
  60         .global C(mulusmall_loop_down),C(mulu_loop_down),C(muluadd_loop_down),C(mulusub_loop_down)
  61         .global C(divu_loop_up),C(divucopy_loop_up)
  62 #else
  63         .global C(or_loop_down),C(xor_loop_down),C(and_loop_down),C(eqv_loop_down)
  64         .global C(nand_loop_down),C(nor_loop_down),C(andc2_loop_down),C(orc2_loop_down)
  65         .global C(not_loop_down)
  66         .global C(and_test_loop_down),C(compare_loop_down)
  67         .global C(add_loop_up),C(addto_loop_up),C(inc_loop_up)
  68         .global C(sub_loop_up),C(subx_loop_up),C(subfrom_loop_up),C(dec_loop_up)
  69         .global C(neg_loop_up)
  70         .global C(shift1left_loop_up),C(shiftleft_loop_up)
  71         .global C(shift1right_loop_down),C(shiftright_loop_down),C(shiftrightsigned_loop_down),C(shiftrightcopy_loop_down)
  72         .global C(mulusmall_loop_up),C(mulu_loop_up),C(muluadd_loop_up),C(mulusub_loop_up)
  73         .global C(divu_loop_down),C(divucopy_loop_down)
  74 #endif
  75
  76 #define LOOP_TYPE  1    // 1: Standard-Schleifen
  77                         // 2: Schleifen ohne Pointer, nur mit Zähler
  78                         // 3: entrollte Schleifen
  79 #define SLOW_LOOPS  0
  80 #define STANDARD_LOOPS  (LOOP_TYPE==1)
  81 #define COUNTER_LOOPS  (LOOP_TYPE==2)
  82 #define UNROLLED_LOOPS  (LOOP_TYPE==3)
  83 #define MULU32_INLINE  1  // 1: mulu32-Aufrufe inline in die Schleifen
  84
  85 // extern uint32 mulu16_ (uint16 arg1, uint16 arg2);
  86 // ergebnis := arg1*arg2.
  87         DECLARE_FUNCTION(mulu16_)
  88 C(mulu16_:) // Input in %o0,%o1, Output in %o0
  89 #ifdef sparcv8
  90         umul    %o0,%o1,%o0
  91         retl
  92        _ nop
  93 #else
  94         mov     %o1,%y
  95         nop                     // Wartetakt, nötig z.B. für SUN SPARCstation IPC
  96         andcc   %g0,%g0,%o2
  97         mulscc  %o2,%o0,%o2
  98         mulscc  %o2,%o0,%o2
  99         mulscc  %o2,%o0,%o2
 100         mulscc  %o2,%o0,%o2
 101         mulscc  %o2,%o0,%o2
 102         mulscc  %o2,%o0,%o2
 103         mulscc  %o2,%o0,%o2
 104         mulscc  %o2,%o0,%o2
 105         mulscc  %o2,%o0,%o2
 106         mulscc  %o2,%o0,%o2
 107         mulscc  %o2,%o0,%o2
 108         mulscc  %o2,%o0,%o2
 109         mulscc  %o2,%o0,%o2
 110         mulscc  %o2,%o0,%o2
 111         mulscc  %o2,%o0,%o2
 112         mulscc  %o2,%o0,%o2
 113         // Die 17 unteren Bits von %o2 und die 15 oberen Bits von %y
 114         // ergeben das Resultat. (Die anderen Bits sind Null.)
 115         rd      %y,%o0
 116         srl     %o0,17,%o0
 117         sll     %o2,15,%o2
 118         retl
 119        _ or      %o2,%o0,%o0
 120 #endif
 121
 122 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
 123 // 2^32*hi+lo := arg1*arg2.
 124         DECLARE_FUNCTION(mulu32_)
 125 C(mulu32_:) // Input in %o0,%o1, Output in %o0,%g1
 126 #ifdef sparcv8
 127         umul    %o0,%o1,%o0
 128         retl
 129        _ rd      %y,%g1
 130 #else
 131         mov     %o1,%y
 132         sra     %o0,31,%o3      // Wartetakt, nötig z.B. für SUN SPARCstation IPC
 133         andcc   %g0,%g0,%o2
 134         mulscc  %o2,%o0,%o2
 135         mulscc  %o2,%o0,%o2
 136         mulscc  %o2,%o0,%o2
 137         mulscc  %o2,%o0,%o2
 138         mulscc  %o2,%o0,%o2
 139         mulscc  %o2,%o0,%o2
 140         mulscc  %o2,%o0,%o2
 141         mulscc  %o2,%o0,%o2
 142         mulscc  %o2,%o0,%o2
 143         mulscc  %o2,%o0,%o2
 144         mulscc  %o2,%o0,%o2
 145         mulscc  %o2,%o0,%o2
 146         mulscc  %o2,%o0,%o2
 147         mulscc  %o2,%o0,%o2
 148         mulscc  %o2,%o0,%o2
 149         mulscc  %o2,%o0,%o2
 150         mulscc  %o2,%o0,%o2
 151         mulscc  %o2,%o0,%o2
 152         mulscc  %o2,%o0,%o2
 153         mulscc  %o2,%o0,%o2
 154         mulscc  %o2,%o0,%o2
 155         mulscc  %o2,%o0,%o2
 156         mulscc  %o2,%o0,%o2
 157         mulscc  %o2,%o0,%o2
 158         mulscc  %o2,%o0,%o2
 159         mulscc  %o2,%o0,%o2
 160         mulscc  %o2,%o0,%o2
 161         mulscc  %o2,%o0,%o2
 162         mulscc  %o2,%o0,%o2
 163         mulscc  %o2,%o0,%o2
 164         mulscc  %o2,%o0,%o2
 165         mulscc  %o2,%o0,%o2
 166         mulscc  %o2,%g0,%o2
 167         and     %o3,%o1,%o3     // %o3 = (0 falls %o0>=0, %o1 falls %o0<0)
 168         add     %o2,%o3,%g1     // hi
 169         retl
 170        _ rd      %y,%o0         // lo
 171 #endif
 172
 173 // extern uint32 mulu32_unchecked (uint32 x, uint32 y);
 174 // ergebnis := arg1*arg2 < 2^32.
 175         DECLARE_FUNCTION(mulu32_unchecked)
 176 C(mulu32_unchecked:) // Input in %o0,%o1, Output in %o0
 177 #ifdef sparcv8
 178         umul    %o0,%o1,%o0
 179         retl
 180        _ nop
 181 #else
 182         subcc   %o0,%o1,%g0
 183         bcc,a   1f
 184        __ mov     %o1,%y
 185         // arg1 < arg2, also kann man arg1 < 2^16 annehmen.
 186         mov     %o0,%y
 187         nop                     // Wartetakt, nötig z.B. für SUN SPARCstation IPC
 188         andcc   %g0,%g0,%o2
 189         mulscc  %o2,%o1,%o2
 190         mulscc  %o2,%o1,%o2
 191         mulscc  %o2,%o1,%o2
 192         mulscc  %o2,%o1,%o2
 193         mulscc  %o2,%o1,%o2
 194         mulscc  %o2,%o1,%o2
 195         mulscc  %o2,%o1,%o2
 196         mulscc  %o2,%o1,%o2
 197         mulscc  %o2,%o1,%o2
 198         mulscc  %o2,%o1,%o2
 199         mulscc  %o2,%o1,%o2
 200         mulscc  %o2,%o1,%o2
 201         mulscc  %o2,%o1,%o2
 202         mulscc  %o2,%o1,%o2
 203         mulscc  %o2,%o1,%o2
 204         mulscc  %o2,%o1,%o2
 205         // Die 17 unteren Bits von %o2 und die 15 oberen Bits von %y
 206         // ergeben das Resultat. (Die anderen Bits sind Null.)
 207         rd      %y,%o0
 208         srl     %o0,17,%o0
 209         sll     %o2,15,%o2
 210         retl
 211        _ or      %o2,%o0,%o0
 212 1:      // arg1 >= arg2, also kann man arg2 < 2^16 annehmen.
 213         nop                     // Wartetakt, nötig z.B. für SUN SPARCstation IPC
 214         andcc   %g0,%g0,%o2
 215         mulscc  %o2,%o0,%o2
 216         mulscc  %o2,%o0,%o2
 217         mulscc  %o2,%o0,%o2
 218         mulscc  %o2,%o0,%o2
 219         mulscc  %o2,%o0,%o2
 220         mulscc  %o2,%o0,%o2
 221         mulscc  %o2,%o0,%o2
 222         mulscc  %o2,%o0,%o2
 223         mulscc  %o2,%o0,%o2
 224         mulscc  %o2,%o0,%o2
 225         mulscc  %o2,%o0,%o2
 226         mulscc  %o2,%o0,%o2
 227         mulscc  %o2,%o0,%o2
 228         mulscc  %o2,%o0,%o2
 229         mulscc  %o2,%o0,%o2
 230         mulscc  %o2,%o0,%o2
 231         // Die 17 unteren Bits von %o2 und die 15 oberen Bits von %y
 232         // ergeben das Resultat.
 233         rd      %y,%o0
 234         srl     %o0,17,%o0
 235         sll     %o2,15,%o2
 236         retl
 237        _ or      %o2,%o0,%o0
 238 #endif
 239
 240 // extern struct { uint32 q; uint32 r; } divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y);
 241 // x = 2^32*xhi+xlo = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^32*y .
 242         DECLARE_FUNCTION(divu_6432_3232_)
 243 C(divu_6432_3232_:) // Input in %o0,%o1,%o2, Output in %o0,%g1
 244 #if defined(sparcv8)
 245         // Problem: Is udiv worth using (gmp-2.0.2 doesn't use it) ??
 246         wr      %o0,%g0,%y
 247         nop                     // wait 1 | Necessary for certain sparcv8
 248         nop                     // wait 2 | processors such as Ross Hypersparc,
 249         nop                     // wait 3 | but not for most of the others.
 250         udiv    %o1,%o2,%o0     // x durch y dividieren, %o0 := q
 251         umul    %o0,%o2,%g1     // %g1 := (q*y) mod 2^32
 252         retl
 253        _ sub     %o1,%g1,%g1    // %g1 := (xlo-q*y) mod 2^32 = r
 254 #else
 255         // %o0 = xhi, %o1 = xlo, %o2 = y
 256 // Divisions-Einzelschritte:
 257 // %o0|%o1  wird jeweils um 1 Bit nach links geschoben,
 258 // dafür wird rechts in %o1 ein Ergebnisbit (negiert!) reingeschoben.
 259 // Je nachdem wird mit %o3|%o1 statt %o0|%o1 weitergemacht (spart 1 'mov').
 260 // Deswegen muß man den Code doppelt vorsehen: einmal mit %o0, einmal mit %o3.
 261 #define SA0(label) /* Vergleichsschritt mit %o0  */\
 262         subcc   %o0,%o2,%o3; \
 263         bcc     label;       \
 264        _ addxcc  %o1,%o1,%o1
 265 #define SA1(label) /* Vergleichsschritt mit %o3  */\
 266         subcc   %o3,%o2,%o0; \
 267         bcc     label;       \
 268        _ addxcc  %o1,%o1,%o1
 269 #define SB0() /* Additionsschritt mit %o0  */\
 270         addx    %o0,%o0,%o0
 271 #define SB1() /* Additionsschritt mit %o3  */\
 272         addx    %o3,%o3,%o3
 273 // Los geht's:
 274         addcc   %o2,%o2,%g0     // y = %o2 < 2^31 ?
 275         bcc     Lsmalldiv       // ja -> "kleine" Division
 276        _ andcc   %o2,1,%g0      // y = %o2 gerade ?
 277         be      Levendiv        // ja -> Division durch gerade Zahl
 278        _ srl     %o2,1,%o2
 279         // Division durch ungerade Zahl:
 280         // floor(x / (2*y'-1)) = floor(floor(x/2) / y') + (0 oder 1 oder 2)
 281         // da  0 <= x/(2*y'-1) - x/(2*y') = x/(2*y'-1) / (2*y') = x/y / (2*y')
 282         //       < 2^32 / (2*y') < 2^32/y <= 2 .
 283         add     %o2,1,%o2       // %o2 = ceiling(y/2) = y'
 284         // Man spart im Vergleich zu Lsmalldiv
 285         // zu Beginn eine Verdoppelung von %o0|%o1 : addcc %o1,%o1,%o1; SB0()
 286         // dafür am Schluß mehr zu tun...
 287         SA0(Lb01)               // Bit 31 des Quotienten bestimmen
 288 La01:   SB0(); SA0(Lb02)        // Bit 30 des Quotienten bestimmen
 289 La02:   SB0(); SA0(Lb03)        // Bit 29 des Quotienten bestimmen
 290 La03:   SB0(); SA0(Lb04)        // Bit 28 des Quotienten bestimmen
 291 La04:   SB0(); SA0(Lb05)        // Bit 27 des Quotienten bestimmen
 292 La05:   SB0(); SA0(Lb06)        // Bit 26 des Quotienten bestimmen
 293 La06:   SB0(); SA0(Lb07)        // Bit 25 des Quotienten bestimmen
 294 La07:   SB0(); SA0(Lb08)        // Bit 24 des Quotienten bestimmen
 295 La08:   SB0(); SA0(Lb09)        // Bit 23 des Quotienten bestimmen
 296 La09:   SB0(); SA0(Lb10)        // Bit 22 des Quotienten bestimmen
 297 La10:   SB0(); SA0(Lb11)        // Bit 21 des Quotienten bestimmen
 298 La11:   SB0(); SA0(Lb12)        // Bit 20 des Quotienten bestimmen
 299 La12:   SB0(); SA0(Lb13)        // Bit 19 des Quotienten bestimmen
 300 La13:   SB0(); SA0(Lb14)        // Bit 18 des Quotienten bestimmen
 301 La14:   SB0(); SA0(Lb15)        // Bit 17 des Quotienten bestimmen
 302 La15:   SB0(); SA0(Lb16)        // Bit 16 des Quotienten bestimmen
 303 La16:   SB0(); SA0(Lb17)        // Bit 15 des Quotienten bestimmen
 304 La17:   SB0(); SA0(Lb18)        // Bit 14 des Quotienten bestimmen
 305 La18:   SB0(); SA0(Lb19)        // Bit 13 des Quotienten bestimmen
 306 La19:   SB0(); SA0(Lb20)        // Bit 12 des Quotienten bestimmen
 307 La20:   SB0(); SA0(Lb21)        // Bit 11 des Quotienten bestimmen
 308 La21:   SB0(); SA0(Lb22)        // Bit 10 des Quotienten bestimmen
 309 La22:   SB0(); SA0(Lb23)        // Bit 9 des Quotienten bestimmen
 310 La23:   SB0(); SA0(Lb24)        // Bit 8 des Quotienten bestimmen
 311 La24:   SB0(); SA0(Lb25)        // Bit 7 des Quotienten bestimmen
 312 La25:   SB0(); SA0(Lb26)        // Bit 6 des Quotienten bestimmen
 313 La26:   SB0(); SA0(Lb27)        // Bit 5 des Quotienten bestimmen
 314 La27:   SB0(); SA0(Lb28)        // Bit 4 des Quotienten bestimmen
 315 La28:   SB0(); SA0(Lb29)        // Bit 3 des Quotienten bestimmen
 316 La29:   SB0(); SA0(Lb30)        // Bit 2 des Quotienten bestimmen
 317 La30:   SB0(); SA0(Lb31)        // Bit 1 des Quotienten bestimmen
 318 La31:   SB0(); SA0(Lb32)        // Bit 0 des Quotienten bestimmen
 319 La32:   SB0()                   // %o0 = x mod (2*y')
 320         xor     %o1,-1,%o1      // %o1 = floor( floor(x/2) / y') = floor(x/(2*y'))
 321         add     %o2,%o2,%o2
 322         sub     %o2,1,%o2       // wieder %o2 = 2*y'-1 = y
 323         // Quotient und Rest umrechnen:
 324         // x = %o1 * 2*y' + %o0 = %o1 * (2*y'-1) + (%o0+%o1)
 325         // Also Quotient = %o1, Rest = %o0+%o1.
 326         // Noch maximal 2 mal: Quotient += 1, Rest -= y.
 327         addcc   %o1,%o0,%o0     // Rest mod y bestimmen
 328         bcc     1f              // Additions-Überlauf -> Quotient erhöhen
 329        _ subcc   %o0,%o2,%o3
 330         subcc   %o3,%o2,%o0     // muß der Quotient nochmals erhöht werden?
 331         bcs     2f
 332        _ mov     %o3,%g1
 333         // Quotient 2 mal erhöhen, Rest %o0
 334         mov     %o0,%g1
 335         retl
 336        _ add     %o1,2,%o0
 337 1:      // kein Additions-Überlauf.
 338         // Wegen y>=2^31 muß der Quotient noch höchstens 1 mal erhöht werden:
 339         bcs     3f              // %o0 < %o2 -> Rest %o0 und Quotient %o1 OK
 340        _ mov     %o3,%g1
 341 2:      // Quotient %o1 erhöhen, Rest = %o0-%o2 = %o3
 342         retl
 343        _ add     %o1,1,%o0
 344 3:      // Quotient %o1 und Rest %o0 OK
 345         mov     %o0,%g1
 346         retl
 347        _ mov     %o1,%o0
 348 // Parallelschiene zu La01..La32:
 349 Lb01:   SB1(); SA1(La02)
 350 Lb02:   SB1(); SA1(La03)
 351 Lb03:   SB1(); SA1(La04)
 352 Lb04:   SB1(); SA1(La05)
 353 Lb05:   SB1(); SA1(La06)
 354 Lb06:   SB1(); SA1(La07)
 355 Lb07:   SB1(); SA1(La08)
 356 Lb08:   SB1(); SA1(La09)
 357 Lb09:   SB1(); SA1(La10)
 358 Lb10:   SB1(); SA1(La11)
 359 Lb11:   SB1(); SA1(La12)
 360 Lb12:   SB1(); SA1(La13)
 361 Lb13:   SB1(); SA1(La14)
 362 Lb14:   SB1(); SA1(La15)
 363 Lb15:   SB1(); SA1(La16)
 364 Lb16:   SB1(); SA1(La17)
 365 Lb17:   SB1(); SA1(La18)
 366 Lb18:   SB1(); SA1(La19)
 367 Lb19:   SB1(); SA1(La20)
 368 Lb20:   SB1(); SA1(La21)
 369 Lb21:   SB1(); SA1(La22)
 370 Lb22:   SB1(); SA1(La23)
 371 Lb23:   SB1(); SA1(La24)
 372 Lb24:   SB1(); SA1(La25)
 373 Lb25:   SB1(); SA1(La26)
 374 Lb26:   SB1(); SA1(La27)
 375 Lb27:   SB1(); SA1(La28)
 376 Lb28:   SB1(); SA1(La29)
 377 Lb29:   SB1(); SA1(La30)
 378 Lb30:   SB1(); SA1(La31)
 379 Lb31:   SB1(); SA1(La32)
 380 Lb32:   SB1()                   // %o3 = x mod (2*y')
 381         xor     %o1,-1,%o1      // %o1 = floor( floor(x/2) / y') = floor(x/(2*y'))
 382         add     %o2,%o2,%o2
 383         sub     %o2,1,%o2       // wieder %o2 = 2*y'-1 = y
 384         // Quotient und Rest umrechnen:
 385         // x = %o1 * 2*y' + %o3 = %o1 * (2*y'-1) + (%o3+%o1)
 386         // Also Quotient = %o1, Rest = %o3+%o1.
 387         // Noch maximal 2 mal: Quotient += 1, Rest -= y.
 388         addcc   %o1,%o3,%o3     // Rest mod y bestimmen
 389         bcc     1f              // Additions-Überlauf -> Quotient erhöhen
 390        _ subcc   %o3,%o2,%o0
 391         subcc   %o0,%o2,%o3     // muß der Quotient nochmals erhöht werden?
 392         bcs     2f
 393        _ mov     %o0,%g1
 394         // Quotient 2 mal erhöhen, Rest %o3
 395         mov     %o3,%g1
 396         retl
 397        _ add     %o1,2,%o0
 398 1:      // kein Additions-Überlauf.
 399         // Wegen y>=2^31 muß der Quotient noch höchstens 1 mal erhöht werden:
 400         bcs     3f              // %o3 < %o2 -> Rest %o3 und Quotient %o1 OK
 401        _ mov     %o0,%g1
 402 2:      // Quotient %o1 erhöhen, Rest = %o3-%o2 = %o0
 403         retl
 404        _ add     %o1,1,%o0
 405 3:      // Quotient %o1 und Rest %o3 OK
 406         mov     %o3,%g1
 407         retl
 408        _ mov     %o1,%o0
 409 Lsmalldiv: // Division durch y < 2^31
 410         addcc   %o1,%o1,%o1
 411 Lc00:   SB0(); SA0(Ld01)        // Bit 31 des Quotienten bestimmen
 412 Lc01:   SB0(); SA0(Ld02)        // Bit 30 des Quotienten bestimmen
 413 Lc02:   SB0(); SA0(Ld03)        // Bit 29 des Quotienten bestimmen
 414 Lc03:   SB0(); SA0(Ld04)        // Bit 28 des Quotienten bestimmen
 415 Lc04:   SB0(); SA0(Ld05)        // Bit 27 des Quotienten bestimmen
 416 Lc05:   SB0(); SA0(Ld06)        // Bit 26 des Quotienten bestimmen
 417 Lc06:   SB0(); SA0(Ld07)        // Bit 25 des Quotienten bestimmen
 418 Lc07:   SB0(); SA0(Ld08)        // Bit 24 des Quotienten bestimmen
 419 Lc08:   SB0(); SA0(Ld09)        // Bit 23 des Quotienten bestimmen
 420 Lc09:   SB0(); SA0(Ld10)        // Bit 22 des Quotienten bestimmen
 421 Lc10:   SB0(); SA0(Ld11)        // Bit 21 des Quotienten bestimmen
 422 Lc11:   SB0(); SA0(Ld12)        // Bit 20 des Quotienten bestimmen
 423 Lc12:   SB0(); SA0(Ld13)        // Bit 19 des Quotienten bestimmen
 424 Lc13:   SB0(); SA0(Ld14)        // Bit 18 des Quotienten bestimmen
 425 Lc14:   SB0(); SA0(Ld15)        // Bit 17 des Quotienten bestimmen
 426 Lc15:   SB0(); SA0(Ld16)        // Bit 16 des Quotienten bestimmen
 427 Lc16:   SB0(); SA0(Ld17)        // Bit 15 des Quotienten bestimmen
 428 Lc17:   SB0(); SA0(Ld18)        // Bit 14 des Quotienten bestimmen
 429 Lc18:   SB0(); SA0(Ld19)        // Bit 13 des Quotienten bestimmen
 430 Lc19:   SB0(); SA0(Ld20)        // Bit 12 des Quotienten bestimmen
 431 Lc20:   SB0(); SA0(Ld21)        // Bit 11 des Quotienten bestimmen
 432 Lc21:   SB0(); SA0(Ld22)        // Bit 10 des Quotienten bestimmen
 433 Lc22:   SB0(); SA0(Ld23)        // Bit 9 des Quotienten bestimmen
 434 Lc23:   SB0(); SA0(Ld24)        // Bit 8 des Quotienten bestimmen
 435 Lc24:   SB0(); SA0(Ld25)        // Bit 7 des Quotienten bestimmen
 436 Lc25:   SB0(); SA0(Ld26)        // Bit 6 des Quotienten bestimmen
 437 Lc26:   SB0(); SA0(Ld27)        // Bit 5 des Quotienten bestimmen
 438 Lc27:   SB0(); SA0(Ld28)        // Bit 4 des Quotienten bestimmen
 439 Lc28:   SB0(); SA0(Ld29)        // Bit 3 des Quotienten bestimmen
 440 Lc29:   SB0(); SA0(Ld30)        // Bit 2 des Quotienten bestimmen
 441 Lc30:   SB0(); SA0(Ld31)        // Bit 1 des Quotienten bestimmen
 442 Lc31:   SB0(); SA0(Ld32)        // Bit 0 des Quotienten bestimmen
 443 Lc32:   mov     %o0,%g1         // Rest aus %o0 in %g1 abspeichern
 444         retl
 445        _ xor     %o1,-1,%o0     // Quotient nach %o0
 446 // Parallelschiene zu Lc01..Lc32:
 447 Ld01:   SB1(); SA1(Lc02)
 448 Ld02:   SB1(); SA1(Lc03)
 449 Ld03:   SB1(); SA1(Lc04)
 450 Ld04:   SB1(); SA1(Lc05)
 451 Ld05:   SB1(); SA1(Lc06)
 452 Ld06:   SB1(); SA1(Lc07)
 453 Ld07:   SB1(); SA1(Lc08)
 454 Ld08:   SB1(); SA1(Lc09)
 455 Ld09:   SB1(); SA1(Lc10)
 456 Ld10:   SB1(); SA1(Lc11)
 457 Ld11:   SB1(); SA1(Lc12)
 458 Ld12:   SB1(); SA1(Lc13)
 459 Ld13:   SB1(); SA1(Lc14)
 460 Ld14:   SB1(); SA1(Lc15)
 461 Ld15:   SB1(); SA1(Lc16)
 462 Ld16:   SB1(); SA1(Lc17)
 463 Ld17:   SB1(); SA1(Lc18)
 464 Ld18:   SB1(); SA1(Lc19)
 465 Ld19:   SB1(); SA1(Lc20)
 466 Ld20:   SB1(); SA1(Lc21)
 467 Ld21:   SB1(); SA1(Lc22)
 468 Ld22:   SB1(); SA1(Lc23)
 469 Ld23:   SB1(); SA1(Lc24)
 470 Ld24:   SB1(); SA1(Lc25)
 471 Ld25:   SB1(); SA1(Lc26)
 472 Ld26:   SB1(); SA1(Lc27)
 473 Ld27:   SB1(); SA1(Lc28)
 474 Ld28:   SB1(); SA1(Lc29)
 475 Ld29:   SB1(); SA1(Lc30)
 476 Ld30:   SB1(); SA1(Lc31)
 477 Ld31:   SB1(); SA1(Lc32)
 478 Ld32:   mov     %o3,%g1         // Rest aus %o3 in %g1 abspeichern
 479         retl
 480        _ xor     %o1,-1,%o0     // Quotient nach %o0
 481 Levendiv: // Division durch gerades y.
 482         // x/2 durch y/2 dividieren, Quotient OK, Rest evtl. mit 2 multiplizieren.
 483         // Es ist schon %o2 = y/2.
 484         // Man spart im Vergleich zu Lsmalldiv
 485         // zu Beginn eine Verdoppelung von %o0|%o1 : addcc %o1,%o1,%o1; SB0()
 486         // dafür am Schluß Bit 0 von x zum Rest dazuschieben.
 487         SA0(Lf01)               // Bit 31 des Quotienten bestimmen
 488 Le01:   SB0(); SA0(Lf02)        // Bit 30 des Quotienten bestimmen
 489 Le02:   SB0(); SA0(Lf03)        // Bit 29 des Quotienten bestimmen
 490 Le03:   SB0(); SA0(Lf04)        // Bit 28 des Quotienten bestimmen
 491 Le04:   SB0(); SA0(Lf05)        // Bit 27 des Quotienten bestimmen
 492 Le05:   SB0(); SA0(Lf06)        // Bit 26 des Quotienten bestimmen
 493 Le06:   SB0(); SA0(Lf07)        // Bit 25 des Quotienten bestimmen
 494 Le07:   SB0(); SA0(Lf08)        // Bit 24 des Quotienten bestimmen
 495 Le08:   SB0(); SA0(Lf09)        // Bit 23 des Quotienten bestimmen
 496 Le09:   SB0(); SA0(Lf10)        // Bit 22 des Quotienten bestimmen
 497 Le10:   SB0(); SA0(Lf11)        // Bit 21 des Quotienten bestimmen
 498 Le11:   SB0(); SA0(Lf12)        // Bit 20 des Quotienten bestimmen
 499 Le12:   SB0(); SA0(Lf13)        // Bit 19 des Quotienten bestimmen
 500 Le13:   SB0(); SA0(Lf14)        // Bit 18 des Quotienten bestimmen
 501 Le14:   SB0(); SA0(Lf15)        // Bit 17 des Quotienten bestimmen
 502 Le15:   SB0(); SA0(Lf16)        // Bit 16 des Quotienten bestimmen
 503 Le16:   SB0(); SA0(Lf17)        // Bit 15 des Quotienten bestimmen
 504 Le17:   SB0(); SA0(Lf18)        // Bit 14 des Quotienten bestimmen
 505 Le18:   SB0(); SA0(Lf19)        // Bit 13 des Quotienten bestimmen
 506 Le19:   SB0(); SA0(Lf20)        // Bit 12 des Quotienten bestimmen
 507 Le20:   SB0(); SA0(Lf21)        // Bit 11 des Quotienten bestimmen
 508 Le21:   SB0(); SA0(Lf22)        // Bit 10 des Quotienten bestimmen
 509 Le22:   SB0(); SA0(Lf23)        // Bit 9 des Quotienten bestimmen
 510 Le23:   SB0(); SA0(Lf24)        // Bit 8 des Quotienten bestimmen
 511 Le24:   SB0(); SA0(Lf25)        // Bit 7 des Quotienten bestimmen
 512 Le25:   SB0(); SA0(Lf26)        // Bit 6 des Quotienten bestimmen
 513 Le26:   SB0(); SA0(Lf27)        // Bit 5 des Quotienten bestimmen
 514 Le27:   SB0(); SA0(Lf28)        // Bit 4 des Quotienten bestimmen
 515 Le28:   SB0(); SA0(Lf29)        // Bit 3 des Quotienten bestimmen
 516 Le29:   SB0(); SA0(Lf30)        // Bit 2 des Quotienten bestimmen
 517 Le30:   SB0(); SA0(Lf31)        // Bit 1 des Quotienten bestimmen
 518 Le31:   SB0(); SA0(Lf32)        // Bit 0 des Quotienten bestimmen
 519 Le32:   SB0()                   // Bit 0 des Restes bestimmen
 520         mov     %o0,%g1         // Rest aus %o0 in %g1 abspeichern
 521         retl
 522        _ xor     %o1,-1,%o0     // Quotient nach %o0
 523 // Parallelschiene zu Le01..Le32:
 524 Lf01:   SB1(); SA1(Le02)
 525 Lf02:   SB1(); SA1(Le03)
 526 Lf03:   SB1(); SA1(Le04)
 527 Lf04:   SB1(); SA1(Le05)
 528 Lf05:   SB1(); SA1(Le06)
 529 Lf06:   SB1(); SA1(Le07)
 530 Lf07:   SB1(); SA1(Le08)
 531 Lf08:   SB1(); SA1(Le09)
 532 Lf09:   SB1(); SA1(Le10)
 533 Lf10:   SB1(); SA1(Le11)
 534 Lf11:   SB1(); SA1(Le12)
 535 Lf12:   SB1(); SA1(Le13)
 536 Lf13:   SB1(); SA1(Le14)
 537 Lf14:   SB1(); SA1(Le15)
 538 Lf15:   SB1(); SA1(Le16)
 539 Lf16:   SB1(); SA1(Le17)
 540 Lf17:   SB1(); SA1(Le18)
 541 Lf18:   SB1(); SA1(Le19)
 542 Lf19:   SB1(); SA1(Le20)
 543 Lf20:   SB1(); SA1(Le21)
 544 Lf21:   SB1(); SA1(Le22)
 545 Lf22:   SB1(); SA1(Le23)
 546 Lf23:   SB1(); SA1(Le24)
 547 Lf24:   SB1(); SA1(Le25)
 548 Lf25:   SB1(); SA1(Le26)
 549 Lf26:   SB1(); SA1(Le27)
 550 Lf27:   SB1(); SA1(Le28)
 551 Lf28:   SB1(); SA1(Le29)
 552 Lf29:   SB1(); SA1(Le30)
 553 Lf30:   SB1(); SA1(Le31)
 554 Lf31:   SB1(); SA1(Le32)
 555 Lf32:   SB1()
 556         mov     %o3,%g1         // Rest aus %o0 in %g1 abspeichern
 557         retl
 558        _ xor     %o1,-1,%o0     // Quotient nach %o0
 559 #endif
 560
 561 // extern struct { uint16 q; uint16 r; } divu_3216_1616_ (uint32 x, uint16 y);
 562 // x = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^16*y .
 563         DECLARE_FUNCTION(divu_3216_1616_)
 564 C(divu_3216_1616_:) // Input in %o0,%o1, Output in %o0 (Rest und Quotient).
 565 #if defined(sparcv8)
 566         // Problem: Is udiv worth using (gmp-2.0.2 doesn't use it) ??
 567         wr      %g0,%g0,%y
 568         nop                     // wait 1
 569         nop                     // wait 2
 570         nop                     // wait 3
 571         udiv    %o0,%o1,%o0     // dividieren, Quotient nach %o0
 572         rd      %y,%o1          // Rest aus %y
 573         sll     %o1,16,%o1      // in die oberen 16 Bit schieben
 574         retl
 575        _ or      %o0,%o1,%o0
 576 #else
 577         // %o0 = x, %o1 = y
 578 // Divisions-Einzelschritte:
 579 // %o0  wird jeweils um 1 Bit nach links geschoben,
 580 // dafür wird rechts in %o1 ein Ergebnisbit (negiert!) reingeschoben.
 581 // Dann wird auf >= 2^15*y verglichen (nicht auf >= 2^16*y, weil man dann das
 582 // links herausgeschobene Bit mit vergleichen müßte!)
 583         sll %o1,16,%o1
 584         srl %o1,1,%o1           // 2^15*y
 585         sub %g0,%o1,%o2         // zum Addieren statt Subtrahieren: -2^15*y
 586         // SC0(label) subtrahiert y, schiebt Carry-Bit rechts in %o0 rein
 587         // (1 falls Subtraktion aufging, 0 sonst).
 588         // Ging die Subtraktion nicht auf, so müßte man noch 2*y addieren.
 589         // Das faßt man mit der nächsten Operation zusammen, indem man - statt
 590         // y zu subtrahieren - y addiert:
 591         // SC1(label) addiert y, schiebt Carry-Bit rechts in %o0 rein
 592         // (1 falls Subtraktion aufgegangen wäre, man also wieder im
 593         // "positiven Bereich" landet, 0 sonst).
 594 #define SC0(label) \
 595         addcc   %o0,%o2,%o0; \
 596         bcc     label;       \
 597        _ addx    %o0,%o0,%o0
 598 #define SC1(label) \
 599         addcc   %o0,%o1,%o0; \
 600         bcs     label;       \
 601        _ addx    %o0,%o0,%o0
 602         SC0(Lh01)               // Bit 15 des Quotienten bestimmen
 603 Lg01:   SC0(Lh02)               // Bit 14 des Quotienten bestimmen
 604 Lg02:   SC0(Lh03)               // Bit 13 des Quotienten bestimmen
 605 Lg03:   SC0(Lh04)               // Bit 12 des Quotienten bestimmen
 606 Lg04:   SC0(Lh05)               // Bit 11 des Quotienten bestimmen
 607 Lg05:   SC0(Lh06)               // Bit 10 des Quotienten bestimmen
 608 Lg06:   SC0(Lh07)               // Bit 9 des Quotienten bestimmen
 609 Lg07:   SC0(Lh08)               // Bit 8 des Quotienten bestimmen
 610 Lg08:   SC0(Lh09)               // Bit 7 des Quotienten bestimmen
 611 Lg09:   SC0(Lh10)               // Bit 6 des Quotienten bestimmen
 612 Lg10:   SC0(Lh11)               // Bit 5 des Quotienten bestimmen
 613 Lg11:   SC0(Lh12)               // Bit 4 des Quotienten bestimmen
 614 Lg12:   SC0(Lh13)               // Bit 3 des Quotienten bestimmen
 615 Lg13:   SC0(Lh14)               // Bit 2 des Quotienten bestimmen
 616 Lg14:   SC0(Lh15)               // Bit 1 des Quotienten bestimmen
 617 Lg15:   SC0(Lh16)               // Bit 0 des Quotienten bestimmen
 618 Lg16:   // Die oberen 16 Bit von %o0 sind der Rest,
 619         // die unteren 16 Bit von %o0 sind der Quotient.
 620         retl
 621        _ nop
 622 Lh01:   SC1(Lg02)               // Bit 14 des Quotienten bestimmen
 623 Lh02:   SC1(Lg03)               // Bit 13 des Quotienten bestimmen
 624 Lh03:   SC1(Lg04)               // Bit 12 des Quotienten bestimmen
 625 Lh04:   SC1(Lg05)               // Bit 11 des Quotienten bestimmen
 626 Lh05:   SC1(Lg06)               // Bit 10 des Quotienten bestimmen
 627 Lh06:   SC1(Lg07)               // Bit 9 des Quotienten bestimmen
 628 Lh07:   SC1(Lg08)               // Bit 8 des Quotienten bestimmen
 629 Lh08:   SC1(Lg09)               // Bit 7 des Quotienten bestimmen
 630 Lh09:   SC1(Lg10)               // Bit 6 des Quotienten bestimmen
 631 Lh10:   SC1(Lg11)               // Bit 5 des Quotienten bestimmen
 632 Lh11:   SC1(Lg12)               // Bit 4 des Quotienten bestimmen
 633 Lh12:   SC1(Lg13)               // Bit 3 des Quotienten bestimmen
 634 Lh13:   SC1(Lg14)               // Bit 2 des Quotienten bestimmen
 635 Lh14:   SC1(Lg15)               // Bit 1 des Quotienten bestimmen
 636 Lh15:   SC1(Lg16)               // Bit 0 des Quotienten bestimmen
 637 Lh16:   // Noch 2*y addieren:
 638         add %o0,%o1,%o0
 639         retl
 640        _ add %o0,%o1,%o0
 641 #endif
 642
 643 #if !defined(__GNUC__)
 644         .global C(_get_g1)
 645 // extern uint32 _get_g1 (void);
 646         DECLARE_FUNCTION(_get_g1)
 647 C(_get_g1:)
 648         retl
 649        _ mov %g1,%o0
 650 #endif
 651
 652 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
 653         DECLARE_FUNCTION(copy_loop_up)
 654 C(copy_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 655 #if STANDARD_LOOPS
 656         andcc %o2,%o2,%g0
 657         be 2f
 658        _ nop
 659 1:        ld [%o0],%o3
 660           add %o0,4,%o0
 661           st %o3,[%o1]
 662           subcc %o2,1,%o2
 663           bne 1b
 664          _ add %o1,4,%o1
 665 2:      retl
 666        _ mov %o1,%o0
 667 #endif
 668 #if COUNTER_LOOPS
 669         subcc %g0,%o2,%o2       // %o2 = -count
 670         be 2f
 671        _ sub %o1,4,%o1
 672         sll %o2,2,%o2           // %o2 = -4*count
 673         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
 674         sub %o1,%o2,%o1         // %o1 = &destptr[count-1]
 675 1:        ld [%o0+%o2],%o3      // nächstes Digit holen
 676           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
 677           bne 1b
 678          _ st %o3,[%o1+%o2]     // Digit ablegen
 679 2:      retl
 680        _ add %o1,4,%o0
 681 #endif
 682
 683 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 684         DECLARE_FUNCTION(copy_loop_down)
 685 C(copy_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 686 #if STANDARD_LOOPS
 687         andcc %o2,%o2,%g0
 688         be 2f
 689        _ sub %o0,4,%o0
 690 1:        ld [%o0],%o3
 691           sub %o1,4,%o1
 692           st %o3,[%o1]
 693           subcc %o2,1,%o2
 694           bne 1b
 695          _ sub %o0,4,%o0
 696 2:      retl
 697        _ mov %o1,%o0
 698 #endif
 699 #if COUNTER_LOOPS
 700         andcc %o2,%o2,%g0
 701         be 2f
 702        _ sub %o0,4,%o0
 703         sll %o2,2,%o2           // %o2 = 4*count
 704         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 705         sub %o1,%o2,%o1         // %o1 = &destptr[-count]
 706 1:        ld [%o0+%o2],%o3      // nächstes Digit holen
 707           subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
 708           bne 1b
 709          _ st %o3,[%o1+%o2]     // Digit ablegen
 710 2:      retl
 711        _ mov %o1,%o0
 712 #endif
 713
 714 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
 715         DECLARE_FUNCTION(fill_loop_up)
 716 C(fill_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 717 #if STANDARD_LOOPS
 718         andcc %o1,%o1,%g0
 719         be 2f
 720        _ nop
 721 1:        st %o2,[%o0]
 722           subcc %o1,1,%o1
 723           bne 1b
 724          _ add %o0,4,%o0
 725 2:      retl
 726        _ nop
 727 #endif
 728 #if COUNTER_LOOPS
 729         subcc %g0,%o1,%o1       // %o1 = -count
 730         be 2f
 731        _ sub %o0,4,%o0
 732         sll %o1,2,%o1           // %o1 = -4*count
 733         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 734 1:        addcc %o1,4,%o1       // Zähler "erniedrigen", Pointer erhöhen
 735           bne 1b
 736          _ st %o2,[%o0+%o1]     // Digit ablegen
 737 2:      retl
 738        _ add %o0,4,%o0
 739 #endif
 740
 741 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
 742         DECLARE_FUNCTION(fill_loop_down)
 743 C(fill_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 744 #if STANDARD_LOOPS
 745         andcc %o1,%o1,%g0
 746         be 2f
 747        _ sub %o0,4,%o0
 748 1:        st %o2,[%o0]
 749           subcc %o1,1,%o1
 750           bne 1b
 751          _ sub %o0,4,%o0
 752 2:      retl
 753        _ add %o0,4,%o0
 754 #endif
 755 #if COUNTER_LOOPS
 756         andcc %o1,%o1,%g0
 757         be 2f
 758        _ sll %o1,2,%o1          // %o1 = 4*count
 759         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 760 1:        subcc %o1,4,%o1       // Zähler erniedrigen, Pointer erniedrigen
 761           bne 1b
 762          _ st %o2,[%o0+%o1]     // Digit ablegen
 763 2:      retl
 764        _ nop
 765 #endif
 766
 767 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
 768         DECLARE_FUNCTION(clear_loop_up)
 769 C(clear_loop_up:) // Input in %o0,%o1, Output in %o0
 770 #if STANDARD_LOOPS
 771         andcc %o1,%o1,%g0
 772         be 2f
 773        _ nop
 774 1:        st %g0,[%o0]
 775           subcc %o1,1,%o1
 776           bne 1b
 777          _ add %o0,4,%o0
 778 2:      retl
 779        _ nop
 780 #endif
 781 #if COUNTER_LOOPS
 782         subcc %g0,%o1,%o1       // %o1 = -count
 783         be 2f
 784        _ sub %o0,4,%o0
 785         sll %o1,2,%o1           // %o1 = -4*count
 786         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 787 1:        addcc %o1,4,%o1       // Zähler "erniedrigen", Pointer erhöhen
 788           bne 1b
 789          _ st %g0,[%o0+%o1]     // Digit 0 ablegen
 790 2:      retl
 791        _ add %o0,4,%o0
 792 #endif
 793
 794 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
 795         DECLARE_FUNCTION(clear_loop_down)
 796 C(clear_loop_down:) // Input in %o0,%o1, Output in %o0
 797 #if STANDARD_LOOPS
 798         andcc %o1,%o1,%g0
 799         be 2f
 800        _ sub %o0,4,%o0
 801 1:        st %g0,[%o0]
 802           subcc %o1,1,%o1
 803           bne 1b
 804          _ sub %o0,4,%o0
 805 2:      retl
 806        _ add %o0,4,%o0
 807 #endif
 808 #if COUNTER_LOOPS
 809         andcc %o1,%o1,%g0
 810         be 2f
 811        _ sll %o1,2,%o1          // %o1 = 4*count
 812         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 813 1:        subcc %o1,4,%o1       // Zähler erniedrigen, Pointer erniedrigen
 814           bne 1b
 815          _ st %g0,[%o0+%o1]     // Digit 0 ablegen
 816 2:      retl
 817        _ nop
 818 #endif
 819
 820 // extern boolean test_loop_up (uintD* ptr, uintC count);
 821         DECLARE_FUNCTION(test_loop_up)
 822 C(test_loop_up:) // Input in %o0,%o1, Output in %o0
 823 #if STANDARD_LOOPS
 824         andcc %o1,%o1,%g0
 825         be 2f
 826        _ nop
 827           ld [%o0],%o2
 828 1:        add %o0,4,%o0
 829           andcc %o2,%o2,%g0
 830           bne 3f
 831          _ subcc %o1,1,%o1
 832           bne,a 1b
 833          __ ld [%o0],%o2
 834 2:      retl
 835        _ mov 0,%o0
 836 3:      retl
 837        _ mov 1,%o0
 838 #endif
 839 #if COUNTER_LOOPS
 840         subcc %g0,%o1,%o1       // %o1 = -count
 841         be 2f
 842        _ sll %o1,2,%o1          // %o1 = -4*count
 843         sub %o0,%o1,%o0         // %o0 = &ptr[count]
 844           ld [%o0+%o1],%o2      // nächstes Digit holen
 845 1:        andcc %o2,%o2,%g0     // testen
 846           bne 3f
 847          _ addcc %o1,4,%o1      // Zähler "erniedrigen", Pointer erhöhen
 848           bne,a 1b
 849          __ ld [%o0+%o1],%o2    // nächstes Digit holen
 850 2:      retl
 851        _ mov 0,%o0
 852 3:      retl
 853        _ mov 1,%o0
 854 #endif
 855
 856 // extern boolean test_loop_down (uintD* ptr, uintC count);
 857         DECLARE_FUNCTION(test_loop_down)
 858 C(test_loop_down:) // Input in %o0,%o1, Output in %o0
 859 #if STANDARD_LOOPS
 860         andcc %o1,%o1,%g0
 861         be 2f
 862        _ sub %o0,4,%o0
 863           ld [%o0],%o2
 864 1:        sub %o0,4,%o0
 865           andcc %o2,%o2,%g0
 866           bne 3f
 867          _ subcc %o1,1,%o1
 868           bne,a 1b
 869          __ ld [%o0],%o2
 870 2:      retl
 871        _ mov 0,%o0
 872 3:      retl
 873        _ mov 1,%o0
 874 #endif
 875 #if COUNTER_LOOPS
 876         sll %o1,2,%o1           // %o1 = 4*count
 877         sub %o0,%o1,%o0         // %o0 = &ptr[-count]
 878         subcc %o1,4,%o1
 879         bcs 4f
 880        _ nop
 881           ld [%o0+%o1],%o2      // nächstes Digit holen
 882 1:        subcc %o1,4,%o1       // Zähler erniedrigen, Pointer erniedrigen
 883           bcs 3f
 884          _ andcc %o2,%o2,%g0    // testen
 885           be,a 1b
 886          __ ld [%o0+%o1],%o2    // nächstes Digit holen
 887 2:      retl
 888        _ mov 1,%o0
 889 3:      bne 2b
 890        _ nop
 891 4:      retl
 892        _ mov 0,%o0
 893 #endif
 894
 895 #if CL_DS_BIG_ENDIAN_P
 896
 897 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
 898         DECLARE_FUNCTION(or_loop_up)
 899 C(or_loop_up:) // Input in %o0,%o1,%o2
 900 #if SLOW_LOOPS
 901         andcc %o2,%o2,%g0
 902         be 2f
 903        _ nop
 904 1:        ld [%o0],%o3
 905           ld [%o1],%o4
 906           add %o1,4,%o1
 907           or %o3,%o4,%o3
 908           st %o3,[%o0]
 909           subcc %o2,1,%o2
 910           bne 1b
 911          _ add %o0,4,%o0
 912 2:      retl
 913        _ nop
 914 #endif
 915 #if STANDARD_LOOPS
 916         andcc %o2,%o2,%g0
 917         be 2f
 918        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 919 1:        ld [%o0],%o3          // *xptr
 920           ld [%o0+%o1],%o4      // *yptr
 921           subcc %o2,1,%o2
 922           or %o3,%o4,%o3        // verknüpfen
 923           st %o3,[%o0]          // =: *xptr
 924           bne 1b
 925          _ add %o0,4,%o0        // xptr++, yptr++
 926 2:      retl
 927        _ nop
 928 #endif
 929 #if COUNTER_LOOPS
 930         subcc %g0,%o2,%o2       // %o2 = -count
 931         be 2f
 932        _ sub %o0,4,%o0
 933         sll %o2,2,%o2           // %o2 = -4*count
 934         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 935         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 936 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
 937           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
 938           ld [%o0+%o2],%o4      // noch ein Digit holen
 939           or %o4,%o3,%o3        // beide verknüpfen
 940           bne 1b
 941          _ st %o3,[%o1+%o2]     // Digit ablegen
 942 2:      retl
 943        _ nop
 944 #endif
 945
 946 #endif
 947
 948 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 949         DECLARE_FUNCTION(xor_loop_up)
 950 C(xor_loop_up:) // Input in %o0,%o1,%o2
 951 #if SLOW_LOOPS
 952         andcc %o2,%o2,%g0
 953         be 2f
 954        _ nop
 955 1:        ld [%o0],%o3
 956           ld [%o1],%o4
 957           add %o1,4,%o1
 958           xor %o3,%o4,%o3
 959           st %o3,[%o0]
 960           subcc %o2,1,%o2
 961           bne 1b
 962          _ add %o0,4,%o0
 963 2:      retl
 964        _ nop
 965 #endif
 966 #if STANDARD_LOOPS
 967         andcc %o2,%o2,%g0
 968         be 2f
 969        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 970 1:        ld [%o0],%o3          // *xptr
 971           ld [%o0+%o1],%o4      // *yptr
 972           subcc %o2,1,%o2
 973           xor %o3,%o4,%o3       // verknüpfen
 974           st %o3,[%o0]          // =: *xptr
 975           bne 1b
 976          _ add %o0,4,%o0        // xptr++, yptr++
 977 2:      retl
 978        _ nop
 979 #endif
 980 #if COUNTER_LOOPS
 981         subcc %g0,%o2,%o2       // %o2 = -count
 982         be 2f
 983        _ sub %o0,4,%o0
 984         sll %o2,2,%o2           // %o2 = -4*count
 985         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 986         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 987 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
 988           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
 989           ld [%o0+%o2],%o4      // noch ein Digit holen
 990           xor %o4,%o3,%o3       // beide verknüpfen
 991           bne 1b
 992          _ st %o3,[%o1+%o2]     // Digit ablegen
 993 2:      retl
 994        _ nop
 995 #endif
 996
 997 #if CL_DS_BIG_ENDIAN_P
 998
 999 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
1000         DECLARE_FUNCTION(and_loop_up)
1001 C(and_loop_up:) // Input in %o0,%o1,%o2
1002 #if SLOW_LOOPS
1003         andcc %o2,%o2,%g0
1004         be 2f
1005        _ nop
1006 1:        ld [%o0],%o3
1007           ld [%o1],%o4
1008           add %o1,4,%o1
1009           and %o3,%o4,%o3
1010           st %o3,[%o0]
1011           subcc %o2,1,%o2
1012           bne 1b
1013          _ add %o0,4,%o0
1014 2:      retl
1015        _ nop
1016 #endif
1017 #if STANDARD_LOOPS
1018         andcc %o2,%o2,%g0
1019         be 2f
1020        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1021 1:        ld [%o0],%o3          // *xptr
1022           ld [%o0+%o1],%o4      // *yptr
1023           subcc %o2,1,%o2
1024           and %o3,%o4,%o3       // verknüpfen
1025           st %o3,[%o0]          // =: *xptr
1026           bne 1b
1027          _ add %o0,4,%o0        // xptr++, yptr++
1028 2:      retl
1029        _ nop
1030 #endif
1031 #if COUNTER_LOOPS
1032         subcc %g0,%o2,%o2       // %o2 = -count
1033         be 2f
1034        _ sub %o0,4,%o0
1035         sll %o2,2,%o2           // %o2 = -4*count
1036         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
1037         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1038 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
1039           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
1040           ld [%o0+%o2],%o4      // noch ein Digit holen
1041           and %o4,%o3,%o3       // beide verknüpfen
1042           bne 1b
1043          _ st %o3,[%o1+%o2]     // Digit ablegen
1044 2:      retl
1045        _ nop
1046 #endif
1047
1048 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
1049         DECLARE_FUNCTION(eqv_loop_up)
1050 C(eqv_loop_up:) // Input in %o0,%o1,%o2
1051 #if SLOW_LOOPS
1052         andcc %o2,%o2,%g0
1053         be 2f
1054        _ nop
1055 1:        ld [%o0],%o3
1056           ld [%o1],%o4
1057           add %o1,4,%o1
1058           xnor %o3,%o4,%o3
1059           st %o3,[%o0]
1060           subcc %o2,1,%o2
1061           bne 1b
1062          _ add %o0,4,%o0
1063 2:      retl
1064        _ nop
1065 #endif
1066 #if STANDARD_LOOPS
1067         andcc %o2,%o2,%g0
1068         be 2f
1069        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1070 1:        ld [%o0],%o3          // *xptr
1071           ld [%o0+%o1],%o4      // *yptr
1072           subcc %o2,1,%o2
1073           xnor %o3,%o4,%o3      // verknüpfen
1074           st %o3,[%o0]          // =: *xptr
1075           bne 1b
1076          _ add %o0,4,%o0        // xptr++, yptr++
1077 2:      retl
1078        _ nop
1079 #endif
1080 #if COUNTER_LOOPS
1081         subcc %g0,%o2,%o2       // %o2 = -count
1082         be 2f
1083        _ sub %o0,4,%o0
1084         sll %o2,2,%o2           // %o2 = -4*count
1085         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
1086         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1087 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
1088           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
1089           ld [%o0+%o2],%o4      // noch ein Digit holen
1090           xnor %o4,%o3,%o3      // beide verknüpfen
1091           bne 1b
1092          _ st %o3,[%o1+%o2]     // Digit ablegen
1093 2:      retl
1094        _ nop
1095 #endif
1096
1097 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
1098         DECLARE_FUNCTION(nand_loop_up)
1099 C(nand_loop_up:) // Input in %o0,%o1,%o2
1100 #if SLOW_LOOPS
1101         andcc %o2,%o2,%g0
1102         be 2f
1103        _ nop
1104 1:        ld [%o0],%o3
1105           ld [%o1],%o4
1106           add %o1,4,%o1
1107           and %o3,%o4,%o3
1108           xor %o3,-1,%o3
1109           st %o3,[%o0]
1110           subcc %o2,1,%o2
1111           bne 1b
1112          _ add %o0,4,%o0
1113 2:      retl
1114        _ nop
1115 #endif
1116 #if STANDARD_LOOPS
1117         andcc %o2,%o2,%g0
1118         be 2f
1119        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1120 1:        ld [%o0],%o3          // *xptr
1121           ld [%o0+%o1],%o4      // *yptr
1122           subcc %o2,1,%o2
1123           and %o3,%o4,%o3       // verknüpfen
1124           xor %o3,-1,%o3
1125           st %o3,[%o0]          // =: *xptr
1126           bne 1b
1127          _ add %o0,4,%o0        // xptr++, yptr++
1128 2:      retl
1129        _ nop
1130 #endif
1131 #if COUNTER_LOOPS
1132         subcc %g0,%o2,%o2       // %o2 = -count
1133         be 2f
1134        _ sub %o0,4,%o0
1135         sll %o2,2,%o2           // %o2 = -4*count
1136         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
1137         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1138 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
1139           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
1140           ld [%o0+%o2],%o4      // noch ein Digit holen
1141           and %o4,%o3,%o3       // beide verknüpfen
1142           xor %o3,-1,%o3
1143           bne 1b
1144          _ st %o3,[%o1+%o2]     // Digit ablegen
1145 2:      retl
1146        _ nop
1147 #endif
1148
1149 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
1150         DECLARE_FUNCTION(nor_loop_up)
1151 C(nor_loop_up:) // Input in %o0,%o1,%o2
1152 #if SLOW_LOOPS
1153         andcc %o2,%o2,%g0
1154         be 2f
1155        _ nop
1156 1:        ld [%o0],%o3
1157           ld [%o1],%o4
1158           add %o1,4,%o1
1159           or %o3,%o4,%o3
1160           xor %o3,-1,%o3
1161           st %o3,[%o0]
1162           subcc %o2,1,%o2
1163           bne 1b
1164          _ add %o0,4,%o0
1165 2:      retl
1166        _ nop
1167 #endif
1168 #if STANDARD_LOOPS
1169         andcc %o2,%o2,%g0
1170         be 2f
1171        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1172 1:        ld [%o0],%o3          // *xptr
1173           ld [%o0+%o1],%o4      // *yptr
1174           subcc %o2,1,%o2
1175           or %o3,%o4,%o3        // verknüpfen
1176           xor %o3,-1,%o3
1177           st %o3,[%o0]          // =: *xptr
1178           bne 1b
1179          _ add %o0,4,%o0        // xptr++, yptr++
1180 2:      retl
1181        _ nop
1182 #endif
1183 #if COUNTER_LOOPS
1184         subcc %g0,%o2,%o2       // %o2 = -count
1185         be 2f
1186        _ sub %o0,4,%o0
1187         sll %o2,2,%o2           // %o2 = -4*count
1188         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
1189         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1190 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
1191           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
1192           ld [%o0+%o2],%o4      // noch ein Digit holen
1193           or %o4,%o3,%o3        // beide verknüpfen
1194           xor %o3,-1,%o3
1195           bne 1b
1196          _ st %o3,[%o1+%o2]     // Digit ablegen
1197 2:      retl
1198        _ nop
1199 #endif
1200
1201 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
1202         DECLARE_FUNCTION(andc2_loop_up)
1203 C(andc2_loop_up:) // Input in %o0,%o1,%o2
1204 #if SLOW_LOOPS
1205         andcc %o2,%o2,%g0
1206         be 2f
1207        _ nop
1208 1:        ld [%o0],%o3
1209           ld [%o1],%o4
1210           add %o1,4,%o1
1211           andn %o3,%o4,%o3
1212           st %o3,[%o0]
1213           subcc %o2,1,%o2
1214           bne 1b
1215          _ add %o0,4,%o0
1216 2:      retl
1217        _ nop
1218 #endif
1219 #if STANDARD_LOOPS
1220         andcc %o2,%o2,%g0
1221         be 2f
1222        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1223 1:        ld [%o0],%o3          // *xptr
1224           ld [%o0+%o1],%o4      // *yptr
1225           subcc %o2,1,%o2
1226           andn %o3,%o4,%o3      // verknüpfen
1227           st %o3,[%o0]          // =: *xptr
1228           bne 1b
1229          _ add %o0,4,%o0        // xptr++, yptr++
1230 2:      retl
1231        _ nop
1232 #endif
1233 #if COUNTER_LOOPS
1234         subcc %g0,%o2,%o2       // %o2 = -count
1235         be 2f
1236        _ sub %o0,4,%o0
1237         sll %o2,2,%o2           // %o2 = -4*count
1238         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
1239         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1240 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
1241           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
1242           ld [%o0+%o2],%o4      // noch ein Digit holen
1243           andn %o4,%o3,%o3      // beide verknüpfen
1244           bne 1b
1245          _ st %o3,[%o1+%o2]     // Digit ablegen
1246 2:      retl
1247        _ nop
1248 #endif
1249
1250 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
1251         DECLARE_FUNCTION(orc2_loop_up)
1252 C(orc2_loop_up:) // Input in %o0,%o1,%o2
1253 #if SLOW_LOOPS
1254         andcc %o2,%o2,%g0
1255         be 2f
1256        _ nop
1257 1:        ld [%o0],%o3
1258           ld [%o1],%o4
1259           add %o1,4,%o1
1260           orn %o3,%o4,%o3
1261           st %o3,[%o0]
1262           subcc %o2,1,%o2
1263           bne 1b
1264          _ add %o0,4,%o0
1265 2:      retl
1266        _ nop
1267 #endif
1268 #if STANDARD_LOOPS
1269         andcc %o2,%o2,%g0
1270         be 2f
1271        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1272 1:        ld [%o0],%o3          // *xptr
1273           ld [%o0+%o1],%o4      // *yptr
1274           subcc %o2,1,%o2
1275           orn %o3,%o4,%o3       // verknüpfen
1276           st %o3,[%o0]          // =: *xptr
1277           bne 1b
1278          _ add %o0,4,%o0        // xptr++, yptr++
1279 2:      retl
1280        _ nop
1281 #endif
1282 #if COUNTER_LOOPS
1283         subcc %g0,%o2,%o2       // %o2 = -count
1284         be 2f
1285        _ sub %o0,4,%o0
1286         sll %o2,2,%o2           // %o2 = -4*count
1287         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
1288         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1289 1:        ld [%o1+%o2],%o3      // nächstes Digit holen
1290           addcc %o2,4,%o2       // Zähler "erniedrigen", Pointer erhöhen
1291           ld [%o0+%o2],%o4      // noch ein Digit holen
1292           orn %o4,%o3,%o3       // beide verknüpfen
1293           bne 1b
1294          _ st %o3,[%o1+%o2]     // Digit ablegen
1295 2:      retl
1296        _ nop
1297 #endif
1298
1299 // extern void not_loop_up (uintD* xptr, uintC count);
1300         DECLARE_FUNCTION(not_loop_up)
1301 C(not_loop_up:) // Input in %o0,%o1
1302 #if STANDARD_LOOPS
1303         andcc %o1,%o1,%g0
1304         be 2f
1305        _ nop
1306 1:        ld [%o0],%o2
1307           subcc %o1,1,%o1
1308           xor %o2,-1,%o2
1309           st %o2,[%o0]
1310           bne 1b
1311          _ add %o0,4,%o0
1312 2:      retl
1313        _ nop
1314 #endif
1315 #if COUNTER_LOOPS
1316         subcc %g0,%o1,%o1       // %o1 = -count
1317         be 2f
1318        _ sub %o0,4,%o0
1319         sll %o1,2,%o1           // %o1 = -4*count
1320         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
1321 1:        addcc %o1,4,%o1       // Zähler "erniedrigen", Pointer erhöhen
1322           ld [%o0+%o1],%o2      // nächstes Digit holen
1323           xor %o2,-1,%o2
1324           bne 1b
1325          _ st %o2,[%o0+%o1]     // Digit ablegen
1326 2:      retl
1327        _ nop
1328 #endif
1329
1330 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
1331         DECLARE_FUNCTION(and_test_loop_up)
1332 C(and_test_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1333 #if STANDARD_LOOPS
1334         andcc %o2,%o2,%g0
1335         be 2f
1336        _ nop
1337 1:        ld [%o0],%o3
1338           ld [%o1],%o4
1339           add %o0,4,%o0
1340           andcc %o3,%o4,%g0
1341           bne 3f
1342          _ subcc %o2,1,%o2
1343           bne 1b
1344          _ add %o1,4,%o1
1345 2:      retl
1346        _ mov 0,%o0
1347 3:      retl
1348        _ mov 1,%o0
1349 #endif
1350 #if COUNTER_LOOPS
1351         subcc %g0,%o2,%o2       // %o2 = -count
1352         be 2f
1353        _ sll %o2,2,%o2          // %o2 = -4*count
1354         sub %o0,%o2,%o0         // %o0 = &xptr[count]
1355         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1356           ld [%o0+%o2],%o3      // nächstes Digit holen
1357 1:        ld [%o1+%o2],%o4      // noch ein Digit holen
1358           andcc %o3,%o4,%g0     // beide verknüpfen
1359           bne 3f
1360          _ addcc %o2,4,%o2      // Zähler "erniedrigen", Pointer erhöhen
1361           bne,a 1b
1362          __ ld [%o0+%o2],%o3    // nächstes Digit holen
1363 2:      retl
1364        _ mov 0,%o0
1365 3:      retl
1366        _ mov 1,%o0
1367 #endif
1368
1369 #endif
1370
1371 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
1372         DECLARE_FUNCTION(compare_loop_up)
1373 C(compare_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1374 #if STANDARD_LOOPS
1375         andcc %o2,%o2,%g0
1376         be 2f
1377        _ nop
1378           ld [%o0],%o3
1379 1:        ld [%o1],%o4
1380           add %o0,4,%o0
1381           subcc %o3,%o4,%g0
1382           bne 3f
1383          _ add %o1,4,%o1
1384           subcc %o2,1,%o2
1385           bne,a 1b
1386          __ ld [%o0],%o3
1387 2:      retl
1388        _ mov 0,%o0
1389 3:      blu 4f
1390        _ nop
1391         retl
1392        _ mov 1,%o0
1393 4:      retl
1394        _ mov -1,%o0
1395 #endif
1396 #if COUNTER_LOOPS
1397         subcc %g0,%o2,%o2       // %o2 = -count
1398         be 2f
1399        _ sll %o2,2,%o2          // %o2 = -4*count
1400         sub %o0,%o2,%o0         // %o0 = &xptr[count]
1401         sub %o1,%o2,%o1         // %o1 = &yptr[count]
1402           ld [%o0+%o2],%o3      // nächstes Digit holen
1403 1:        ld [%o1+%o2],%o4      // noch ein Digit holen
1404           subcc %o3,%o4,%g0     // vergleichen
1405           bne 3f
1406          _ addcc %o2,4,%o2      // Zähler "erniedrigen", Pointer erhöhen
1407           bne,a 1b
1408          __ ld [%o0+%o2],%o3    // nächstes Digit holen
1409 2:      retl
1410        _ mov 0,%o0
1411 3:      subcc %o3,%o4,%g0       // nochmals vergleichen
1412         blu 4f
1413        _ nop
1414         retl
1415        _ mov 1,%o0
1416 4:      retl
1417        _ mov -1,%o0
1418 #endif
1419
1420 #if CL_DS_BIG_ENDIAN_P
1421
1422 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1423         DECLARE_FUNCTION(add_loop_down)
1424 C(add_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1425 #if STANDARD_LOOPS
1426         andcc %o3,%o3,%g0
1427         be 2f
1428        _ mov %g0,%g1            // Carry := 0
1429         sub %o0,4,%o0
1430 1:        ld [%o0],%o4          // source1-digit
1431           sub %o1,4,%o1
1432           ld [%o1],%o5          // source2-digit
1433           subcc %g0,%g1,%g0     // carry
1434           addxcc %o4,%o5,%o4    // addieren
1435           addx %g0,%g0,%g1      // neuer Carry
1436           sub %o2,4,%o2
1437           st %o4,[%o2]          // Digit ablegen
1438           subcc %o3,1,%o3
1439           bne 1b
1440          _ sub %o0,4,%o0
1441 2:      retl
1442        _ mov %g1,%o0
1443 #endif
1444 #if COUNTER_LOOPS
1445         andcc %o3,%o3,%g0
1446         be 2f
1447        _ mov %g0,%g1            // Carry := 0
1448         sub %o0,4,%o0
1449         sub %o1,4,%o1
1450         sll %o3,2,%o3           // %o3 = 4*count
1451         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
1452         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
1453         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
1454 1:        ld [%o0+%o3],%o4      // source1-digit
1455           ld [%o1+%o3],%o5      // source2-digit
1456           subcc %g0,%g1,%g0     // carry
1457           addxcc %o4,%o5,%o4    // addieren
1458           addx %g0,%g0,%g1      // neuer Carry
1459           subcc %o3,4,%o3
1460           bne 1b
1461          _ st %o4,[%o2+%o3]     // Digit ablegen
1462 2:      retl
1463        _ mov %g1,%o0
1464 #endif
1465 #if UNROLLED_LOOPS
1466         and %o3,7,%o4           // count mod 8
1467         sll %o4,2,%o5
1468         sub %o0,%o5,%o0         // %o0 = &sourceptr1[-(count mod 8)]
1469         sub %o1,%o5,%o1         // %o1 = &sourceptr2[-(count mod 8)]
1470         sub %o2,%o5,%o2         // %o2 = &destptr[-(count mod 8)]
1471         sll %o4,4,%o4
1472 #ifdef PIC
1473         mov %o7,%g2             // save return address
1474         call 0f                 // put address of label 0 into %o7
1475        _ add %o7,144,%o5
1476 0:
1477 #else
1478         set _add_loop_down+176,%o5
1479 #endif
1480         sub %o5,%o4,%o5
1481         jmp %o5                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
1482        _ subcc %g0,%g0,%g0      // carry löschen
1483 1:        subcc %g0,%g1,%g0     // carry
1484           ld [%o0+28],%o4       // source1-digit
1485           ld [%o1+28],%o5       // source2-digit
1486           addxcc %o5,%o4,%o5    // addieren
1487           st %o5,[%o2+28]       // Digit ablegen
1488           ld [%o0+24],%o4       // source1-digit
1489           ld [%o1+24],%o5       // source2-digit
1490           addxcc %o5,%o4,%o5    // addieren
1491           st %o5,[%o2+24]       // Digit ablegen
1492           ld [%o0+20],%o4       // source1-digit
1493           ld [%o1+20],%o5       // source2-digit
1494           addxcc %o5,%o4,%o5    // addieren
1495           st %o5,[%o2+20]       // Digit ablegen
1496           ld [%o0+16],%o4       // source1-digit
1497           ld [%o1+16],%o5       // source2-digit
1498           addxcc %o5,%o4,%o5    // addieren
1499           st %o5,[%o2+16]       // Digit ablegen
1500           ld [%o0+12],%o4       // source1-digit
1501           ld [%o1+12],%o5       // source2-digit
1502           addxcc %o5,%o4,%o5    // addieren
1503           st %o5,[%o2+12]       // Digit ablegen
1504           ld [%o0+8],%o4        // source1-digit
1505           ld [%o1+8],%o5        // source2-digit
1506           addxcc %o5,%o4,%o5    // addieren
1507           st %o5,[%o2+8]        // Digit ablegen
1508           ld [%o0+4],%o4        // source1-digit
1509           ld [%o1+4],%o5        // source2-digit
1510           addxcc %o5,%o4,%o5    // addieren
1511           st %o5,[%o2+4]        // Digit ablegen
1512           ld [%o0],%o4          // source1-digit
1513           ld [%o1],%o5          // source2-digit
1514           addxcc %o5,%o4,%o5    // addieren
1515           st %o5,[%o2]          // Digit ablegen
1516           addx %g0,%g0,%g1      // neuer Carry
1517           sub %o0,32,%o0
1518           sub %o1,32,%o1
1519           subcc %o3,8,%o3       // noch mindestens 8 Digits abzuarbeiten?
1520           bcc 1b
1521          _ sub %o2,32,%o2
1522 #ifdef PIC
1523         jmp %g2+8
1524 #else
1525         retl
1526 #endif
1527        _ mov %g1,%o0
1528 #endif
1529
1530 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
1531         DECLARE_FUNCTION(addto_loop_down)
1532 C(addto_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1533 #if STANDARD_LOOPS
1534         andcc %o2,%o2,%g0
1535         be 2f
1536        _ mov %g0,%o5            // Carry := 0
1537         sub %o0,4,%o0
1538 1:        ld [%o0],%o3          // source-digit
1539           sub %o1,4,%o1
1540           ld [%o1],%o4          // dest-digit
1541           subcc %g0,%o5,%g0     // carry
1542           addxcc %o4,%o3,%o4    // addieren
1543           addx %g0,%g0,%o5      // neuer Carry
1544           st %o4,[%o1]          // Digit ablegen
1545           subcc %o2,1,%o2
1546           bne 1b
1547          _ sub %o0,4,%o0
1548 2:      retl
1549        _ mov %o5,%o0
1550 #endif
1551 #if COUNTER_LOOPS
1552         andcc %o2,%o2,%g0
1553         be 2f
1554        _ mov %g0,%o5            // Carry := 0
1555         sub %o0,4,%o0
1556         sub %o1,4,%o1
1557         sll %o2,2,%o2           // %o2 = 4*count
1558         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
1559         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
1560           ld [%o0+%o2],%o3      // source-digit
1561 1:        ld [%o1+%o2],%o4      // dest-digit
1562           subcc %g0,%o5,%g0     // carry
1563           addxcc %o4,%o3,%o4    // addieren
1564           addx %g0,%g0,%o5      // neuer Carry
1565           st %o4,[%o1+%o2]      // Digit ablegen
1566           subcc %o2,4,%o2
1567           bne,a 1b
1568          __ ld [%o0+%o2],%o3    // source-digit
1569 2:      retl
1570        _ mov %o5,%o0
1571 #endif
1572 #if UNROLLED_LOOPS
1573         and %o2,7,%o3           // count mod 8
1574         sll %o3,2,%o4
1575         sub %o0,%o4,%o0         // %o0 = &sourceptr[-(count mod 8)]
1576         sub %o1,%o4,%o1         // %o1 = &destptr[-(count mod 8)]
1577         sll %o3,4,%o3
1578 #ifdef PIC
1579         mov %o7,%g2             // save return address
1580         call 0f                 // put address of label 0 into %o7
1581        _ add %o7,144,%o4
1582 0:
1583 #else
1584         set _addto_loop_down+172,%o4
1585 #endif
1586         sub %o4,%o3,%o4
1587         jmp %o4                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
1588        _ subcc %g0,%g0,%g0      // carry löschen
1589 1:        subcc %g0,%o5,%g0     // carry
1590           ld [%o0+28],%o3       // source-digit
1591           ld [%o1+28],%o4       // dest-digit
1592           addxcc %o4,%o3,%o4    // addieren
1593           st %o4,[%o1+28]       // Digit ablegen
1594           ld [%o0+24],%o3       // source-digit
1595           ld [%o1+24],%o4       // dest-digit
1596           addxcc %o4,%o3,%o4    // addieren
1597           st %o4,[%o1+24]       // Digit ablegen
1598           ld [%o0+20],%o3       // source-digit
1599           ld [%o1+20],%o4       // dest-digit
1600           addxcc %o4,%o3,%o4    // addieren
1601           st %o4,[%o1+20]       // Digit ablegen
1602           ld [%o0+16],%o3       // source-digit
1603           ld [%o1+16],%o4       // dest-digit
1604           addxcc %o4,%o3,%o4    // addieren
1605           st %o4,[%o1+16]       // Digit ablegen
1606           ld [%o0+12],%o3       // source-digit
1607           ld [%o1+12],%o4       // dest-digit
1608           addxcc %o4,%o3,%o4    // addieren
1609           st %o4,[%o1+12]       // Digit ablegen
1610           ld [%o0+8],%o3        // source-digit
1611           ld [%o1+8],%o4        // dest-digit
1612           addxcc %o4,%o3,%o4    // addieren
1613           st %o4,[%o1+8]        // Digit ablegen
1614           ld [%o0+4],%o3        // source-digit
1615           ld [%o1+4],%o4        // dest-digit
1616           addxcc %o4,%o3,%o4    // addieren
1617           st %o4,[%o1+4]        // Digit ablegen
1618           ld [%o0],%o3          // source-digit
1619           ld [%o1],%o4          // dest-digit
1620           addxcc %o4,%o3,%o4    // addieren
1621           st %o4,[%o1]          // Digit ablegen
1622           addx %g0,%g0,%o5      // neuer Carry
1623           sub %o0,32,%o0
1624           subcc %o2,8,%o2       // noch mindestens 8 Digits abzuarbeiten?
1625           bcc 1b
1626          _ sub %o1,32,%o1
1627 #ifdef PIC
1628         jmp %g2+8
1629 #else
1630         retl
1631 #endif
1632        _ mov %o5,%o0
1633 #endif
1634
1635 // extern uintD inc_loop_down (uintD* ptr, uintC count);
1636         DECLARE_FUNCTION(inc_loop_down)
1637 C(inc_loop_down:) // Input in %o0,%o1, Output in %o0
1638 #if STANDARD_LOOPS
1639         andcc %o1,%o1,%g0
1640         be 2f
1641        _ sub %o0,4,%o0
1642 1:        ld [%o0],%o2
1643           addcc %o2,1,%o2
1644           bne 3f
1645          _ st %o2,[%o0]
1646           subcc %o1,1,%o1
1647           bne 1b
1648          _ sub %o0,4,%o0
1649 2:      retl
1650        _ mov 1,%o0
1651 3:      retl
1652        _ mov 0,%o0
1653 #endif
1654 #if COUNTER_LOOPS
1655         andcc %o1,%o1,%g0
1656         be 2f
1657        _ sub %o0,4,%o0
1658         sll %o1,2,%o1           // %o1 = 4*count
1659         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1660           ld [%o0+%o1],%o2      // digit holen
1661 1:        addcc %o2,1,%o2       // incrementieren
1662           bne 3f
1663          _ st %o2,[%o0+%o1]     // ablegen
1664           subcc %o1,4,%o1       // Zähler erniedrigen, Pointer erniedrigen
1665           bne,a 1b
1666          __ ld [%o0+%o1],%o2
1667 2:      retl
1668        _ mov 1,%o0
1669 3:      retl
1670        _ mov 0,%o0
1671 #endif
1672
1673 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1674         DECLARE_FUNCTION(sub_loop_down)
1675 C(sub_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1676 #if STANDARD_LOOPS
1677         andcc %o3,%o3,%g0
1678         be 2f
1679        _ mov %g0,%g1            // Carry := 0
1680         sub %o0,4,%o0
1681 1:        ld [%o0],%o4          // source1-digit
1682           sub %o1,4,%o1
1683           ld [%o1],%o5          // source2-digit
1684           subcc %g0,%g1,%g0     // carry
1685           subxcc %o4,%o5,%o4    // subtrahieren
1686           addx %g0,%g0,%g1      // neuer Carry
1687           sub %o2,4,%o2
1688           st %o4,[%o2]          // Digit ablegen
1689           subcc %o3,1,%o3
1690           bne 1b
1691          _ sub %o0,4,%o0
1692 2:      retl
1693        _ mov %g1,%o0
1694 #endif
1695 #if COUNTER_LOOPS
1696         andcc %o3,%o3,%g0
1697         be 2f
1698        _ mov %g0,%g1            // Carry := 0
1699         sub %o0,4,%o0
1700         sub %o1,4,%o1
1701         sll %o3,2,%o3           // %o3 = 4*count
1702         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
1703         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
1704         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
1705 1:        ld [%o0+%o3],%o4      // source1-digit
1706           ld [%o1+%o3],%o5      // source2-digit
1707           subcc %g0,%g1,%g0     // carry
1708           subxcc %o4,%o5,%o4    // subtrahieren
1709           addx %g0,%g0,%g1      // neuer Carry
1710           subcc %o3,4,%o3
1711           bne 1b
1712          _ st %o4,[%o2+%o3]     // Digit ablegen
1713 2:      retl
1714        _ mov %g1,%o0
1715 #endif
1716 #if UNROLLED_LOOPS
1717         and %o3,7,%o4           // count mod 8
1718         sll %o4,2,%o5
1719         sub %o0,%o5,%o0         // %o0 = &sourceptr1[-(count mod 8)]
1720         sub %o1,%o5,%o1         // %o1 = &sourceptr2[-(count mod 8)]
1721         sub %o2,%o5,%o2         // %o2 = &destptr[-(count mod 8)]
1722         sll %o4,4,%o4
1723 #ifdef PIC
1724         mov %o7,%g2             // save return address
1725         call 0f                 // put address of label 0 into %o7
1726        _ add %o7,144,%o5
1727 0:
1728 #else
1729         set _sub_loop_down+176,%o5
1730 #endif
1731         sub %o5,%o4,%o5
1732         jmp %o5                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
1733        _ subcc %g0,%g0,%g0      // carry löschen
1734 1:        subcc %g0,%g1,%g0     // carry
1735           ld [%o0+28],%o4       // source1-digit
1736           ld [%o1+28],%o5       // source2-digit
1737           subxcc %o4,%o5,%o4    // subtrahieren
1738           st %o4,[%o2+28]       // Digit ablegen
1739           ld [%o0+24],%o4       // source1-digit
1740           ld [%o1+24],%o5       // source2-digit
1741           subxcc %o4,%o5,%o4    // subtrahieren
1742           st %o4,[%o2+24]       // Digit ablegen
1743           ld [%o0+20],%o4       // source1-digit
1744           ld [%o1+20],%o5       // source2-digit
1745           subxcc %o4,%o5,%o4    // subtrahieren
1746           st %o4,[%o2+20]       // Digit ablegen
1747           ld [%o0+16],%o4       // source1-digit
1748           ld [%o1+16],%o5       // source2-digit
1749           subxcc %o4,%o5,%o4    // subtrahieren
1750           st %o4,[%o2+16]       // Digit ablegen
1751           ld [%o0+12],%o4       // source1-digit
1752           ld [%o1+12],%o5       // source2-digit
1753           subxcc %o4,%o5,%o4    // subtrahieren
1754           st %o4,[%o2+12]       // Digit ablegen
1755           ld [%o0+8],%o4        // source1-digit
1756           ld [%o1+8],%o5        // source2-digit
1757           subxcc %o4,%o5,%o4    // subtrahieren
1758           st %o4,[%o2+8]        // Digit ablegen
1759           ld [%o0+4],%o4        // source1-digit
1760           ld [%o1+4],%o5        // source2-digit
1761           subxcc %o4,%o5,%o4    // subtrahieren
1762           st %o4,[%o2+4]        // Digit ablegen
1763           ld [%o0],%o4          // source1-digit
1764           ld [%o1],%o5          // source2-digit
1765           subxcc %o4,%o5,%o4    // subtrahieren
1766           st %o4,[%o2]          // Digit ablegen
1767           addx %g0,%g0,%g1      // neuer Carry
1768           sub %o0,32,%o0
1769           sub %o1,32,%o1
1770           subcc %o3,8,%o3       // noch mindestens 8 Digits abzuarbeiten?
1771           bcc 1b
1772          _ sub %o2,32,%o2
1773 #ifdef PIC
1774         jmp %g2+8
1775 #else
1776         retl
1777 #endif
1778        _ mov %g1,%o0
1779 #endif
1780
1781 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1782         DECLARE_FUNCTION(subx_loop_down)
1783 C(subx_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
1784 #if STANDARD_LOOPS
1785         andcc %o3,%o3,%g0
1786         be 2f
1787        _ mov %o4,%g1            // Carry
1788         sub %o0,4,%o0
1789 1:        ld [%o0],%o4          // source1-digit
1790           sub %o1,4,%o1
1791           ld [%o1],%o5          // source2-digit
1792           subcc %g0,%g1,%g0     // carry
1793           subxcc %o4,%o5,%o4    // subtrahieren
1794           addx %g0,%g0,%g1      // neuer Carry
1795           sub %o2,4,%o2
1796           st %o4,[%o2]          // Digit ablegen
1797           subcc %o3,1,%o3
1798           bne 1b
1799          _ sub %o0,4,%o0
1800 2:      retl
1801        _ mov %g1,%o0
1802 #endif
1803 #if COUNTER_LOOPS
1804         andcc %o3,%o3,%g0
1805         be 2f
1806        _ mov %o4,%g1            // Carry
1807         sub %o0,4,%o0
1808         sub %o1,4,%o1
1809         sll %o3,2,%o3           // %o3 = 4*count
1810         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
1811         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
1812         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
1813 1:        ld [%o0+%o3],%o4      // source1-digit
1814           ld [%o1+%o3],%o5      // source2-digit
1815           subcc %g0,%g1,%g0     // carry
1816           subxcc %o4,%o5,%o4    // subtrahieren
1817           addx %g0,%g0,%g1      // neuer Carry
1818           subcc %o3,4,%o3
1819           bne 1b
1820          _ st %o4,[%o2+%o3]     // Digit ablegen
1821 2:      retl
1822        _ mov %g1,%o0
1823 #endif
1824 #if UNROLLED_LOOPS
1825         and %o3,7,%o5           // count mod 8
1826         sll %o5,2,%g1
1827         sub %o0,%g1,%o0         // %o0 = &sourceptr1[-(count mod 8)]
1828         sub %o1,%g1,%o1         // %o1 = &sourceptr2[-(count mod 8)]
1829         sub %o2,%g1,%o2         // %o2 = &destptr[-(count mod 8)]
1830         sll %o5,4,%o5
1831 #ifdef PIC
1832         mov %o7,%g2             // save return address
1833         call 0f                 // put address of label 0 into %o7
1834        _ add %o7,144,%g1
1835 0:
1836 #else
1837         set _subx_loop_down+176,%g1
1838 #endif
1839         sub %g1,%o5,%g1
1840         jmp %g1                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
1841        _ subcc %g0,%o4,%g0      // carry initialisieren
1842 1:        subcc %g0,%g1,%g0     // carry
1843           ld [%o0+28],%o4       // source1-digit
1844           ld [%o1+28],%o5       // source2-digit
1845           subxcc %o4,%o5,%o4    // subtrahieren
1846           st %o4,[%o2+28]       // Digit ablegen
1847           ld [%o0+24],%o4       // source1-digit
1848           ld [%o1+24],%o5       // source2-digit
1849           subxcc %o4,%o5,%o4    // subtrahieren
1850           st %o4,[%o2+24]       // Digit ablegen
1851           ld [%o0+20],%o4       // source1-digit
1852           ld [%o1+20],%o5       // source2-digit
1853           subxcc %o4,%o5,%o4    // subtrahieren
1854           st %o4,[%o2+20]       // Digit ablegen
1855           ld [%o0+16],%o4       // source1-digit
1856           ld [%o1+16],%o5       // source2-digit
1857           subxcc %o4,%o5,%o4    // subtrahieren
1858           st %o4,[%o2+16]       // Digit ablegen
1859           ld [%o0+12],%o4       // source1-digit
1860           ld [%o1+12],%o5       // source2-digit
1861           subxcc %o4,%o5,%o4    // subtrahieren
1862           st %o4,[%o2+12]       // Digit ablegen
1863           ld [%o0+8],%o4        // source1-digit
1864           ld [%o1+8],%o5        // source2-digit
1865           subxcc %o4,%o5,%o4    // subtrahieren
1866           st %o4,[%o2+8]        // Digit ablegen
1867           ld [%o0+4],%o4        // source1-digit
1868           ld [%o1+4],%o5        // source2-digit
1869           subxcc %o4,%o5,%o4    // subtrahieren
1870           st %o4,[%o2+4]        // Digit ablegen
1871           ld [%o0],%o4          // source1-digit
1872           ld [%o1],%o5          // source2-digit
1873           subxcc %o4,%o5,%o4    // subtrahieren
1874           st %o4,[%o2]          // Digit ablegen
1875           addx %g0,%g0,%g1      // neuer Carry
1876           sub %o0,32,%o0
1877           sub %o1,32,%o1
1878           subcc %o3,8,%o3       // noch mindestens 8 Digits abzuarbeiten?
1879           bcc 1b
1880          _ sub %o2,32,%o2
1881 #ifdef PIC
1882         jmp %g2+8
1883 #else
1884         retl
1885 #endif
1886        _ mov %g1,%o0
1887 #endif
1888
1889 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
1890         DECLARE_FUNCTION(subfrom_loop_down)
1891 C(subfrom_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1892 #if STANDARD_LOOPS
1893         andcc %o2,%o2,%g0
1894         be 2f
1895        _ mov %g0,%o5            // Carry := 0
1896         sub %o0,4,%o0
1897 1:        ld [%o0],%o3          // source-digit
1898           sub %o1,4,%o1
1899           ld [%o1],%o4          // dest-digit
1900           subcc %g0,%o5,%g0     // carry
1901           subxcc %o4,%o3,%o4    // subtrahieren
1902           addx %g0,%g0,%o5      // neuer Carry
1903           st %o4,[%o1]          // Digit ablegen
1904           subcc %o2,1,%o2
1905           bne 1b
1906          _ sub %o0,4,%o0
1907 2:      retl
1908        _ mov %o5,%o0
1909 #endif
1910 #if COUNTER_LOOPS
1911         andcc %o2,%o2,%g0
1912         be 2f
1913        _ mov %g0,%o5            // Carry := 0
1914         sub %o0,4,%o0
1915         sub %o1,4,%o1
1916         sll %o2,2,%o2           // %o2 = 4*count
1917         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
1918         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
1919           ld [%o0+%o2],%o3      // source-digit
1920 1:        ld [%o1+%o2],%o4      // dest-digit
1921           subcc %g0,%o5,%g0     // carry
1922           subxcc %o4,%o3,%o4    // subtrahieren
1923           addx %g0,%g0,%o5      // neuer Carry
1924           st %o4,[%o1+%o2]      // Digit ablegen
1925           subcc %o2,4,%o2
1926           bne,a 1b
1927          __ ld [%o0+%o2],%o3    // source-digit
1928 2:      retl
1929        _ mov %o5,%o0
1930 #endif
1931 #if UNROLLED_LOOPS
1932         and %o2,7,%o3           // count mod 8
1933         sll %o3,2,%o4
1934         sub %o0,%o4,%o0         // %o0 = &sourceptr[-(count mod 8)]
1935         sub %o1,%o4,%o1         // %o1 = &destptr[-(count mod 8)]
1936         sll %o3,4,%o3
1937 #ifdef PIC
1938         mov %o7,%g2             // save return address
1939         call 0f                 // put address of label 0 into %o7
1940        _ add %o7,144,%o4
1941 0:
1942 #else
1943         set _subfrom_loop_down+172,%o4
1944 #endif
1945         sub %o4,%o3,%o4
1946         jmp %o4                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
1947        _ subcc %g0,%g0,%g0      // carry löschen
1948 1:        subcc %g0,%o5,%g0     // carry
1949           ld [%o0+28],%o3       // source-digit
1950           ld [%o1+28],%o4       // dest-digit
1951           subxcc %o4,%o3,%o4    // subtrahieren
1952           st %o4,[%o1+28]       // Digit ablegen
1953           ld [%o0+24],%o3       // source-digit
1954           ld [%o1+24],%o4       // dest-digit
1955           subxcc %o4,%o3,%o4    // subtrahieren
1956           st %o4,[%o1+24]       // Digit ablegen
1957           ld [%o0+20],%o3       // source-digit
1958           ld [%o1+20],%o4       // dest-digit
1959           subxcc %o4,%o3,%o4    // subtrahieren
1960           st %o4,[%o1+20]       // Digit ablegen
1961           ld [%o0+16],%o3       // source-digit
1962           ld [%o1+16],%o4       // dest-digit
1963           subxcc %o4,%o3,%o4    // subtrahieren
1964           st %o4,[%o1+16]       // Digit ablegen
1965           ld [%o0+12],%o3       // source-digit
1966           ld [%o1+12],%o4       // dest-digit
1967           subxcc %o4,%o3,%o4    // subtrahieren
1968           st %o4,[%o1+12]       // Digit ablegen
1969           ld [%o0+8],%o3        // source-digit
1970           ld [%o1+8],%o4        // dest-digit
1971           subxcc %o4,%o3,%o4    // subtrahieren
1972           st %o4,[%o1+8]        // Digit ablegen
1973           ld [%o0+4],%o3        // source-digit
1974           ld [%o1+4],%o4        // dest-digit
1975           subxcc %o4,%o3,%o4    // subtrahieren
1976           st %o4,[%o1+4]        // Digit ablegen
1977           ld [%o0],%o3          // source-digit
1978           ld [%o1],%o4          // dest-digit
1979           subxcc %o4,%o3,%o4    // subtrahieren
1980           st %o4,[%o1]          // Digit ablegen
1981           addx %g0,%g0,%o5      // neuer Carry
1982           sub %o0,32,%o0
1983           subcc %o2,8,%o2       // noch mindestens 8 Digits abzuarbeiten?
1984           bcc 1b
1985          _ sub %o1,32,%o1
1986 #ifdef PIC
1987         jmp %g2+8
1988 #else
1989         retl
1990 #endif
1991        _ mov %o5,%o0
1992 #endif
1993
1994 // extern uintD dec_loop_down (uintD* ptr, uintC count);
1995         DECLARE_FUNCTION(dec_loop_down)
1996 C(dec_loop_down:) // Input in %o0,%o1, Output in %o0
1997 #if STANDARD_LOOPS
1998         andcc %o1,%o1,%g0
1999         be 2f
2000        _ sub %o0,4,%o0
2001 1:        ld [%o0],%o2
2002           subcc %o2,1,%o2
2003           bcc 3f
2004          _ st %o2,[%o0]
2005           subcc %o1,1,%o1
2006           bne 1b
2007          _ sub %o0,4,%o0
2008 2:      retl
2009        _ mov -1,%o0
2010 3:      retl
2011        _ mov 0,%o0
2012 #endif
2013 #if COUNTER_LOOPS
2014         andcc %o1,%o1,%g0
2015         be 2f
2016        _ sub %o0,4,%o0
2017         sll %o1,2,%o1           // %o1 = 4*count
2018         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
2019           ld [%o0+%o1],%o2      // digit holen
2020 1:        subcc %o2,1,%o2       // decrementieren
2021           bcc 3f
2022          _ st %o2,[%o0+%o1]     // ablegen
2023           subcc %o1,4,%o1       // Zähler erniedrigen, Pointer erniedrigen
2024           bne,a 1b
2025          __ ld [%o0+%o1],%o2
2026 2:      retl
2027        _ mov -1,%o0
2028 3:      retl
2029        _ mov 0,%o0
2030 #endif
2031
2032 // extern uintD neg_loop_down (uintD* ptr, uintC count);
2033         DECLARE_FUNCTION(neg_loop_down)
2034 C(neg_loop_down:) // Input in %o0,%o1, Output in %o0
2035 #if STANDARD_LOOPS
2036         // erstes Digit /=0 suchen:
2037         andcc %o1,%o1,%g0
2038         be 2f
2039        _ sub %o0,4,%o0
2040 1:        ld [%o0],%o2
2041           subcc %g0,%o2,%o2
2042           bne 3f
2043          _ subcc %o1,1,%o1
2044           bne 1b
2045          _ sub %o0,4,%o0
2046 2:      retl
2047        _ mov 0,%o0
2048 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2049         st %o2,[%o0]            // 1 Digit negieren
2050         // alle anderen Digits invertieren:
2051         be 5f
2052        _ sub %o0,4,%o0
2053 4:        ld [%o0],%o2
2054           subcc %o1,1,%o1
2055           xor %o2,-1,%o2
2056           st %o2,[%o0]
2057           bne 4b
2058          _ sub %o0,4,%o0
2059 5:      retl
2060        _ mov -1,%o0
2061 #endif
2062 #if COUNTER_LOOPS
2063         // erstes Digit /=0 suchen:
2064         andcc %o1,%o1,%g0
2065         be 2f
2066        _ sub %o0,4,%o0
2067         sll %o1,2,%o1           // %o1 = 4*count
2068         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
2069           ld [%o0+%o1],%o2      // digit holen
2070 1:        subcc %g0,%o2,%o2     // negieren, testen
2071           bne 3f
2072          _ subcc %o1,4,%o1      // Zähler erniedrigen, Pointer erniedrigen
2073           bne,a 1b
2074          __ ld [%o0+%o1],%o2
2075 2:      retl
2076        _ mov 0,%o0
2077 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2078         // alle anderen Digits invertieren:
2079         add %o1,4,%o1
2080         st %o2,[%o0+%o1]        // ablegen
2081         subcc %o1,4,%o1
2082         be 5f
2083        _ nop
2084           ld [%o0+%o1],%o2
2085 4:        xor %o2,-1,%o2
2086           st %o2,[%o0+%o1]
2087           subcc %o1,4,%o1
2088           bne,a 4b
2089          __ ld [%o0+%o1],%o2
2090 5:      retl
2091        _ mov -1,%o0
2092 #endif
2093
2094 // extern uintD shift1left_loop_down (uintD* ptr, uintC count);
2095         DECLARE_FUNCTION(shift1left_loop_down)
2096 C(shift1left_loop_down:) // Input in %o0,%o1, Output in %o0
2097         andcc %o1,%o1,%g0
2098         be 2f
2099        _ mov 0,%o3              // Carry := 0
2100         sub %o0,4,%o0
2101 1:        ld [%o0],%o2          // Digit
2102           subcc %g0,%o3,%g0     // carry
2103           addxcc %o2,%o2,%o2    // shiften
2104           addx %g0,%g0,%o3      // neues Carry
2105           st %o2,[%o0]          // Digit ablegen
2106           subcc %o1,1,%o1
2107           bne 1b
2108          _ sub %o0,4,%o0
2109 2:      retl
2110        _ mov %o3,%o0
2111
2112 // extern uintD shiftleft_loop_down (uintD* ptr, uintC count, uintC i, uintD carry);
2113         DECLARE_FUNCTION(shiftleft_loop_down)
2114 C(shiftleft_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2115         andcc %o1,%o1,%g0
2116         be 2f
2117        _ sub %g0,%o2,%g1        // 32-i (mod 32)
2118         sub %o0,4,%o0
2119 1:        ld [%o0],%o4          // Digit
2120           subcc %o1,1,%o1
2121           sll %o4,%o2,%o5       // dessen niedere (32-i) Bits
2122           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
2123           st %o5,[%o0]          // Digit ablegen
2124           srl %o4,%g1,%o3       // dessen höchste i Bits liefern den neuen Carry
2125           bne 1b
2126          _ sub %o0,4,%o0
2127 2:      retl
2128        _ mov %o3,%o0
2129
2130 // extern uintD shiftleftcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
2131         DECLARE_FUNCTION(shiftleftcopy_loop_down)
2132 C(shiftleftcopy_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
2133         andcc %o2,%o2,%g0
2134         be 2f
2135        _ mov 0,%o4              // Carry := 0
2136         sub %g0,%o3,%g1         // 32-i (mod 32)
2137         sub %o0,4,%o0
2138 1:        ld [%o0],%o5          // Digit
2139           subcc %o2,1,%o2
2140           sll %o5,%o3,%g2       // dessen niedere (32-i) Bits
2141           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
2142           sub %o1,4,%o1
2143           st %g2,[%o1]          // Digit ablegen
2144           srl %o5,%g1,%o4       // dessen höchste i Bits liefern den neuen Carry
2145           bne 1b
2146          _ sub %o0,4,%o0
2147 2:      retl
2148        _ mov %o4,%o0
2149
2150 // extern uintD shift1right_loop_up (uintD* ptr, uintC count, uintD carry);
2151         DECLARE_FUNCTION(shift1right_loop_up)
2152 C(shift1right_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
2153         andcc %o1,%o1,%g0
2154         be 2f
2155        _ sll %o2,31,%o2         // Carry
2156 1:        ld [%o0],%o3          // Digit
2157           subcc %o1,1,%o1
2158           srl %o3,1,%o4         // shiften
2159           or %o2,%o4,%o4        // und mit altem Carry kombinieren
2160           st %o4,[%o0]          // und ablegen
2161           sll %o3,31,%o2        // neuer Carry
2162           bne 1b
2163          _ add %o0,4,%o0
2164 2:      retl
2165        _ mov %o2,%o0
2166
2167 // extern uintD shiftright_loop_up (uintD* ptr, uintC count, uintC i);
2168         DECLARE_FUNCTION(shiftright_loop_up)
2169 C(shiftright_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2170         sub %g0,%o2,%g1         // 32-i (mod 32)
2171         andcc %o1,%o1,%g0
2172         be 2f
2173        _ or %g0,%g0,%o3         // Carry := 0
2174 1:        ld [%o0],%o4          // Digit
2175           subcc %o1,1,%o1
2176           srl %o4,%o2,%o5       // shiften
2177           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2178           st %o5,[%o0]          // und ablegen
2179           sll %o4,%g1,%o3       // neuer Carry
2180           bne 1b
2181          _ add %o0,4,%o0
2182 2:      retl
2183        _ mov %o3,%o0
2184
2185 // extern uintD shiftrightsigned_loop_up (uintD* ptr, uintC count, uintC i);
2186         DECLARE_FUNCTION(shiftrightsigned_loop_up)
2187 C(shiftrightsigned_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2188         ld [%o0],%o4            // erstes Digit
2189         sub %g0,%o2,%g1         // 32-i (mod 32)
2190         sra %o4,%o2,%o5         // shiften
2191         st %o5,[%o0]            // und ablegen
2192         sll %o4,%g1,%o3         // neuer Carry
2193         subcc %o1,1,%o1
2194         be 2f
2195        _ add %o0,4,%o0
2196 1:        ld [%o0],%o4          // Digit
2197           subcc %o1,1,%o1
2198           srl %o4,%o2,%o5       // shiften
2199           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2200           st %o5,[%o0]          // und ablegen
2201           sll %o4,%g1,%o3       // neuer Carry
2202           bne 1b
2203          _ add %o0,4,%o0
2204 2:      retl
2205        _ mov %o3,%o0
2206
2207 // extern uintD shiftrightcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
2208         DECLARE_FUNCTION(shiftrightcopy_loop_up)
2209 C(shiftrightcopy_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
2210         sub %g0,%o3,%g1         // 32-i (mod 32)
2211         andcc %o2,%o2,%g0
2212         be 2f
2213        _ sll %o4,%g1,%g2        // erster Carry
2214 1:        ld [%o0],%o4          // Digit
2215           add %o0,4,%o0
2216           srl %o4,%o3,%o5       // shiften
2217           or %g2,%o5,%o5        // und mit altem Carry kombinieren
2218           st %o5,[%o1]          // und ablegen
2219           sll %o4,%g1,%g2       // neuer Carry
2220           subcc %o2,1,%o2
2221           bne 1b
2222          _ add %o1,4,%o1
2223 2:      retl
2224        _ mov %g2,%o0
2225
2226 // extern uintD mulusmall_loop_down (uintD digit, uintD* ptr, uintC len, uintD newdigit);
2227         DECLARE_FUNCTION(mulusmall_loop_down)
2228 C(mulusmall_loop_down:) // Input in %o0,%o1,%o2,%o3, Output in %o0
2229         andcc %o2,%o2,%g0
2230         be 3f
2231        _ sub %o1,4,%o1
2232 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
2233           // und kleinen Carry %o3 dazu:
2234           mov %o0,%y
2235           ld [%o1],%o4          // Wartetakt!
2236           addcc %o3,%o3,%o5
2237           mulscc %o5,%o4,%o5
2238           mulscc %o5,%o4,%o5
2239           mulscc %o5,%o4,%o5
2240           mulscc %o5,%o4,%o5
2241           mulscc %o5,%o4,%o5
2242           mulscc %o5,%o4,%o5
2243           mulscc %o5,%g0,%o5
2244           // Die 26 unteren Bits von %o5 und die 6 oberen Bits von %y
2245           // ergeben das Resultat. (Die anderen Bits sind Null.)
2246           tst %o4               // Korrektur, falls %o4 negativ war
2247           bge 2f
2248          _ sra %o5,26,%o3       // 6 obere Bits von %o5 -> neuer Carry
2249           add %o3,%o0,%o3       // (falls %o4 negativ war, noch + %o0)
2250 2:        rd %y,%o4
2251           srl %o4,26,%o4        // 6 obere Bits von %y
2252           sll %o5,6,%o5         // 26 untere Bits von %o5
2253           or %o5,%o4,%o4        // neues Digit
2254           st %o4,[%o1]          // ablegen
2255           subcc %o2,1,%o2
2256           bne 1b
2257          _ sub %o1,4,%o1
2258 3:      retl
2259        _ mov %o3,%o0
2260
2261 // extern void mulu_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2262 #if !MULU32_INLINE
2263         DECLARE_FUNCTION(mulu_loop_down)
2264 C(mulu_loop_down:) // Input in %i0,%i1,%i2,%i3
2265         save %sp,-96,%sp
2266         mov 0,%l0               // Carry
2267 1:        sub %i1,4,%i1
2268           ld [%i1],%o1          // nächstes Digit
2269           call _mulu32_         // mit digit multiplizieren
2270          _ mov %i0,%o0
2271           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
2272           addx %g0,%g1,%l0      // High-Digit gibt neuen Carry
2273           sub %i2,4,%i2
2274           subcc %i3,1,%i3
2275           bne 1b
2276          _ st %o0,[%i2]         // Low-Digit ablegen
2277         st %l0,[%i2-4]          // letzten Carry ablegen
2278         ret
2279        _ restore
2280 #else
2281         DECLARE_FUNCTION(mulu_loop_down)
2282 C(mulu_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1
2283         mov 0,%o4               // Carry
2284 1:        ld [%o1-4],%g1        // nächstes Digit
2285           // mit digit multiplizieren: %o0 * %g1 -> %o5|%g1
2286 #ifdef sparcv8
2287           sub     %o1,4,%o1
2288           umul    %g1,%o0,%g1
2289           rd      %y,%o5
2290 #else
2291           mov     %g1,%y
2292           sub     %o1,4,%o1     // Wartetakt!
2293           andcc   %g0,%g0,%o5
2294           mulscc  %o5,%o0,%o5
2295           mulscc  %o5,%o0,%o5
2296           mulscc  %o5,%o0,%o5
2297           mulscc  %o5,%o0,%o5
2298           mulscc  %o5,%o0,%o5
2299           mulscc  %o5,%o0,%o5
2300           mulscc  %o5,%o0,%o5
2301           mulscc  %o5,%o0,%o5
2302           mulscc  %o5,%o0,%o5
2303           mulscc  %o5,%o0,%o5
2304           mulscc  %o5,%o0,%o5
2305           mulscc  %o5,%o0,%o5
2306           mulscc  %o5,%o0,%o5
2307           mulscc  %o5,%o0,%o5
2308           mulscc  %o5,%o0,%o5
2309           mulscc  %o5,%o0,%o5
2310           mulscc  %o5,%o0,%o5
2311           mulscc  %o5,%o0,%o5
2312           mulscc  %o5,%o0,%o5
2313           mulscc  %o5,%o0,%o5
2314           mulscc  %o5,%o0,%o5
2315           mulscc  %o5,%o0,%o5
2316           mulscc  %o5,%o0,%o5
2317           mulscc  %o5,%o0,%o5
2318           mulscc  %o5,%o0,%o5
2319           mulscc  %o5,%o0,%o5
2320           mulscc  %o5,%o0,%o5
2321           mulscc  %o5,%o0,%o5
2322           mulscc  %o5,%o0,%o5
2323           mulscc  %o5,%o0,%o5
2324           mulscc  %o5,%o0,%o5
2325           mulscc  %o5,%o0,%o5
2326           mulscc  %o5,%g0,%o5
2327           tst     %o0
2328           bl,a    2f
2329          __ add     %o5,%g1,%o5
2330 2:        rd      %y,%g1
2331 #endif
2332           addcc %o4,%g1,%g1     // und bisherigen Carry addieren
2333           addx %g0,%o5,%o4      // High-Digit gibt neuen Carry
2334           sub %o2,4,%o2
2335           subcc %o3,1,%o3
2336           bne 1b
2337          _ st %g1,[%o2]         // Low-Digit ablegen
2338         retl
2339        _ st %o4,[%o2-4]         // letzten Carry ablegen
2340 #endif
2341
2342 // extern uintD muluadd_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2343         DECLARE_FUNCTION(muluadd_loop_down)
2344 C(muluadd_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2345 #if !MULU32_INLINE
2346         save %sp,-96,%sp
2347         mov 0,%l0               // Carry
2348 1:        sub %i1,4,%i1
2349           ld [%i1],%o1          // nächstes source-Digit
2350           call _mulu32_         // mit digit multiplizieren
2351          _ mov %i0,%o0
2352           sub %i2,4,%i2
2353           ld [%i2],%o1          // nächstes dest-digit
2354           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
2355           addx %g0,%g1,%l0      // High-Digit gibt neuen Carry
2356           addcc %o1,%o0,%o0     // addieren
2357           addx %g0,%l0,%l0
2358           subcc %i3,1,%i3
2359           bne 1b
2360          _ st %o0,[%i2]         // Low-Digit ablegen
2361         mov %l0,%i0             // letzter Carry
2362         ret
2363        _ restore
2364 #else
2365         save %sp,-96,%sp
2366         mov 0,%l0               // Carry
2367 #ifndef sparcv8
2368         sra %i0,31,%l1          // 0 falls %i0>=0, -1 falls %i0<0
2369 #endif
2370 1:        ld [%i1-4],%o1        // nächstes source-Digit
2371           sub %i1,4,%i1
2372           // mit digit multiplizieren: %i0 * %o1 -> %o2|%o0
2373 #ifdef sparcv8
2374           umul    %i0,%o1,%o0
2375           rd      %y,%o2
2376 #else
2377           mov     %o1,%y
2378           and     %o1,%l1,%o3   // Wartetakt!
2379           andcc   %g0,%g0,%o2
2380           mulscc  %o2,%i0,%o2
2381           mulscc  %o2,%i0,%o2
2382           mulscc  %o2,%i0,%o2
2383           mulscc  %o2,%i0,%o2
2384           mulscc  %o2,%i0,%o2
2385           mulscc  %o2,%i0,%o2
2386           mulscc  %o2,%i0,%o2
2387           mulscc  %o2,%i0,%o2
2388           mulscc  %o2,%i0,%o2
2389           mulscc  %o2,%i0,%o2
2390           mulscc  %o2,%i0,%o2
2391           mulscc  %o2,%i0,%o2
2392           mulscc  %o2,%i0,%o2
2393           mulscc  %o2,%i0,%o2
2394           mulscc  %o2,%i0,%o2
2395           mulscc  %o2,%i0,%o2
2396           mulscc  %o2,%i0,%o2
2397           mulscc  %o2,%i0,%o2
2398           mulscc  %o2,%i0,%o2
2399           mulscc  %o2,%i0,%o2
2400           mulscc  %o2,%i0,%o2
2401           mulscc  %o2,%i0,%o2
2402           mulscc  %o2,%i0,%o2
2403           mulscc  %o2,%i0,%o2
2404           mulscc  %o2,%i0,%o2
2405           mulscc  %o2,%i0,%o2
2406           mulscc  %o2,%i0,%o2
2407           mulscc  %o2,%i0,%o2
2408           mulscc  %o2,%i0,%o2
2409           mulscc  %o2,%i0,%o2
2410           mulscc  %o2,%i0,%o2
2411           mulscc  %o2,%i0,%o2
2412           mulscc  %o2,%g0,%o2
2413           add     %o2,%o3,%o2   // %o3 = (0 falls %i0>=0, %o1 falls %i0<0)
2414           rd      %y,%o0
2415 #endif
2416           sub %i2,4,%i2
2417           ld [%i2],%o1          // nächstes dest-digit
2418           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
2419           addx %g0,%o2,%l0      // High-Digit gibt neuen Carry
2420           addcc %o1,%o0,%o0     // addieren
2421           addx %g0,%l0,%l0
2422           subcc %i3,1,%i3
2423           bne 1b
2424          _ st %o0,[%i2]         // Low-Digit ablegen
2425         mov %l0,%i0             // letzter Carry
2426         ret
2427        _ restore
2428 #endif
2429
2430 // extern uintD mulusub_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2431         DECLARE_FUNCTION(mulusub_loop_down)
2432 C(mulusub_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2433 #if !MULU32_INLINE
2434         save %sp,-96,%sp
2435         mov 0,%l0               // Carry
2436 1:        sub %i1,4,%i1
2437           ld [%i1],%o1          // nächstes source-Digit
2438           call _mulu32_         // mit digit multiplizieren
2439          _ mov %i0,%o0
2440           sub %i2,4,%i2
2441           ld [%i2],%o1          // nächstes dest-digit
2442           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
2443           addx %g0,%g1,%l0      // High-Digit gibt neuen Carry
2444           subcc %o1,%o0,%o1     // davon das Low-Digit subtrahieren
2445           addx %g0,%l0,%l0
2446           subcc %i3,1,%i3
2447           bne 1b
2448          _ st %o1,[%i2]         // dest-Digit ablegen
2449         mov %l0,%i0             // letzter Carry
2450         ret
2451        _ restore
2452 #else
2453         save %sp,-96,%sp
2454         mov 0,%l0               // Carry
2455 #ifndef sparcv8
2456         sra %i0,31,%l1          // 0 falls %i0>=0, -1 falls %i0<0
2457 #endif
2458 1:        ld [%i1-4],%o1        // nächstes source-Digit
2459           sub %i1,4,%i1
2460           // mit digit multiplizieren: %i0 * %o1 -> %o2|%o0
2461 #ifdef sparcv8
2462           umul    %i0,%o1,%o0
2463           rd      %y,%o2
2464 #else
2465           mov     %o1,%y
2466           and     %o1,%l1,%o3   // Wartetakt!
2467           andcc   %g0,%g0,%o2
2468           mulscc  %o2,%i0,%o2
2469           mulscc  %o2,%i0,%o2
2470           mulscc  %o2,%i0,%o2
2471           mulscc  %o2,%i0,%o2
2472           mulscc  %o2,%i0,%o2
2473           mulscc  %o2,%i0,%o2
2474           mulscc  %o2,%i0,%o2
2475           mulscc  %o2,%i0,%o2
2476           mulscc  %o2,%i0,%o2
2477           mulscc  %o2,%i0,%o2
2478           mulscc  %o2,%i0,%o2
2479           mulscc  %o2,%i0,%o2
2480           mulscc  %o2,%i0,%o2
2481           mulscc  %o2,%i0,%o2
2482           mulscc  %o2,%i0,%o2
2483           mulscc  %o2,%i0,%o2
2484           mulscc  %o2,%i0,%o2
2485           mulscc  %o2,%i0,%o2
2486           mulscc  %o2,%i0,%o2
2487           mulscc  %o2,%i0,%o2
2488           mulscc  %o2,%i0,%o2
2489           mulscc  %o2,%i0,%o2
2490           mulscc  %o2,%i0,%o2
2491           mulscc  %o2,%i0,%o2
2492           mulscc  %o2,%i0,%o2
2493           mulscc  %o2,%i0,%o2
2494           mulscc  %o2,%i0,%o2
2495           mulscc  %o2,%i0,%o2
2496           mulscc  %o2,%i0,%o2
2497           mulscc  %o2,%i0,%o2
2498           mulscc  %o2,%i0,%o2
2499           mulscc  %o2,%i0,%o2
2500           mulscc  %o2,%g0,%o2
2501           add     %o2,%o3,%o2   // %o3 = (0 falls %i0>=0, %o1 falls %i0<0)
2502           rd      %y,%o0
2503 #endif
2504           sub %i2,4,%i2
2505           ld [%i2],%o1          // nächstes dest-digit
2506           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
2507           addx %g0,%o2,%l0      // High-Digit gibt neuen Carry
2508           subcc %o1,%o0,%o1     // davon das Low-Digit subtrahieren
2509           addx %g0,%l0,%l0
2510           subcc %i3,1,%i3
2511           bne 1b
2512          _ st %o1,[%i2]         // dest-Digit ablegen
2513         mov %l0,%i0             // letzter Carry
2514         ret
2515        _ restore
2516 #endif
2517
2518 // extern uintD divu_loop_up (uintD digit, uintD* ptr, uintC len);
2519         DECLARE_FUNCTION(divu_loop_up)
2520 C(divu_loop_up:) // Input in %i0,%i1,%i2, Output in %i0
2521         save %sp,-96,%sp
2522         andcc %i2,%i2,%g0
2523         be 2f
2524        _ mov 0,%g1                 // Rest
2525 1:        mov %g1,%o0              // Rest als High-Digit
2526           ld [%i1],%o1             // nächstes Digit als Low-Digit
2527           call C(divu_6432_3232_)  // zusammen durch digit dividieren
2528          _ mov %i0,%o2
2529           st %o0,[%i1]             // Quotient ablegen, Rest in %g1
2530           subcc %i2,1,%i2
2531           bne 1b
2532          _ add %i1,4,%i1
2533 2:      mov %g1,%i0                // Rest als Ergebnis
2534         ret
2535        _ restore
2536
2537 // extern uintD divucopy_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2538         DECLARE_FUNCTION(divucopy_loop_up)
2539 C(divucopy_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2540         save %sp,-96,%sp
2541         andcc %i3,%i3,%g0
2542         be 2f
2543        _ mov 0,%g1                 // Rest
2544 1:        mov %g1,%o0              // Rest als High-Digit
2545           ld [%i1],%o1             // nächstes Digit als Low-Digit
2546           call C(divu_6432_3232_)  // zusammen durch digit dividieren
2547          _ mov %i0,%o2
2548           st %o0,[%i2]             // Quotient ablegen, Rest in %g1
2549           add %i1,4,%i1
2550           subcc %i3,1,%i3
2551           bne 1b
2552          _ add %i2,4,%i2
2553 2:      mov %g1,%i0                // Rest als Ergebnis
2554         ret
2555        _ restore
2556
2557 #endif
2558
2559 #if !CL_DS_BIG_ENDIAN_P
2560
2561 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
2562         DECLARE_FUNCTION(or_loop_down)
2563 C(or_loop_down:) // Input in %o0,%o1,%o2
2564 #if SLOW_LOOPS
2565         andcc %o2,%o2,%g0
2566         be 2f
2567        _ sub %o0,4,%o0
2568 1:        ld [%o0],%o3
2569           sub %o1,4,%o1
2570           ld [%o1],%o4
2571           subcc %o2,1,%o2
2572           or %o3,%o4,%o3
2573           st %o3,[%o0]
2574           bne 1b
2575          _ sub %o0,4,%o0
2576 2:      retl
2577        _ nop
2578 #endif
2579 #if STANDARD_LOOPS
2580         andcc %o2,%o2,%g0
2581         be 2f
2582        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2583         sub %o0,4,%o0
2584 1:        ld [%o0],%o3          // *xptr
2585           ld [%o0+%o1],%o4      // *yptr
2586           subcc %o2,1,%o2
2587           or %o3,%o4,%o3        // verknüpfen
2588           st %o3,[%o0]          // =: *xptr
2589           bne 1b
2590          _ sub %o0,4,%o0        // xptr++, yptr++
2591 2:      retl
2592        _ nop
2593 #endif
2594 #if COUNTER_LOOPS
2595         andcc %o2,%o2,%g0
2596         be 2f
2597        _ sll %o2,2,%o2          // %o2 = 4*count
2598         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2599         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2600 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2601           ld [%o1+%o2],%o3      // nächstes Digit holen
2602           ld [%o0+%o2],%o4      // noch ein Digit holen
2603           or %o4,%o3,%o3        // beide verknüpfen
2604           bne 1b
2605          _ st %o3,[%o1+%o2]     // Digit ablegen
2606 2:      retl
2607        _ nop
2608 #endif
2609
2610 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
2611         DECLARE_FUNCTION(xor_loop_down)
2612 C(xor_loop_down:) // Input in %o0,%o1,%o2
2613 #if SLOW_LOOPS
2614         andcc %o2,%o2,%g0
2615         be 2f
2616        _ sub %o0,4,%o0
2617 1:        ld [%o0],%o3
2618           sub %o1,4,%o1
2619           ld [%o1],%o4
2620           subcc %o2,1,%o2
2621           xor %o3,%o4,%o3
2622           st %o3,[%o0]
2623           bne 1b
2624          _ sub %o0,4,%o0
2625 2:      retl
2626        _ nop
2627 #endif
2628 #if STANDARD_LOOPS
2629         andcc %o2,%o2,%g0
2630         be 2f
2631        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2632         sub %o0,4,%o0
2633 1:        ld [%o0],%o3          // *xptr
2634           ld [%o0+%o1],%o4      // *yptr
2635           subcc %o2,1,%o2
2636           xor %o3,%o4,%o3       // verknüpfen
2637           st %o3,[%o0]          // =: *xptr
2638           bne 1b
2639          _ sub %o0,4,%o0        // xptr++, yptr++
2640 2:      retl
2641        _ nop
2642 #endif
2643 #if COUNTER_LOOPS
2644         andcc %o2,%o2,%g0
2645         be 2f
2646        _ sll %o2,2,%o2          // %o2 = 4*count
2647         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2648         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2649 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2650           ld [%o1+%o2],%o3      // nächstes Digit holen
2651           ld [%o0+%o2],%o4      // noch ein Digit holen
2652           xor %o4,%o3,%o3       // beide verknüpfen
2653           bne 1b
2654          _ st %o3,[%o1+%o2]     // Digit ablegen
2655 2:      retl
2656        _ nop
2657 #endif
2658
2659 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
2660         DECLARE_FUNCTION(and_loop_down)
2661 C(and_loop_down:) // Input in %o0,%o1,%o2
2662 #if SLOW_LOOPS
2663         andcc %o2,%o2,%g0
2664         be 2f
2665        _ sub %o0,4,%o0
2666 1:        ld [%o0],%o3
2667           sub %o1,4,%o1
2668           ld [%o1],%o4
2669           subcc %o2,1,%o2
2670           and %o3,%o4,%o3
2671           st %o3,[%o0]
2672           bne 1b
2673          _ sub %o0,4,%o0
2674 2:      retl
2675        _ nop
2676 #endif
2677 #if STANDARD_LOOPS
2678         andcc %o2,%o2,%g0
2679         be 2f
2680        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2681         sub %o0,4,%o0
2682 1:        ld [%o0],%o3          // *xptr
2683           ld [%o0+%o1],%o4      // *yptr
2684           subcc %o2,1,%o2
2685           and %o3,%o4,%o3       // verknüpfen
2686           st %o3,[%o0]          // =: *xptr
2687           bne 1b
2688          _ sub %o0,4,%o0        // xptr++, yptr++
2689 2:      retl
2690        _ nop
2691 #endif
2692 #if COUNTER_LOOPS
2693         andcc %o2,%o2,%g0
2694         be 2f
2695        _ sll %o2,2,%o2          // %o2 = 4*count
2696         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2697         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2698 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2699           ld [%o1+%o2],%o3      // nächstes Digit holen
2700           ld [%o0+%o2],%o4      // noch ein Digit holen
2701           and %o4,%o3,%o3       // beide verknüpfen
2702           bne 1b
2703          _ st %o3,[%o1+%o2]     // Digit ablegen
2704 2:      retl
2705        _ nop
2706 #endif
2707
2708 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
2709         DECLARE_FUNCTION(eqv_loop_down)
2710 C(eqv_loop_down:) // Input in %o0,%o1,%o2
2711 #if SLOW_LOOPS
2712         andcc %o2,%o2,%g0
2713         be 2f
2714        _ sub %o0,4,%o0
2715 1:        ld [%o0],%o3
2716           sub %o1,4,%o1
2717           ld [%o1],%o4
2718           subcc %o2,1,%o2
2719           xnor %o3,%o4,%o3
2720           st %o3,[%o0]
2721           bne 1b
2722          _ sub %o0,4,%o0
2723 2:      retl
2724        _ nop
2725 #endif
2726 #if STANDARD_LOOPS
2727         andcc %o2,%o2,%g0
2728         be 2f
2729        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2730         sub %o0,4,%o0
2731 1:        ld [%o0],%o3          // *xptr
2732           ld [%o0+%o1],%o4      // *yptr
2733           subcc %o2,1,%o2
2734           xnor %o3,%o4,%o3      // verknüpfen
2735           st %o3,[%o0]          // =: *xptr
2736           bne 1b
2737          _ sub %o0,4,%o0        // xptr++, yptr++
2738 2:      retl
2739        _ nop
2740 #endif
2741 #if COUNTER_LOOPS
2742         andcc %o2,%o2,%g0
2743         be 2f
2744        _ sll %o2,2,%o2          // %o2 = 4*count
2745         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2746         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2747 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2748           ld [%o1+%o2],%o3      // nächstes Digit holen
2749           ld [%o0+%o2],%o4      // noch ein Digit holen
2750           xnor %o4,%o3,%o3      // beide verknüpfen
2751           bne 1b
2752          _ st %o3,[%o1+%o2]     // Digit ablegen
2753 2:      retl
2754        _ nop
2755 #endif
2756
2757 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
2758         DECLARE_FUNCTION(nand_loop_down)
2759 C(nand_loop_down:) // Input in %o0,%o1,%o2
2760 #if SLOW_LOOPS
2761         andcc %o2,%o2,%g0
2762         be 2f
2763        _ sub %o0,4,%o0
2764 1:        ld [%o0],%o3
2765           sub %o1,4,%o1
2766           ld [%o1],%o4
2767           subcc %o2,1,%o2
2768           and %o3,%o4,%o3
2769           xor %o3,-1,%o3
2770           st %o3,[%o0]
2771           bne 1b
2772          _ sub %o0,4,%o0
2773 2:      retl
2774        _ nop
2775 #endif
2776 #if STANDARD_LOOPS
2777         andcc %o2,%o2,%g0
2778         be 2f
2779        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2780         sub %o0,4,%o0
2781 1:        ld [%o0],%o3          // *xptr
2782           ld [%o0+%o1],%o4      // *yptr
2783           subcc %o2,1,%o2
2784           and %o3,%o4,%o3       // verknüpfen
2785           xor %o3,-1,%o3
2786           st %o3,[%o0]          // =: *xptr
2787           bne 1b
2788          _ sub %o0,4,%o0        // xptr++, yptr++
2789 2:      retl
2790        _ nop
2791 #endif
2792 #if COUNTER_LOOPS
2793         andcc %o2,%o2,%g0
2794         be 2f
2795        _ sll %o2,2,%o2          // %o2 = 4*count
2796         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2797         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2798 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2799           ld [%o1+%o2],%o3      // nächstes Digit holen
2800           ld [%o0+%o2],%o4      // noch ein Digit holen
2801           and %o4,%o3,%o3       // beide verknüpfen
2802           xor %o3,-1,%o3
2803           bne 1b
2804          _ st %o3,[%o1+%o2]     // Digit ablegen
2805 2:      retl
2806        _ nop
2807 #endif
2808
2809 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
2810         DECLARE_FUNCTION(nor_loop_down)
2811 C(nor_loop_down:) // Input in %o0,%o1,%o2
2812 #if SLOW_LOOPS
2813         andcc %o2,%o2,%g0
2814         be 2f
2815        _ sub %o0,4,%o0
2816 1:        ld [%o0],%o3
2817           sub %o1,4,%o1
2818           ld [%o1],%o4
2819           subcc %o2,1,%o2
2820           or %o3,%o4,%o3
2821           xor %o3,-1,%o3
2822           st %o3,[%o0]
2823           bne 1b
2824          _ sub %o0,4,%o0
2825 2:      retl
2826        _ nop
2827 #endif
2828 #if STANDARD_LOOPS
2829         andcc %o2,%o2,%g0
2830         be 2f
2831        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2832         sub %o0,4,%o0
2833 1:        ld [%o0],%o3          // *xptr
2834           ld [%o0+%o1],%o4      // *yptr
2835           subcc %o2,1,%o2
2836           or %o3,%o4,%o3        // verknüpfen
2837           xor %o3,-1,%o3
2838           st %o3,[%o0]          // =: *xptr
2839           bne 1b
2840          _ sub %o0,4,%o0        // xptr++, yptr++
2841 2:      retl
2842        _ nop
2843 #endif
2844 #if COUNTER_LOOPS
2845         andcc %o2,%o2,%g0
2846         be 2f
2847        _ sll %o2,2,%o2          // %o2 = 4*count
2848         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2849         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2850 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2851           ld [%o1+%o2],%o3      // nächstes Digit holen
2852           ld [%o0+%o2],%o4      // noch ein Digit holen
2853           or %o4,%o3,%o3        // beide verknüpfen
2854           xor %o3,-1,%o3
2855           bne 1b
2856          _ st %o3,[%o1+%o2]     // Digit ablegen
2857 2:      retl
2858        _ nop
2859 #endif
2860
2861 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
2862         DECLARE_FUNCTION(andc2_loop_down)
2863 C(andc2_loop_down:) // Input in %o0,%o1,%o2
2864 #if SLOW_LOOPS
2865         andcc %o2,%o2,%g0
2866         be 2f
2867        _ sub %o0,4,%o0
2868 1:        ld [%o0],%o3
2869           sub %o1,4,%o1
2870           ld [%o1],%o4
2871           subcc %o2,1,%o2
2872           andn %o3,%o4,%o3
2873           st %o3,[%o0]
2874           bne 1b
2875          _ sub %o0,4,%o0
2876 2:      retl
2877        _ nop
2878 #endif
2879 #if STANDARD_LOOPS
2880         andcc %o2,%o2,%g0
2881         be 2f
2882        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2883         sub %o0,4,%o0
2884 1:        ld [%o0],%o3          // *xptr
2885           ld [%o0+%o1],%o4      // *yptr
2886           subcc %o2,1,%o2
2887           andn %o3,%o4,%o3      // verknüpfen
2888           st %o3,[%o0]          // =: *xptr
2889           bne 1b
2890          _ sub %o0,4,%o0        // xptr++, yptr++
2891 2:      retl
2892        _ nop
2893 #endif
2894 #if COUNTER_LOOPS
2895         andcc %o2,%o2,%g0
2896         be 2f
2897        _ sll %o2,2,%o2          // %o2 = 4*count
2898         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2899         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2900 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2901           ld [%o1+%o2],%o3      // nächstes Digit holen
2902           ld [%o0+%o2],%o4      // noch ein Digit holen
2903           andn %o4,%o3,%o3      // beide verknüpfen
2904           bne 1b
2905          _ st %o3,[%o1+%o2]     // Digit ablegen
2906 2:      retl
2907        _ nop
2908 #endif
2909
2910 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
2911         DECLARE_FUNCTION(orc2_loop_down)
2912 C(orc2_loop_down:) // Input in %o0,%o1,%o2
2913 #if SLOW_LOOPS
2914         andcc %o2,%o2,%g0
2915         be 2f
2916        _ sub %o0,4,%o0
2917 1:        ld [%o0],%o3
2918           sub %o1,4,%o1
2919           ld [%o1],%o4
2920           subcc %o2,1,%o2
2921           orn %o3,%o4,%o3
2922           st %o3,[%o0]
2923           bne 1b
2924          _ sub %o0,4,%o0
2925 2:      retl
2926        _ nop
2927 #endif
2928 #if STANDARD_LOOPS
2929         andcc %o2,%o2,%g0
2930         be 2f
2931        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
2932         sub %o0,4,%o0
2933 1:        ld [%o0],%o3          // *xptr
2934           ld [%o0+%o1],%o4      // *yptr
2935           subcc %o2,1,%o2
2936           orn %o3,%o4,%o3       // verknüpfen
2937           st %o3,[%o0]          // =: *xptr
2938           bne 1b
2939          _ sub %o0,4,%o0        // xptr++, yptr++
2940 2:      retl
2941        _ nop
2942 #endif
2943 #if COUNTER_LOOPS
2944         andcc %o2,%o2,%g0
2945         be 2f
2946        _ sll %o2,2,%o2          // %o2 = 4*count
2947         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
2948         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
2949 1:        subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
2950           ld [%o1+%o2],%o3      // nächstes Digit holen
2951           ld [%o0+%o2],%o4      // noch ein Digit holen
2952           orn %o4,%o3,%o3       // beide verknüpfen
2953           bne 1b
2954          _ st %o3,[%o1+%o2]     // Digit ablegen
2955 2:      retl
2956        _ nop
2957 #endif
2958
2959 // extern void not_loop_down (uintD* xptr, uintC count);
2960         DECLARE_FUNCTION(not_loop_down)
2961 C(not_loop_down:) // Input in %o0,%o1
2962 #if STANDARD_LOOPS
2963         andcc %o1,%o1,%g0
2964         be 2f
2965        _ sub %o0,4,%o0
2966 1:        ld [%o0],%o2
2967           subcc %o1,1,%o1
2968           xor %o2,-1,%o2
2969           st %o2,[%o0]
2970           bne 1b
2971          _ sub %o0,4,%o0
2972 2:      retl
2973        _ nop
2974 #endif
2975 #if COUNTER_LOOPS
2976         andcc %o1,%o1,%g0
2977         be 2f
2978        _ sll %o1,2,%o1          // %o1 = 4*count
2979         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
2980 1:        subcc %o1,4,%o1       // Zähler erniedrigen, Pointer erniedrigen
2981           ld [%o0+%o1],%o2      // nächstes Digit holen
2982           xor %o2,-1,%o2
2983           bne 1b
2984          _ st %o2,[%o0+%o1]     // Digit ablegen
2985 2:      retl
2986        _ nop
2987 #endif
2988
2989 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
2990         DECLARE_FUNCTION(and_test_loop_down)
2991 C(and_test_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
2992 #if STANDARD_LOOPS
2993         andcc %o2,%o2,%g0
2994         be 4f
2995        _ sub %o0,4,%o0
2996 1:        ld [%o0],%o3
2997           sub %o1,4,%o1
2998           ld [%o1],%o4
2999           subcc %o2,1,%o2
3000           be 3f
3001          _ andcc %o3,%o4,%g0
3002           be 1b
3003          _ sub %o0,4,%o0
3004 2:      retl
3005        _ mov 1,%o0
3006 3:      bne 2b
3007        _ nop
3008 4:      retl
3009        _ mov 0,%o0
3010 #endif
3011 #if COUNTER_LOOPS
3012         sll %o2,2,%o2           // %o2 = 4*count
3013         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
3014         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
3015         subcc %o2,4,%o2
3016         bcs 2f
3017        _ nop
3018           ld [%o0+%o2],%o3      // nächstes Digit holen
3019 1:        ld [%o1+%o2],%o4      // noch ein Digit holen
3020           andcc %o3,%o4,%g0     // beide verknüpfen
3021           bne 3f
3022          _ subcc %o2,4,%o2      // Zähler erniedrigen, Pointer erniedrigen
3023           bcc,a 1b
3024          __ ld [%o0+%o2],%o3    // nächstes Digit holen
3025 2:      retl
3026        _ mov 0,%o0
3027 3:      retl
3028        _ mov 1,%o0
3029 #endif
3030
3031 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
3032         DECLARE_FUNCTION(compare_loop_down)
3033 C(compare_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
3034 #if STANDARD_LOOPS
3035         andcc %o2,%o2,%g0
3036         be 2f
3037        _ nop
3038 1:        ld [%o0-4],%o3
3039           ld [%o1-4],%o4
3040           subcc %o3,%o4,%g0
3041           bne 3f
3042          _ sub %o0,4,%o0
3043           subcc %o2,1,%o2
3044           bne 1b
3045          _ sub %o1,4,%o1
3046 2:      retl
3047        _ mov 0,%o0
3048 3:      blu 4f
3049        _ nop
3050         retl
3051        _ mov 1,%o0
3052 4:      retl
3053        _ mov -1,%o0
3054 #endif
3055 #if COUNTER_LOOPS
3056         sll %o2,2,%o2           // %o2 = 4*count
3057         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
3058         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
3059         subcc %o2,4,%o2
3060         bcs 5f
3061        _ nop
3062           ld [%o0+%o2],%o3      // nächstes Digit holen
3063 1:        ld [%o1+%o2],%o4      // noch ein Digit holen
3064           subcc %o2,4,%o2       // Zähler erniedrigen, Pointer erniedrigen
3065           bcs 4f
3066          _ subcc %o3,%o4,%g0    // vergleichen
3067           be,a 1b
3068          __ ld [%o0+%o2],%o3    // nächstes Digit holen
3069 2:      blu 3f
3070        _ nop
3071         retl
3072        _ mov 1,%o0
3073 3:      retl
3074        _ mov -1,%o0
3075 4:      bne 2b
3076        _ nop
3077 5:      retl
3078        _ mov 0,%o0
3079 #endif
3080
3081 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
3082         DECLARE_FUNCTION(add_loop_up)
3083 C(add_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
3084 #if STANDARD_LOOPS
3085         andcc %o3,%o3,%g0
3086         be 2f
3087        _ subcc %g0,%g0,%g0      // Carry := 0
3088 1:        ld [%o0],%o4          // source1-digit
3089           add %o0,4,%o0
3090           ld [%o1],%o5          // source2-digit
3091           add %o1,4,%o1
3092           addxcc %o4,%o5,%o4    // addieren
3093           addx %g0,%g0,%g1      // neuer Carry
3094           st %o4,[%o2]          // Digit ablegen
3095           add %o2,4,%o2
3096           subcc %o3,1,%o3
3097           bne 1b
3098          _ subcc %g0,%g1,%g0    // carry
3099 2:      retl
3100        _ addx %g0,%g0,%o0
3101 #endif
3102 #if COUNTER_LOOPS
3103         subcc %g0,%o3,%o3       // %o3 = -count
3104         be 2f
3105        _ mov %g0,%g1            // Carry := 0
3106         sll %o3,2,%o3           // %o3 = -4*count
3107         sub %o2,4,%o2
3108         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
3109         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
3110         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
3111 1:        ld [%o0+%o3],%o4      // source1-digit
3112           ld [%o1+%o3],%o5      // source2-digit
3113           subcc %g0,%g1,%g0     // carry
3114           addxcc %o4,%o5,%o4    // addieren
3115           addx %g0,%g0,%g1      // neuer Carry
3116           addcc %o3,4,%o3       // Zähler erniedrigen, Pointer erhöhen
3117           bne 1b
3118          _ st %o4,[%o2+%o3]     // Digit ablegen
3119 2:      retl
3120        _ mov %g1,%o0
3121 #endif
3122 #if UNROLLED_LOOPS
3123         and %o3,7,%o4           // count mod 8
3124         sll %o4,2,%o5
3125         add %o0,%o5,%o0         // %o0 = &sourceptr1[count mod 8]
3126         add %o1,%o5,%o1         // %o1 = &sourceptr2[count mod 8]
3127         add %o2,%o5,%o2         // %o2 = &destptr[count mod 8]
3128         sll %o4,4,%o4
3129 #ifdef PIC
3130         mov %o7,%g2             // save return address
3131         call 0f                 // put address of label 0 into %o7
3132        _ add %o7,144,%o5
3133 0:
3134 #else
3135         set _add_loop_up+176,%o5
3136 #endif
3137         sub %o5,%o4,%o5
3138         jmp %o5                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
3139        _ subcc %g0,%g0,%g0      // carry löschen
3140 1:        subcc %g0,%g1,%g0     // carry
3141           ld [%o0-32],%o4       // source1-digit
3142           ld [%o1-32],%o5       // source2-digit
3143           addxcc %o5,%o4,%o5    // addieren
3144           st %o5,[%o2-32]       // Digit ablegen
3145           ld [%o0-28],%o4       // source1-digit
3146           ld [%o1-28],%o5       // source2-digit
3147           addxcc %o5,%o4,%o5    // addieren
3148           st %o5,[%o2-28]       // Digit ablegen
3149           ld [%o0-24],%o4       // source1-digit
3150           ld [%o1-24],%o5       // source2-digit
3151           addxcc %o5,%o4,%o5    // addieren
3152           st %o5,[%o2-24]       // Digit ablegen
3153           ld [%o0-20],%o4       // source1-digit
3154           ld [%o1-20],%o5       // source2-digit
3155           addxcc %o5,%o4,%o5    // addieren
3156           st %o5,[%o2-20]       // Digit ablegen
3157           ld [%o0-16],%o4       // source1-digit
3158           ld [%o1-16],%o5       // source2-digit
3159           addxcc %o5,%o4,%o5    // addieren
3160           st %o5,[%o2-16]       // Digit ablegen
3161           ld [%o0-12],%o4       // source1-digit
3162           ld [%o1-12],%o5       // source2-digit
3163           addxcc %o5,%o4,%o5    // addieren
3164           st %o5,[%o2-12]       // Digit ablegen
3165           ld [%o0-8],%o4        // source1-digit
3166           ld [%o1-8],%o5        // source2-digit
3167           addxcc %o5,%o4,%o5    // addieren
3168           st %o5,[%o2-8]        // Digit ablegen
3169           ld [%o0-4],%o4        // source1-digit
3170           ld [%o1-4],%o5        // source2-digit
3171           addxcc %o5,%o4,%o5    // addieren
3172           st %o5,[%o2-4]        // Digit ablegen
3173           addx %g0,%g0,%g1      // neuer Carry
3174           add %o0,32,%o0
3175           add %o1,32,%o1
3176           subcc %o3,8,%o3       // noch mindestens 8 Digits abzuarbeiten?
3177           bcc 1b
3178          _ add %o2,32,%o2
3179 #ifdef PIC
3180         jmp %g2+8
3181 #else
3182         retl
3183 #endif
3184        _ mov %g1,%o0
3185 #endif
3186
3187 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
3188         DECLARE_FUNCTION(addto_loop_up)
3189 C(addto_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
3190 #if STANDARD_LOOPS
3191         andcc %o2,%o2,%g0
3192         be 2f
3193        _ mov %g0,%o5            // Carry := 0
3194 1:        ld [%o0],%o3          // source-digit
3195           add %o0,4,%o0
3196           ld [%o1],%o4          // dest-digit
3197           subcc %g0,%o5,%g0     // carry
3198           addxcc %o4,%o3,%o4    // addieren
3199           addx %g0,%g0,%o5      // neuer Carry
3200           st %o4,[%o1]          // Digit ablegen
3201           subcc %o2,1,%o2
3202           bne 1b
3203          _ add %o1,4,%o1
3204 2:      retl
3205        _ mov %o5,%o0
3206 #endif
3207 #if COUNTER_LOOPS
3208         subcc %g0,%o2,%o2       // %o2 = -count
3209         be 2f
3210        _ mov %g0,%o5            // Carry := 0
3211         sll %o2,2,%o2           // %o2 = -4*count
3212         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
3213         sub %o1,%o2,%o1         // %o1 = &destptr[count]
3214           ld [%o0+%o2],%o3      // source-digit
3215 1:        ld [%o1+%o2],%o4      // dest-digit
3216           subcc %g0,%o5,%g0     // carry
3217           addxcc %o4,%o3,%o4    // addieren
3218           addx %g0,%g0,%o5      // neuer Carry
3219           st %o4,[%o1+%o2]      // Digit ablegen
3220           addcc %o2,4,%o2       // Zähler erniedrigen, Pointer erhöhen
3221           bne,a 1b
3222          __ ld [%o0+%o2],%o3    // source-digit
3223 2:      retl
3224        _ mov %o5,%o0
3225 #endif
3226 #if UNROLLED_LOOPS
3227         and %o2,7,%o3           // count mod 8
3228         sll %o3,2,%o4
3229         add %o0,%o4,%o0         // %o0 = &sourceptr[count mod 8]
3230         add %o1,%o4,%o1         // %o1 = &destptr[count mod 8]
3231         sll %o3,4,%o3
3232 #ifdef PIC
3233         mov %o7,%g2             // save return address
3234         call 0f                 // put address of label 0 into %o7
3235        _ add %o7,144,%o4
3236 0:
3237 #else
3238         set _addto_loop_up+172,%o4
3239 #endif
3240         sub %o4,%o3,%o4
3241         jmp %o4                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
3242        _ subcc %g0,%g0,%g0      // carry löschen
3243 1:        subcc %g0,%o5,%g0     // carry
3244           ld [%o0-32],%o3       // source-digit
3245           ld [%o1-32],%o4       // dest-digit
3246           addxcc %o4,%o3,%o4    // addieren
3247           st %o4,[%o1-32]       // Digit ablegen
3248           ld [%o0-28],%o3       // source-digit
3249           ld [%o1-28],%o4       // dest-digit
3250           addxcc %o4,%o3,%o4    // addieren
3251           st %o4,[%o1-28]       // Digit ablegen
3252           ld [%o0-24],%o3       // source-digit
3253           ld [%o1-24],%o4       // dest-digit
3254           addxcc %o4,%o3,%o4    // addieren
3255           st %o4,[%o1-24]       // Digit ablegen
3256           ld [%o0-20],%o3       // source-digit
3257           ld [%o1-20],%o4       // dest-digit
3258           addxcc %o4,%o3,%o4    // addieren
3259           st %o4,[%o1-20]       // Digit ablegen
3260           ld [%o0-16],%o3       // source-digit
3261           ld [%o1-16],%o4       // dest-digit
3262           addxcc %o4,%o3,%o4    // addieren
3263           st %o4,[%o1-16]       // Digit ablegen
3264           ld [%o0-12],%o3       // source-digit
3265           ld [%o1-12],%o4       // dest-digit
3266           addxcc %o4,%o3,%o4    // addieren
3267           st %o4,[%o1-12]       // Digit ablegen
3268           ld [%o0-8],%o3        // source-digit
3269           ld [%o1-8],%o4        // dest-digit
3270           addxcc %o4,%o3,%o4    // addieren
3271           st %o4,[%o1-8]        // Digit ablegen
3272           ld [%o0-4],%o3        // source-digit
3273           ld [%o1-4],%o4        // dest-digit
3274           addxcc %o4,%o3,%o4    // addieren
3275           st %o4,[%o1-4]        // Digit ablegen
3276           addx %g0,%g0,%o5      // neuer Carry
3277           add %o0,32,%o0
3278           subcc %o2,8,%o2       // noch mindestens 8 Digits abzuarbeiten?
3279           bcc 1b
3280          _ add %o1,32,%o1
3281 #ifdef PIC
3282         jmp %g2+8
3283 #else
3284         retl
3285 #endif
3286        _ mov %o5,%o0
3287 #endif
3288
3289 // extern uintD inc_loop_up (uintD* ptr, uintC count);
3290         DECLARE_FUNCTION(inc_loop_up)
3291 C(inc_loop_up:) // Input in %o0,%o1, Output in %o0
3292 #if STANDARD_LOOPS
3293         andcc %o1,%o1,%g0
3294         be 2f
3295        _ nop
3296           ld [%o0],%o2
3297 1:        add %o0,4,%o0
3298           addcc %o2,1,%o2
3299           bne 3f
3300          _ st %o2,[%o0-4]
3301           subcc %o1,1,%o1
3302           bne,a 1b
3303          __ ld [%o0],%o2
3304 2:      retl
3305        _ mov 1,%o0
3306 3:      retl
3307        _ mov 0,%o0
3308 #endif
3309 #if COUNTER_LOOPS
3310         subcc %g0,%o1,%o1       // %o1 = -count
3311         be 2f
3312        _ sll %o1,2,%o1          // %o1 = -4*count
3313         sub %o0,%o1,%o0         // %o0 = &ptr[count]
3314           ld [%o0+%o1],%o2      // digit holen
3315 1:        addcc %o2,1,%o2       // incrementieren
3316           bne 3f
3317          _ st %o2,[%o0+%o1]     // ablegen
3318           addcc %o1,4,%o1       // Zähler erniedrigen, Pointer erhöhen
3319           bne,a 1b
3320          __ ld [%o0+%o1],%o2
3321 2:      retl
3322        _ mov 1,%o0
3323 3:      retl
3324        _ mov 0,%o0
3325 #endif
3326
3327 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
3328         DECLARE_FUNCTION(sub_loop_up)
3329 C(sub_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
3330 #if STANDARD_LOOPS
3331         andcc %o3,%o3,%g0
3332         be 2f
3333        _ subcc %g0,%g0,%g0      // Carry := 0
3334 1:        ld [%o0],%o4          // source1-digit
3335           add %o0,4,%o0
3336           ld [%o1],%o5          // source2-digit
3337           add %o1,4,%o1
3338           subxcc %o4,%o5,%o4    // subtrahieren
3339           addx %g0,%g0,%g1      // neuer Carry
3340           st %o4,[%o2]          // Digit ablegen
3341           add %o2,4,%o2
3342           subcc %o3,1,%o3
3343           bne 1b
3344          _ subcc %g0,%g1,%g0    // carry
3345 2:      retl
3346        _ addx %g0,%g0,%o0
3347 #endif
3348 #if COUNTER_LOOPS
3349         subcc %g0,%o3,%o3       // %o3 = -count
3350         be 2f
3351        _ mov %g0,%g1            // Carry := 0
3352         sll %o3,2,%o3           // %o3 = -4*count
3353         sub %o2,4,%o2
3354         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
3355         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
3356         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
3357 1:        ld [%o0+%o3],%o4      // source1-digit
3358           ld [%o1+%o3],%o5      // source2-digit
3359           subcc %g0,%g1,%g0     // carry
3360           subxcc %o4,%o5,%o4    // subtrahieren
3361           addx %g0,%g0,%g1      // neuer Carry
3362           addcc %o3,4,%o3
3363           bne 1b
3364          _ st %o4,[%o2+%o3]     // Digit ablegen
3365 2:      retl
3366        _ mov %g1,%o0
3367 #endif
3368 #if UNROLLED_LOOPS
3369         and %o3,7,%o4           // count mod 8
3370         sll %o4,2,%o5
3371         add %o0,%o5,%o0         // %o0 = &sourceptr1[count mod 8]
3372         add %o1,%o5,%o1         // %o1 = &sourceptr2[count mod 8]
3373         add %o2,%o5,%o2         // %o2 = &destptr[count mod 8]
3374         sll %o4,4,%o4
3375 #ifdef PIC
3376         mov %o7,%g2             // save return address
3377         call 0f                 // put address of label 0 into %o7
3378        _ add %o7,144,%o5
3379 0:
3380 #else
3381         set _sub_loop_up+176,%o5
3382 #endif
3383         sub %o5,%o4,%o5
3384         jmp %o5                 // Sprung nach (label 1)+4*(1+4*8-4*(count mod 8))
3385        _ subcc %g0,%g0,%g0      // carry löschen
3386 1:        subcc %g0,%g1,%g0     // carry
3387           ld [%o0-32],%o4       // source1-digit
3388           ld [%o1-32],%o5       // source2-digit
3389           subxcc %o4,%o5,%o4    // subtrahieren
3390           st %o4,[%o2-32]       // Digit ablegen
3391           ld [%o0-28],%o4       // source1-digit
3392           ld [%o1-28],%o5       // source2-digit
3393           subxcc %o4,%o5,%o4    // subtrahieren
3394           st %o4,[%o2-28]       // Digit ablegen
3395           ld [%o0-24],%o4       // source1-digit
3396           ld [%o1-24],%o5       // source2-digit
3397           subxcc %o4,%o5,%o4    // subtrahieren
3398           st %o4,[%o2-24]       // Digit ablegen
3399           ld [%o0-20],%o4       // source1-digit
3400           ld [%o1-20],%o5       // source2-digit
3401           subxcc %o4,%o5,%o4    // subtrahieren
3402           st %o4,[%o2-20]       // Digit ablegen
3403           ld [%o0-16],%o4       // source1-digit
3404           ld [%o1-16],%o5       // source2-digit
3405           subxcc %o4,%o5,%o4    // subtrahieren
3406           st %o4,[%o2-16]       // Digit ablegen
3407           ld [%o0-12],%o4       // source1-digit
3408           ld [%o1-12],%o5       // source2-digit
3409           subxcc %o4,%o5,%o4    // subtrahieren
3410           st %o4,[%o2-12]       // Digit ablegen
3411           ld [%o0-8],%o4        // source1-digit
3412           ld [%o1-8],%o5        // source2-digit
3413           subxcc %o4,%o5,%o4    // subtrahieren
3414           st %o4,[%o2-8]        // Digit ablegen
3415           ld [%o0-4],%o4        // source1-digit
3416           ld [%o1-4],%o5        // source2-digit
3417           subxcc %o4,%o5,%o4    // subtrahieren
3418           st %o4,[%o2-4]        // Digit ablegen
3419           addx %g0,%g0,%g1      // neuer Carry
3420           add %o0,32,%o0
3421           add %o1,32,%o1
3422           subcc %o3,8,%o3       // noch mindestens 8 Digits abzuarbeiten?
3423           bcc 1b
3424          _ add %o2,32,%o2
3425 #ifdef PIC
3426         jmp %g2+8
3427 #else
3428         retl
3429 #endif
3430        _ mov %g1,%o0
3431 #endif
3432
3433 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
3434         DECLARE_FUNCTION(subx_loop_up)
3435 C(subx_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
3436 #if STANDARD_LOOPS
3437         andcc %o3,%o3,%g0
3438         be 2f
3439        _ subcc %g0,%o4,%g0      // Carry
3440 1:        ld [%o0],%o4          // source1-digit
3441           add %o0,4,%o0
3442           ld [%o1],%o5          // source2-digit
3443           add %o1,4,%o1
3444           subxcc %o4,%o5,%o4    // subtrahieren
3445           addx %g0,%g0,%g1      // neuer Carry
3446           st %o4,[%o2]          // Digit ablegen
3447           add %o2,4,%o2
3448           subcc %o3,1,%o3
3449           bne 1b
3450          _ subcc %g0,%g1,%g0    // carry
3451 2:      retl
3452        _ addx %g0,%g0,%o0
3453 #endif
3454 #if COUNTER_LOOPS
3455         subcc %g0,%o3,%o3       // %o3 = -count
3456         be 2f
3457        _ mov %o4,%g1            // Carry
3458         sll %o3,2,%o3           // %o3 = -4*count
3459         sub %o2,4,%o2
3460         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
3461         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
3462         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
3463 1:        ld [%o0+%o3],%o4      // source1-digit
3464           ld [%o1+%o3],%o5      // source2-digit
3465           subcc %g0,%g1,%g0     // carry
3466           subxcc %o4,%o5,%o4    // subtrahieren
3467           addx %g0,%g0,%g1      // neuer Carry
3468           addcc %o3,4,%o3
3469           bne 1b
3470          _ st %o4,[%o2+%o3]     // Digit ablegen
3471 2:      retl
3472        _ mov %g1,%o0
3473 #endif
3474 #if UNROLLED_LOOPS
3475         and %o3,7,%o5           // count mod 8
3476         sll %o5,2,%g1
3477         add %o0,%g1,%o0         // %o0 = &sourceptr1[count mod 8]
3478         add %o1,%g1,%o1         // %o1 = &sourceptr2[count mod 8]
3479         add %o2,%g1,%o2         // %o2 = &destptr[count mod 8]
3480         sll %o5,4,%o5
3481 #ifdef PIC
3482         mov %o7,%g2             // save return address
3483         call 0f                 // put address of label 0 into %o7
3484        _ add %o7,144,%g1
3485 0:
3486 #else
3487         set _subx_loop_up+176,%g1
3488 #endif
3489         sub %g1,%o5,%g1
3490         jmp %g1                 // Sprung nach _subx_loop_up+4*(12+4*8-4*(count mod 8))
3491        _ subcc %g0,%o4,%g0      // carry initialisieren
3492 1:        subcc %g0,%g1,%g0     // carry
3493           ld [%o0-32],%o4       // source1-digit
3494           ld [%o1-32],%o5       // source2-digit
3495           subxcc %o4,%o5,%o4    // subtrahieren
3496           st %o4,[%o2-32]       // Digit ablegen
3497           ld [%o0-28],%o4       // source1-digit
3498           ld [%o1-28],%o5       // source2-digit
3499           subxcc %o4,%o5,%o4    // subtrahieren
3500           st %o4,[%o2-28]       // Digit ablegen
3501           ld [%o0-24],%o4       // source1-digit
3502           ld [%o1-24],%o5       // source2-digit
3503           subxcc %o4,%o5,%o4    // subtrahieren
3504           st %o4,[%o2-24]       // Digit ablegen
3505           ld [%o0-20],%o4       // source1-digit
3506           ld [%o1-20],%o5       // source2-digit
3507           subxcc %o4,%o5,%o4    // subtrahieren
3508           st %o4,[%o2-20]       // Digit ablegen
3509           ld [%o0-16],%o4       // source1-digit
3510           ld [%o1-16],%o5       // source2-digit
3511           subxcc %o4,%o5,%o4    // subtrahieren
3512           st %o4,[%o2-16]       // Digit ablegen
3513           ld [%o0-12],%o4       // source1-digit
3514           ld [%o1-12],%o5       // source2-digit
3515           subxcc %o4,%o5,%o4    // subtrahieren
3516           st %o4,[%o2-12]       // Digit ablegen
3517           ld [%o0-8],%o4        // source1-digit
3518           ld [%o1-8],%o5        // source2-digit
3519           subxcc %o4,%o5,%o4    // subtrahieren
3520           st %o4,[%o2-8]        // Digit ablegen
3521           ld [%o0-4],%o4        // source1-digit
3522           ld [%o1-4],%o5        // source2-digit
3523           subxcc %o4,%o5,%o4    // subtrahieren
3524           st %o4,[%o2-4]        // Digit ablegen
3525           addx %g0,%g0,%g1      // neuer Carry
3526           add %o0,32,%o0
3527           add %o1,32,%o1
3528           subcc %o3,8,%o3       // noch mindestens 8 Digits abzuarbeiten?
3529           bcc 1b
3530          _ add %o2,32,%o2
3531 #ifdef PIC
3532         jmp %g2+8
3533 #else
3534         retl
3535 #endif
3536        _ mov %g1,%o0
3537 #endif
3538
3539 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
3540         DECLARE_FUNCTION(subfrom_loop_up)
3541 C(subfrom_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
3542 #if STANDARD_LOOPS
3543         andcc %o2,%o2,%g0
3544         be 2f
3545        _ mov %g0,%o5            // Carry := 0
3546 1:        ld [%o0],%o3          // source-digit
3547           add %o0,4,%o0
3548           ld [%o1],%o4          // dest-digit
3549           subcc %g0,%o5,%g0     // carry
3550           subxcc %o4,%o3,%o4    // subtrahieren
3551           addx %g0,%g0,%o5      // neuer Carry
3552           st %o4,[%o1]          // Digit ablegen
3553           subcc %o2,1,%o2
3554           bne 1b
3555          _ add %o1,4,%o1
3556 2:      retl
3557        _ mov %o5,%o0
3558 #endif
3559 #if COUNTER_LOOPS
3560         subcc %g0,%o2,%o2       // %o2 = -count
3561         be 2f
3562        _ mov %g0,%o5            // Carry := 0
3563         sll %o2,2,%o2           // %o2 = -4*count
3564         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
3565         sub %o1,%o2,%o1         // %o1 = &destptr[count]
3566           ld [%o0+%o2],%o3      // source-digit
3567 1:        ld [%o1+%o2],%o4      // dest-digit
3568           subcc %g0,%o5,%g0     // carry
3569           subxcc %o4,%o3,%o4    // subtrahieren
3570           addx %g0,%g0,%o5      // neuer Carry
3571           st %o4,[%o1+%o2]      // Digit ablegen
3572           addcc %o2,4,%o2
3573           bne,a 1b
3574          __ ld [%o0+%o2],%o3    // source-digit
3575 2:      retl
3576        _ mov %o5,%o0
3577 #endif
3578 #if UNROLLED_LOOPS
3579         and %o2,7,%o3           // count mod 8
3580         sll %o3,2,%o4
3581         add %o0,%o4,%o0         // %o0 = &sourceptr[count mod 8]
3582         add %o1,%o4,%o1         // %o1 = &destptr[count mod 8]
3583         sll %o3,4,%o3
3584 #ifdef PIC
3585         mov %o7,%g2             // save return address
3586         call 0f                 // put address of label 0 into %o7
3587        _ add %o7,144,%o4
3588 0:
3589 #else
3590         set _subfrom_loop_up+172,%o4
3591 #endif
3592         sub %o4,%o3,%o4
3593         jmp %o4                 // Sprung nach _subfrom_loop_up+4*(11+4*8-4*(count mod 8))
3594        _ subcc %g0,%g0,%g0      // carry löschen
3595 1:        subcc %g0,%o5,%g0     // carry
3596           ld [%o0-32],%o3       // source-digit
3597           ld [%o1-32],%o4       // dest-digit
3598           subxcc %o4,%o3,%o4    // subtrahieren
3599           st %o4,[%o1-32]       // Digit ablegen
3600           ld [%o0-28],%o3       // source-digit
3601           ld [%o1-28],%o4       // dest-digit
3602           subxcc %o4,%o3,%o4    // subtrahieren
3603           st %o4,[%o1-28]       // Digit ablegen
3604           ld [%o0-24],%o3       // source-digit
3605           ld [%o1-24],%o4       // dest-digit
3606           subxcc %o4,%o3,%o4    // subtrahieren
3607           st %o4,[%o1-24]       // Digit ablegen
3608           ld [%o0-20],%o3       // source-digit
3609           ld [%o1-20],%o4       // dest-digit
3610           subxcc %o4,%o3,%o4    // subtrahieren
3611           st %o4,[%o1-20]       // Digit ablegen
3612           ld [%o0-16],%o3       // source-digit
3613           ld [%o1-16],%o4       // dest-digit
3614           subxcc %o4,%o3,%o4    // subtrahieren
3615           st %o4,[%o1-16]       // Digit ablegen
3616           ld [%o0-12],%o3       // source-digit
3617           ld [%o1-12],%o4       // dest-digit
3618           subxcc %o4,%o3,%o4    // subtrahieren
3619           st %o4,[%o1-12]       // Digit ablegen
3620           ld [%o0-8],%o3        // source-digit
3621           ld [%o1-8],%o4        // dest-digit
3622           subxcc %o4,%o3,%o4    // subtrahieren
3623           st %o4,[%o1-8]        // Digit ablegen
3624           ld [%o0-4],%o3        // source-digit
3625           ld [%o1-4],%o4        // dest-digit
3626           subxcc %o4,%o3,%o4    // subtrahieren
3627           st %o4,[%o1-4]        // Digit ablegen
3628           addx %g0,%g0,%o5      // neuer Carry
3629           add %o0,32,%o0
3630           subcc %o2,8,%o2       // noch mindestens 8 Digits abzuarbeiten?
3631           bcc 1b
3632          _ add %o1,32,%o1
3633 #ifdef PIC
3634         jmp %g2+8
3635 #else
3636         retl
3637 #endif
3638        _ mov %o5,%o0
3639 #endif
3640
3641 // extern uintD dec_loop_up (uintD* ptr, uintC count);
3642         DECLARE_FUNCTION(dec_loop_up)
3643 C(dec_loop_up:) // Input in %o0,%o1, Output in %o0
3644 #if STANDARD_LOOPS
3645         andcc %o1,%o1,%g0
3646         be 2f
3647        _ nop
3648           ld [%o0],%o2
3649 1:        add %o0,4,%o0
3650           subcc %o2,1,%o2
3651           bcc 3f
3652          _ st %o2,[%o0-4]
3653           subcc %o1,1,%o1
3654           bne,a 1b
3655          __ ld [%o0],%o2
3656 2:      retl
3657        _ mov -1,%o0
3658 3:      retl
3659        _ mov 0,%o0
3660 #endif
3661 #if COUNTER_LOOPS
3662         subcc %g0,%o1,%o1       // %o1 = -count
3663         be 2f
3664        _ sll %o1,2,%o1          // %o1 = -4*count
3665         sub %o0,%o1,%o0         // %o0 = &ptr[count]
3666           ld [%o0+%o1],%o2      // digit holen
3667 1:        subcc %o2,1,%o2       // decrementieren
3668           bcc 3f
3669          _ st %o2,[%o0+%o1]     // ablegen
3670           addcc %o1,4,%o1       // Zähler erniedrigen, Pointer erhöhen
3671           bne,a 1b
3672          __ ld [%o0+%o1],%o2
3673 2:      retl
3674        _ mov -1,%o0
3675 3:      retl
3676        _ mov 0,%o0
3677 #endif
3678
3679 // extern uintD neg_loop_up (uintD* ptr, uintC count);
3680         DECLARE_FUNCTION(neg_loop_up)
3681 C(neg_loop_up:) // Input in %o0,%o1, Output in %o0
3682 #if STANDARD_LOOPS
3683         // erstes Digit /=0 suchen:
3684         andcc %o1,%o1,%g0
3685         be 2f
3686        _ add %o0,4,%o0
3687 1:        ld [%o0-4],%o2
3688           subcc %g0,%o2,%o2
3689           bne 3f
3690          _ subcc %o1,1,%o1
3691           bne 1b
3692          _ add %o0,4,%o0
3693 2:      retl
3694        _ mov 0,%o0
3695 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
3696         // 1 Digit negieren, alle anderen Digits invertieren:
3697         be 5f
3698        _ st %o2,[%o0-4]
3699 4:        ld [%o0],%o2
3700           subcc %o1,1,%o1
3701           xor %o2,-1,%o2
3702           st %o2,[%o0]
3703           bne 4b
3704          _ add %o0,4,%o0
3705 5:      retl
3706        _ mov -1,%o0
3707 #endif
3708 #if COUNTER_LOOPS
3709         // erstes Digit /=0 suchen:
3710         subcc %g0,%o1,%o1       // %o1 = -count
3711         be 2f
3712        _ sll %o1,2,%o1          // %o1 = -4*count
3713         sub %o0,%o1,%o0         // %o0 = &ptr[count]
3714           ld [%o0+%o1],%o2      // digit holen
3715 1:        subcc %g0,%o2,%o2     // negieren, testen
3716           bne 3f
3717          _ addcc %o1,4,%o1      // Zähler erniedrigen, Pointer erhöhen
3718           bne,a 1b
3719          __ ld [%o0+%o1],%o2
3720 2:      retl
3721        _ mov 0,%o0
3722 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
3723         // alle anderen Digits invertieren:
3724         sub %o1,4,%o1
3725         st %o2,[%o0+%o1]        // ablegen
3726         addcc %o1,4,%o1
3727         be 5f
3728        _ nop
3729           ld [%o0+%o1],%o2
3730 4:        xor %o2,-1,%o2
3731           st %o2,[%o0+%o1]
3732           addcc %o1,4,%o1
3733           bne,a 4b
3734          __ ld [%o0+%o1],%o2
3735 5:      retl
3736        _ mov -1,%o0
3737 #endif
3738
3739 // extern uintD shift1left_loop_up (uintD* ptr, uintC count);
3740         DECLARE_FUNCTION(shift1left_loop_up)
3741 C(shift1left_loop_up:) // Input in %o0,%o1, Output in %o0
3742         andcc %o1,%o1,%g0
3743         be 2f
3744        _ mov 0,%o3              // Carry := 0
3745 1:        ld [%o0],%o2          // Digit
3746           subcc %g0,%o3,%g0     // carry
3747           addxcc %o2,%o2,%o2    // shiften
3748           addx %g0,%g0,%o3      // neues Carry
3749           st %o2,[%o0]          // Digit ablegen
3750           subcc %o1,1,%o1
3751           bne 1b
3752          _ add %o0,4,%o0
3753 2:      retl
3754        _ mov %o3,%o0
3755
3756 // extern uintD shiftleft_loop_up (uintD* ptr, uintC count, uintC i, uintD carry);
3757         DECLARE_FUNCTION(shiftleft_loop_up)
3758 C(shiftleft_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
3759         andcc %o1,%o1,%g0
3760         be 2f
3761        _ sub %g0,%o2,%g1        // 32-i (mod 32)
3762 1:        ld [%o0],%o4          // Digit
3763           subcc %o1,1,%o1
3764           sll %o4,%o2,%o5       // dessen niedere (32-i) Bits
3765           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
3766           st %o5,[%o0]          // Digit ablegen
3767           srl %o4,%g1,%o3       // dessen höchste i Bits liefern den neuen Carry
3768           bne 1b
3769          _ add %o0,4,%o0
3770 2:      retl
3771        _ mov %o3,%o0
3772
3773 #endif
3774
3775 // extern uintD shiftleftcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
3776         DECLARE_FUNCTION(shiftleftcopy_loop_up)
3777 C(shiftleftcopy_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
3778         andcc %o2,%o2,%g0
3779         be 2f
3780        _ mov 0,%o4              // Carry := 0
3781         sub %g0,%o3,%g1         // 32-i (mod 32)
3782 1:        ld [%o0],%o5          // Digit
3783           subcc %o2,1,%o2
3784           sll %o5,%o3,%g2       // dessen niedere (32-i) Bits
3785           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
3786           st %g2,[%o1]          // Digit ablegen
3787           add %o1,4,%o1
3788           srl %o5,%g1,%o4       // dessen höchste i Bits liefern den neuen Carry
3789           bne 1b
3790          _ add %o0,4,%o0
3791 2:      retl
3792        _ mov %o4,%o0
3793
3794 #if !CL_DS_BIG_ENDIAN_P
3795
3796 // extern uintD shift1right_loop_down (uintD* ptr, uintC count, uintD carry);
3797         DECLARE_FUNCTION(shift1right_loop_down)
3798 C(shift1right_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
3799         andcc %o1,%o1,%g0
3800         be 2f
3801        _ sll %o2,31,%o2         // Carry
3802         sub %o0,4,%o0
3803 1:        ld [%o0],%o3          // Digit
3804           subcc %o1,1,%o1
3805           srl %o3,1,%o4         // shiften
3806           or %o2,%o4,%o4        // und mit altem Carry kombinieren
3807           st %o4,[%o0]          // und ablegen
3808           sll %o3,31,%o2        // neuer Carry
3809           bne 1b
3810          _ sub %o0,4,%o0
3811 2:      retl
3812        _ mov %o2,%o0
3813
3814 // extern uintD shiftright_loop_down (uintD* ptr, uintC count, uintC i);
3815         DECLARE_FUNCTION(shiftright_loop_down)
3816 C(shiftright_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
3817         sub %g0,%o2,%g1         // 32-i (mod 32)
3818         andcc %o1,%o1,%g0
3819         be 2f
3820        _ or %g0,%g0,%o3         // Carry := 0
3821         sub %o0,4,%o0
3822 1:        ld [%o0],%o4          // Digit
3823           subcc %o1,1,%o1
3824           srl %o4,%o2,%o5       // shiften
3825           or %o3,%o5,%o5        // und mit altem Carry kombinieren
3826           st %o5,[%o0]          // und ablegen
3827           sll %o4,%g1,%o3       // neuer Carry
3828           bne 1b
3829          _ sub %o0,4,%o0
3830 2:      retl
3831        _ mov %o3,%o0
3832
3833 // extern uintD shiftrightsigned_loop_down (uintD* ptr, uintC count, uintC i);
3834         DECLARE_FUNCTION(shiftrightsigned_loop_down)
3835 C(shiftrightsigned_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
3836         ld [%o0-4],%o4          // erstes Digit
3837         sub %g0,%o2,%g1         // 32-i (mod 32)
3838         sra %o4,%o2,%o5         // shiften
3839         st %o5,[%o0-4]          // und ablegen
3840         sll %o4,%g1,%o3         // neuer Carry
3841         subcc %o1,1,%o1
3842         be 2f
3843        _ sub %o0,8,%o0
3844 1:        ld [%o0],%o4          // Digit
3845           subcc %o1,1,%o1
3846           srl %o4,%o2,%o5       // shiften
3847           or %o3,%o5,%o5        // und mit altem Carry kombinieren
3848           st %o5,[%o0]          // und ablegen
3849           sll %o4,%g1,%o3       // neuer Carry
3850           bne 1b
3851          _ sub %o0,4,%o0
3852 2:      retl
3853        _ mov %o3,%o0
3854
3855 // extern uintD shiftrightcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
3856         DECLARE_FUNCTION(shiftrightcopy_loop_down)
3857 C(shiftrightcopy_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
3858         sub %g0,%o3,%g1         // 32-i (mod 32)
3859         andcc %o2,%o2,%g0
3860         be 2f
3861        _ sll %o4,%g1,%g2        // erster Carry
3862           sub %o0,4,%o0
3863 1:        ld [%o0],%o4          // Digit
3864           sub %o1,4,%o1
3865           srl %o4,%o3,%o5       // shiften
3866           or %g2,%o5,%o5        // und mit altem Carry kombinieren
3867           st %o5,[%o1]          // und ablegen
3868           sll %o4,%g1,%g2       // neuer Carry
3869           subcc %o2,1,%o2
3870           bne 1b
3871          _ sub %o0,4,%o0
3872 2:      retl
3873        _ mov %g2,%o0
3874
3875 // extern uintD mulusmall_loop_up (uintD digit, uintD* ptr, uintC len, uintD newdigit);
3876         DECLARE_FUNCTION(mulusmall_loop_up)
3877 C(mulusmall_loop_up:) // Input in %o0,%o1,%o2,%o3, Output in %o0
3878         andcc %o2,%o2,%g0
3879         be 3f
3880        _ nop
3881 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
3882           // und kleinen Carry %o3 dazu:
3883           mov %o0,%y
3884           ld [%o1],%o4          // Wartetakt!
3885           addcc %o3,%o3,%o5
3886           mulscc %o5,%o4,%o5
3887           mulscc %o5,%o4,%o5
3888           mulscc %o5,%o4,%o5
3889           mulscc %o5,%o4,%o5
3890           mulscc %o5,%o4,%o5
3891           mulscc %o5,%o4,%o5
3892           mulscc %o5,%g0,%o5
3893           // Die 26 unteren Bits von %o5 und die 6 oberen Bits von %y
3894           // ergeben das Resultat. (Die anderen Bits sind Null.)
3895           tst %o4               // Korrektur, falls %o4 negativ war
3896           bge 2f
3897          _ sra %o5,26,%o3       // 6 obere Bits von %o5 -> neuer Carry
3898           add %o3,%o0,%o3       // (falls %o4 negativ war, noch + %o0)
3899 2:        rd %y,%o4
3900           srl %o4,26,%o4        // 6 obere Bits von %y
3901           sll %o5,6,%o5         // 26 untere Bits von %o5
3902           or %o5,%o4,%o4        // neues Digit
3903           st %o4,[%o1]          // ablegen
3904           subcc %o2,1,%o2
3905           bne 1b
3906          _ add %o1,4,%o1
3907 3:      retl
3908        _ mov %o3,%o0
3909
3910 // extern void mulu_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
3911 #if !MULU32_INLINE
3912         DECLARE_FUNCTION(mulu_loop_up)
3913 C(mulu_loop_up:) // Input in %i0,%i1,%i2,%i3
3914         save %sp,-96,%sp
3915         mov 0,%l0               // Carry
3916 1:        ld [%i1],%o1          // nächstes Digit
3917           add %i1,4,%i1
3918           call _mulu32_         // mit digit multiplizieren
3919          _ mov %i0,%o0
3920           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
3921           addx %g0,%g1,%l0      // High-Digit gibt neuen Carry
3922           st %o0,[%i2]         // Low-Digit ablegen
3923           subcc %i3,1,%i3
3924           bne 1b
3925          _ add %i2,4,%i2
3926         st %l0,[%i2]            // letzten Carry ablegen
3927         ret
3928        _ restore
3929 #else
3930         DECLARE_FUNCTION(mulu_loop_up)
3931 C(mulu_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1
3932         mov 0,%o4               // Carry
3933 1:        ld [%o1],%g1          // nächstes Digit
3934           // mit digit multiplizieren: %o0 * %g1 -> %o5|%g1
3935 #ifdef sparcv8
3936           add     %o1,4,%o1
3937           umul    %g1,%o0,%g1
3938           rd      %y,%o5
3939 #else
3940           mov     %g1,%y
3941           add     %o1,4,%o1     // Wartetakt!
3942           andcc   %g0,%g0,%o5
3943           mulscc  %o5,%o0,%o5
3944           mulscc  %o5,%o0,%o5
3945           mulscc  %o5,%o0,%o5
3946           mulscc  %o5,%o0,%o5
3947           mulscc  %o5,%o0,%o5
3948           mulscc  %o5,%o0,%o5
3949           mulscc  %o5,%o0,%o5
3950           mulscc  %o5,%o0,%o5
3951           mulscc  %o5,%o0,%o5
3952           mulscc  %o5,%o0,%o5
3953           mulscc  %o5,%o0,%o5
3954           mulscc  %o5,%o0,%o5
3955           mulscc  %o5,%o0,%o5
3956           mulscc  %o5,%o0,%o5
3957           mulscc  %o5,%o0,%o5
3958           mulscc  %o5,%o0,%o5
3959           mulscc  %o5,%o0,%o5
3960           mulscc  %o5,%o0,%o5
3961           mulscc  %o5,%o0,%o5
3962           mulscc  %o5,%o0,%o5
3963           mulscc  %o5,%o0,%o5
3964           mulscc  %o5,%o0,%o5
3965           mulscc  %o5,%o0,%o5
3966           mulscc  %o5,%o0,%o5
3967           mulscc  %o5,%o0,%o5
3968           mulscc  %o5,%o0,%o5
3969           mulscc  %o5,%o0,%o5
3970           mulscc  %o5,%o0,%o5
3971           mulscc  %o5,%o0,%o5
3972           mulscc  %o5,%o0,%o5
3973           mulscc  %o5,%o0,%o5
3974           mulscc  %o5,%o0,%o5
3975           mulscc  %o5,%g0,%o5
3976           tst     %o0
3977           bl,a    2f
3978          __ add     %o5,%g1,%o5
3979 2:        rd      %y,%g1
3980 #endif
3981           addcc %o4,%g1,%g1     // und bisherigen Carry addieren
3982           addx %g0,%o5,%o4      // High-Digit gibt neuen Carry
3983           st %g1,[%o2]          // Low-Digit ablegen
3984           subcc %o3,1,%o3
3985           bne 1b
3986          _ add %o2,4,%o2
3987         retl
3988        _ st %o4,[%o2]           // letzten Carry ablegen
3989 #endif
3990
3991 // extern uintD muluadd_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
3992         DECLARE_FUNCTION(muluadd_loop_up)
3993 C(muluadd_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
3994 #if !MULU32_INLINE
3995         save %sp,-96,%sp
3996         mov 0,%l0               // Carry
3997 1:        ld [%i1],%o1          // nächstes source-Digit
3998           add %i1,4,%i1
3999           call _mulu32_         // mit digit multiplizieren
4000          _ mov %i0,%o0
4001           ld [%i2],%o1          // nächstes dest-digit
4002           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
4003           addx %g0,%g1,%l0      // High-Digit gibt neuen Carry
4004           addcc %o1,%o0,%o0     // addieren
4005           addx %g0,%l0,%l0
4006           st %o0,[%i2]          // Low-Digit ablegen
4007           subcc %i3,1,%i3
4008           bne 1b
4009          _ add %i2,4,%i2
4010         mov %l0,%i0             // letzter Carry
4011         ret
4012        _ restore
4013 #else
4014         save %sp,-96,%sp
4015         mov 0,%l0               // Carry
4016 #ifndef sparcv8
4017         sra %i0,31,%l1          // 0 falls %i0>=0, -1 falls %i0<0
4018 #endif
4019 1:        ld [%i1],%o1          // nächstes source-Digit
4020           add %i1,4,%i1
4021           // mit digit multiplizieren: %i0 * %o1 -> %o2|%o0
4022 #ifdef sparcv8
4023           umul    %i0,%o1,%o0
4024           rd      %y,%o2
4025 #else
4026           mov     %o1,%y
4027           and     %o1,%l1,%o3   // Wartetakt!
4028           andcc   %g0,%g0,%o2
4029           mulscc  %o2,%i0,%o2
4030           mulscc  %o2,%i0,%o2
4031           mulscc  %o2,%i0,%o2
4032           mulscc  %o2,%i0,%o2
4033           mulscc  %o2,%i0,%o2
4034           mulscc  %o2,%i0,%o2
4035           mulscc  %o2,%i0,%o2
4036           mulscc  %o2,%i0,%o2
4037           mulscc  %o2,%i0,%o2
4038           mulscc  %o2,%i0,%o2
4039           mulscc  %o2,%i0,%o2
4040           mulscc  %o2,%i0,%o2
4041           mulscc  %o2,%i0,%o2
4042           mulscc  %o2,%i0,%o2
4043           mulscc  %o2,%i0,%o2
4044           mulscc  %o2,%i0,%o2
4045           mulscc  %o2,%i0,%o2
4046           mulscc  %o2,%i0,%o2
4047           mulscc  %o2,%i0,%o2
4048           mulscc  %o2,%i0,%o2
4049           mulscc  %o2,%i0,%o2
4050           mulscc  %o2,%i0,%o2
4051           mulscc  %o2,%i0,%o2
4052           mulscc  %o2,%i0,%o2
4053           mulscc  %o2,%i0,%o2
4054           mulscc  %o2,%i0,%o2
4055           mulscc  %o2,%i0,%o2
4056           mulscc  %o2,%i0,%o2
4057           mulscc  %o2,%i0,%o2
4058           mulscc  %o2,%i0,%o2
4059           mulscc  %o2,%i0,%o2
4060           mulscc  %o2,%i0,%o2
4061           mulscc  %o2,%g0,%o2
4062           add     %o2,%o3,%o2   // %o3 = (0 falls %i0>=0, %o1 falls %i0<0)
4063           rd      %y,%o0
4064 #endif
4065           ld [%i2],%o1          // nächstes dest-digit
4066           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
4067           addx %g0,%o2,%l0      // High-Digit gibt neuen Carry
4068           addcc %o1,%o0,%o0     // addieren
4069           addx %g0,%l0,%l0
4070           st %o0,[%i2]          // Low-Digit ablegen
4071           subcc %i3,1,%i3
4072           bne 1b
4073          _ add %i2,4,%i2
4074         mov %l0,%i0             // letzter Carry
4075         ret
4076        _ restore
4077 #endif
4078
4079 // extern uintD mulusub_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
4080         DECLARE_FUNCTION(mulusub_loop_up)
4081 C(mulusub_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
4082 #if !MULU32_INLINE
4083         save %sp,-96,%sp
4084         mov 0,%l0               // Carry
4085 1:        ld [%i1],%o1          // nächstes source-Digit
4086           add %i1,4,%i1
4087           call _mulu32_         // mit digit multiplizieren
4088          _ mov %i0,%o0
4089           ld [%i2],%o1          // nächstes dest-digit
4090           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
4091           addx %g0,%g1,%l0      // High-Digit gibt neuen Carry
4092           subcc %o1,%o0,%o1     // davon das Low-Digit subtrahieren
4093           addx %g0,%l0,%l0
4094           st %o1,[%i2]         // dest-Digit ablegen
4095           subcc %i3,1,%i3
4096           bne 1b
4097          _ add %i2,4,%i2
4098         mov %l0,%i0             // letzter Carry
4099         ret
4100        _ restore
4101 #else
4102         save %sp,-96,%sp
4103         mov 0,%l0               // Carry
4104 #ifndef sparcv8
4105         sra %i0,31,%l1          // 0 falls %i0>=0, -1 falls %i0<0
4106 #endif
4107 1:        ld [%i1],%o1          // nächstes source-Digit
4108           add %i1,4,%i1
4109           // mit digit multiplizieren: %i0 * %o1 -> %o2|%o0
4110 #ifdef sparcv8
4111           umul    %i0,%o1,%o0
4112           rd      %y,%o2
4113 #else
4114           mov     %o1,%y
4115           and     %o1,%l1,%o3   // Wartetakt!
4116           andcc   %g0,%g0,%o2
4117           mulscc  %o2,%i0,%o2
4118           mulscc  %o2,%i0,%o2
4119           mulscc  %o2,%i0,%o2
4120           mulscc  %o2,%i0,%o2
4121           mulscc  %o2,%i0,%o2
4122           mulscc  %o2,%i0,%o2
4123           mulscc  %o2,%i0,%o2
4124           mulscc  %o2,%i0,%o2
4125           mulscc  %o2,%i0,%o2
4126           mulscc  %o2,%i0,%o2
4127           mulscc  %o2,%i0,%o2
4128           mulscc  %o2,%i0,%o2
4129           mulscc  %o2,%i0,%o2
4130           mulscc  %o2,%i0,%o2
4131           mulscc  %o2,%i0,%o2
4132           mulscc  %o2,%i0,%o2
4133           mulscc  %o2,%i0,%o2
4134           mulscc  %o2,%i0,%o2
4135           mulscc  %o2,%i0,%o2
4136           mulscc  %o2,%i0,%o2
4137           mulscc  %o2,%i0,%o2
4138           mulscc  %o2,%i0,%o2
4139           mulscc  %o2,%i0,%o2
4140           mulscc  %o2,%i0,%o2
4141           mulscc  %o2,%i0,%o2
4142           mulscc  %o2,%i0,%o2
4143           mulscc  %o2,%i0,%o2
4144           mulscc  %o2,%i0,%o2
4145           mulscc  %o2,%i0,%o2
4146           mulscc  %o2,%i0,%o2
4147           mulscc  %o2,%i0,%o2
4148           mulscc  %o2,%i0,%o2
4149           mulscc  %o2,%g0,%o2
4150           add     %o2,%o3,%o2   // %o3 = (0 falls %i0>=0, %o1 falls %i0<0)
4151           rd      %y,%o0
4152 #endif
4153           ld [%i2],%o1          // nächstes dest-digit
4154           addcc %l0,%o0,%o0     // und bisherigen Carry addieren
4155           addx %g0,%o2,%l0      // High-Digit gibt neuen Carry
4156           subcc %o1,%o0,%o1     // davon das Low-Digit subtrahieren
4157           addx %g0,%l0,%l0
4158           st %o1,[%i2]          // dest-Digit ablegen
4159           subcc %i3,1,%i3
4160           bne 1b
4161          _ add %i2,4,%i2
4162         mov %l0,%i0             // letzter Carry
4163         ret
4164        _ restore
4165 #endif
4166
4167 // extern uintD divu_loop_down (uintD digit, uintD* ptr, uintC len);
4168         DECLARE_FUNCTION(divu_loop_down)
4169 C(divu_loop_down:) // Input in %i0,%i1,%i2, Output in %i0
4170         save %sp,-96,%sp
4171         andcc %i2,%i2,%g0
4172         be 2f
4173        _ mov 0,%g1                 // Rest
4174 1:        mov %g1,%o0              // Rest als High-Digit
4175           ld [%i1-4],%o1           // nächstes Digit als Low-Digit
4176           call C(divu_6432_3232_)  // zusammen durch digit dividieren
4177          _ mov %i0,%o2
4178           st %o0,[%i1-4]           // Quotient ablegen, Rest in %g1
4179           subcc %i2,1,%i2
4180           bne 1b
4181          _ sub %i1,4,%i1
4182 2:      mov %g1,%i0                // Rest als Ergebnis
4183         ret
4184        _ restore
4185
4186 // extern uintD divucopy_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
4187         DECLARE_FUNCTION(divucopy_loop_down)
4188 C(divucopy_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
4189         save %sp,-96,%sp
4190         andcc %i3,%i3,%g0
4191         be 2f
4192        _ mov 0,%g1                 // Rest
4193 1:        mov %g1,%o0              // Rest als High-Digit
4194           ld [%i1-4],%o1           // nächstes Digit als Low-Digit
4195           call C(divu_6432_3232_)  // zusammen durch digit dividieren
4196          _ mov %i0,%o2
4197           sub %i2,4,%i2
4198           st %o0,[%i2]             // Quotient ablegen, Rest in %g1
4199           subcc %i3,1,%i3
4200           bne 1b
4201          _ sub %i1,4,%i1
4202 2:      mov %g1,%i0                // Rest als Ergebnis
4203         ret
4204        _ restore
4205
4206 #endif
4207
4208 // extern void shiftxor_loop_up (uintD* xptr, const uintD* yptr, uintC count, uintC i);
4209         DECLARE_FUNCTION(shiftxor_loop_up)
4210 C(shiftxor_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2
4211         andcc %o2,%o2,%g0
4212         be 2f
4213        _ sub %g0,%o3,%g1        // 32-i (mod 32)
4214         sub %o1,%o0,%o1
4215         ld [%o0],%o4            // *xptr holen
4216 1:        ld [%o0+%o1],%o5      // *yptr holen
4217           subcc %o2,1,%o2
4218           sll %o5,%o3,%g2       // dessen niedere (32-i) Bits
4219           xor %o4,%g2,%o4       // mit dem modifizierten *xptr kombinieren
4220           st %o4,[%o0]          // und ablegen
4221           add %o0,4,%o0
4222           srl %o5,%g1,%g2       // höchste i Bits von *yptr
4223           ld [%o0],%o4          // schon mal mit dem nächsten *xptr
4224           bne 1b
4225          _ xor %o4,%g2,%o4      // verknüpfen
4226         st %o4,[%o0]            // und ablegen
4227 2:      retl
4228        _ nop
4229