cl_asm_sparc64_.cc

   1 // Externe Routinen zu ARILEV1.D
   2 // Prozessor: SPARC 64-bit
   3 // Compiler: GNU-C oder ...
   4 // Parameter-Übergabe: in Registern %o0-%o5.
   5 // Parameter-Übergabe: in Registern %o0-%o5.
   6 //   Argumente vom Typ uint8, uint16, uint32 sind bereits vom Aufrufer zu
   7 //   uint64 umgewandelt worden (zero-extend, "srl reg,0,reg").
   8 //   Argumente vom Typ sint8, sint16, sint32 sind bereits vom Aufrufer zu
   9 //   sint64 umgewandelt worden (sign-extend, "sra reg,0,reg").
  10 //   Ergebnisse vom Typ uint8, uint16, uint32 müssen vor Rückgabe zu uint64
  11 //   umgewandelt werden (zero-extend, "srl reg,0,reg").
  12 //   Ergebnisse vom Typ sint8, sint16, sint32 müssen vor Rückgabe zu sint64
  13 //   umgewandelt werden (sign-extend, "sra reg,0,reg").
  14 // Einstellungen: intCsize=32, intDsize=32.
  15
  16 #ifdef ASM_UNDERSCORE
  17   #define C(entrypoint) _##entrypoint
  18 #else
  19   #define C(entrypoint) entrypoint
  20 #endif
  21
  22 // When this file is compiled into a shared library, ELF linkers need to
  23 // know which symbols are functions.
  24 #if defined(__NetBSD__) || defined(__OpenBSD__)
  25   #define DECLARE_FUNCTION(name) .type C(name),@function
  26 #elif defined(__svr4__) || defined(__ELF__)
  27   // Some preprocessors keep the backslash in place, some don't.
  28   // Some complain about the # being not in front of an ANSI C macro.
  29   // Therefore we use a dollar, which will be sed-converted to # later.
  30   #define DECLARE_FUNCTION(name) .type C(name),$function
  31 #else
  32   #define DECLARE_FUNCTION(name)
  33 #endif
  34
  35   // Indikatoren für Anweisungen (Instruktionen) in Delay-Slots
  36   // (diese werden VOR der vorigen Instruktion ausgeführt):
  37   #define _             // Instruktion, die stets ausgeführt wird
  38   #define __            // Instruktion, die nur im Sprung-Fall ausgeführt wird
  39   // Abkürzungen für Anweisungen:
  40   #define ret   jmp %i7+8    // return from subroutine
  41   #define retl  jmp %o7+8    // return from leaf subroutine (no save/restore)
  42
  43         .seg "text"
  44
  45         .register %g2,#scratch
  46
  47         .global C(mulu16_),C(mulu32_),C(mulu32_unchecked),C(mulu64_)
  48         .global C(divu_6432_3232_),C(divu_3216_1616_)
  49         .global C(copy_loop_up),C(copy_loop_down),C(fill_loop_up),C(fill_loop_down)
  50         .global C(clear_loop_up),C(clear_loop_down)
  51         .global C(test_loop_up),C(test_loop_down)
  52         .global C(xor_loop_up),C(compare_loop_up),C(shiftleftcopy_loop_up),C(shiftxor_loop_up)
  53 #if CL_DS_BIG_ENDIAN_P
  54         .global C(or_loop_up),C(and_loop_up),C(eqv_loop_up)
  55         .global C(nand_loop_up),C(nor_loop_up),C(andc2_loop_up),C(orc2_loop_up)
  56         .global C(not_loop_up)
  57         .global C(and_test_loop_up)
  58         .global C(add_loop_down),C(addto_loop_down),C(inc_loop_down)
  59         .global C(sub_loop_down),C(subx_loop_down),C(subfrom_loop_down),C(dec_loop_down)
  60         .global C(neg_loop_down)
  61         .global C(shift1left_loop_down),C(shiftleft_loop_down),C(shiftleftcopy_loop_down)
  62         .global C(shift1right_loop_up),C(shiftright_loop_up),C(shiftrightsigned_loop_up),C(shiftrightcopy_loop_up)
  63         .global C(mulusmall_loop_down),C(mulu_loop_down),C(muluadd_loop_down),C(mulusub_loop_down)
  64 #else
  65         .global C(or_loop_down),C(xor_loop_down),C(and_loop_down),C(eqv_loop_down)
  66         .global C(nand_loop_down),C(nor_loop_down),C(andc2_loop_down),C(orc2_loop_down)
  67         .global C(not_loop_down)
  68         .global C(and_test_loop_down),C(compare_loop_down)
  69         .global C(add_loop_up),C(addto_loop_up),C(inc_loop_up)
  70         .global C(sub_loop_up),C(subx_loop_up),C(subfrom_loop_up),C(dec_loop_up)
  71         .global C(neg_loop_up)
  72         .global C(shift1left_loop_up),C(shiftleft_loop_up)
  73         .global C(shift1right_loop_down),C(shiftright_loop_down),C(shiftrightsigned_loop_down),C(shiftrightcopy_loop_down)
  74         .global C(mulusmall_loop_up),C(mulu_loop_up),C(muluadd_loop_up),C(mulusub_loop_up)
  75 #endif
  76
  77 #define LOOP_TYPE  1    // 1: Standard-Schleifen
  78                         // 2: Schleifen ohne Pointer, nur mit Zähler
  79 #define STANDARD_LOOPS  (LOOP_TYPE==1)
  80 #define COUNTER_LOOPS  (LOOP_TYPE==2)
  81
  82 // extern uint32 mulu16_ (uint16 arg1, uint16 arg2);
  83 // ergebnis := arg1*arg2.
  84         DECLARE_FUNCTION(mulu16_)
  85 C(mulu16_:) // Input in %o0,%o1, Output in %o0
  86         umul %o0,%o1,%o2
  87         retl
  88        _ srl %o2,0,%o0
  89
  90 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
  91 // 2^32*hi+lo := arg1*arg2.
  92         DECLARE_FUNCTION(mulu32_)
  93 C(mulu32_:) // Input in %o0,%o1, Output in %o0,%g1
  94         umul %o0,%o1,%o2
  95         rd %y,%g1
  96         retl
  97        _ srl %o2,0,%o0
  98
  99 // extern uint32 mulu32_unchecked (uint32 x, uint32 y);
 100 // ergebnis := arg1*arg2 < 2^32.
 101         DECLARE_FUNCTION(mulu32_unchecked)
 102 C(mulu32_unchecked:) // Input in %o0,%o1, Output in %o0
 103         umul %o0,%o1,%o2
 104         retl
 105        _ srl %o2,0,%o0
 106
 107 // extern struct { uint64 lo; uint64 hi; } mulu64_ (uint64 arg1, uint64 arg2);
 108 // 2^64*hi+lo := arg1*arg2.
 109         DECLARE_FUNCTION(mulu64_)
 110 C(mulu64_:) // Input in %o0,%o1, Output in %o0,%g2
 111         srlx %o0,32,%o2         // %o2 = high32(arg1)
 112         srl %o0,0,%o0           // %o0 = low32(arg1)
 113         srlx %o1,32,%o3         // %o3 = high32(arg2)
 114         srl %o1,0,%o1           // %o1 = low32(arg2)
 115         mulx %o2,%o3,%g2        // high part
 116         mulx %o2,%o1,%o2        // first mid part
 117         mulx %o0,%o3,%o3        // second mid part
 118         addcc %o2,%o3,%o2       // sum of mid parts
 119         mov 0,%o3
 120         movcs %xcc,1,%o3        // carry from sum of mid parts
 121         sllx %o3,32,%o3
 122         add %g2,%o3,%g2         // add to high part
 123         srlx %o2,32,%o3
 124         add %g2,%o3,%g2         // add high32(midparts) to high part
 125         mulx %o0,%o1,%o0        // low part
 126         sllx %o2,32,%o2
 127         addcc %o0,%o2,%o0       // add low32(midparts)*2^32 to low part
 128         add %g2,1,%o3
 129         retl
 130        _ movcs %xcc,%o3,%g2     // add carry to high part
 131
 132 // extern struct { uint32 q; uint32 r; } divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y);
 133 // x = 2^32*xhi+xlo = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^32*y .
 134         DECLARE_FUNCTION(divu_6432_3232_)
 135 C(divu_6432_3232_:) // Input in %o0,%o1,%o2, Output in %o0,%g1
 136         wr %o0,%g0,%y
 137         udiv %o1,%o2,%o0        // x durch y dividieren, %o0 := q
 138         umul %o0,%o2,%g1        // %g1 := (q*y) mod 2^32
 139         sub %o1,%g1,%g1         // %g1 := (xlo-q*y) mod 2^32 = r
 140         retl
 141        _ srl %o0,0,%o0
 142
 143 // extern struct { uint16 q; uint16 r; } divu_3216_1616_ (uint32 x, uint16 y);
 144 // x = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^16*y .
 145         DECLARE_FUNCTION(divu_3216_1616_)
 146 C(divu_3216_1616_:) // Input in %o0,%o1, Output in %o0 (Rest und Quotient).
 147         wr %g0,%g0,%y
 148         udiv %o0,%o1,%o2        // dividieren, Quotient nach %o2
 149 #if 0 // Who says that %y has some meaningful contents after `udiv' ??
 150         rd %y,%g1               // Rest aus %y
 151 #else
 152         umul %o2,%o1,%g1        // %g1 := (q*y) mod 2^32
 153         sub %o0,%g1,%g1         // %g1 := (x-q*y) mod 2^32 = r
 154 #endif
 155         sll %g1,16,%g1          // in die oberen 16 Bit schieben
 156         or %o2,%g1,%o0
 157         retl
 158        _ srl %o0,0,%o0
 159
 160 #if !defined(__GNUC__)
 161         .global C(_get_g1)
 162 // extern uint32 _get_g1 (void);
 163         DECLARE_FUNCTION(_get_g1)
 164 C(_get_g1:)
 165         retl
 166        _ srl %g1,0,%o0
 167 #endif
 168
 169 #if !defined(__GNUC__)
 170         .global C(_get_g2)
 171 // extern uint64 _get_g2 (void);
 172         DECLARE_FUNCTION(_get_g2)
 173 C(_get_g2:)
 174         retl
 175        _ mov %g2,%o0
 176 #endif
 177
 178 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
 179         DECLARE_FUNCTION(copy_loop_up)
 180 C(copy_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 181 #if STANDARD_LOOPS
 182 //      srl %o2,0,%o2           // zero-extend %o2 = count
 183         brz,pn %o2,2f
 184        _ nop
 185 1:        ldx [%o0],%o3
 186           add %o0,8,%o0
 187           stx %o3,[%o1]
 188           subcc %o2,1,%o2
 189           bne,pt %xcc,1b
 190          _ add %o1,8,%o1
 191 2:      retl
 192        _ mov %o1,%o0
 193 #endif
 194 #if COUNTER_LOOPS
 195 //      srl %o2,0,%o2           // zero-extend %o2 = count
 196         brz,pn %o2,2f
 197        _ sub %o1,8,%o1
 198         sub %g0,%o2,%o2         // %o2 = -count
 199         sllx %o2,3,%o2          // %o2 = -8*count
 200         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
 201         sub %o1,%o2,%o1         // %o1 = &destptr[count-1]
 202 1:        ldx [%o0+%o2],%o3     // nächstes Digit holen
 203           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 204           bne,pt %xcc,1b
 205          _ stx %o3,[%o1+%o2]    // Digit ablegen
 206 2:      retl
 207        _ add %o1,8,%o0
 208 #endif
 209
 210 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 211         DECLARE_FUNCTION(copy_loop_down)
 212 C(copy_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 213 #if STANDARD_LOOPS
 214 //      srl %o2,0,%o2           // zero-extend %o2 = count
 215         brz,pn %o2,2f
 216        _ sub %o0,8,%o0
 217 1:        ldx [%o0],%o3
 218           sub %o1,8,%o1
 219           stx %o3,[%o1]
 220           subcc %o2,1,%o2
 221           bne,pt %xcc,1b
 222          _ sub %o0,8,%o0
 223 2:      retl
 224        _ mov %o1,%o0
 225 #endif
 226 #if COUNTER_LOOPS
 227 //      srl %o2,0,%o2           // zero-extend %o2 = count
 228         brz,pn %o2,2f
 229        _ sub %o0,8,%o0
 230         sllx %o2,3,%o2          // %o2 = 8*count
 231         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 232         sub %o1,%o2,%o1         // %o1 = &destptr[-count]
 233 1:        ldx [%o0+%o2],%o3     // nächstes Digit holen
 234           subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
 235           bne,pt %xcc,1b
 236          _ stx %o3,[%o1+%o2]    // Digit ablegen
 237 2:      retl
 238        _ mov %o1,%o0
 239 #endif
 240
 241 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
 242         DECLARE_FUNCTION(fill_loop_up)
 243 C(fill_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 244 #if STANDARD_LOOPS
 245 //      srl %o1,0,%o1           // zero-extend %o1 = count
 246         brz,pn %o1,2f
 247        _ nop
 248 1:        stx %o2,[%o0]
 249           subcc %o1,1,%o1
 250           bne,pt %xcc,1b
 251          _ add %o0,8,%o0
 252 2:      retl
 253        _ nop
 254 #endif
 255 #if COUNTER_LOOPS
 256 //      srl %o1,0,%o1           // zero-extend %o1 = count
 257         brz,pn %o1,2f
 258        _ sub %o0,8,%o0
 259         sub %g0,%o1,%o1         // %o1 = -count
 260         sllx %o1,3,%o1          // %o1 = -8*count
 261         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 262 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 263           bne,pt %xcc,1b
 264          _ stx %o2,[%o0+%o1]    // Digit ablegen
 265 2:      retl
 266        _ add %o0,8,%o0
 267 #endif
 268
 269 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
 270         DECLARE_FUNCTION(fill_loop_down)
 271 C(fill_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 272 #if STANDARD_LOOPS
 273 //      srl %o1,0,%o1           // zero-extend %o1 = count
 274         brz,pn %o1,2f
 275        _ sub %o0,8,%o0
 276 1:        stx %o2,[%o0]
 277           subcc %o1,1,%o1
 278           bne,pt %xcc,1b
 279          _ sub %o0,8,%o0
 280 2:      retl
 281        _ add %o0,8,%o0
 282 #endif
 283 #if COUNTER_LOOPS
 284 //      srl %o1,0,%o1           // zero-extend %o1 = count
 285         brz,pn %o1,2f
 286        _ sllx %o1,3,%o1         // %o1 = 8*count
 287         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 288 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 289           bne,pt %xcc,1b
 290          _ stx %o2,[%o0+%o1]    // Digit ablegen
 291 2:      retl
 292        _ nop
 293 #endif
 294
 295 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
 296         DECLARE_FUNCTION(clear_loop_up)
 297 C(clear_loop_up:) // Input in %o0,%o1, Output in %o0
 298 #if STANDARD_LOOPS
 299 //      srl %o1,0,%o1           // zero-extend %o1 = count
 300         brz,pn %o1,2f
 301        _ nop
 302 1:        stx %g0,[%o0]
 303           subcc %o1,1,%o1
 304           bne,pt %xcc,1b
 305          _ add %o0,8,%o0
 306 2:      retl
 307        _ nop
 308 #endif
 309 #if COUNTER_LOOPS
 310 //      srl %o1,0,%o1           // zero-extend %o1 = count
 311         brz,pn %o1,2f
 312        _ sub %o0,8,%o0
 313         sub %g0,%o1,%o1         // %o1 = -count
 314         sllx %o1,3,%o1          // %o1 = -8*count
 315         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 316 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 317           bne,pt %xcc,1b
 318          _ stx %g0,[%o0+%o1]    // Digit 0 ablegen
 319 2:      retl
 320        _ add %o0,8,%o0
 321 #endif
 322
 323 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
 324         DECLARE_FUNCTION(clear_loop_down)
 325 C(clear_loop_down:) // Input in %o0,%o1, Output in %o0
 326 #if STANDARD_LOOPS
 327 //      srl %o1,0,%o1           // zero-extend %o1 = count
 328         brz,pn %o1,2f
 329        _ sub %o0,8,%o0
 330 1:        stx %g0,[%o0]
 331           subcc %o1,1,%o1
 332           bne,pt %xcc,1b
 333          _ sub %o0,8,%o0
 334 2:      retl
 335        _ add %o0,8,%o0
 336 #endif
 337 #if COUNTER_LOOPS
 338 //      srl %o1,0,%o1           // zero-extend %o1 = count
 339         brz,pn %o1,2f
 340        _ sllx %o1,3,%o1         // %o1 = 8*count
 341         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 342 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 343           bne,pt %xcc,1b
 344          _ stx %g0,[%o0+%o1]    // Digit 0 ablegen
 345 2:      retl
 346        _ nop
 347 #endif
 348
 349 // extern boolean test_loop_up (uintD* ptr, uintC count);
 350         DECLARE_FUNCTION(test_loop_up)
 351 C(test_loop_up:) // Input in %o0,%o1, Output in %o0
 352 #if STANDARD_LOOPS
 353 //      srl %o1,0,%o1           // zero-extend %o1 = count
 354         brz,pn %o1,2f
 355        _ nop
 356           ldx [%o0],%o2
 357 1:        add %o0,8,%o0
 358           brnz,pn %o2,3f
 359          _ subcc %o1,1,%o1
 360           bne,a,pt %xcc,1b
 361          __ ldx [%o0],%o2
 362 2:      retl
 363        _ mov 0,%o0
 364 3:      retl
 365        _ mov 1,%o0
 366 #endif
 367 #if COUNTER_LOOPS
 368 //      srl %o1,0,%o1           // zero-extend %o1 = count
 369         brz,pn %o1,2f
 370        _ sub %g0,%o1,%o1        // %o1 = -count
 371         sllx %o1,3,%o1          // %o1 = -8*count
 372         sub %o0,%o1,%o0         // %o0 = &ptr[count]
 373           ldx [%o0+%o1],%o2     // nächstes Digit holen
 374 1:        brnz,pn %o2,3f        // testen
 375          _ addcc %o1,8,%o1      // Zähler "erniedrigen", Pointer erhöhen
 376           bne,a,pt %xcc,1b
 377          __ ldx [%o0+%o1],%o2   // nächstes Digit holen
 378 2:      retl
 379        _ mov 0,%o0
 380 3:      retl
 381        _ mov 1,%o0
 382 #endif
 383
 384 // extern boolean test_loop_down (uintD* ptr, uintC count);
 385         DECLARE_FUNCTION(test_loop_down)
 386 C(test_loop_down:) // Input in %o0,%o1, Output in %o0
 387 #if STANDARD_LOOPS
 388 //      srl %o1,0,%o1           // zero-extend %o1 = count
 389         brz,pn %o1,2f
 390        _ sub %o0,8,%o0
 391           ldx [%o0],%o2
 392 1:        sub %o0,8,%o0
 393           brnz,pn %o2,3f
 394          _ subcc %o1,1,%o1
 395           bne,a,pt %xcc,1b
 396          __ ldx [%o0],%o2
 397 2:      retl
 398        _ mov 0,%o0
 399 3:      retl
 400        _ mov 1,%o0
 401 #endif
 402 #if COUNTER_LOOPS
 403 //      srl %o1,0,%o1           // zero-extend %o1 = count
 404         brz,pn %o1,2f
 405        _ sllx %o1,3,%o1         // %o1 = 8*count
 406         sub %o0,%o1,%o0         // %o0 = &ptr[-count]
 407         sub %o1,8,%o1
 408           ldx [%o0+%o1],%o2     // nächstes Digit holen
 409 1:        brnz,pn %o2,3f        // testen
 410          _ subcc %o1,8,%o1      // Zähler erniedrigen, Pointer erniedrigen
 411           bcc,a,pt %xcc,1b
 412          __ ldx [%o0+%o1],%o2   // nächstes Digit holen
 413 2:      retl
 414        _ mov 0,%o0
 415 3:      retl
 416        _ mov 1,%o0
 417 #endif
 418
 419 #if CL_DS_BIG_ENDIAN_P
 420
 421 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
 422         DECLARE_FUNCTION(or_loop_up)
 423 C(or_loop_up:) // Input in %o0,%o1,%o2
 424 #if STANDARD_LOOPS
 425 //      srl %o2,0,%o2           // zero-extend %o2 = count
 426         brz,pn %o2,2f
 427        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 428 1:        ldx [%o0],%o3         // *xptr
 429           ldx [%o0+%o1],%o4     // *yptr
 430           subcc %o2,1,%o2
 431           or %o3,%o4,%o3        // verknüpfen
 432           stx %o3,[%o0]         // =: *xptr
 433           bne,pt %xcc,1b
 434          _ add %o0,8,%o0        // xptr++, yptr++
 435 2:      retl
 436        _ nop
 437 #endif
 438 #if COUNTER_LOOPS
 439 //      srl %o2,0,%o2           // zero-extend %o2 = count
 440         brz,pn %o2,2f
 441        _ sub %o0,8,%o0
 442         sub %g0,%o2,%o2         // %o2 = -count
 443         sllx %o2,3,%o2          // %o2 = -8*count
 444         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 445         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 446 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 447           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 448           ldx [%o0+%o2],%o4     // noch ein Digit holen
 449           or %o4,%o3,%o3        // beide verknüpfen
 450           bne,pt %xcc,1b
 451          _ stx %o3,[%o1+%o2]    // Digit ablegen
 452 2:      retl
 453        _ nop
 454 #endif
 455
 456 #endif
 457
 458 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 459         DECLARE_FUNCTION(xor_loop_up)
 460 C(xor_loop_up:) // Input in %o0,%o1,%o2
 461 #if STANDARD_LOOPS
 462 //      srl %o2,0,%o2           // zero-extend %o2 = count
 463         brz,pn %o2,2f
 464        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 465 1:        ldx [%o0],%o3         // *xptr
 466           ldx [%o0+%o1],%o4     // *yptr
 467           subcc %o2,1,%o2
 468           xor %o3,%o4,%o3       // verknüpfen
 469           stx %o3,[%o0]         // =: *xptr
 470           bne,pt %xcc,1b
 471          _ add %o0,8,%o0        // xptr++, yptr++
 472 2:      retl
 473        _ nop
 474 #endif
 475 #if COUNTER_LOOPS
 476 //      srl %o2,0,%o2           // zero-extend %o2 = count
 477         brz,pn %o2,2f
 478        _ sub %o0,8,%o0
 479         sub %g0,%o2,%o2         // %o2 = -count
 480         sllx %o2,3,%o2          // %o2 = -8*count
 481         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 482         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 483 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 484           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 485           ldx [%o0+%o2],%o4     // noch ein Digit holen
 486           xor %o4,%o3,%o3       // beide verknüpfen
 487           bne,pt %xcc,1b
 488          _ stx %o3,[%o1+%o2]    // Digit ablegen
 489 2:      retl
 490        _ nop
 491 #endif
 492
 493 #if CL_DS_BIG_ENDIAN_P
 494
 495 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
 496         DECLARE_FUNCTION(and_loop_up)
 497 C(and_loop_up:) // Input in %o0,%o1,%o2
 498 #if STANDARD_LOOPS
 499 //      srl %o2,0,%o2           // zero-extend %o2 = count
 500         brz,pn %o2,2f
 501        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 502 1:        ldx [%o0],%o3         // *xptr
 503           ldx [%o0+%o1],%o4     // *yptr
 504           subcc %o2,1,%o2
 505           and %o3,%o4,%o3       // verknüpfen
 506           stx %o3,[%o0]         // =: *xptr
 507           bne,pt %xcc,1b
 508          _ add %o0,8,%o0        // xptr++, yptr++
 509 2:      retl
 510        _ nop
 511 #endif
 512 #if COUNTER_LOOPS
 513 //      srl %o2,0,%o2           // zero-extend %o2 = count
 514         brz,pn %o2,2f
 515        _ sub %o0,8,%o0
 516         sub %g0,%o2,%o2         // %o2 = -count
 517         sllx %o2,3,%o2          // %o2 = -8*count
 518         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 519         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 520 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 521           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 522           ldx [%o0+%o2],%o4     // noch ein Digit holen
 523           and %o4,%o3,%o3       // beide verknüpfen
 524           bne,pt %xcc,1b
 525          _ stx %o3,[%o1+%o2]    // Digit ablegen
 526 2:      retl
 527        _ nop
 528 #endif
 529
 530 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
 531         DECLARE_FUNCTION(eqv_loop_up)
 532 C(eqv_loop_up:) // Input in %o0,%o1,%o2
 533 #if STANDARD_LOOPS
 534 //      srl %o2,0,%o2           // zero-extend %o2 = count
 535         brz,pn %o2,2f
 536        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 537 1:        ldx [%o0],%o3         // *xptr
 538           ldx [%o0+%o1],%o4     // *yptr
 539           subcc %o2,1,%o2
 540           xnor %o3,%o4,%o3      // verknüpfen
 541           stx %o3,[%o0]         // =: *xptr
 542           bne,pt %xcc,1b
 543          _ add %o0,8,%o0        // xptr++, yptr++
 544 2:      retl
 545        _ nop
 546 #endif
 547 #if COUNTER_LOOPS
 548 //      srl %o2,0,%o2           // zero-extend %o2 = count
 549         brz,pn %o2,2f
 550        _ sub %o0,8,%o0
 551         sub %g0,%o2,%o2         // %o2 = -count
 552         sllx %o2,3,%o2          // %o2 = -8*count
 553         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 554         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 555 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 556           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 557           ldx [%o0+%o2],%o4     // noch ein Digit holen
 558           xnor %o4,%o3,%o3      // beide verknüpfen
 559           bne,pt %xcc,1b
 560          _ stx %o3,[%o1+%o2]    // Digit ablegen
 561 2:      retl
 562        _ nop
 563 #endif
 564
 565 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
 566         DECLARE_FUNCTION(nand_loop_up)
 567 C(nand_loop_up:) // Input in %o0,%o1,%o2
 568 #if STANDARD_LOOPS
 569 //      srl %o2,0,%o2           // zero-extend %o2 = count
 570         brz,pn %o2,2f
 571        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 572 1:        ldx [%o0],%o3         // *xptr
 573           ldx [%o0+%o1],%o4     // *yptr
 574           subcc %o2,1,%o2
 575           and %o3,%o4,%o3       // verknüpfen
 576           xnor %g0,%o3,%o3
 577           stx %o3,[%o0]         // =: *xptr
 578           bne,pt %xcc,1b
 579          _ add %o0,8,%o0        // xptr++, yptr++
 580 2:      retl
 581        _ nop
 582 #endif
 583 #if COUNTER_LOOPS
 584 //      srl %o2,0,%o2           // zero-extend %o2 = count
 585         brz,pn %o2,2f
 586        _ sub %o0,8,%o0
 587         sub %g0,%o2,%o2         // %o2 = -count
 588         sllx %o2,3,%o2          // %o2 = -8*count
 589         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 590         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 591 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 592           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 593           ldx [%o0+%o2],%o4     // noch ein Digit holen
 594           and %o4,%o3,%o3       // beide verknüpfen
 595           xnor %g0,%o3,%o3
 596           bne,pt %xcc,1b
 597          _ stx %o3,[%o1+%o2]    // Digit ablegen
 598 2:      retl
 599        _ nop
 600 #endif
 601
 602 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 603         DECLARE_FUNCTION(nor_loop_up)
 604 C(nor_loop_up:) // Input in %o0,%o1,%o2
 605 #if STANDARD_LOOPS
 606 //      srl %o2,0,%o2           // zero-extend %o2 = count
 607         brz,pn %o2,2f
 608        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 609 1:        ldx [%o0],%o3         // *xptr
 610           ldx [%o0+%o1],%o4     // *yptr
 611           subcc %o2,1,%o2
 612           or %o3,%o4,%o3        // verknüpfen
 613           xnor %g0,%o3,%o3
 614           stx %o3,[%o0]         // =: *xptr
 615           bne,pt %xcc,1b
 616          _ add %o0,8,%o0        // xptr++, yptr++
 617 2:      retl
 618        _ nop
 619 #endif
 620 #if COUNTER_LOOPS
 621 //      srl %o2,0,%o2           // zero-extend %o2 = count
 622         brz,pn %o2,2f
 623        _ sub %o0,8,%o0
 624         sub %g0,%o2,%o2         // %o2 = -count
 625         sllx %o2,3,%o2          // %o2 = -8*count
 626         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 627         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 628 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 629           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 630           ldx [%o0+%o2],%o4     // noch ein Digit holen
 631           or %o4,%o3,%o3        // beide verknüpfen
 632           xnor %g0,%o3,%o3
 633           bne,pt %xcc,1b
 634          _ stx %o3,[%o1+%o2]    // Digit ablegen
 635 2:      retl
 636        _ nop
 637 #endif
 638
 639 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 640         DECLARE_FUNCTION(andc2_loop_up)
 641 C(andc2_loop_up:) // Input in %o0,%o1,%o2
 642 #if STANDARD_LOOPS
 643 //      srl %o2,0,%o2           // zero-extend %o2 = count
 644         brz,pn %o2,2f
 645        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 646 1:        ldx [%o0],%o3         // *xptr
 647           ldx [%o0+%o1],%o4     // *yptr
 648           subcc %o2,1,%o2
 649           andn %o3,%o4,%o3      // verknüpfen
 650           stx %o3,[%o0]         // =: *xptr
 651           bne,pt %xcc,1b
 652          _ add %o0,8,%o0        // xptr++, yptr++
 653 2:      retl
 654        _ nop
 655 #endif
 656 #if COUNTER_LOOPS
 657 //      srl %o2,0,%o2           // zero-extend %o2 = count
 658         brz,pn %o2,2f
 659        _ sub %o0,8,%o0
 660         sub %g0,%o2,%o2         // %o2 = -count
 661         sllx %o2,3,%o2          // %o2 = -8*count
 662         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 663         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 664 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 665           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 666           ldx [%o0+%o2],%o4     // noch ein Digit holen
 667           andn %o4,%o3,%o3      // beide verknüpfen
 668           bne,pt %xcc,1b
 669          _ stx %o3,[%o1+%o2]    // Digit ablegen
 670 2:      retl
 671        _ nop
 672 #endif
 673
 674 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 675         DECLARE_FUNCTION(orc2_loop_up)
 676 C(orc2_loop_up:) // Input in %o0,%o1,%o2
 677 #if STANDARD_LOOPS
 678 //      srl %o2,0,%o2           // zero-extend %o2 = count
 679         brz,pn %o2,2f
 680        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 681 1:        ldx [%o0],%o3         // *xptr
 682           ldx [%o0+%o1],%o4     // *yptr
 683           subcc %o2,1,%o2
 684           orn %o3,%o4,%o3       // verknüpfen
 685           stx %o3,[%o0]         // =: *xptr
 686           bne,pt %xcc,1b
 687          _ add %o0,8,%o0        // xptr++, yptr++
 688 2:      retl
 689        _ nop
 690 #endif
 691 #if COUNTER_LOOPS
 692 //      srl %o2,0,%o2           // zero-extend %o2 = count
 693         brz,pn %o2,2f
 694        _ sub %o0,8,%o0
 695         sub %g0,%o2,%o2         // %o2 = -count
 696         sllx %o2,3,%o2          // %o2 = -8*count
 697         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 698         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 699 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 700           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 701           ldx [%o0+%o2],%o4     // noch ein Digit holen
 702           orn %o4,%o3,%o3       // beide verknüpfen
 703           bne,pt %xcc,1b
 704          _ stx %o3,[%o1+%o2]    // Digit ablegen
 705 2:      retl
 706        _ nop
 707 #endif
 708
 709 // extern void not_loop_up (uintD* xptr, uintC count);
 710         DECLARE_FUNCTION(not_loop_up)
 711 C(not_loop_up:) // Input in %o0,%o1
 712 #if STANDARD_LOOPS
 713 //      srl %o1,0,%o1           // zero-extend %o1 = count
 714         brz,pn %o1,2f
 715        _ nop
 716 1:        ldx [%o0],%o2
 717           subcc %o1,1,%o1
 718           xnor %g0,%o2,%o2
 719           stx %o2,[%o0]
 720           bne,pt %xcc,1b
 721          _ add %o0,8,%o0
 722 2:      retl
 723        _ nop
 724 #endif
 725 #if COUNTER_LOOPS
 726 //      srl %o1,0,%o1           // zero-extend %o1 = count
 727         brz,pn %o1,2f
 728        _ sub %o0,8,%o0
 729         sub %g0,%o1,%o1         // %o1 = -count
 730         sllx %o1,3,%o1          // %o1 = -8*count
 731         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 732 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 733           ldx [%o0+%o1],%o2     // nächstes Digit holen
 734           xnor %g0,%o2,%o2
 735           bne,pt %xcc,1b
 736          _ stx %o2,[%o0+%o1]    // Digit ablegen
 737 2:      retl
 738        _ nop
 739 #endif
 740
 741 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
 742         DECLARE_FUNCTION(and_test_loop_up)
 743 C(and_test_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 744 #if STANDARD_LOOPS
 745 //      srl %o2,0,%o2           // zero-extend %o2 = count
 746         brz,pn %o2,2f
 747        _ nop
 748 1:        ldx [%o0],%o3
 749           ldx [%o1],%o4
 750           add %o0,8,%o0
 751           andcc %o3,%o4,%g0
 752           bne,pn %xcc,3f
 753          _ subcc %o2,1,%o2
 754           bne,pt %xcc,1b
 755          _ add %o1,8,%o1
 756 2:      retl
 757        _ mov 0,%o0
 758 3:      retl
 759        _ mov 1,%o0
 760 #endif
 761 #if COUNTER_LOOPS
 762 //      srl %o2,0,%o2           // zero-extend %o2 = count
 763         brz,pn %o2,2f
 764        _ sub %g0,%o2,%o2        // %o2 = -count
 765         sllx %o2,3,%o2          // %o2 = -8*count
 766         sub %o0,%o2,%o0         // %o0 = &xptr[count]
 767         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 768           ldx [%o0+%o2],%o3     // nächstes Digit holen
 769 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
 770           andcc %o3,%o4,%g0     // beide verknüpfen
 771           bne,pn %xcc,3f
 772          _ addcc %o2,8,%o2      // Zähler "erniedrigen", Pointer erhöhen
 773           bne,a,pt %xcc,1b
 774          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
 775 2:      retl
 776        _ mov 0,%o0
 777 3:      retl
 778        _ mov 1,%o0
 779 #endif
 780
 781 #endif
 782
 783 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
 784         DECLARE_FUNCTION(compare_loop_up)
 785 C(compare_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 786 #if STANDARD_LOOPS
 787 //      srl %o2,0,%o2           // zero-extend %o2 = count
 788         brz,pn %o2,2f
 789        _ nop
 790           ldx [%o0],%o3
 791 1:        ldx [%o1],%o4
 792           add %o0,8,%o0
 793           subcc %o3,%o4,%g0
 794           bne,pn %xcc,3f
 795          _ add %o1,8,%o1
 796           subcc %o2,1,%o2
 797           bne,a,pt %xcc,1b
 798          __ ldx [%o0],%o3
 799 2:      retl
 800        _ mov 0,%o0
 801 3:      mov 1,%o0
 802         movlu %xcc,-1,%o0
 803         retl
 804        _ sra %o0,0,%o0          // sign-extend %o0
 805 #endif
 806 #if COUNTER_LOOPS
 807 //      srl %o2,0,%o2           // zero-extend %o2 = count
 808         brz,pn %o2,2f
 809        _ sub %g0,%o2,%o2        // %o2 = -count
 810         sllx %o2,3,%o2          // %o2 = -8*count
 811         sub %o0,%o2,%o0         // %o0 = &xptr[count]
 812         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 813           ldx [%o0+%o2],%o3     // nächstes Digit holen
 814 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
 815           subcc %o3,%o4,%g0     // vergleichen
 816           bne,pn %xcc,3f
 817          _ addcc %o2,8,%o2      // Zähler "erniedrigen", Pointer erhöhen
 818           bne,a,pt %xcc,1b
 819          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
 820 2:      retl
 821        _ mov 0,%o0
 822 3:      subcc %o3,%o4,%g0       // nochmals vergleichen
 823         mov 1,%o0
 824         movlu %xcc,-1,%o0
 825         retl
 826        _ sra %o0,0,%o0          // sign-extend %o0
 827 #endif
 828
 829 #if CL_DS_BIG_ENDIAN_P
 830
 831 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 832         DECLARE_FUNCTION(add_loop_down)
 833 C(add_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
 834 #if STANDARD_LOOPS
 835 //      srl %o3,0,%o3           // zero-extend %o3 = count
 836         brz,pn %o3,2f
 837        _ mov %g0,%g1            // Carry := 0
 838         sub %o0,8,%o0
 839 1:        ldx [%o0],%o4         // source1-digit
 840           sub %o1,8,%o1
 841           ldx [%o1],%o5         // source2-digit
 842           addcc %o4,%g1,%o4
 843           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
 844           addcc %o4,%o5,%o4
 845           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
 846           sub %o2,8,%o2
 847           stx %o4,[%o2]         // Digit ablegen
 848           subcc %o3,1,%o3
 849           bne,pt %xcc,1b
 850          _ sub %o0,8,%o0
 851 2:      retl
 852        _ mov %g1,%o0
 853 #endif
 854 #if COUNTER_LOOPS
 855 //      srl %o3,0,%o3           // zero-extend %o3 = count
 856         brz,pn %o3,2f
 857        _ mov %g0,%g1            // Carry := 0
 858         sub %o0,8,%o0
 859         sub %o1,8,%o1
 860         sllx %o3,3,%o3          // %o3 = 8*count
 861         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
 862         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
 863         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
 864 1:        ldx [%o0+%o3],%o4     // source1-digit
 865           ldx [%o1+%o3],%o5     // source2-digit
 866           addcc %o4,%g1,%o4
 867           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
 868           addcc %o4,%o5,%o4
 869           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
 870           subcc %o3,8,%o3
 871           bne,pt %xcc,1b
 872          _ stx %o4,[%o2+%o3]    // Digit ablegen
 873 2:      retl
 874        _ mov %g1,%o0
 875 #endif
 876
 877 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 878         DECLARE_FUNCTION(addto_loop_down)
 879 C(addto_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 880 #if STANDARD_LOOPS
 881 //      srl %o2,0,%o2           // zero-extend %o2 = count
 882         brz,pn %o2,2f
 883        _ mov %g0,%o5            // Carry := 0
 884         sub %o0,8,%o0
 885 1:        ldx [%o0],%o3         // source-digit
 886           sub %o1,8,%o1
 887           ldx [%o1],%o4         // dest-digit
 888           addcc %o3,%o5,%o3
 889           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
 890           addcc %o3,%o4,%o4
 891           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
 892           stx %o4,[%o1]         // Digit ablegen
 893           subcc %o2,1,%o2
 894           bne,pt %xcc,1b
 895          _ sub %o0,8,%o0
 896 2:      retl
 897        _ mov %o5,%o0
 898 #endif
 899 #if COUNTER_LOOPS
 900 //      srl %o2,0,%o2           // zero-extend %o2 = count
 901         brz,pn %o2,2f
 902        _ mov %g0,%o5            // Carry := 0
 903         sub %o0,8,%o0
 904         sub %o1,8,%o1
 905         sllx %o2,3,%o2          // %o2 = 8*count
 906         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 907         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
 908           ldx [%o0+%o2],%o3     // source-digit
 909 1:        ldx [%o1+%o2],%o4     // dest-digit
 910           addcc %o3,%o5,%o3
 911           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
 912           addcc %o3,%o4,%o4
 913           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
 914           stx %o4,[%o1+%o2]     // Digit ablegen
 915           subcc %o2,8,%o2
 916           bne,a,pt %xcc,1b
 917          __ ldx [%o0+%o2],%o3   // source-digit
 918 2:      retl
 919        _ mov %o5,%o0
 920 #endif
 921
 922 // extern uintD inc_loop_down (uintD* ptr, uintC count);
 923         DECLARE_FUNCTION(inc_loop_down)
 924 C(inc_loop_down:) // Input in %o0,%o1, Output in %o0
 925 #if STANDARD_LOOPS
 926 //      srl %o1,0,%o1           // zero-extend %o1 = count
 927         brz,pn %o1,2f
 928        _ sub %o0,8,%o0
 929 1:        ldx [%o0],%o2
 930           addcc %o2,1,%o2
 931           bne,pn %xcc,3f
 932          _ stx %o2,[%o0]
 933           subcc %o1,1,%o1
 934           bne,pt %xcc,1b
 935          _ sub %o0,8,%o0
 936 2:      retl
 937        _ mov 1,%o0
 938 3:      retl
 939        _ mov 0,%o0
 940 #endif
 941 #if COUNTER_LOOPS
 942 //      srl %o1,0,%o1           // zero-extend %o1 = count
 943         brz,pn %o1,2f
 944        _ sub %o0,8,%o0
 945         sllx %o1,3,%o1          // %o1 = 8*count
 946         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
 947           ldx [%o0+%o1],%o2     // digit holen
 948 1:        addcc %o2,1,%o2       // incrementieren
 949           bne,pn %xcc,3f
 950          _ stx %o2,[%o0+%o1]    // ablegen
 951           subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 952           bne,a,pt %xcc,1b
 953          __ ldx [%o0+%o1],%o2
 954 2:      retl
 955        _ mov 1,%o0
 956 3:      retl
 957        _ mov 0,%o0
 958 #endif
 959
 960 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 961         DECLARE_FUNCTION(sub_loop_down)
 962 C(sub_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
 963 #if STANDARD_LOOPS
 964 //      srl %o3,0,%o3           // zero-extend %o3 = count
 965         brz,pn %o3,2f
 966        _ mov %g0,%g1            // Carry := 0
 967         sub %o1,8,%o1
 968 1:        ldx [%o1],%o5         // source2-digit
 969           sub %o0,8,%o0
 970           ldx [%o0],%o4         // source1-digit
 971           addcc %o5,%g1,%o5
 972           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
 973           subcc %o4,%o5,%o4
 974           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
 975           sub %o2,8,%o2
 976           stx %o4,[%o2]         // Digit ablegen
 977           subcc %o3,1,%o3
 978           bne,pt %xcc,1b
 979          _ sub %o1,8,%o1
 980 2:      retl
 981        _ mov %g1,%o0
 982 #endif
 983 #if COUNTER_LOOPS
 984 //      srl %o3,0,%o3           // zero-extend %o3 = count
 985         brz,pn %o3,2f
 986        _ mov %g0,%g1            // Carry := 0
 987         sub %o0,8,%o0
 988         sub %o1,8,%o1
 989         sllx %o3,3,%o3          // %o3 = 8*count
 990         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
 991         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
 992         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
 993 1:        ldx [%o0+%o3],%o4     // source1-digit
 994           ldx [%o1+%o3],%o5     // source2-digit
 995           addcc %o5,%g1,%o5
 996           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
 997           subcc %o4,%o5,%o4
 998           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
 999           subcc %o3,8,%o3
1000           bne,pt %xcc,1b
1001          _ stx %o4,[%o2+%o3]    // Digit ablegen
1002 2:      retl
1003        _ mov %g1,%o0
1004 #endif
1005
1006 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1007         DECLARE_FUNCTION(subx_loop_down)
1008 C(subx_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
1009 #if STANDARD_LOOPS
1010 //      srl %o3,0,%o3           // zero-extend %o3 = count
1011         brz,pn %o3,2f
1012        _ mov %o4,%g1            // Carry (0 oder -1)
1013         sub %o1,8,%o1
1014 1:        ldx [%o1],%o5         // source2-digit
1015           sub %o0,8,%o0
1016           ldx [%o0],%o4         // source1-digit
1017           subcc %o5,%g1,%o5
1018           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
1019           subcc %o4,%o5,%o4
1020           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
1021           sub %o2,8,%o2
1022           stx %o4,[%o2]         // Digit ablegen
1023           subcc %o3,1,%o3
1024           bne,pt %xcc,1b
1025          _ sub %o1,8,%o1
1026 2:      retl
1027        _ mov %g1,%o0
1028 #endif
1029 #if COUNTER_LOOPS
1030 //      srl %o3,0,%o3           // zero-extend %o3 = count
1031         brz,pn %o3,2f
1032        _ mov %o4,%g1            // Carry (0 oder -1)
1033         sub %o0,8,%o0
1034         sub %o1,8,%o1
1035         sllx %o3,3,%o3          // %o3 = 8*count
1036         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
1037         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
1038         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
1039 1:        ldx [%o1+%o3],%o5     // source2-digit
1040           ldx [%o0+%o3],%o4     // source1-digit
1041           subcc %o5,%g1,%o5
1042           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
1043           subcc %o4,%o5,%o4
1044           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
1045           subcc %o3,8,%o3
1046           bne,pt %xcc,1b
1047          _ stx %o4,[%o2+%o3]    // Digit ablegen
1048 2:      retl
1049        _ mov %g1,%o0
1050 #endif
1051
1052 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
1053         DECLARE_FUNCTION(subfrom_loop_down)
1054 C(subfrom_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1055 #if STANDARD_LOOPS
1056 //      srl %o2,0,%o2           // zero-extend %o2 = count
1057         brz,pn %o2,2f
1058        _ mov %g0,%o5            // Carry := 0
1059         sub %o0,8,%o0
1060 1:        ldx [%o0],%o3         // source-digit
1061           sub %o1,8,%o1
1062           ldx [%o1],%o4         // dest-digit
1063           addcc %o3,%o5,%o3
1064           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1065           subcc %o4,%o3,%o4
1066           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
1067           stx %o4,[%o1]         // Digit ablegen
1068           subcc %o2,1,%o2
1069           bne,pt %xcc,1b
1070          _ sub %o0,8,%o0
1071 2:      retl
1072        _ mov %o5,%o0
1073 #endif
1074 #if COUNTER_LOOPS
1075 //      srl %o2,0,%o2           // zero-extend %o2 = count
1076         brz,pn %o2,2f
1077        _ mov %g0,%o5            // Carry := 0
1078         sub %o0,8,%o0
1079         sub %o1,8,%o1
1080         sllx %o2,3,%o2          // %o2 = 8*count
1081         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
1082         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
1083           ldx [%o0+%o2],%o3     // source-digit
1084 1:        ldx [%o1+%o2],%o4     // dest-digit
1085           addcc %o3,%o5,%o3
1086           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1087           subcc %o4,%o3,%o4
1088           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
1089           stx %o4,[%o1+%o2]     // Digit ablegen
1090           subcc %o2,8,%o2
1091           bne,a,pt %xcc,1b
1092          __ ldx [%o0+%o2],%o3   // source-digit
1093 2:      retl
1094        _ mov %o5,%o0
1095 #endif
1096
1097 // extern uintD dec_loop_down (uintD* ptr, uintC count);
1098         DECLARE_FUNCTION(dec_loop_down)
1099 C(dec_loop_down:) // Input in %o0,%o1, Output in %o0
1100 #if STANDARD_LOOPS
1101 //      srl %o1,0,%o1           // zero-extend %o1 = count
1102         brz,pn %o1,2f
1103        _ sub %o0,8,%o0
1104 1:        ldx [%o0],%o2
1105           subcc %o2,1,%o2
1106           bcc,pn %xcc,3f
1107          _ stx %o2,[%o0]
1108           subcc %o1,1,%o1
1109           bne,pt %xcc,1b
1110          _ sub %o0,8,%o0
1111 2:      retl
1112        _ mov -1,%o0
1113 3:      retl
1114        _ mov 0,%o0
1115 #endif
1116 #if COUNTER_LOOPS
1117 //      srl %o1,0,%o1           // zero-extend %o1 = count
1118         brz,pn %o1,2f
1119        _ sub %o0,8,%o0
1120         sllx %o1,3,%o1          // %o1 = 8*count
1121         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1122           ldx [%o0+%o1],%o2     // digit holen
1123 1:        subcc %o2,1,%o2       // decrementieren
1124           bcc,pn %xcc,3f
1125          _ stx %o2,[%o0+%o1]    // ablegen
1126           subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
1127           bne,a,pt %xcc,1b
1128          __ ldx [%o0+%o1],%o2
1129 2:      retl
1130        _ mov -1,%o0
1131 3:      retl
1132        _ mov 0,%o0
1133 #endif
1134
1135 // extern uintD neg_loop_down (uintD* ptr, uintC count);
1136         DECLARE_FUNCTION(neg_loop_down)
1137 C(neg_loop_down:) // Input in %o0,%o1, Output in %o0
1138 #if STANDARD_LOOPS
1139 //      srl %o1,0,%o1           // zero-extend %o1 = count
1140         // erstes Digit /=0 suchen:
1141         brz,pn %o1,2f
1142        _ sub %o0,8,%o0
1143 1:        ldx [%o0],%o2
1144           subcc %g0,%o2,%o2
1145           bne,pn %xcc,3f
1146          _ subcc %o1,1,%o1
1147           bne,pt %xcc,1b
1148          _ sub %o0,8,%o0
1149 2:      retl
1150        _ mov 0,%o0
1151 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1152         stx %o2,[%o0]           // 1 Digit negieren
1153         // alle anderen Digits invertieren:
1154         be,pn %xcc,5f
1155        _ sub %o0,8,%o0
1156 4:        ldx [%o0],%o2
1157           subcc %o1,1,%o1
1158           xnor %g0,%o2,%o2
1159           stx %o2,[%o0]
1160           bne,pt %xcc,4b
1161          _ sub %o0,8,%o0
1162 5:      retl
1163        _ mov -1,%o0
1164 #endif
1165 #if COUNTER_LOOPS
1166 //      srl %o1,0,%o1           // zero-extend %o1 = count
1167         // erstes Digit /=0 suchen:
1168         brz,pn %o1,2f
1169        _ sub %o0,8,%o0
1170         sllx %o1,3,%o1          // %o1 = 8*count
1171         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1172           ldx [%o0+%o1],%o2     // digit holen
1173 1:        subcc %g0,%o2,%o2     // negieren, testen
1174           bne,pn %xcc,3f
1175          _ subcc %o1,8,%o1      // Zähler erniedrigen, Pointer erniedrigen
1176           bne,a,pt %xcc,1b
1177          __ ldx [%o0+%o1],%o2
1178 2:      retl
1179        _ mov 0,%o0
1180 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1181         // alle anderen Digits invertieren:
1182         add %o1,8,%o1
1183         stx %o2,[%o0+%o1]       // ablegen
1184         subcc %o1,8,%o1
1185         be,pn %xcc,5f
1186        _ nop
1187           ldx [%o0+%o1],%o2
1188 4:        xnor %g0,%o2,%o2
1189           stx %o2,[%o0+%o1]
1190           subcc %o1,8,%o1
1191           bne,a,pt %xcc,4b
1192          __ ldx [%o0+%o1],%o2
1193 5:      retl
1194        _ mov -1,%o0
1195 #endif
1196
1197 // extern uintD shift1left_loop_down (uintD* ptr, uintC count);
1198         DECLARE_FUNCTION(shift1left_loop_down)
1199 C(shift1left_loop_down:) // Input in %o0,%o1, Output in %o0
1200 //      srl %o1,0,%o1           // zero-extend %o1 = count
1201         brz,pn %o1,2f
1202        _ mov 0,%o3              // Carry := 0
1203         sub %o0,8,%o0
1204 1:        ldx [%o0],%o2         // Digit
1205           addcc %o2,%o2,%o4     // shiften
1206           add %o4,%o3,%o4       // und carry
1207           srlx %o2,63,%o3       // neues Carry
1208           stx %o4,[%o0]         // Digit ablegen
1209           subcc %o1,1,%o1
1210           bne,pt %xcc,1b
1211          _ sub %o0,8,%o0
1212 2:      retl
1213        _ mov %o3,%o0
1214
1215 // extern uintD shiftleft_loop_down (uintD* ptr, uintC count, uintC i, uintD carry);
1216         DECLARE_FUNCTION(shiftleft_loop_down)
1217 C(shiftleft_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1218 //      srl %o1,0,%o1           // zero-extend %o1 = count
1219         brz,pn %o1,2f
1220        _ sub %g0,%o2,%g1        // 64-i (mod 64)
1221         sub %o0,8,%o0
1222 1:        ldx [%o0],%o4         // Digit
1223           subcc %o1,1,%o1
1224           sllx %o4,%o2,%o5      // dessen niedere (64-i) Bits
1225           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
1226           stx %o5,[%o0]         // Digit ablegen
1227           srlx %o4,%g1,%o3      // dessen höchste i Bits liefern den neuen Carry
1228           bne,pt %xcc,1b
1229          _ sub %o0,8,%o0
1230 2:      retl
1231        _ mov %o3,%o0
1232
1233 // extern uintD shiftleftcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
1234         DECLARE_FUNCTION(shiftleftcopy_loop_down)
1235 C(shiftleftcopy_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
1236 //      srl %o2,0,%o2           // zero-extend %o2 = count
1237         brz,pn %o2,2f
1238        _ mov 0,%o4              // Carry := 0
1239         sub %g0,%o3,%g1         // 64-i (mod 64)
1240         sub %o0,8,%o0
1241 1:        ldx [%o0],%o5         // Digit
1242           subcc %o2,1,%o2
1243           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
1244           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
1245           sub %o1,8,%o1
1246           stx %g2,[%o1]         // Digit ablegen
1247           srlx %o5,%g1,%o4      // dessen höchste i Bits liefern den neuen Carry
1248           bne,pt %xcc,1b
1249          _ sub %o0,8,%o0
1250 2:      retl
1251        _ mov %o4,%o0
1252
1253 // extern uintD shift1right_loop_up (uintD* ptr, uintC count, uintD carry);
1254         DECLARE_FUNCTION(shift1right_loop_up)
1255 C(shift1right_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1256 //      srl %o1,0,%o1           // zero-extend %o1 = count
1257         brz,pn %o1,2f
1258        _ sllx %o2,63,%o2        // Carry
1259 1:        ldx [%o0],%o3         // Digit
1260           subcc %o1,1,%o1
1261           srlx %o3,1,%o4        // shiften
1262           or %o2,%o4,%o4        // und mit altem Carry kombinieren
1263           stx %o4,[%o0]         // und ablegen
1264           sllx %o3,63,%o2       // neuer Carry
1265           bne,pt %xcc,1b
1266          _ add %o0,8,%o0
1267 2:      retl
1268        _ mov %o2,%o0
1269
1270 // extern uintD shiftright_loop_up (uintD* ptr, uintC count, uintC i);
1271         DECLARE_FUNCTION(shiftright_loop_up)
1272 C(shiftright_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
1273 //      srl %o1,0,%o1           // zero-extend %o1 = count
1274         sub %g0,%o2,%g1         // 64-i (mod 64)
1275         brz,pn %o1,2f
1276        _ or %g0,%g0,%o3         // Carry := 0
1277 1:        ldx [%o0],%o4         // Digit
1278           subcc %o1,1,%o1
1279           srlx %o4,%o2,%o5      // shiften
1280           or %o3,%o5,%o5        // und mit altem Carry kombinieren
1281           stx %o5,[%o0]         // und ablegen
1282           sllx %o4,%g1,%o3      // neuer Carry
1283           bne,pt %xcc,1b
1284          _ add %o0,8,%o0
1285 2:      retl
1286        _ mov %o3,%o0
1287
1288 // extern uintD shiftrightsigned_loop_up (uintD* ptr, uintC count, uintC i);
1289         DECLARE_FUNCTION(shiftrightsigned_loop_up)
1290 C(shiftrightsigned_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
1291 //      srl %o1,0,%o1           // zero-extend %o1 = count
1292         ldx [%o0],%o4           // erstes Digit
1293         sub %g0,%o2,%g1         // 64-i (mod 64)
1294         srax %o4,%o2,%o5        // shiften
1295         stx %o5,[%o0]           // und ablegen
1296         sllx %o4,%g1,%o3        // neuer Carry
1297         subcc %o1,1,%o1
1298         be,pn %xcc,2f
1299        _ add %o0,8,%o0
1300 1:        ldx [%o0],%o4         // Digit
1301           subcc %o1,1,%o1
1302           srlx %o4,%o2,%o5      // shiften
1303           or %o3,%o5,%o5        // und mit altem Carry kombinieren
1304           stx %o5,[%o0]         // und ablegen
1305           sllx %o4,%g1,%o3      // neuer Carry
1306           bne,pt %xcc,1b
1307          _ add %o0,8,%o0
1308 2:      retl
1309        _ mov %o3,%o0
1310
1311 // extern uintD shiftrightcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
1312         DECLARE_FUNCTION(shiftrightcopy_loop_up)
1313 C(shiftrightcopy_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
1314 //      srl %o2,0,%o2           // zero-extend %o2 = count
1315         sub %g0,%o3,%g1         // 64-i (mod 64)
1316         brz,pn %o2,2f
1317        _ sllx %o4,%g1,%g2       // erster Carry
1318 1:        ldx [%o0],%o4         // Digit
1319           add %o0,8,%o0
1320           srlx %o4,%o3,%o5      // shiften
1321           or %g2,%o5,%o5        // und mit altem Carry kombinieren
1322           stx %o5,[%o1]         // und ablegen
1323           sllx %o4,%g1,%g2      // neuer Carry
1324           subcc %o2,1,%o2
1325           bne,pt %xcc,1b
1326          _ add %o1,8,%o1
1327 2:      retl
1328        _ mov %g2,%o0
1329
1330 // extern uintD mulusmall_loop_down (uintD digit, uintD* ptr, uintC len, uintD newdigit);
1331         DECLARE_FUNCTION(mulusmall_loop_down)
1332 C(mulusmall_loop_down:) // Input in %o0,%o1,%o2,%o3, Output in %o0, verändert %g1
1333 //      srl %o2,0,%o2           // zero-extend %o2 = len
1334         brz,pn %o2,2f
1335        _ sub %o1,8,%o1
1336 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
1337           // und kleinen Carry %o3 dazu:
1338           ldx [%o1],%o4
1339           sub %o2,1,%o2
1340           srlx %o4,32,%o5       // high32(x)
1341           srl %o4,0,%o4         // low32(x)
1342           mulx %o4,%o0,%o4      // low32(x)*digit
1343           mulx %o5,%o0,%o5      // high32(x)*digit
1344           sllx %o5,32,%g1       // low32(high32(x)*digit)*2^32
1345           add %g1,%o3,%g1       // plus carry
1346           addcc %o4,%g1,%o4     // plus low32(x)*digit
1347           srlx %o5,32,%o3       // high32(high32(x)*digit)
1348           add %o3,1,%g1
1349           movcs %xcc,%g1,%o3    // neuer Carry
1350           stx %o4,[%o1]         // neues Digit ablegen
1351           brnz,pt %o2,1b
1352          _ sub %o1,8,%o1
1353 2:      retl
1354        _ mov %o3,%o0
1355
1356 // extern void mulu_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1357         DECLARE_FUNCTION(mulu_loop_down)
1358 C(mulu_loop_down:) // Input in %i0,%i1,%i2,%i3
1359         save %sp,-192,%sp
1360         mov 0,%l0               // Carry
1361         srlx %i0,32,%l1         // %l1 = high32(digit)
1362         srl %i0,0,%l2           // %l2 = low32(digit)
1363         mov 1,%l3
1364         sllx %l3,32,%l3         // %l3 = 2^32
1365         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1366 1:        sub %i2,8,%i2
1367           ldx [%i1+%i2],%o0     // nächstes Digit
1368           subcc %i3,1,%i3
1369           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1370           srlx %o0,32,%o1
1371           srl %o0,0,%o2
1372           mulx %l1,%o1,%o3      // high part
1373           mulx %l1,%o2,%o4      // first mid part
1374           mulx %l2,%o1,%o1      // second mid part
1375           mulx %l2,%o2,%o2      // low part
1376           srlx %o2,32,%o5       // low part's upper half
1377           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1378           addcc %o4,%o1,%o4     // add other mid part
1379           add %o3,%l3,%o5
1380           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1381           srlx %o4,32,%o5
1382           sllx %o4,32,%o4
1383           srl %o2,0,%o2
1384           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1385           addcc %o0,%l0,%o0     // alten Carry addieren
1386           add %o3,%o5,%l0       // add high32(midparts) to high part
1387           add %l0,1,%o5
1388           movcs %xcc,%o5,%l0    // neuer Carry
1389           // Multiplikation fertig
1390           brnz,pt %i3,1b
1391          _ stx %o0,[%i2]        // Low-Digit ablegen
1392         stx %l0,[%i2-8]         // letzten Carry ablegen
1393         ret
1394        _ restore
1395
1396 // extern uintD muluadd_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1397         DECLARE_FUNCTION(muluadd_loop_down)
1398 C(muluadd_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
1399         save %sp,-192,%sp
1400         mov 0,%l0               // Carry
1401         srlx %i0,32,%l1         // %l1 = high32(digit)
1402         srl %i0,0,%l2           // %l2 = low32(digit)
1403         mov 1,%l3
1404         sllx %l3,32,%l3         // %l3 = 2^32
1405         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1406 1:        sub %i2,8,%i2
1407           ldx [%i1+%i2],%o0     // nächstes Digit
1408           ldx [%i2],%i4         // *destptr
1409           subcc %i3,1,%i3
1410           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1411           srlx %o0,32,%o1
1412           srl %o0,0,%o2
1413           mulx %l1,%o1,%o3      // high part
1414           mulx %l1,%o2,%o4      // first mid part
1415           mulx %l2,%o1,%o1      // second mid part
1416           mulx %l2,%o2,%o2      // low part
1417           srlx %o2,32,%o5       // low part's upper half
1418           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1419           addcc %o4,%o1,%o4     // add other mid part
1420           add %o3,%l3,%o5
1421           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1422           srlx %o4,32,%o5
1423           sllx %o4,32,%o4
1424           srl %o2,0,%o2
1425           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1426           addcc %o0,%l0,%o0     // alten Carry addieren
1427           add %o3,%o5,%l0       // add high32(midparts) to high part
1428           add %l0,1,%o5
1429           movcs %xcc,%o5,%l0    // neuer Carry
1430           // Multiplikation fertig
1431           addcc %i4,%o0,%o0     // alten *destptr addieren
1432           add %l0,1,%o2
1433           movcs %xcc,%o2,%l0    // neuer Carry
1434           brnz,pt %i3,1b
1435          _ stx %o0,[%i2]        // Low-Digit ablegen
1436         mov %l0,%i0             // letzter Carry
1437         ret
1438        _ restore
1439
1440 // extern uintD mulusub_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1441         DECLARE_FUNCTION(mulusub_loop_down)
1442 C(mulusub_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
1443         save %sp,-192,%sp
1444         mov 0,%l0               // Carry
1445         srlx %i0,32,%l1         // %l1 = high32(digit)
1446         srl %i0,0,%l2           // %l2 = low32(digit)
1447         mov 1,%l3
1448         sllx %l3,32,%l3         // %l3 = 2^32
1449         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1450 1:        sub %i2,8,%i2
1451           ldx [%i1+%i2],%o0     // nächstes Digit
1452           ldx [%i2],%i4         // *destptr
1453           subcc %i3,1,%i3
1454           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1455           srlx %o0,32,%o1
1456           srl %o0,0,%o2
1457           mulx %l1,%o1,%o3      // high part
1458           mulx %l1,%o2,%o4      // first mid part
1459           mulx %l2,%o1,%o1      // second mid part
1460           mulx %l2,%o2,%o2      // low part
1461           srlx %o2,32,%o5       // low part's upper half
1462           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1463           addcc %o4,%o1,%o4     // add other mid part
1464           add %o3,%l3,%o5
1465           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1466           srlx %o4,32,%o5
1467           sllx %o4,32,%o4
1468           srl %o2,0,%o2
1469           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1470           addcc %o0,%l0,%o0     // alten Carry addieren
1471           add %o3,%o5,%l0       // add high32(midparts) to high part
1472           add %l0,1,%o5
1473           movcs %xcc,%o5,%l0    // neuer Carry
1474           // Multiplikation fertig
1475           subcc %i4,%o0,%o0     // vom alten *destptr subtrahieren
1476           add %l0,1,%o2
1477           movcs %xcc,%o2,%l0    // neuer Carry
1478           brnz,pt %i3,1b
1479          _ stx %o0,[%i2]        // Low-Digit ablegen
1480         mov %l0,%i0             // letzter Carry
1481         ret
1482        _ restore
1483
1484 #endif
1485
1486 #if !CL_DS_BIG_ENDIAN_P
1487
1488 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
1489         DECLARE_FUNCTION(or_loop_down)
1490 C(or_loop_down:) // Input in %o0,%o1,%o2
1491 #if STANDARD_LOOPS
1492 //      srl %o2,0,%o2           // zero-extend %o2 = count
1493         brz,pn %o2,2f
1494        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1495         sub %o0,8,%o0
1496 1:        ldx [%o0],%o3         // *xptr
1497           ldx [%o0+%o1],%o4     // *yptr
1498           subcc %o2,1,%o2
1499           or %o3,%o4,%o3        // verknüpfen
1500           stx %o3,[%o0]         // =: *xptr
1501           bne,pt %xcc,1b
1502          _ sub %o0,8,%o0        // xptr++, yptr++
1503 2:      retl
1504        _ nop
1505 #endif
1506 #if COUNTER_LOOPS
1507 //      srl %o2,0,%o2           // zero-extend %o2 = count
1508         brz,pn %o2,2f
1509        _ sllx %o2,3,%o2         // %o2 = 8*count
1510         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1511         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1512 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1513           ldx [%o1+%o2],%o3     // nächstes Digit holen
1514           ldx [%o0+%o2],%o4     // noch ein Digit holen
1515           or %o4,%o3,%o3        // beide verknüpfen
1516           bne,pt %xcc,1b
1517          _ stx %o3,[%o1+%o2]    // Digit ablegen
1518 2:      retl
1519        _ nop
1520 #endif
1521
1522 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1523         DECLARE_FUNCTION(xor_loop_down)
1524 C(xor_loop_down:) // Input in %o0,%o1,%o2
1525 #if STANDARD_LOOPS
1526 //      srl %o2,0,%o2           // zero-extend %o2 = count
1527         brz,pn %o2,2f
1528        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1529         sub %o0,8,%o0
1530 1:        ldx [%o0],%o3         // *xptr
1531           ldx [%o0+%o1],%o4     // *yptr
1532           subcc %o2,1,%o2
1533           xor %o3,%o4,%o3       // verknüpfen
1534           stx %o3,[%o0]         // =: *xptr
1535           bne,pt %xcc,1b
1536          _ sub %o0,8,%o0        // xptr++, yptr++
1537 2:      retl
1538        _ nop
1539 #endif
1540 #if COUNTER_LOOPS
1541 //      srl %o2,0,%o2           // zero-extend %o2 = count
1542         brz,pn %o2,2f
1543        _ sllx %o2,3,%o2         // %o2 = 8*count
1544         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1545         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1546 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1547           ldx [%o1+%o2],%o3     // nächstes Digit holen
1548           ldx [%o0+%o2],%o4     // noch ein Digit holen
1549           xor %o4,%o3,%o3       // beide verknüpfen
1550           bne,pt %xcc,1b
1551          _ stx %o3,[%o1+%o2]    // Digit ablegen
1552 2:      retl
1553        _ nop
1554 #endif
1555
1556 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
1557         DECLARE_FUNCTION(and_loop_down)
1558 C(and_loop_down:) // Input in %o0,%o1,%o2
1559 #if STANDARD_LOOPS
1560 //      srl %o2,0,%o2           // zero-extend %o2 = count
1561         brz,pn %o2,2f
1562        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1563         sub %o0,8,%o0
1564 1:        ldx [%o0],%o3         // *xptr
1565           ldx [%o0+%o1],%o4     // *yptr
1566           subcc %o2,1,%o2
1567           and %o3,%o4,%o3       // verknüpfen
1568           stx %o3,[%o0]         // =: *xptr
1569           bne,pt %xcc,1b
1570          _ sub %o0,8,%o0        // xptr++, yptr++
1571 2:      retl
1572        _ nop
1573 #endif
1574 #if COUNTER_LOOPS
1575 //      srl %o2,0,%o2           // zero-extend %o2 = count
1576         brz,pn %o2,2f
1577        _ sllx %o2,3,%o2         // %o2 = 8*count
1578         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1579         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1580 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1581           ldx [%o1+%o2],%o3     // nächstes Digit holen
1582           ldx [%o0+%o2],%o4     // noch ein Digit holen
1583           and %o4,%o3,%o3       // beide verknüpfen
1584           bne,pt %xcc,1b
1585          _ stx %o3,[%o1+%o2]    // Digit ablegen
1586 2:      retl
1587        _ nop
1588 #endif
1589
1590 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
1591         DECLARE_FUNCTION(eqv_loop_down)
1592 C(eqv_loop_down:) // Input in %o0,%o1,%o2
1593 #if STANDARD_LOOPS
1594 //      srl %o2,0,%o2           // zero-extend %o2 = count
1595         brz,pn %o2,2f
1596        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1597         sub %o0,8,%o0
1598 1:        ldx [%o0],%o3         // *xptr
1599           ldx [%o0+%o1],%o4     // *yptr
1600           subcc %o2,1,%o2
1601           xnor %o3,%o4,%o3      // verknüpfen
1602           stx %o3,[%o0]         // =: *xptr
1603           bne,pt %xcc,1b
1604          _ sub %o0,8,%o0        // xptr++, yptr++
1605 2:      retl
1606        _ nop
1607 #endif
1608 #if COUNTER_LOOPS
1609 //      srl %o2,0,%o2           // zero-extend %o2 = count
1610         brz,pn %o2,2f
1611        _ sllx %o2,3,%o2         // %o2 = 8*count
1612         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1613         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1614 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1615           ldx [%o1+%o2],%o3     // nächstes Digit holen
1616           ldx [%o0+%o2],%o4     // noch ein Digit holen
1617           xnor %o4,%o3,%o3      // beide verknüpfen
1618           bne,pt %xcc,1b
1619          _ stx %o3,[%o1+%o2]    // Digit ablegen
1620 2:      retl
1621        _ nop
1622 #endif
1623
1624 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
1625         DECLARE_FUNCTION(nand_loop_down)
1626 C(nand_loop_down:) // Input in %o0,%o1,%o2
1627 #if STANDARD_LOOPS
1628 //      srl %o2,0,%o2           // zero-extend %o2 = count
1629         brz,pn %o2,2f
1630        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1631         sub %o0,8,%o0
1632 1:        ldx [%o0],%o3         // *xptr
1633           ldx [%o0+%o1],%o4     // *yptr
1634           subcc %o2,1,%o2
1635           and %o3,%o4,%o3       // verknüpfen
1636           xnor %g0,%o3,%o3
1637           stx %o3,[%o0]         // =: *xptr
1638           bne,pt %xcc,1b
1639          _ sub %o0,8,%o0        // xptr++, yptr++
1640 2:      retl
1641        _ nop
1642 #endif
1643 #if COUNTER_LOOPS
1644 //      srl %o2,0,%o2           // zero-extend %o2 = count
1645         brz,pn %o2,2f
1646        _ sllx %o2,3,%o2         // %o2 = 8*count
1647         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1648         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1649 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1650           ldx [%o1+%o2],%o3     // nächstes Digit holen
1651           ldx [%o0+%o2],%o4     // noch ein Digit holen
1652           and %o4,%o3,%o3       // beide verknüpfen
1653           xnor %g0,%o3,%o3
1654           bne,pt %xcc,1b
1655          _ stx %o3,[%o1+%o2]    // Digit ablegen
1656 2:      retl
1657        _ nop
1658 #endif
1659
1660 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1661         DECLARE_FUNCTION(nor_loop_down)
1662 C(nor_loop_down:) // Input in %o0,%o1,%o2
1663 #if STANDARD_LOOPS
1664 //      srl %o2,0,%o2           // zero-extend %o2 = count
1665         brz,pn %o2,2f
1666        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1667         sub %o0,8,%o0
1668 1:        ldx [%o0],%o3         // *xptr
1669           ldx [%o0+%o1],%o4     // *yptr
1670           subcc %o2,1,%o2
1671           or %o3,%o4,%o3        // verknüpfen
1672           xnor %g0,%o3,%o3
1673           stx %o3,[%o0]         // =: *xptr
1674           bne,pt %xcc,1b
1675          _ sub %o0,8,%o0        // xptr++, yptr++
1676 2:      retl
1677        _ nop
1678 #endif
1679 #if COUNTER_LOOPS
1680 //      srl %o2,0,%o2           // zero-extend %o2 = count
1681         brz,pn %o2,2f
1682        _ sllx %o2,3,%o2         // %o2 = 8*count
1683         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1684         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1685 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1686           ldx [%o1+%o2],%o3     // nächstes Digit holen
1687           ldx [%o0+%o2],%o4     // noch ein Digit holen
1688           or %o4,%o3,%o3        // beide verknüpfen
1689           xnor %g0,%o3,%o3
1690           bne,pt %xcc,1b
1691          _ stx %o3,[%o1+%o2]    // Digit ablegen
1692 2:      retl
1693        _ nop
1694 #endif
1695
1696 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1697         DECLARE_FUNCTION(andc2_loop_down)
1698 C(andc2_loop_down:) // Input in %o0,%o1,%o2
1699 #if STANDARD_LOOPS
1700 //      srl %o2,0,%o2           // zero-extend %o2 = count
1701         brz,pn %o2,2f
1702        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1703         sub %o0,8,%o0
1704 1:        ldx [%o0],%o3         // *xptr
1705           ldx [%o0+%o1],%o4     // *yptr
1706           subcc %o2,1,%o2
1707           andn %o3,%o4,%o3      // verknüpfen
1708           stx %o3,[%o0]         // =: *xptr
1709           bne,pt %xcc,1b
1710          _ sub %o0,8,%o0        // xptr++, yptr++
1711 2:      retl
1712        _ nop
1713 #endif
1714 #if COUNTER_LOOPS
1715 //      srl %o2,0,%o2           // zero-extend %o2 = count
1716         brz,pn %o2,2f
1717        _ sllx %o2,3,%o2         // %o2 = 8*count
1718         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1719         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1720 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1721           ldx [%o1+%o2],%o3     // nächstes Digit holen
1722           ldx [%o0+%o2],%o4     // noch ein Digit holen
1723           andn %o4,%o3,%o3      // beide verknüpfen
1724           bne,pt %xcc,1b
1725          _ stx %o3,[%o1+%o2]    // Digit ablegen
1726 2:      retl
1727        _ nop
1728 #endif
1729
1730 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1731         DECLARE_FUNCTION(orc2_loop_down)
1732 C(orc2_loop_down:) // Input in %o0,%o1,%o2
1733 #if STANDARD_LOOPS
1734 //      srl %o2,0,%o2           // zero-extend %o2 = count
1735         brz,pn %o2,2f
1736        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1737         sub %o0,8,%o0
1738 1:        ldx [%o0],%o3         // *xptr
1739           ldx [%o0+%o1],%o4     // *yptr
1740           subcc %o2,1,%o2
1741           orn %o3,%o4,%o3       // verknüpfen
1742           stx %o3,[%o0]         // =: *xptr
1743           bne,pt %xcc,1b
1744          _ sub %o0,8,%o0        // xptr++, yptr++
1745 2:      retl
1746        _ nop
1747 #endif
1748 #if COUNTER_LOOPS
1749 //      srl %o2,0,%o2           // zero-extend %o2 = count
1750         brz,pn %o2,2f
1751        _ sllx %o2,3,%o2         // %o2 = 8*count
1752         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1753         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1754 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1755           ldx [%o1+%o2],%o3     // nächstes Digit holen
1756           ldx [%o0+%o2],%o4     // noch ein Digit holen
1757           orn %o4,%o3,%o3       // beide verknüpfen
1758           bne,pt %xcc,1b
1759          _ stx %o3,[%o1+%o2]    // Digit ablegen
1760 2:      retl
1761        _ nop
1762 #endif
1763
1764 // extern void not_loop_down (uintD* xptr, uintC count);
1765         DECLARE_FUNCTION(not_loop_down)
1766 C(not_loop_down:) // Input in %o0,%o1
1767 #if STANDARD_LOOPS
1768 //      srl %o1,0,%o1           // zero-extend %o1 = count
1769         brz,pn %o1,2f
1770        _ sub %o0,8,%o0
1771 1:        ldx [%o0],%o2
1772           subcc %o1,1,%o1
1773           xnor %g0,%o2,%o2
1774           stx %o2,[%o0]
1775           bne,pt %xcc,1b
1776          _ sub %o0,8,%o0
1777 2:      retl
1778        _ nop
1779 #endif
1780 #if COUNTER_LOOPS
1781 //      srl %o1,0,%o1           // zero-extend %o1 = count
1782         brz,pn %o1,2f
1783        _ sllx %o1,3,%o1         // %o1 = 8*count
1784         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
1785 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
1786           ldx [%o0+%o1],%o2     // nächstes Digit holen
1787           xnor %g0,%o2,%o2
1788           bne,pt %xcc,1b
1789          _ stx %o2,[%o0+%o1]    // Digit ablegen
1790 2:      retl
1791        _ nop
1792 #endif
1793
1794 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
1795         DECLARE_FUNCTION(and_test_loop_down)
1796 C(and_test_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1797 #if STANDARD_LOOPS
1798 //      srl %o2,0,%o2           // zero-extend %o2 = count
1799         brz,pn %o2,4f
1800        _ sub %o0,8,%o0
1801 1:        ldx [%o0],%o3
1802           sub %o1,8,%o1
1803           ldx [%o1],%o4
1804           subcc %o2,1,%o2
1805           be,pn %xcc,3f
1806          _ andcc %o3,%o4,%g0
1807           be,pt %xcc,1b
1808          _ sub %o0,8,%o0
1809 2:      retl
1810        _ mov 1,%o0
1811 3:      bne 2b
1812        _ nop
1813 4:      retl
1814        _ mov 0,%o0
1815 #endif
1816 #if COUNTER_LOOPS
1817 //      srl %o2,0,%o2           // zero-extend %o2 = count
1818         sllx %o2,3,%o2          // %o2 = 8*count
1819         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1820         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1821         subcc %o2,8,%o2
1822         bcs,pn %xcc,2f
1823        _ nop
1824           ldx [%o0+%o2],%o3     // nächstes Digit holen
1825 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
1826           andcc %o3,%o4,%g0     // beide verknüpfen
1827           bne,pn %xcc,3f
1828          _ subcc %o2,8,%o2      // Zähler erniedrigen, Pointer erniedrigen
1829           bcc,a,pt %xcc,1b
1830          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
1831 2:      retl
1832        _ mov 0,%o0
1833 3:      retl
1834        _ mov 1,%o0
1835 #endif
1836
1837 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
1838         DECLARE_FUNCTION(compare_loop_down)
1839 C(compare_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1840 #if STANDARD_LOOPS
1841 //      srl %o2,0,%o2           // zero-extend %o2 = count
1842         brz,pn %o2,2f
1843        _ nop
1844 1:        ldx [%o0-8],%o3
1845           ldx [%o1-8],%o4
1846           subcc %o3,%o4,%g0
1847           bne,pn %xcc,3f
1848          _ sub %o0,8,%o0
1849           subcc %o2,1,%o2
1850           bne,pn %xcc,1b
1851          _ sub %o1,8,%o1
1852 2:      retl
1853        _ mov 0,%o0
1854 3:      mov 1,%o0
1855         movlu %xcc,-1,%o0
1856         retl
1857        _ sra %o0,0,%o0          // sign-extend %o0
1858 #endif
1859 #if COUNTER_LOOPS
1860 //      srl %o2,0,%o2           // zero-extend %o2 = count
1861         sllx %o2,3,%o2          // %o2 = 8*count
1862         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1863         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1864         subcc %o2,8,%o2
1865         bcs,pn %xcc,4f
1866        _ nop
1867           ldx [%o0+%o2],%o3     // nächstes Digit holen
1868 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
1869           subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1870           bcs,pn %xcc,3f
1871          _ subcc %o3,%o4,%g0    // vergleichen
1872           be,a,pt %xcc,1b
1873          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
1874 2:      mov 1,%o0
1875         movlu %xcc,-1,%o0
1876         retl
1877        _ sra %o0,0,%o0          // sign-extend %o0
1878 3:      bne 2b
1879        _ nop
1880 4:      retl
1881        _ mov 0,%o0
1882 #endif
1883
1884 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1885         DECLARE_FUNCTION(add_loop_up)
1886 C(add_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1887 #if STANDARD_LOOPS
1888 //      srl %o3,0,%o3           // zero-extend %o3 = count
1889         brz,pn %o3,2f
1890        _ mov %g0,%g1            // Carry := 0
1891 1:        ldx [%o0],%o4         // source1-digit
1892           add %o0,8,%o0
1893           ldx [%o1],%o5         // source2-digit
1894           add %o1,8,%o1
1895           addcc %o4,%g1,%o4
1896           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
1897           addcc %o4,%o5,%o4
1898           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
1899           stx %o4,[%o2]         // Digit ablegen
1900           subcc %o3,1,%o3
1901           bne,pt %xcc,1b
1902          _ add %o2,8,%o2
1903 2:      retl
1904        _ mov %g1,%o0
1905 #endif
1906 #if COUNTER_LOOPS
1907 //      srl %o3,0,%o3           // zero-extend %o3 = count
1908         brz,pn %o3,2f
1909        _ mov %g0,%g1            // Carry := 0
1910         sub %g0,%o3,%o3         // %o3 = -count
1911         sllx %o3,3,%o3          // %o3 = -8*count
1912         sub %o2,8,%o2
1913         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
1914         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
1915         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
1916 1:        ldx [%o0+%o3],%o4     // source1-digit
1917           ldx [%o1+%o3],%o5     // source2-digit
1918           addcc %o4,%g1,%o4
1919           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
1920           addcc %o4,%o5,%o4
1921           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
1922           addcc %o3,8,%o3       // Zähler erniedrigen, Pointer erhöhen
1923           bne,pt %xcc,1b
1924          _ stx %o4,[%o2+%o3]    // Digit ablegen
1925 2:      retl
1926        _ mov %g1,%o0
1927 #endif
1928
1929 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1930         DECLARE_FUNCTION(addto_loop_up)
1931 C(addto_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1932 #if STANDARD_LOOPS
1933 //      srl %o2,0,%o2           // zero-extend %o2 = count
1934         brz,pn %o2,2f
1935        _ mov %g0,%o5            // Carry := 0
1936 1:        ldx [%o0],%o3         // source-digit
1937           add %o0,8,%o0
1938           ldx [%o1],%o4         // dest-digit
1939           addcc %o3,%o5,%o3
1940           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1941           addcc %o3,%o4,%o4
1942           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
1943           stx %o4,[%o1]         // Digit ablegen
1944           subcc %o2,1,%o2
1945           bne,pt %xcc,1b
1946          _ add %o1,8,%o1
1947 2:      retl
1948        _ mov %o5,%o0
1949 #endif
1950 #if COUNTER_LOOPS
1951 //      srl %o2,0,%o2           // zero-extend %o2 = count
1952         brz,pn %o2,2f
1953        _ mov %g0,%o5            // Carry := 0
1954         sub %g0,%o2,%o2         // %o2 = -count
1955         sllx %o2,3,%o2          // %o2 = -8*count
1956         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
1957         sub %o1,%o2,%o1         // %o1 = &destptr[count]
1958           ldx [%o0+%o2],%o3     // source-digit
1959 1:        ldx [%o1+%o2],%o4     // dest-digit
1960           addcc %o3,%o5,%o3
1961           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1962           addcc %o3,%o4,%o4
1963           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
1964           stx %o4,[%o1+%o2]     // Digit ablegen
1965           addcc %o2,8,%o2       // Zähler erniedrigen, Pointer erhöhen
1966           bne,a,pt %xcc,1b
1967          __ ldx [%o0+%o2],%o3   // source-digit
1968 2:      retl
1969        _ mov %o5,%o0
1970 #endif
1971
1972 // extern uintD inc_loop_up (uintD* ptr, uintC count);
1973         DECLARE_FUNCTION(inc_loop_up)
1974 C(inc_loop_up:) // Input in %o0,%o1, Output in %o0
1975 #if STANDARD_LOOPS
1976 //      srl %o1,0,%o1           // zero-extend %o1 = count
1977         brz,pn %o1,2f
1978        _ nop
1979           ldx [%o0],%o2
1980 1:        add %o0,8,%o0
1981           addcc %o2,1,%o2
1982           bne,pn %xcc,3f
1983          _ stx %o2,[%o0-8]
1984           subcc %o1,1,%o1
1985           bne,a,pt %xcc,1b
1986          __ ldx [%o0],%o2
1987 2:      retl
1988        _ mov 1,%o0
1989 3:      retl
1990        _ mov 0,%o0
1991 #endif
1992 #if COUNTER_LOOPS
1993 //      srl %o1,0,%o1           // zero-extend %o1 = count
1994         brz,pn %o1,2f
1995        _ sub %g0,%o1,%o1        // %o1 = -count
1996         sllx %o1,3,%o1          // %o1 = -8*count
1997         sub %o0,%o1,%o0         // %o0 = &ptr[count]
1998           ldx [%o0+%o1],%o2     // digit holen
1999 1:        addcc %o2,1,%o2       // incrementieren
2000           bne,pn %xcc,3f
2001          _ stx %o2,[%o0+%o1]    // ablegen
2002           addcc %o1,8,%o1       // Zähler erniedrigen, Pointer erhöhen
2003           bne,a,pt %xcc,1b
2004          __ ldx [%o0+%o1],%o2
2005 2:      retl
2006        _ mov 1,%o0
2007 3:      retl
2008        _ mov 0,%o0
2009 #endif
2010
2011 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
2012         DECLARE_FUNCTION(sub_loop_up)
2013 C(sub_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2014 #if STANDARD_LOOPS
2015 //      srl %o3,0,%o3           // zero-extend %o3 = count
2016         brz,pn %o3,2f
2017        _ mov %g0,%g1            // Carry := 0
2018 1:        ldx [%o0],%o4         // source1-digit
2019           add %o0,8,%o0
2020           ldx [%o1],%o5         // source2-digit
2021           add %o1,8,%o1
2022           addcc %o5,%g1,%o5
2023           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
2024           subcc %o4,%o5,%o4
2025           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
2026           stx %o4,[%o2]         // Digit ablegen
2027           subcc %o3,1,%o3
2028           bne,pt %xcc,1b
2029          _ add %o2,8,%o2
2030 2:      retl
2031        _ mov %g1,%o0
2032 #endif
2033 #if COUNTER_LOOPS
2034 //      srl %o3,0,%o3           // zero-extend %o3 = count
2035         brz,pn %o3,2f
2036        _ mov %g0,%g1            // Carry := 0
2037         sub %g0,%o3,%o3         // %o3 = -count
2038         sllx %o3,3,%o3          // %o3 = -8*count
2039         sub %o2,8,%o2
2040         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
2041         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
2042         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
2043 1:        ldx [%o1+%o3],%o5     // source2-digit
2044           ldx [%o0+%o3],%o4     // source1-digit
2045           addcc %o5,%g1,%o5
2046           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
2047           subcc %o4,%o5,%o4
2048           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
2049           addcc %o3,8,%o3
2050           bne,pt %xcc,1b
2051          _ stx %o4,[%o2+%o3]    // Digit ablegen
2052 2:      retl
2053        _ mov %g1,%o0
2054 #endif
2055
2056 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
2057         DECLARE_FUNCTION(subx_loop_up)
2058 C(subx_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
2059 #if STANDARD_LOOPS
2060 //      srl %o3,0,%o3           // zero-extend %o3 = count
2061         brz,pn %o3,2f
2062        _ mov %o4,%g1            // Carry (0 oder -1)
2063 1:        ldx [%o0],%o4         // source1-digit
2064           add %o0,8,%o0
2065           ldx [%o1],%o5         // source2-digit
2066           add %o1,8,%o1
2067           subcc %o5,%g1,%o5
2068           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
2069           subcc %o4,%o5,%o4
2070           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
2071           stx %o4,[%o2]         // Digit ablegen
2072           subcc %o3,1,%o3
2073           bne,pt %xcc,1b
2074          _ add %o2,8,%o2
2075 2:      retl
2076        _ mov %g1,%o0
2077 #endif
2078 #if COUNTER_LOOPS
2079 //      srl %o3,0,%o3           // zero-extend %o3 = count
2080         brz,pn %o3,2f
2081        _ mov %o4,%g1            // Carry (0 oder -1)
2082         sub %g0,%o3,%o3         // %o3 = -count
2083         sllx %o3,3,%o3          // %o3 = -8*count
2084         sub %o2,8,%o2
2085         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
2086         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
2087         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
2088 1:        ldx [%o1+%o3],%o5     // source2-digit
2089           ldx [%o0+%o3],%o4     // source1-digit
2090           subcc %o5,%g1,%o5
2091           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
2092           subcc %o4,%o5,%o4
2093           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
2094           addcc %o3,8,%o3
2095           bne,pt %xcc,1b
2096          _ stx %o4,[%o2+%o3]    // Digit ablegen
2097 2:      retl
2098        _ mov %g1,%o0
2099 #endif
2100
2101 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
2102         DECLARE_FUNCTION(subfrom_loop_up)
2103 C(subfrom_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
2104 #if STANDARD_LOOPS
2105 //      srl %o2,0,%o2           // zero-extend %o2 = count
2106         brz,pn %o2,2f
2107        _ mov %g0,%o5            // Carry := 0
2108 1:        ldx [%o0],%o3         // source-digit
2109           add %o0,8,%o0
2110           ldx [%o1],%o4         // dest-digit
2111           addcc %o3,%o5,%o3
2112           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
2113           subcc %o4,%o3,%o4
2114           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
2115           stx %o4,[%o1]         // Digit ablegen
2116           subcc %o2,1,%o2
2117           bne,pt %xcc,1b
2118          _ add %o1,8,%o1
2119 2:      retl
2120        _ mov %o5,%o0
2121 #endif
2122 #if COUNTER_LOOPS
2123 //      srl %o2,0,%o2           // zero-extend %o2 = count
2124         brz,pn %o2,2f
2125        _ mov %g0,%o5            // Carry := 0
2126         sub %g0,%o2,%o2         // %o2 = -count
2127         sllx %o2,3,%o2          // %o2 = -8*count
2128         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
2129         sub %o1,%o2,%o1         // %o1 = &destptr[count]
2130           ldx [%o0+%o2],%o3     // source-digit
2131 1:        ldx [%o1+%o2],%o4     // dest-digit
2132           addcc %o3,%o5,%o3
2133           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
2134           subcc %o4,%o3,%o4
2135           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
2136           stx %o4,[%o1+%o2]     // Digit ablegen
2137           addcc %o2,8,%o2
2138           bne,a,pt %xcc,1b
2139          __ ldx [%o0+%o2],%o3   // source-digit
2140 2:      retl
2141        _ mov %o5,%o0
2142 #endif
2143
2144 // extern uintD dec_loop_up (uintD* ptr, uintC count);
2145         DECLARE_FUNCTION(dec_loop_up)
2146 C(dec_loop_up:) // Input in %o0,%o1, Output in %o0
2147 #if STANDARD_LOOPS
2148 //      srl %o1,0,%o1           // zero-extend %o1 = count
2149         brz,pn %o1,2f
2150        _ nop
2151           ldx [%o0],%o2
2152 1:        add %o0,8,%o0
2153           subcc %o2,1,%o2
2154           bcc,pn %xcc,3f
2155          _ stx %o2,[%o0-8]
2156           subcc %o1,1,%o1
2157           bne,a,pt %xcc,1b
2158          __ ldx [%o0],%o2
2159 2:      retl
2160        _ mov -1,%o0
2161 3:      retl
2162        _ mov 0,%o0
2163 #endif
2164 #if COUNTER_LOOPS
2165 //      srl %o1,0,%o1           // zero-extend %o1 = count
2166         brz,pn %o1,2f
2167        _ sub %g0,%o1,%o1        // %o1 = -count
2168         sllx %o1,3,%o1          // %o1 = -8*count
2169         sub %o0,%o1,%o0         // %o0 = &ptr[count]
2170           ldx [%o0+%o1],%o2     // digit holen
2171 1:        subcc %o2,1,%o2       // decrementieren
2172           bcc,pn %xcc,3f
2173          _ stx %o2,[%o0+%o1]    // ablegen
2174           addcc %o1,8,%o1       // Zähler erniedrigen, Pointer erhöhen
2175           bne,a,pt %xcc,1b
2176          __ ldx [%o0+%o1],%o2
2177 2:      retl
2178        _ mov -1,%o0
2179 3:      retl
2180        _ mov 0,%o0
2181 #endif
2182
2183 // extern uintD neg_loop_up (uintD* ptr, uintC count);
2184         DECLARE_FUNCTION(neg_loop_up)
2185 C(neg_loop_up:) // Input in %o0,%o1, Output in %o0
2186 #if STANDARD_LOOPS
2187 //      srl %o1,0,%o1           // zero-extend %o1 = count
2188         // erstes Digit /=0 suchen:
2189         brz,pn %o1,2f
2190        _ add %o0,8,%o0
2191 1:        ldx [%o0-8],%o2
2192           subcc %g0,%o2,%o2
2193           bne,pn %xcc,3f
2194          _ subcc %o1,1,%o1
2195           bne,pt %xcc,1b
2196          _ add %o0,8,%o0
2197 2:      retl
2198        _ mov 0,%o0
2199 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2200         // 1 Digit negieren, alle anderen Digits invertieren:
2201         be,pn %xcc,5f
2202        _ stx %o2,[%o0-8]
2203 4:        ldx [%o0],%o2
2204           subcc %o1,1,%o1
2205           xnor %g0,%o2,%o2
2206           stx %o2,[%o0]
2207           bne,pt %xcc,4b
2208          _ add %o0,8,%o0
2209 5:      retl
2210        _ mov -1,%o0
2211 #endif
2212 #if COUNTER_LOOPS
2213 //      srl %o1,0,%o1           // zero-extend %o1 = count
2214         // erstes Digit /=0 suchen:
2215         brz,pn %o1,2f
2216        _ sub %g0,%o1,%o1        // %o1 = -count
2217         sllx %o1,3,%o1          // %o1 = -8*count
2218         sub %o0,%o1,%o0         // %o0 = &ptr[count]
2219           ldx [%o0+%o1],%o2     // digit holen
2220 1:        subcc %g0,%o2,%o2     // negieren, testen
2221           bne,pn %xcc,3f
2222          _ addcc %o1,8,%o1      // Zähler erniedrigen, Pointer erhöhen
2223           bne,a,pt %xcc,1b
2224          __ ldx [%o0+%o1],%o2
2225 2:      retl
2226        _ mov 0,%o0
2227 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2228         // alle anderen Digits invertieren:
2229         sub %o1,8,%o1
2230         stx %o2,[%o0+%o1]       // ablegen
2231         addcc %o1,8,%o1
2232         be,pn %xcc,5f
2233        _ nop
2234           ldx [%o0+%o1],%o2
2235 4:        xnor %g0,%o2,%o2
2236           stx %o2,[%o0+%o1]
2237           addcc %o1,8,%o1
2238           bne,a,pt %xcc,4b
2239          __ ldx [%o0+%o1],%o2
2240 5:      retl
2241        _ mov -1,%o0
2242 #endif
2243
2244 // extern uintD shift1left_loop_up (uintD* ptr, uintC count);
2245         DECLARE_FUNCTION(shift1left_loop_up)
2246 C(shift1left_loop_up:) // Input in %o0,%o1, Output in %o0
2247 //      srl %o1,0,%o1           // zero-extend %o1 = count
2248         brz,pn %o1,2f
2249        _ mov 0,%o3              // Carry := 0
2250 1:        ldx [%o0],%o2         // Digit
2251           addcc %o2,%o2,%o4     // shiften
2252           add %o4,%o3,%o4       // und carry
2253           srlx %o2,63,%o3       // neues Carry
2254           stx %o4,[%o0]         // Digit ablegen
2255           subcc %o1,1,%o1
2256           bne,pt %xcc,1b
2257          _ add %o0,8,%o0
2258 2:      retl
2259        _ mov %o3,%o0
2260
2261 // extern uintD shiftleft_loop_up (uintD* ptr, uintC count, uintC i, uintD carry);
2262         DECLARE_FUNCTION(shiftleft_loop_up)
2263 C(shiftleft_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2264 //      srl %o1,0,%o1           // zero-extend %o1 = count
2265         brz,pn %o1,2f
2266        _ sub %g0,%o2,%g1        // 64-i (mod 64)
2267 1:        ldx [%o0],%o4         // Digit
2268           subcc %o1,1,%o1
2269           sllx %o4,%o2,%o5      // dessen niedere (64-i) Bits
2270           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
2271           stx %o5,[%o0]         // Digit ablegen
2272           srlx %o4,%g1,%o3      // dessen höchste i Bits liefern den neuen Carry
2273           bne,pt %xcc,1b
2274          _ add %o0,8,%o0
2275 2:      retl
2276        _ mov %o3,%o0
2277
2278 #endif
2279
2280 // extern uintD shiftleftcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
2281         DECLARE_FUNCTION(shiftleftcopy_loop_up)
2282 C(shiftleftcopy_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
2283 //      srl %o2,0,%o2           // zero-extend %o2 = count
2284         brz,pn %o2,2f
2285        _ mov 0,%o4              // Carry := 0
2286         sub %g0,%o3,%g1         // 64-i (mod 64)
2287 1:        ldx [%o0],%o5         // Digit
2288           subcc %o2,1,%o2
2289           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
2290           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
2291           stx %g2,[%o1]         // Digit ablegen
2292           add %o1,8,%o1
2293           srlx %o5,%g1,%o4      // dessen höchste i Bits liefern den neuen Carry
2294           bne,pt %xcc,1b
2295          _ add %o0,8,%o0
2296 2:      retl
2297        _ mov %o4,%o0
2298
2299 #if !CL_DS_BIG_ENDIAN_P
2300
2301 // extern uintD shift1right_loop_down (uintD* ptr, uintC count, uintD carry);
2302         DECLARE_FUNCTION(shift1right_loop_down)
2303 C(shift1right_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
2304 //      srl %o1,0,%o1           // zero-extend %o1 = count
2305         brz,pn %o1,2f
2306        _ sllx %o2,63,%o2        // Carry
2307         sub %o0,8,%o0
2308 1:        ldx [%o0],%o3         // Digit
2309           subcc %o1,1,%o1
2310           srlx %o3,1,%o4        // shiften
2311           or %o2,%o4,%o4        // und mit altem Carry kombinieren
2312           stx %o4,[%o0]         // und ablegen
2313           sllx %o3,63,%o2       // neuer Carry
2314           bne,pt %xcc,1b
2315          _ sub %o0,8,%o0
2316 2:      retl
2317        _ mov %o2,%o0
2318
2319 // extern uintD shiftright_loop_down (uintD* ptr, uintC count, uintC i);
2320         DECLARE_FUNCTION(shiftright_loop_down)
2321 C(shiftright_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2322 //      srl %o1,0,%o1           // zero-extend %o1 = count
2323         sub %g0,%o2,%g1         // 64-i (mod 64)
2324         brz,pn %o1,2f
2325        _ or %g0,%g0,%o3         // Carry := 0
2326         sub %o0,8,%o0
2327 1:        ldx [%o0],%o4         // Digit
2328           subcc %o1,1,%o1
2329           srlx %o4,%o2,%o5      // shiften
2330           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2331           stx %o5,[%o0]         // und ablegen
2332           sllx %o4,%g1,%o3      // neuer Carry
2333           bne,pt %xcc,1b
2334          _ sub %o0,8,%o0
2335 2:      retl
2336        _ mov %o3,%o0
2337
2338 // extern uintD shiftrightsigned_loop_down (uintD* ptr, uintC count, uintC i);
2339         DECLARE_FUNCTION(shiftrightsigned_loop_down)
2340 C(shiftrightsigned_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2341 //      srl %o1,0,%o1           // zero-extend %o1 = count
2342         ldx [%o0-8],%o4         // erstes Digit
2343         sub %g0,%o2,%g1         // 64-i (mod 64)
2344         srax %o4,%o2,%o5        // shiften
2345         stx %o5,[%o0-8]         // und ablegen
2346         sllx %o4,%g1,%o3        // neuer Carry
2347         subcc %o1,1,%o1
2348         be,pn %xcc,2f
2349        _ sub %o0,16,%o0
2350 1:        ldx [%o0],%o4         // Digit
2351           subcc %o1,1,%o1
2352           srlx %o4,%o2,%o5      // shiften
2353           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2354           stx %o5,[%o0]         // und ablegen
2355           sllx %o4,%g1,%o3      // neuer Carry
2356           bne,pt %xcc,1b
2357          _ sub %o0,8,%o0
2358 2:      retl
2359        _ mov %o3,%o0
2360
2361 // extern uintD shiftrightcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
2362         DECLARE_FUNCTION(shiftrightcopy_loop_down)
2363 C(shiftrightcopy_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
2364 //      srl %o2,0,%o2           // zero-extend %o2 = count
2365         sub %g0,%o3,%g1         // 64-i (mod 64)
2366         brz,pn %o2,2f
2367        _ sllx %o4,%g1,%g2       // erster Carry
2368           sub %o0,8,%o0
2369 1:        ldx [%o0],%o4         // Digit
2370           sub %o1,8,%o1
2371           srlx %o4,%o3,%o5      // shiften
2372           or %g2,%o5,%o5        // und mit altem Carry kombinieren
2373           stx %o5,[%o1]         // und ablegen
2374           sllx %o4,%g1,%g2      // neuer Carry
2375           subcc %o2,1,%o2
2376           bne,pt %xcc,1b
2377          _ sub %o0,8,%o0
2378 2:      retl
2379        _ mov %g2,%o0
2380
2381 // extern uintD mulusmall_loop_up (uintD digit, uintD* ptr, uintC len, uintD newdigit);
2382         DECLARE_FUNCTION(mulusmall_loop_up)
2383 C(mulusmall_loop_up:) // Input in %o0,%o1,%o2,%o3, Output in %o0, verändert %g1
2384 //      srl %o2,0,%o2           // zero-extend %o2 = len
2385         brz,pn %o2,2f
2386        _ nop
2387 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
2388           // und kleinen Carry %o3 dazu:
2389           ldx [%o1],%o4
2390           sub %o2,1,%o2
2391           srlx %o4,32,%o5       // high32(x)
2392           srl %o4,0,%o4         // low32(x)
2393           mulx %o4,%o0,%o4      // low32(x)*digit
2394           mulx %o5,%o0,%o5      // high32(x)*digit
2395           sllx %o5,32,%g1       // low32(high32(x)*digit)*2^32
2396           add %g1,%o3,%g1       // plus carry
2397           addcc %o4,%g1,%o4     // plus low32(x)*digit
2398           srlx %o5,32,%o3       // high32(high32(x)*digit)
2399           add %o3,1,%g1
2400           movcs %xcc,%g1,%o3    // neuer Carry
2401           stx %o4,[%o1]         // neues Digit ablegen
2402           brnz,pt %o2,1b
2403          _ add %o1,8,%o1
2404 2:      retl
2405        _ mov %o3,%o0
2406
2407 // extern void mulu_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2408         DECLARE_FUNCTION(mulu_loop_up)
2409 C(mulu_loop_up:) // Input in %i0,%i1,%i2,%i3
2410         save %sp,-192,%sp
2411         mov 0,%l0               // Carry
2412         srlx %i0,32,%l1         // %l1 = high32(digit)
2413         srl %i0,0,%l2           // %l2 = low32(digit)
2414         mov 1,%l3
2415         sllx %l3,32,%l3         // %l3 = 2^32
2416         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2417 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2418           subcc %i3,1,%i3
2419           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2420           srlx %o0,32,%o1
2421           srl %o0,0,%o2
2422           mulx %l1,%o1,%o3      // high part
2423           mulx %l1,%o2,%o4      // first mid part
2424           mulx %l2,%o1,%o1      // second mid part
2425           mulx %l2,%o2,%o2      // low part
2426           srlx %o2,32,%o5       // low part's upper half
2427           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2428           addcc %o4,%o1,%o4     // add other mid part
2429           add %o3,%l3,%o5
2430           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2431           srlx %o4,32,%o5
2432           sllx %o4,32,%o4
2433           srl %o2,0,%o2
2434           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2435           addcc %o0,%l0,%o0     // alten Carry addieren
2436           add %o3,%o5,%l0       // add high32(midparts) to high part
2437           add %l0,1,%o5
2438           movcs %xcc,%o5,%l0    // neuer Carry
2439           // Multiplikation fertig
2440           stx %o0,[%i2]         // Low-Digit ablegen
2441           brnz,pt %i3,1b
2442          _ add %i2,8,%i2
2443         stx %l0,[%i2]           // letzten Carry ablegen
2444         ret
2445        _ restore
2446
2447 // extern uintD muluadd_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2448         DECLARE_FUNCTION(muluadd_loop_up)
2449 C(muluadd_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2450         save %sp,-192,%sp
2451         mov 0,%l0               // Carry
2452         srlx %i0,32,%l1         // %l1 = high32(digit)
2453         srl %i0,0,%l2           // %l2 = low32(digit)
2454         mov 1,%l3
2455         sllx %l3,32,%l3         // %l3 = 2^32
2456         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2457 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2458           ldx [%i2],%i4         // *destptr
2459           subcc %i3,1,%i3
2460           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2461           srlx %o0,32,%o1
2462           srl %o0,0,%o2
2463           mulx %l1,%o1,%o3      // high part
2464           mulx %l1,%o2,%o4      // first mid part
2465           mulx %l2,%o1,%o1      // second mid part
2466           mulx %l2,%o2,%o2      // low part
2467           srlx %o2,32,%o5       // low part's upper half
2468           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2469           addcc %o4,%o1,%o4     // add other mid part
2470           add %o3,%l3,%o5
2471           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2472           srlx %o4,32,%o5
2473           sllx %o4,32,%o4
2474           srl %o2,0,%o2
2475           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2476           addcc %o0,%l0,%o0     // alten Carry addieren
2477           add %o3,%o5,%l0       // add high32(midparts) to high part
2478           add %l0,1,%o5
2479           movcs %xcc,%o5,%l0    // neuer Carry
2480           // Multiplikation fertig
2481           addcc %i4,%o0,%o0     // alten *destptr addieren
2482           add %l0,1,%o2
2483           movcs %xcc,%o2,%l0    // neuer Carry
2484           stx %o0,[%i2]         // Low-Digit ablegen
2485           brnz,pt %i3,1b
2486          _ add %i2,8,%i2
2487         mov %l0,%i0             // letzter Carry
2488         ret
2489        _ restore
2490
2491 // extern uintD mulusub_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2492         DECLARE_FUNCTION(mulusub_loop_up)
2493 C(mulusub_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2494         save %sp,-192,%sp
2495         mov 0,%l0               // Carry
2496         srlx %i0,32,%l1         // %l1 = high32(digit)
2497         srl %i0,0,%l2           // %l2 = low32(digit)
2498         mov 1,%l3
2499         sllx %l3,32,%l3         // %l3 = 2^32
2500         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2501 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2502           ldx [%i2],%i4         // *destptr
2503           subcc %i3,1,%i3
2504           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2505           srlx %o0,32,%o1
2506           srl %o0,0,%o2
2507           mulx %l1,%o1,%o3      // high part
2508           mulx %l1,%o2,%o4      // first mid part
2509           mulx %l2,%o1,%o1      // second mid part
2510           mulx %l2,%o2,%o2      // low part
2511           srlx %o2,32,%o5       // low part's upper half
2512           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2513           addcc %o4,%o1,%o4     // add other mid part
2514           add %o3,%l3,%o5
2515           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2516           srlx %o4,32,%o5
2517           sllx %o4,32,%o4
2518           srl %o2,0,%o2
2519           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2520           addcc %o0,%l0,%o0     // alten Carry addieren
2521           add %o3,%o5,%l0       // add high32(midparts) to high part
2522           add %l0,1,%o5
2523           movcs %xcc,%o5,%l0    // neuer Carry
2524           // Multiplikation fertig
2525           subcc %i4,%o0,%o0     // vom alten *destptr subtrahieren
2526           add %l0,1,%o2
2527           movcs %xcc,%o2,%l0    // neuer Carry
2528           stx %o0,[%i2]         // Low-Digit ablegen
2529           brnz,pt %i3,1b
2530          _ add %i2,8,%i2
2531         mov %l0,%i0             // letzter Carry
2532         ret
2533        _ restore
2534
2535 #endif
2536
2537 // extern void shiftxor_loop_up (uintD* xptr, const uintD* yptr, uintC count, uintC i);
2538         DECLARE_FUNCTION(shiftxor_loop_up)
2539 C(shiftxor_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2
2540 //      srl %o2,0,%o2           // zero-extend %o2 = count
2541         brz,pn %o2,2f
2542        _ sub %g0,%o3,%g1        // 64-i (mod 64)
2543         sub %o1,%o0,%o1
2544         ldx [%o0],%o4           // *xptr holen
2545 1:        ldx [%o0+%o1],%o5     // *yptr holen
2546           subcc %o2,1,%o2
2547           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
2548           xor %o4,%g2,%o4       // mit dem modifizierten *xptr kombinieren
2549           stx %o4,[%o0]         // und ablegen
2550           add %o0,8,%o0
2551           srlx %o5,%g1,%g2      // höchste i Bits von *yptr
2552           ldx [%o0],%o4         // schon mal mit dem nächsten *xptr
2553           bne,pt %xcc,1b
2554          _ xor %o4,%g2,%o4      // verknüpfen
2555         stx %o4,[%o0]           // und ablegen
2556 2:      retl
2557        _ nop
2558