src/base/digitseq/cl_asm_sparc64_.cc

   1 // Externe Routinen zu ARILEV1.D
   2 // Prozessor: SPARC 64-bit
   3 // Compiler: GNU-C oder ...
   4 // Parameter-Übergabe: in Registern %o0-%o5.
   5 // Parameter-Übergabe: in Registern %o0-%o5.
   6 //   Argumente vom Typ uint8, uint16, uint32 sind bereits vom Aufrufer zu
   7 //   uint64 umgewandelt worden (zero-extend, "srl reg,0,reg").
   8 //   Argumente vom Typ sint8, sint16, sint32 sind bereits vom Aufrufer zu
   9 //   sint64 umgewandelt worden (sign-extend, "sra reg,0,reg").
  10 //   Ergebnisse vom Typ uint8, uint16, uint32 müssen vor Rückgabe zu uint64
  11 //   umgewandelt werden (zero-extend, "srl reg,0,reg").
  12 //   Ergebnisse vom Typ sint8, sint16, sint32 müssen vor Rückgabe zu sint64
  13 //   umgewandelt werden (sign-extend, "sra reg,0,reg").
  14 // Einstellungen: intCsize=32, intDsize=32.
  15
  16 #ifdef ASM_UNDERSCORE
  17   #define C(entrypoint) _##entrypoint
  18 #else
  19   #define C(entrypoint) entrypoint
  20 #endif
  21
  22 // When this file is compiled into a shared library, ELF linkers need to
  23 // know which symbols are functions.
  24 #if defined(__NetBSD__) || defined(__OpenBSD__)
  25   #define DECLARE_FUNCTION(name) .type C(name),@function
  26 #elif defined(__svr4__) || defined(__ELF__)
  27   #define DECLARE_FUNCTION(name) .type C(name),#function
  28 #else
  29   #define DECLARE_FUNCTION(name)
  30 #endif
  31
  32   // Indikatoren für Anweisungen (Instruktionen) in Delay-Slots
  33   // (diese werden VOR der vorigen Instruktion ausgeführt):
  34   #define _             // Instruktion, die stets ausgeführt wird
  35   #define __            // Instruktion, die nur im Sprung-Fall ausgeführt wird
  36   // Abkürzungen für Anweisungen:
  37   #define ret   jmp %i7+8    // return from subroutine
  38   #define retl  jmp %o7+8    // return from leaf subroutine (no save/restore)
  39
  40         .seg "text"
  41
  42         .register %g2,#scratch
  43
  44         .global C(mulu16_),C(mulu32_),C(mulu32_unchecked),C(mulu64_)
  45         .global C(divu_6432_3232_),C(divu_3216_1616_)
  46         .global C(copy_loop_up),C(copy_loop_down),C(fill_loop_up),C(fill_loop_down)
  47         .global C(clear_loop_up),C(clear_loop_down)
  48         .global C(test_loop_up),C(test_loop_down)
  49         .global C(xor_loop_up),C(compare_loop_up),C(shiftleftcopy_loop_up),C(shiftxor_loop_up)
  50 #if CL_DS_BIG_ENDIAN_P
  51         .global C(or_loop_up),C(and_loop_up),C(eqv_loop_up)
  52         .global C(nand_loop_up),C(nor_loop_up),C(andc2_loop_up),C(orc2_loop_up)
  53         .global C(not_loop_up)
  54         .global C(and_test_loop_up)
  55         .global C(add_loop_down),C(addto_loop_down),C(inc_loop_down)
  56         .global C(sub_loop_down),C(subx_loop_down),C(subfrom_loop_down),C(dec_loop_down)
  57         .global C(neg_loop_down)
  58         .global C(shift1left_loop_down),C(shiftleft_loop_down),C(shiftleftcopy_loop_down)
  59         .global C(shift1right_loop_up),C(shiftright_loop_up),C(shiftrightsigned_loop_up),C(shiftrightcopy_loop_up)
  60         .global C(mulusmall_loop_down),C(mulu_loop_down),C(muluadd_loop_down),C(mulusub_loop_down)
  61 #else
  62         .global C(or_loop_down),C(xor_loop_down),C(and_loop_down),C(eqv_loop_down)
  63         .global C(nand_loop_down),C(nor_loop_down),C(andc2_loop_down),C(orc2_loop_down)
  64         .global C(not_loop_down)
  65         .global C(and_test_loop_down),C(compare_loop_down)
  66         .global C(add_loop_up),C(addto_loop_up),C(inc_loop_up)
  67         .global C(sub_loop_up),C(subx_loop_up),C(subfrom_loop_up),C(dec_loop_up)
  68         .global C(neg_loop_up)
  69         .global C(shift1left_loop_up),C(shiftleft_loop_up)
  70         .global C(shift1right_loop_down),C(shiftright_loop_down),C(shiftrightsigned_loop_down),C(shiftrightcopy_loop_down)
  71         .global C(mulusmall_loop_up),C(mulu_loop_up),C(muluadd_loop_up),C(mulusub_loop_up)
  72 #endif
  73
  74 #define LOOP_TYPE  1    // 1: Standard-Schleifen
  75                         // 2: Schleifen ohne Pointer, nur mit Zähler
  76 #define STANDARD_LOOPS  (LOOP_TYPE==1)
  77 #define COUNTER_LOOPS  (LOOP_TYPE==2)
  78
  79 // extern uint32 mulu16_ (uint16 arg1, uint16 arg2);
  80 // ergebnis := arg1*arg2.
  81         DECLARE_FUNCTION(mulu16_)
  82 C(mulu16_:) // Input in %o0,%o1, Output in %o0
  83         umul %o0,%o1,%o2
  84         retl
  85        _ srl %o2,0,%o0
  86
  87 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
  88 // 2^32*hi+lo := arg1*arg2.
  89         DECLARE_FUNCTION(mulu32_)
  90 C(mulu32_:) // Input in %o0,%o1, Output in %o0,%g1
  91         umul %o0,%o1,%o2
  92         rd %y,%g1
  93         retl
  94        _ srl %o2,0,%o0
  95
  96 // extern uint32 mulu32_unchecked (uint32 x, uint32 y);
  97 // ergebnis := arg1*arg2 < 2^32.
  98         DECLARE_FUNCTION(mulu32_unchecked)
  99 C(mulu32_unchecked:) // Input in %o0,%o1, Output in %o0
 100         umul %o0,%o1,%o2
 101         retl
 102        _ srl %o2,0,%o0
 103
 104 // extern struct { uint64 lo; uint64 hi; } mulu64_ (uint64 arg1, uint64 arg2);
 105 // 2^64*hi+lo := arg1*arg2.
 106         DECLARE_FUNCTION(mulu64_)
 107 C(mulu64_:) // Input in %o0,%o1, Output in %o0,%g2
 108         srlx %o0,32,%o2         // %o2 = high32(arg1)
 109         srl %o0,0,%o0           // %o0 = low32(arg1)
 110         srlx %o1,32,%o3         // %o3 = high32(arg2)
 111         srl %o1,0,%o1           // %o1 = low32(arg2)
 112         mulx %o2,%o3,%g2        // high part
 113         mulx %o2,%o1,%o2        // first mid part
 114         mulx %o0,%o3,%o3        // second mid part
 115         addcc %o2,%o3,%o2       // sum of mid parts
 116         mov 0,%o3
 117         movcs %xcc,1,%o3        // carry from sum of mid parts
 118         sllx %o3,32,%o3
 119         add %g2,%o3,%g2         // add to high part
 120         srlx %o2,32,%o3
 121         add %g2,%o3,%g2         // add high32(midparts) to high part
 122         mulx %o0,%o1,%o0        // low part
 123         sllx %o2,32,%o2
 124         addcc %o0,%o2,%o0       // add low32(midparts)*2^32 to low part
 125         add %g2,1,%o3
 126         retl
 127        _ movcs %xcc,%o3,%g2     // add carry to high part
 128
 129 // extern struct { uint32 q; uint32 r; } divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y);
 130 // x = 2^32*xhi+xlo = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^32*y .
 131         DECLARE_FUNCTION(divu_6432_3232_)
 132 C(divu_6432_3232_:) // Input in %o0,%o1,%o2, Output in %o0,%g1
 133         wr %o0,%g0,%y
 134         udiv %o1,%o2,%o0        // x durch y dividieren, %o0 := q
 135         umul %o0,%o2,%g1        // %g1 := (q*y) mod 2^32
 136         sub %o1,%g1,%g1         // %g1 := (xlo-q*y) mod 2^32 = r
 137         retl
 138        _ srl %o0,0,%o0
 139
 140 // extern struct { uint16 q; uint16 r; } divu_3216_1616_ (uint32 x, uint16 y);
 141 // x = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^16*y .
 142         DECLARE_FUNCTION(divu_3216_1616_)
 143 C(divu_3216_1616_:) // Input in %o0,%o1, Output in %o0 (Rest und Quotient).
 144         wr %g0,%g0,%y
 145         udiv %o0,%o1,%o2        // dividieren, Quotient nach %o2
 146 #if 0 // Who says that %y has some meaningful contents after `udiv' ??
 147         rd %y,%g1               // Rest aus %y
 148 #else
 149         umul %o2,%o1,%g1        // %g1 := (q*y) mod 2^32
 150         sub %o0,%g1,%g1         // %g1 := (x-q*y) mod 2^32 = r
 151 #endif
 152         sll %g1,16,%g1          // in die oberen 16 Bit schieben
 153         or %o2,%g1,%o0
 154         retl
 155        _ srl %o0,0,%o0
 156
 157 #if !defined(__GNUC__)
 158         .global C(_get_g1)
 159 // extern uint32 _get_g1 (void);
 160         DECLARE_FUNCTION(_get_g1)
 161 C(_get_g1:)
 162         retl
 163        _ srl %g1,0,%o0
 164 #endif
 165
 166 #if !defined(__GNUC__)
 167         .global C(_get_g2)
 168 // extern uint64 _get_g2 (void);
 169         DECLARE_FUNCTION(_get_g2)
 170 C(_get_g2:)
 171         retl
 172        _ mov %g2,%o0
 173 #endif
 174
 175 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
 176         DECLARE_FUNCTION(copy_loop_up)
 177 C(copy_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 178 #if STANDARD_LOOPS
 179 //      srl %o2,0,%o2           // zero-extend %o2 = count
 180         brz,pn %o2,2f
 181        _ nop
 182 1:        ldx [%o0],%o3
 183           add %o0,8,%o0
 184           stx %o3,[%o1]
 185           subcc %o2,1,%o2
 186           bne,pt %xcc,1b
 187          _ add %o1,8,%o1
 188 2:      retl
 189        _ mov %o1,%o0
 190 #endif
 191 #if COUNTER_LOOPS
 192 //      srl %o2,0,%o2           // zero-extend %o2 = count
 193         brz,pn %o2,2f
 194        _ sub %o1,8,%o1
 195         sub %g0,%o2,%o2         // %o2 = -count
 196         sllx %o2,3,%o2          // %o2 = -8*count
 197         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
 198         sub %o1,%o2,%o1         // %o1 = &destptr[count-1]
 199 1:        ldx [%o0+%o2],%o3     // nächstes Digit holen
 200           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 201           bne,pt %xcc,1b
 202          _ stx %o3,[%o1+%o2]    // Digit ablegen
 203 2:      retl
 204        _ add %o1,8,%o0
 205 #endif
 206
 207 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 208         DECLARE_FUNCTION(copy_loop_down)
 209 C(copy_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 210 #if STANDARD_LOOPS
 211 //      srl %o2,0,%o2           // zero-extend %o2 = count
 212         brz,pn %o2,2f
 213        _ sub %o0,8,%o0
 214 1:        ldx [%o0],%o3
 215           sub %o1,8,%o1
 216           stx %o3,[%o1]
 217           subcc %o2,1,%o2
 218           bne,pt %xcc,1b
 219          _ sub %o0,8,%o0
 220 2:      retl
 221        _ mov %o1,%o0
 222 #endif
 223 #if COUNTER_LOOPS
 224 //      srl %o2,0,%o2           // zero-extend %o2 = count
 225         brz,pn %o2,2f
 226        _ sub %o0,8,%o0
 227         sllx %o2,3,%o2          // %o2 = 8*count
 228         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 229         sub %o1,%o2,%o1         // %o1 = &destptr[-count]
 230 1:        ldx [%o0+%o2],%o3     // nächstes Digit holen
 231           subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
 232           bne,pt %xcc,1b
 233          _ stx %o3,[%o1+%o2]    // Digit ablegen
 234 2:      retl
 235        _ mov %o1,%o0
 236 #endif
 237
 238 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
 239         DECLARE_FUNCTION(fill_loop_up)
 240 C(fill_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 241 #if STANDARD_LOOPS
 242 //      srl %o1,0,%o1           // zero-extend %o1 = count
 243         brz,pn %o1,2f
 244        _ nop
 245 1:        stx %o2,[%o0]
 246           subcc %o1,1,%o1
 247           bne,pt %xcc,1b
 248          _ add %o0,8,%o0
 249 2:      retl
 250        _ nop
 251 #endif
 252 #if COUNTER_LOOPS
 253 //      srl %o1,0,%o1           // zero-extend %o1 = count
 254         brz,pn %o1,2f
 255        _ sub %o0,8,%o0
 256         sub %g0,%o1,%o1         // %o1 = -count
 257         sllx %o1,3,%o1          // %o1 = -8*count
 258         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 259 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 260           bne,pt %xcc,1b
 261          _ stx %o2,[%o0+%o1]    // Digit ablegen
 262 2:      retl
 263        _ add %o0,8,%o0
 264 #endif
 265
 266 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
 267         DECLARE_FUNCTION(fill_loop_down)
 268 C(fill_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 269 #if STANDARD_LOOPS
 270 //      srl %o1,0,%o1           // zero-extend %o1 = count
 271         brz,pn %o1,2f
 272        _ sub %o0,8,%o0
 273 1:        stx %o2,[%o0]
 274           subcc %o1,1,%o1
 275           bne,pt %xcc,1b
 276          _ sub %o0,8,%o0
 277 2:      retl
 278        _ add %o0,8,%o0
 279 #endif
 280 #if COUNTER_LOOPS
 281 //      srl %o1,0,%o1           // zero-extend %o1 = count
 282         brz,pn %o1,2f
 283        _ sllx %o1,3,%o1         // %o1 = 8*count
 284         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 285 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 286           bne,pt %xcc,1b
 287          _ stx %o2,[%o0+%o1]    // Digit ablegen
 288 2:      retl
 289        _ nop
 290 #endif
 291
 292 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
 293         DECLARE_FUNCTION(clear_loop_up)
 294 C(clear_loop_up:) // Input in %o0,%o1, Output in %o0
 295 #if STANDARD_LOOPS
 296 //      srl %o1,0,%o1           // zero-extend %o1 = count
 297         brz,pn %o1,2f
 298        _ nop
 299 1:        stx %g0,[%o0]
 300           subcc %o1,1,%o1
 301           bne,pt %xcc,1b
 302          _ add %o0,8,%o0
 303 2:      retl
 304        _ nop
 305 #endif
 306 #if COUNTER_LOOPS
 307 //      srl %o1,0,%o1           // zero-extend %o1 = count
 308         brz,pn %o1,2f
 309        _ sub %o0,8,%o0
 310         sub %g0,%o1,%o1         // %o1 = -count
 311         sllx %o1,3,%o1          // %o1 = -8*count
 312         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 313 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 314           bne,pt %xcc,1b
 315          _ stx %g0,[%o0+%o1]    // Digit 0 ablegen
 316 2:      retl
 317        _ add %o0,8,%o0
 318 #endif
 319
 320 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
 321         DECLARE_FUNCTION(clear_loop_down)
 322 C(clear_loop_down:) // Input in %o0,%o1, Output in %o0
 323 #if STANDARD_LOOPS
 324 //      srl %o1,0,%o1           // zero-extend %o1 = count
 325         brz,pn %o1,2f
 326        _ sub %o0,8,%o0
 327 1:        stx %g0,[%o0]
 328           subcc %o1,1,%o1
 329           bne,pt %xcc,1b
 330          _ sub %o0,8,%o0
 331 2:      retl
 332        _ add %o0,8,%o0
 333 #endif
 334 #if COUNTER_LOOPS
 335 //      srl %o1,0,%o1           // zero-extend %o1 = count
 336         brz,pn %o1,2f
 337        _ sllx %o1,3,%o1         // %o1 = 8*count
 338         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 339 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 340           bne,pt %xcc,1b
 341          _ stx %g0,[%o0+%o1]    // Digit 0 ablegen
 342 2:      retl
 343        _ nop
 344 #endif
 345
 346 // extern boolean test_loop_up (uintD* ptr, uintC count);
 347         DECLARE_FUNCTION(test_loop_up)
 348 C(test_loop_up:) // Input in %o0,%o1, Output in %o0
 349 #if STANDARD_LOOPS
 350 //      srl %o1,0,%o1           // zero-extend %o1 = count
 351         brz,pn %o1,2f
 352        _ nop
 353           ldx [%o0],%o2
 354 1:        add %o0,8,%o0
 355           brnz,pn %o2,3f
 356          _ subcc %o1,1,%o1
 357           bne,a,pt %xcc,1b
 358          __ ldx [%o0],%o2
 359 2:      retl
 360        _ mov 0,%o0
 361 3:      retl
 362        _ mov 1,%o0
 363 #endif
 364 #if COUNTER_LOOPS
 365 //      srl %o1,0,%o1           // zero-extend %o1 = count
 366         brz,pn %o1,2f
 367        _ sub %g0,%o1,%o1        // %o1 = -count
 368         sllx %o1,3,%o1          // %o1 = -8*count
 369         sub %o0,%o1,%o0         // %o0 = &ptr[count]
 370           ldx [%o0+%o1],%o2     // nächstes Digit holen
 371 1:        brnz,pn %o2,3f        // testen
 372          _ addcc %o1,8,%o1      // Zähler "erniedrigen", Pointer erhöhen
 373           bne,a,pt %xcc,1b
 374          __ ldx [%o0+%o1],%o2   // nächstes Digit holen
 375 2:      retl
 376        _ mov 0,%o0
 377 3:      retl
 378        _ mov 1,%o0
 379 #endif
 380
 381 // extern boolean test_loop_down (uintD* ptr, uintC count);
 382         DECLARE_FUNCTION(test_loop_down)
 383 C(test_loop_down:) // Input in %o0,%o1, Output in %o0
 384 #if STANDARD_LOOPS
 385 //      srl %o1,0,%o1           // zero-extend %o1 = count
 386         brz,pn %o1,2f
 387        _ sub %o0,8,%o0
 388           ldx [%o0],%o2
 389 1:        sub %o0,8,%o0
 390           brnz,pn %o2,3f
 391          _ subcc %o1,1,%o1
 392           bne,a,pt %xcc,1b
 393          __ ldx [%o0],%o2
 394 2:      retl
 395        _ mov 0,%o0
 396 3:      retl
 397        _ mov 1,%o0
 398 #endif
 399 #if COUNTER_LOOPS
 400 //      srl %o1,0,%o1           // zero-extend %o1 = count
 401         brz,pn %o1,2f
 402        _ sllx %o1,3,%o1         // %o1 = 8*count
 403         sub %o0,%o1,%o0         // %o0 = &ptr[-count]
 404         sub %o1,8,%o1
 405           ldx [%o0+%o1],%o2     // nächstes Digit holen
 406 1:        brnz,pn %o2,3f        // testen
 407          _ subcc %o1,8,%o1      // Zähler erniedrigen, Pointer erniedrigen
 408           bcc,a,pt %xcc,1b
 409          __ ldx [%o0+%o1],%o2   // nächstes Digit holen
 410 2:      retl
 411        _ mov 0,%o0
 412 3:      retl
 413        _ mov 1,%o0
 414 #endif
 415
 416 #if CL_DS_BIG_ENDIAN_P
 417
 418 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
 419         DECLARE_FUNCTION(or_loop_up)
 420 C(or_loop_up:) // Input in %o0,%o1,%o2
 421 #if STANDARD_LOOPS
 422 //      srl %o2,0,%o2           // zero-extend %o2 = count
 423         brz,pn %o2,2f
 424        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 425 1:        ldx [%o0],%o3         // *xptr
 426           ldx [%o0+%o1],%o4     // *yptr
 427           subcc %o2,1,%o2
 428           or %o3,%o4,%o3        // verknüpfen
 429           stx %o3,[%o0]         // =: *xptr
 430           bne,pt %xcc,1b
 431          _ add %o0,8,%o0        // xptr++, yptr++
 432 2:      retl
 433        _ nop
 434 #endif
 435 #if COUNTER_LOOPS
 436 //      srl %o2,0,%o2           // zero-extend %o2 = count
 437         brz,pn %o2,2f
 438        _ sub %o0,8,%o0
 439         sub %g0,%o2,%o2         // %o2 = -count
 440         sllx %o2,3,%o2          // %o2 = -8*count
 441         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 442         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 443 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 444           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 445           ldx [%o0+%o2],%o4     // noch ein Digit holen
 446           or %o4,%o3,%o3        // beide verknüpfen
 447           bne,pt %xcc,1b
 448          _ stx %o3,[%o1+%o2]    // Digit ablegen
 449 2:      retl
 450        _ nop
 451 #endif
 452
 453 #endif
 454
 455 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 456         DECLARE_FUNCTION(xor_loop_up)
 457 C(xor_loop_up:) // Input in %o0,%o1,%o2
 458 #if STANDARD_LOOPS
 459 //      srl %o2,0,%o2           // zero-extend %o2 = count
 460         brz,pn %o2,2f
 461        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 462 1:        ldx [%o0],%o3         // *xptr
 463           ldx [%o0+%o1],%o4     // *yptr
 464           subcc %o2,1,%o2
 465           xor %o3,%o4,%o3       // verknüpfen
 466           stx %o3,[%o0]         // =: *xptr
 467           bne,pt %xcc,1b
 468          _ add %o0,8,%o0        // xptr++, yptr++
 469 2:      retl
 470        _ nop
 471 #endif
 472 #if COUNTER_LOOPS
 473 //      srl %o2,0,%o2           // zero-extend %o2 = count
 474         brz,pn %o2,2f
 475        _ sub %o0,8,%o0
 476         sub %g0,%o2,%o2         // %o2 = -count
 477         sllx %o2,3,%o2          // %o2 = -8*count
 478         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 479         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 480 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 481           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 482           ldx [%o0+%o2],%o4     // noch ein Digit holen
 483           xor %o4,%o3,%o3       // beide verknüpfen
 484           bne,pt %xcc,1b
 485          _ stx %o3,[%o1+%o2]    // Digit ablegen
 486 2:      retl
 487        _ nop
 488 #endif
 489
 490 #if CL_DS_BIG_ENDIAN_P
 491
 492 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
 493         DECLARE_FUNCTION(and_loop_up)
 494 C(and_loop_up:) // Input in %o0,%o1,%o2
 495 #if STANDARD_LOOPS
 496 //      srl %o2,0,%o2           // zero-extend %o2 = count
 497         brz,pn %o2,2f
 498        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 499 1:        ldx [%o0],%o3         // *xptr
 500           ldx [%o0+%o1],%o4     // *yptr
 501           subcc %o2,1,%o2
 502           and %o3,%o4,%o3       // verknüpfen
 503           stx %o3,[%o0]         // =: *xptr
 504           bne,pt %xcc,1b
 505          _ add %o0,8,%o0        // xptr++, yptr++
 506 2:      retl
 507        _ nop
 508 #endif
 509 #if COUNTER_LOOPS
 510 //      srl %o2,0,%o2           // zero-extend %o2 = count
 511         brz,pn %o2,2f
 512        _ sub %o0,8,%o0
 513         sub %g0,%o2,%o2         // %o2 = -count
 514         sllx %o2,3,%o2          // %o2 = -8*count
 515         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 516         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 517 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 518           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 519           ldx [%o0+%o2],%o4     // noch ein Digit holen
 520           and %o4,%o3,%o3       // beide verknüpfen
 521           bne,pt %xcc,1b
 522          _ stx %o3,[%o1+%o2]    // Digit ablegen
 523 2:      retl
 524        _ nop
 525 #endif
 526
 527 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
 528         DECLARE_FUNCTION(eqv_loop_up)
 529 C(eqv_loop_up:) // Input in %o0,%o1,%o2
 530 #if STANDARD_LOOPS
 531 //      srl %o2,0,%o2           // zero-extend %o2 = count
 532         brz,pn %o2,2f
 533        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 534 1:        ldx [%o0],%o3         // *xptr
 535           ldx [%o0+%o1],%o4     // *yptr
 536           subcc %o2,1,%o2
 537           xnor %o3,%o4,%o3      // verknüpfen
 538           stx %o3,[%o0]         // =: *xptr
 539           bne,pt %xcc,1b
 540          _ add %o0,8,%o0        // xptr++, yptr++
 541 2:      retl
 542        _ nop
 543 #endif
 544 #if COUNTER_LOOPS
 545 //      srl %o2,0,%o2           // zero-extend %o2 = count
 546         brz,pn %o2,2f
 547        _ sub %o0,8,%o0
 548         sub %g0,%o2,%o2         // %o2 = -count
 549         sllx %o2,3,%o2          // %o2 = -8*count
 550         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 551         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 552 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 553           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 554           ldx [%o0+%o2],%o4     // noch ein Digit holen
 555           xnor %o4,%o3,%o3      // beide verknüpfen
 556           bne,pt %xcc,1b
 557          _ stx %o3,[%o1+%o2]    // Digit ablegen
 558 2:      retl
 559        _ nop
 560 #endif
 561
 562 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
 563         DECLARE_FUNCTION(nand_loop_up)
 564 C(nand_loop_up:) // Input in %o0,%o1,%o2
 565 #if STANDARD_LOOPS
 566 //      srl %o2,0,%o2           // zero-extend %o2 = count
 567         brz,pn %o2,2f
 568        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 569 1:        ldx [%o0],%o3         // *xptr
 570           ldx [%o0+%o1],%o4     // *yptr
 571           subcc %o2,1,%o2
 572           and %o3,%o4,%o3       // verknüpfen
 573           xnor %g0,%o3,%o3
 574           stx %o3,[%o0]         // =: *xptr
 575           bne,pt %xcc,1b
 576          _ add %o0,8,%o0        // xptr++, yptr++
 577 2:      retl
 578        _ nop
 579 #endif
 580 #if COUNTER_LOOPS
 581 //      srl %o2,0,%o2           // zero-extend %o2 = count
 582         brz,pn %o2,2f
 583        _ sub %o0,8,%o0
 584         sub %g0,%o2,%o2         // %o2 = -count
 585         sllx %o2,3,%o2          // %o2 = -8*count
 586         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 587         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 588 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 589           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 590           ldx [%o0+%o2],%o4     // noch ein Digit holen
 591           and %o4,%o3,%o3       // beide verknüpfen
 592           xnor %g0,%o3,%o3
 593           bne,pt %xcc,1b
 594          _ stx %o3,[%o1+%o2]    // Digit ablegen
 595 2:      retl
 596        _ nop
 597 #endif
 598
 599 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 600         DECLARE_FUNCTION(nor_loop_up)
 601 C(nor_loop_up:) // Input in %o0,%o1,%o2
 602 #if STANDARD_LOOPS
 603 //      srl %o2,0,%o2           // zero-extend %o2 = count
 604         brz,pn %o2,2f
 605        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 606 1:        ldx [%o0],%o3         // *xptr
 607           ldx [%o0+%o1],%o4     // *yptr
 608           subcc %o2,1,%o2
 609           or %o3,%o4,%o3        // verknüpfen
 610           xnor %g0,%o3,%o3
 611           stx %o3,[%o0]         // =: *xptr
 612           bne,pt %xcc,1b
 613          _ add %o0,8,%o0        // xptr++, yptr++
 614 2:      retl
 615        _ nop
 616 #endif
 617 #if COUNTER_LOOPS
 618 //      srl %o2,0,%o2           // zero-extend %o2 = count
 619         brz,pn %o2,2f
 620        _ sub %o0,8,%o0
 621         sub %g0,%o2,%o2         // %o2 = -count
 622         sllx %o2,3,%o2          // %o2 = -8*count
 623         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 624         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 625 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 626           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 627           ldx [%o0+%o2],%o4     // noch ein Digit holen
 628           or %o4,%o3,%o3        // beide verknüpfen
 629           xnor %g0,%o3,%o3
 630           bne,pt %xcc,1b
 631          _ stx %o3,[%o1+%o2]    // Digit ablegen
 632 2:      retl
 633        _ nop
 634 #endif
 635
 636 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 637         DECLARE_FUNCTION(andc2_loop_up)
 638 C(andc2_loop_up:) // Input in %o0,%o1,%o2
 639 #if STANDARD_LOOPS
 640 //      srl %o2,0,%o2           // zero-extend %o2 = count
 641         brz,pn %o2,2f
 642        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 643 1:        ldx [%o0],%o3         // *xptr
 644           ldx [%o0+%o1],%o4     // *yptr
 645           subcc %o2,1,%o2
 646           andn %o3,%o4,%o3      // verknüpfen
 647           stx %o3,[%o0]         // =: *xptr
 648           bne,pt %xcc,1b
 649          _ add %o0,8,%o0        // xptr++, yptr++
 650 2:      retl
 651        _ nop
 652 #endif
 653 #if COUNTER_LOOPS
 654 //      srl %o2,0,%o2           // zero-extend %o2 = count
 655         brz,pn %o2,2f
 656        _ sub %o0,8,%o0
 657         sub %g0,%o2,%o2         // %o2 = -count
 658         sllx %o2,3,%o2          // %o2 = -8*count
 659         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 660         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 661 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 662           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 663           ldx [%o0+%o2],%o4     // noch ein Digit holen
 664           andn %o4,%o3,%o3      // beide verknüpfen
 665           bne,pt %xcc,1b
 666          _ stx %o3,[%o1+%o2]    // Digit ablegen
 667 2:      retl
 668        _ nop
 669 #endif
 670
 671 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 672         DECLARE_FUNCTION(orc2_loop_up)
 673 C(orc2_loop_up:) // Input in %o0,%o1,%o2
 674 #if STANDARD_LOOPS
 675 //      srl %o2,0,%o2           // zero-extend %o2 = count
 676         brz,pn %o2,2f
 677        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 678 1:        ldx [%o0],%o3         // *xptr
 679           ldx [%o0+%o1],%o4     // *yptr
 680           subcc %o2,1,%o2
 681           orn %o3,%o4,%o3       // verknüpfen
 682           stx %o3,[%o0]         // =: *xptr
 683           bne,pt %xcc,1b
 684          _ add %o0,8,%o0        // xptr++, yptr++
 685 2:      retl
 686        _ nop
 687 #endif
 688 #if COUNTER_LOOPS
 689 //      srl %o2,0,%o2           // zero-extend %o2 = count
 690         brz,pn %o2,2f
 691        _ sub %o0,8,%o0
 692         sub %g0,%o2,%o2         // %o2 = -count
 693         sllx %o2,3,%o2          // %o2 = -8*count
 694         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 695         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 696 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 697           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 698           ldx [%o0+%o2],%o4     // noch ein Digit holen
 699           orn %o4,%o3,%o3       // beide verknüpfen
 700           bne,pt %xcc,1b
 701          _ stx %o3,[%o1+%o2]    // Digit ablegen
 702 2:      retl
 703        _ nop
 704 #endif
 705
 706 // extern void not_loop_up (uintD* xptr, uintC count);
 707         DECLARE_FUNCTION(not_loop_up)
 708 C(not_loop_up:) // Input in %o0,%o1
 709 #if STANDARD_LOOPS
 710 //      srl %o1,0,%o1           // zero-extend %o1 = count
 711         brz,pn %o1,2f
 712        _ nop
 713 1:        ldx [%o0],%o2
 714           subcc %o1,1,%o1
 715           xnor %g0,%o2,%o2
 716           stx %o2,[%o0]
 717           bne,pt %xcc,1b
 718          _ add %o0,8,%o0
 719 2:      retl
 720        _ nop
 721 #endif
 722 #if COUNTER_LOOPS
 723 //      srl %o1,0,%o1           // zero-extend %o1 = count
 724         brz,pn %o1,2f
 725        _ sub %o0,8,%o0
 726         sub %g0,%o1,%o1         // %o1 = -count
 727         sllx %o1,3,%o1          // %o1 = -8*count
 728         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 729 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 730           ldx [%o0+%o1],%o2     // nächstes Digit holen
 731           xnor %g0,%o2,%o2
 732           bne,pt %xcc,1b
 733          _ stx %o2,[%o0+%o1]    // Digit ablegen
 734 2:      retl
 735        _ nop
 736 #endif
 737
 738 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
 739         DECLARE_FUNCTION(and_test_loop_up)
 740 C(and_test_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 741 #if STANDARD_LOOPS
 742 //      srl %o2,0,%o2           // zero-extend %o2 = count
 743         brz,pn %o2,2f
 744        _ nop
 745 1:        ldx [%o0],%o3
 746           ldx [%o1],%o4
 747           add %o0,8,%o0
 748           andcc %o3,%o4,%g0
 749           bne,pn %xcc,3f
 750          _ subcc %o2,1,%o2
 751           bne,pt %xcc,1b
 752          _ add %o1,8,%o1
 753 2:      retl
 754        _ mov 0,%o0
 755 3:      retl
 756        _ mov 1,%o0
 757 #endif
 758 #if COUNTER_LOOPS
 759 //      srl %o2,0,%o2           // zero-extend %o2 = count
 760         brz,pn %o2,2f
 761        _ sub %g0,%o2,%o2        // %o2 = -count
 762         sllx %o2,3,%o2          // %o2 = -8*count
 763         sub %o0,%o2,%o0         // %o0 = &xptr[count]
 764         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 765           ldx [%o0+%o2],%o3     // nächstes Digit holen
 766 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
 767           andcc %o3,%o4,%g0     // beide verknüpfen
 768           bne,pn %xcc,3f
 769          _ addcc %o2,8,%o2      // Zähler "erniedrigen", Pointer erhöhen
 770           bne,a,pt %xcc,1b
 771          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
 772 2:      retl
 773        _ mov 0,%o0
 774 3:      retl
 775        _ mov 1,%o0
 776 #endif
 777
 778 #endif
 779
 780 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
 781         DECLARE_FUNCTION(compare_loop_up)
 782 C(compare_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 783 #if STANDARD_LOOPS
 784 //      srl %o2,0,%o2           // zero-extend %o2 = count
 785         brz,pn %o2,2f
 786        _ nop
 787           ldx [%o0],%o3
 788 1:        ldx [%o1],%o4
 789           add %o0,8,%o0
 790           subcc %o3,%o4,%g0
 791           bne,pn %xcc,3f
 792          _ add %o1,8,%o1
 793           subcc %o2,1,%o2
 794           bne,a,pt %xcc,1b
 795          __ ldx [%o0],%o3
 796 2:      retl
 797        _ mov 0,%o0
 798 3:      mov 1,%o0
 799         movlu %xcc,-1,%o0
 800         retl
 801        _ sra %o0,0,%o0          // sign-extend %o0
 802 #endif
 803 #if COUNTER_LOOPS
 804 //      srl %o2,0,%o2           // zero-extend %o2 = count
 805         brz,pn %o2,2f
 806        _ sub %g0,%o2,%o2        // %o2 = -count
 807         sllx %o2,3,%o2          // %o2 = -8*count
 808         sub %o0,%o2,%o0         // %o0 = &xptr[count]
 809         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 810           ldx [%o0+%o2],%o3     // nächstes Digit holen
 811 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
 812           subcc %o3,%o4,%g0     // vergleichen
 813           bne,pn %xcc,3f
 814          _ addcc %o2,8,%o2      // Zähler "erniedrigen", Pointer erhöhen
 815           bne,a,pt %xcc,1b
 816          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
 817 2:      retl
 818        _ mov 0,%o0
 819 3:      subcc %o3,%o4,%g0       // nochmals vergleichen
 820         mov 1,%o0
 821         movlu %xcc,-1,%o0
 822         retl
 823        _ sra %o0,0,%o0          // sign-extend %o0
 824 #endif
 825
 826 #if CL_DS_BIG_ENDIAN_P
 827
 828 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 829         DECLARE_FUNCTION(add_loop_down)
 830 C(add_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
 831 #if STANDARD_LOOPS
 832 //      srl %o3,0,%o3           // zero-extend %o3 = count
 833         brz,pn %o3,2f
 834        _ mov %g0,%g1            // Carry := 0
 835         sub %o0,8,%o0
 836 1:        ldx [%o0],%o4         // source1-digit
 837           sub %o1,8,%o1
 838           ldx [%o1],%o5         // source2-digit
 839           addcc %o4,%g1,%o4
 840           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
 841           addcc %o4,%o5,%o4
 842           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
 843           sub %o2,8,%o2
 844           stx %o4,[%o2]         // Digit ablegen
 845           subcc %o3,1,%o3
 846           bne,pt %xcc,1b
 847          _ sub %o0,8,%o0
 848 2:      retl
 849        _ mov %g1,%o0
 850 #endif
 851 #if COUNTER_LOOPS
 852 //      srl %o3,0,%o3           // zero-extend %o3 = count
 853         brz,pn %o3,2f
 854        _ mov %g0,%g1            // Carry := 0
 855         sub %o0,8,%o0
 856         sub %o1,8,%o1
 857         sllx %o3,3,%o3          // %o3 = 8*count
 858         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
 859         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
 860         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
 861 1:        ldx [%o0+%o3],%o4     // source1-digit
 862           ldx [%o1+%o3],%o5     // source2-digit
 863           addcc %o4,%g1,%o4
 864           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
 865           addcc %o4,%o5,%o4
 866           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
 867           subcc %o3,8,%o3
 868           bne,pt %xcc,1b
 869          _ stx %o4,[%o2+%o3]    // Digit ablegen
 870 2:      retl
 871        _ mov %g1,%o0
 872 #endif
 873
 874 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 875         DECLARE_FUNCTION(addto_loop_down)
 876 C(addto_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 877 #if STANDARD_LOOPS
 878 //      srl %o2,0,%o2           // zero-extend %o2 = count
 879         brz,pn %o2,2f
 880        _ mov %g0,%o5            // Carry := 0
 881         sub %o0,8,%o0
 882 1:        ldx [%o0],%o3         // source-digit
 883           sub %o1,8,%o1
 884           ldx [%o1],%o4         // dest-digit
 885           addcc %o3,%o5,%o3
 886           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
 887           addcc %o3,%o4,%o4
 888           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
 889           stx %o4,[%o1]         // Digit ablegen
 890           subcc %o2,1,%o2
 891           bne,pt %xcc,1b
 892          _ sub %o0,8,%o0
 893 2:      retl
 894        _ mov %o5,%o0
 895 #endif
 896 #if COUNTER_LOOPS
 897 //      srl %o2,0,%o2           // zero-extend %o2 = count
 898         brz,pn %o2,2f
 899        _ mov %g0,%o5            // Carry := 0
 900         sub %o0,8,%o0
 901         sub %o1,8,%o1
 902         sllx %o2,3,%o2          // %o2 = 8*count
 903         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 904         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
 905           ldx [%o0+%o2],%o3     // source-digit
 906 1:        ldx [%o1+%o2],%o4     // dest-digit
 907           addcc %o3,%o5,%o3
 908           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
 909           addcc %o3,%o4,%o4
 910           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
 911           stx %o4,[%o1+%o2]     // Digit ablegen
 912           subcc %o2,8,%o2
 913           bne,a,pt %xcc,1b
 914          __ ldx [%o0+%o2],%o3   // source-digit
 915 2:      retl
 916        _ mov %o5,%o0
 917 #endif
 918
 919 // extern uintD inc_loop_down (uintD* ptr, uintC count);
 920         DECLARE_FUNCTION(inc_loop_down)
 921 C(inc_loop_down:) // Input in %o0,%o1, Output in %o0
 922 #if STANDARD_LOOPS
 923 //      srl %o1,0,%o1           // zero-extend %o1 = count
 924         brz,pn %o1,2f
 925        _ sub %o0,8,%o0
 926 1:        ldx [%o0],%o2
 927           addcc %o2,1,%o2
 928           bne,pn %xcc,3f
 929          _ stx %o2,[%o0]
 930           subcc %o1,1,%o1
 931           bne,pt %xcc,1b
 932          _ sub %o0,8,%o0
 933 2:      retl
 934        _ mov 1,%o0
 935 3:      retl
 936        _ mov 0,%o0
 937 #endif
 938 #if COUNTER_LOOPS
 939 //      srl %o1,0,%o1           // zero-extend %o1 = count
 940         brz,pn %o1,2f
 941        _ sub %o0,8,%o0
 942         sllx %o1,3,%o1          // %o1 = 8*count
 943         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
 944           ldx [%o0+%o1],%o2     // digit holen
 945 1:        addcc %o2,1,%o2       // incrementieren
 946           bne,pn %xcc,3f
 947          _ stx %o2,[%o0+%o1]    // ablegen
 948           subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 949           bne,a,pt %xcc,1b
 950          __ ldx [%o0+%o1],%o2
 951 2:      retl
 952        _ mov 1,%o0
 953 3:      retl
 954        _ mov 0,%o0
 955 #endif
 956
 957 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 958         DECLARE_FUNCTION(sub_loop_down)
 959 C(sub_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
 960 #if STANDARD_LOOPS
 961 //      srl %o3,0,%o3           // zero-extend %o3 = count
 962         brz,pn %o3,2f
 963        _ mov %g0,%g1            // Carry := 0
 964         sub %o1,8,%o1
 965 1:        ldx [%o1],%o5         // source2-digit
 966           sub %o0,8,%o0
 967           ldx [%o0],%o4         // source1-digit
 968           addcc %o5,%g1,%o5
 969           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
 970           subcc %o4,%o5,%o4
 971           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
 972           sub %o2,8,%o2
 973           stx %o4,[%o2]         // Digit ablegen
 974           subcc %o3,1,%o3
 975           bne,pt %xcc,1b
 976          _ sub %o1,8,%o1
 977 2:      retl
 978        _ mov %g1,%o0
 979 #endif
 980 #if COUNTER_LOOPS
 981 //      srl %o3,0,%o3           // zero-extend %o3 = count
 982         brz,pn %o3,2f
 983        _ mov %g0,%g1            // Carry := 0
 984         sub %o0,8,%o0
 985         sub %o1,8,%o1
 986         sllx %o3,3,%o3          // %o3 = 8*count
 987         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
 988         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
 989         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
 990 1:        ldx [%o0+%o3],%o4     // source1-digit
 991           ldx [%o1+%o3],%o5     // source2-digit
 992           addcc %o5,%g1,%o5
 993           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
 994           subcc %o4,%o5,%o4
 995           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
 996           subcc %o3,8,%o3
 997           bne,pt %xcc,1b
 998          _ stx %o4,[%o2+%o3]    // Digit ablegen
 999 2:      retl
1000        _ mov %g1,%o0
1001 #endif
1002
1003 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1004         DECLARE_FUNCTION(subx_loop_down)
1005 C(subx_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
1006 #if STANDARD_LOOPS
1007 //      srl %o3,0,%o3           // zero-extend %o3 = count
1008         brz,pn %o3,2f
1009        _ mov %o4,%g1            // Carry (0 oder -1)
1010         sub %o1,8,%o1
1011 1:        ldx [%o1],%o5         // source2-digit
1012           sub %o0,8,%o0
1013           ldx [%o0],%o4         // source1-digit
1014           subcc %o5,%g1,%o5
1015           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
1016           subcc %o4,%o5,%o4
1017           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
1018           sub %o2,8,%o2
1019           stx %o4,[%o2]         // Digit ablegen
1020           subcc %o3,1,%o3
1021           bne,pt %xcc,1b
1022          _ sub %o1,8,%o1
1023 2:      retl
1024        _ mov %g1,%o0
1025 #endif
1026 #if COUNTER_LOOPS
1027 //      srl %o3,0,%o3           // zero-extend %o3 = count
1028         brz,pn %o3,2f
1029        _ mov %o4,%g1            // Carry (0 oder -1)
1030         sub %o0,8,%o0
1031         sub %o1,8,%o1
1032         sllx %o3,3,%o3          // %o3 = 8*count
1033         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
1034         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
1035         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
1036 1:        ldx [%o1+%o3],%o5     // source2-digit
1037           ldx [%o0+%o3],%o4     // source1-digit
1038           subcc %o5,%g1,%o5
1039           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
1040           subcc %o4,%o5,%o4
1041           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
1042           subcc %o3,8,%o3
1043           bne,pt %xcc,1b
1044          _ stx %o4,[%o2+%o3]    // Digit ablegen
1045 2:      retl
1046        _ mov %g1,%o0
1047 #endif
1048
1049 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
1050         DECLARE_FUNCTION(subfrom_loop_down)
1051 C(subfrom_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1052 #if STANDARD_LOOPS
1053 //      srl %o2,0,%o2           // zero-extend %o2 = count
1054         brz,pn %o2,2f
1055        _ mov %g0,%o5            // Carry := 0
1056         sub %o0,8,%o0
1057 1:        ldx [%o0],%o3         // source-digit
1058           sub %o1,8,%o1
1059           ldx [%o1],%o4         // dest-digit
1060           addcc %o3,%o5,%o3
1061           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1062           subcc %o4,%o3,%o4
1063           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
1064           stx %o4,[%o1]         // Digit ablegen
1065           subcc %o2,1,%o2
1066           bne,pt %xcc,1b
1067          _ sub %o0,8,%o0
1068 2:      retl
1069        _ mov %o5,%o0
1070 #endif
1071 #if COUNTER_LOOPS
1072 //      srl %o2,0,%o2           // zero-extend %o2 = count
1073         brz,pn %o2,2f
1074        _ mov %g0,%o5            // Carry := 0
1075         sub %o0,8,%o0
1076         sub %o1,8,%o1
1077         sllx %o2,3,%o2          // %o2 = 8*count
1078         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
1079         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
1080           ldx [%o0+%o2],%o3     // source-digit
1081 1:        ldx [%o1+%o2],%o4     // dest-digit
1082           addcc %o3,%o5,%o3
1083           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1084           subcc %o4,%o3,%o4
1085           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
1086           stx %o4,[%o1+%o2]     // Digit ablegen
1087           subcc %o2,8,%o2
1088           bne,a,pt %xcc,1b
1089          __ ldx [%o0+%o2],%o3   // source-digit
1090 2:      retl
1091        _ mov %o5,%o0
1092 #endif
1093
1094 // extern uintD dec_loop_down (uintD* ptr, uintC count);
1095         DECLARE_FUNCTION(dec_loop_down)
1096 C(dec_loop_down:) // Input in %o0,%o1, Output in %o0
1097 #if STANDARD_LOOPS
1098 //      srl %o1,0,%o1           // zero-extend %o1 = count
1099         brz,pn %o1,2f
1100        _ sub %o0,8,%o0
1101 1:        ldx [%o0],%o2
1102           subcc %o2,1,%o2
1103           bcc,pn %xcc,3f
1104          _ stx %o2,[%o0]
1105           subcc %o1,1,%o1
1106           bne,pt %xcc,1b
1107          _ sub %o0,8,%o0
1108 2:      retl
1109        _ mov -1,%o0
1110 3:      retl
1111        _ mov 0,%o0
1112 #endif
1113 #if COUNTER_LOOPS
1114 //      srl %o1,0,%o1           // zero-extend %o1 = count
1115         brz,pn %o1,2f
1116        _ sub %o0,8,%o0
1117         sllx %o1,3,%o1          // %o1 = 8*count
1118         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1119           ldx [%o0+%o1],%o2     // digit holen
1120 1:        subcc %o2,1,%o2       // decrementieren
1121           bcc,pn %xcc,3f
1122          _ stx %o2,[%o0+%o1]    // ablegen
1123           subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
1124           bne,a,pt %xcc,1b
1125          __ ldx [%o0+%o1],%o2
1126 2:      retl
1127        _ mov -1,%o0
1128 3:      retl
1129        _ mov 0,%o0
1130 #endif
1131
1132 // extern uintD neg_loop_down (uintD* ptr, uintC count);
1133         DECLARE_FUNCTION(neg_loop_down)
1134 C(neg_loop_down:) // Input in %o0,%o1, Output in %o0
1135 #if STANDARD_LOOPS
1136 //      srl %o1,0,%o1           // zero-extend %o1 = count
1137         // erstes Digit /=0 suchen:
1138         brz,pn %o1,2f
1139        _ sub %o0,8,%o0
1140 1:        ldx [%o0],%o2
1141           subcc %g0,%o2,%o2
1142           bne,pn %xcc,3f
1143          _ subcc %o1,1,%o1
1144           bne,pt %xcc,1b
1145          _ sub %o0,8,%o0
1146 2:      retl
1147        _ mov 0,%o0
1148 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1149         stx %o2,[%o0]           // 1 Digit negieren
1150         // alle anderen Digits invertieren:
1151         be,pn %xcc,5f
1152        _ sub %o0,8,%o0
1153 4:        ldx [%o0],%o2
1154           subcc %o1,1,%o1
1155           xnor %g0,%o2,%o2
1156           stx %o2,[%o0]
1157           bne,pt %xcc,4b
1158          _ sub %o0,8,%o0
1159 5:      retl
1160        _ mov -1,%o0
1161 #endif
1162 #if COUNTER_LOOPS
1163 //      srl %o1,0,%o1           // zero-extend %o1 = count
1164         // erstes Digit /=0 suchen:
1165         brz,pn %o1,2f
1166        _ sub %o0,8,%o0
1167         sllx %o1,3,%o1          // %o1 = 8*count
1168         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1169           ldx [%o0+%o1],%o2     // digit holen
1170 1:        subcc %g0,%o2,%o2     // negieren, testen
1171           bne,pn %xcc,3f
1172          _ subcc %o1,8,%o1      // Zähler erniedrigen, Pointer erniedrigen
1173           bne,a,pt %xcc,1b
1174          __ ldx [%o0+%o1],%o2
1175 2:      retl
1176        _ mov 0,%o0
1177 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1178         // alle anderen Digits invertieren:
1179         add %o1,8,%o1
1180         stx %o2,[%o0+%o1]       // ablegen
1181         subcc %o1,8,%o1
1182         be,pn %xcc,5f
1183        _ nop
1184           ldx [%o0+%o1],%o2
1185 4:        xnor %g0,%o2,%o2
1186           stx %o2,[%o0+%o1]
1187           subcc %o1,8,%o1
1188           bne,a,pt %xcc,4b
1189          __ ldx [%o0+%o1],%o2
1190 5:      retl
1191        _ mov -1,%o0
1192 #endif
1193
1194 // extern uintD shift1left_loop_down (uintD* ptr, uintC count);
1195         DECLARE_FUNCTION(shift1left_loop_down)
1196 C(shift1left_loop_down:) // Input in %o0,%o1, Output in %o0
1197 //      srl %o1,0,%o1           // zero-extend %o1 = count
1198         brz,pn %o1,2f
1199        _ mov 0,%o3              // Carry := 0
1200         sub %o0,8,%o0
1201 1:        ldx [%o0],%o2         // Digit
1202           addcc %o2,%o2,%o4     // shiften
1203           add %o4,%o3,%o4       // und carry
1204           srlx %o2,63,%o3       // neues Carry
1205           stx %o4,[%o0]         // Digit ablegen
1206           subcc %o1,1,%o1
1207           bne,pt %xcc,1b
1208          _ sub %o0,8,%o0
1209 2:      retl
1210        _ mov %o3,%o0
1211
1212 // extern uintD shiftleft_loop_down (uintD* ptr, uintC count, uintC i, uintD carry);
1213         DECLARE_FUNCTION(shiftleft_loop_down)
1214 C(shiftleft_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1215 //      srl %o1,0,%o1           // zero-extend %o1 = count
1216         brz,pn %o1,2f
1217        _ sub %g0,%o2,%g1        // 64-i (mod 64)
1218         sub %o0,8,%o0
1219 1:        ldx [%o0],%o4         // Digit
1220           subcc %o1,1,%o1
1221           sllx %o4,%o2,%o5      // dessen niedere (64-i) Bits
1222           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
1223           stx %o5,[%o0]         // Digit ablegen
1224           srlx %o4,%g1,%o3      // dessen höchste i Bits liefern den neuen Carry
1225           bne,pt %xcc,1b
1226          _ sub %o0,8,%o0
1227 2:      retl
1228        _ mov %o3,%o0
1229
1230 // extern uintD shiftleftcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
1231         DECLARE_FUNCTION(shiftleftcopy_loop_down)
1232 C(shiftleftcopy_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
1233 //      srl %o2,0,%o2           // zero-extend %o2 = count
1234         brz,pn %o2,2f
1235        _ mov 0,%o4              // Carry := 0
1236         sub %g0,%o3,%g1         // 64-i (mod 64)
1237         sub %o0,8,%o0
1238 1:        ldx [%o0],%o5         // Digit
1239           subcc %o2,1,%o2
1240           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
1241           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
1242           sub %o1,8,%o1
1243           stx %g2,[%o1]         // Digit ablegen
1244           srlx %o5,%g1,%o4      // dessen höchste i Bits liefern den neuen Carry
1245           bne,pt %xcc,1b
1246          _ sub %o0,8,%o0
1247 2:      retl
1248        _ mov %o4,%o0
1249
1250 // extern uintD shift1right_loop_up (uintD* ptr, uintC count, uintD carry);
1251         DECLARE_FUNCTION(shift1right_loop_up)
1252 C(shift1right_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1253 //      srl %o1,0,%o1           // zero-extend %o1 = count
1254         brz,pn %o1,2f
1255        _ sllx %o2,63,%o2        // Carry
1256 1:        ldx [%o0],%o3         // Digit
1257           subcc %o1,1,%o1
1258           srlx %o3,1,%o4        // shiften
1259           or %o2,%o4,%o4        // und mit altem Carry kombinieren
1260           stx %o4,[%o0]         // und ablegen
1261           sllx %o3,63,%o2       // neuer Carry
1262           bne,pt %xcc,1b
1263          _ add %o0,8,%o0
1264 2:      retl
1265        _ mov %o2,%o0
1266
1267 // extern uintD shiftright_loop_up (uintD* ptr, uintC count, uintC i);
1268         DECLARE_FUNCTION(shiftright_loop_up)
1269 C(shiftright_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
1270 //      srl %o1,0,%o1           // zero-extend %o1 = count
1271         sub %g0,%o2,%g1         // 64-i (mod 64)
1272         brz,pn %o1,2f
1273        _ or %g0,%g0,%o3         // Carry := 0
1274 1:        ldx [%o0],%o4         // Digit
1275           subcc %o1,1,%o1
1276           srlx %o4,%o2,%o5      // shiften
1277           or %o3,%o5,%o5        // und mit altem Carry kombinieren
1278           stx %o5,[%o0]         // und ablegen
1279           sllx %o4,%g1,%o3      // neuer Carry
1280           bne,pt %xcc,1b
1281          _ add %o0,8,%o0
1282 2:      retl
1283        _ mov %o3,%o0
1284
1285 // extern uintD shiftrightsigned_loop_up (uintD* ptr, uintC count, uintC i);
1286         DECLARE_FUNCTION(shiftrightsigned_loop_up)
1287 C(shiftrightsigned_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
1288 //      srl %o1,0,%o1           // zero-extend %o1 = count
1289         ldx [%o0],%o4           // erstes Digit
1290         sub %g0,%o2,%g1         // 64-i (mod 64)
1291         srax %o4,%o2,%o5        // shiften
1292         stx %o5,[%o0]           // und ablegen
1293         sllx %o4,%g1,%o3        // neuer Carry
1294         subcc %o1,1,%o1
1295         be,pn %xcc,2f
1296        _ add %o0,8,%o0
1297 1:        ldx [%o0],%o4         // Digit
1298           subcc %o1,1,%o1
1299           srlx %o4,%o2,%o5      // shiften
1300           or %o3,%o5,%o5        // und mit altem Carry kombinieren
1301           stx %o5,[%o0]         // und ablegen
1302           sllx %o4,%g1,%o3      // neuer Carry
1303           bne,pt %xcc,1b
1304          _ add %o0,8,%o0
1305 2:      retl
1306        _ mov %o3,%o0
1307
1308 // extern uintD shiftrightcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
1309         DECLARE_FUNCTION(shiftrightcopy_loop_up)
1310 C(shiftrightcopy_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
1311 //      srl %o2,0,%o2           // zero-extend %o2 = count
1312         sub %g0,%o3,%g1         // 64-i (mod 64)
1313         brz,pn %o2,2f
1314        _ sllx %o4,%g1,%g2       // erster Carry
1315 1:        ldx [%o0],%o4         // Digit
1316           add %o0,8,%o0
1317           srlx %o4,%o3,%o5      // shiften
1318           or %g2,%o5,%o5        // und mit altem Carry kombinieren
1319           stx %o5,[%o1]         // und ablegen
1320           sllx %o4,%g1,%g2      // neuer Carry
1321           subcc %o2,1,%o2
1322           bne,pt %xcc,1b
1323          _ add %o1,8,%o1
1324 2:      retl
1325        _ mov %g2,%o0
1326
1327 // extern uintD mulusmall_loop_down (uintD digit, uintD* ptr, uintC len, uintD newdigit);
1328         DECLARE_FUNCTION(mulusmall_loop_down)
1329 C(mulusmall_loop_down:) // Input in %o0,%o1,%o2,%o3, Output in %o0, verändert %g1
1330 //      srl %o2,0,%o2           // zero-extend %o2 = len
1331         brz,pn %o2,2f
1332        _ sub %o1,8,%o1
1333 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
1334           // und kleinen Carry %o3 dazu:
1335           ldx [%o1],%o4
1336           sub %o2,1,%o2
1337           srlx %o4,32,%o5       // high32(x)
1338           srl %o4,0,%o4         // low32(x)
1339           mulx %o4,%o0,%o4      // low32(x)*digit
1340           mulx %o5,%o0,%o5      // high32(x)*digit
1341           sllx %o5,32,%g1       // low32(high32(x)*digit)*2^32
1342           add %g1,%o3,%g1       // plus carry
1343           addcc %o4,%g1,%o4     // plus low32(x)*digit
1344           srlx %o5,32,%o3       // high32(high32(x)*digit)
1345           add %o3,1,%g1
1346           movcs %xcc,%g1,%o3    // neuer Carry
1347           stx %o4,[%o1]         // neues Digit ablegen
1348           brnz,pt %o2,1b
1349          _ sub %o1,8,%o1
1350 2:      retl
1351        _ mov %o3,%o0
1352
1353 // extern void mulu_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1354         DECLARE_FUNCTION(mulu_loop_down)
1355 C(mulu_loop_down:) // Input in %i0,%i1,%i2,%i3
1356         save %sp,-192,%sp
1357         mov 0,%l0               // Carry
1358         srlx %i0,32,%l1         // %l1 = high32(digit)
1359         srl %i0,0,%l2           // %l2 = low32(digit)
1360         mov 1,%l3
1361         sllx %l3,32,%l3         // %l3 = 2^32
1362         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1363 1:        sub %i2,8,%i2
1364           ldx [%i1+%i2],%o0     // nächstes Digit
1365           subcc %i3,1,%i3
1366           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1367           srlx %o0,32,%o1
1368           srl %o0,0,%o2
1369           mulx %l1,%o1,%o3      // high part
1370           mulx %l1,%o2,%o4      // first mid part
1371           mulx %l2,%o1,%o1      // second mid part
1372           mulx %l2,%o2,%o2      // low part
1373           srlx %o2,32,%o5       // low part's upper half
1374           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1375           addcc %o4,%o1,%o4     // add other mid part
1376           add %o3,%l3,%o5
1377           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1378           srlx %o4,32,%o5
1379           sllx %o4,32,%o4
1380           srl %o2,0,%o2
1381           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1382           addcc %o0,%l0,%o0     // alten Carry addieren
1383           add %o3,%o5,%l0       // add high32(midparts) to high part
1384           add %l0,1,%o5
1385           movcs %xcc,%o5,%l0    // neuer Carry
1386           // Multiplikation fertig
1387           brnz,pt %i3,1b
1388          _ stx %o0,[%i2]        // Low-Digit ablegen
1389         stx %l0,[%i2-8]         // letzten Carry ablegen
1390         ret
1391        _ restore
1392
1393 // extern uintD muluadd_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1394         DECLARE_FUNCTION(muluadd_loop_down)
1395 C(muluadd_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
1396         save %sp,-192,%sp
1397         mov 0,%l0               // Carry
1398         srlx %i0,32,%l1         // %l1 = high32(digit)
1399         srl %i0,0,%l2           // %l2 = low32(digit)
1400         mov 1,%l3
1401         sllx %l3,32,%l3         // %l3 = 2^32
1402         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1403 1:        sub %i2,8,%i2
1404           ldx [%i1+%i2],%o0     // nächstes Digit
1405           ldx [%i2],%i4         // *destptr
1406           subcc %i3,1,%i3
1407           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1408           srlx %o0,32,%o1
1409           srl %o0,0,%o2
1410           mulx %l1,%o1,%o3      // high part
1411           mulx %l1,%o2,%o4      // first mid part
1412           mulx %l2,%o1,%o1      // second mid part
1413           mulx %l2,%o2,%o2      // low part
1414           srlx %o2,32,%o5       // low part's upper half
1415           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1416           addcc %o4,%o1,%o4     // add other mid part
1417           add %o3,%l3,%o5
1418           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1419           srlx %o4,32,%o5
1420           sllx %o4,32,%o4
1421           srl %o2,0,%o2
1422           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1423           addcc %o0,%l0,%o0     // alten Carry addieren
1424           add %o3,%o5,%l0       // add high32(midparts) to high part
1425           add %l0,1,%o5
1426           movcs %xcc,%o5,%l0    // neuer Carry
1427           // Multiplikation fertig
1428           addcc %i4,%o0,%o0     // alten *destptr addieren
1429           add %l0,1,%o2
1430           movcs %xcc,%o2,%l0    // neuer Carry
1431           brnz,pt %i3,1b
1432          _ stx %o0,[%i2]        // Low-Digit ablegen
1433         mov %l0,%i0             // letzter Carry
1434         ret
1435        _ restore
1436
1437 // extern uintD mulusub_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1438         DECLARE_FUNCTION(mulusub_loop_down)
1439 C(mulusub_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
1440         save %sp,-192,%sp
1441         mov 0,%l0               // Carry
1442         srlx %i0,32,%l1         // %l1 = high32(digit)
1443         srl %i0,0,%l2           // %l2 = low32(digit)
1444         mov 1,%l3
1445         sllx %l3,32,%l3         // %l3 = 2^32
1446         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1447 1:        sub %i2,8,%i2
1448           ldx [%i1+%i2],%o0     // nächstes Digit
1449           ldx [%i2],%i4         // *destptr
1450           subcc %i3,1,%i3
1451           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1452           srlx %o0,32,%o1
1453           srl %o0,0,%o2
1454           mulx %l1,%o1,%o3      // high part
1455           mulx %l1,%o2,%o4      // first mid part
1456           mulx %l2,%o1,%o1      // second mid part
1457           mulx %l2,%o2,%o2      // low part
1458           srlx %o2,32,%o5       // low part's upper half
1459           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1460           addcc %o4,%o1,%o4     // add other mid part
1461           add %o3,%l3,%o5
1462           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1463           srlx %o4,32,%o5
1464           sllx %o4,32,%o4
1465           srl %o2,0,%o2
1466           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1467           addcc %o0,%l0,%o0     // alten Carry addieren
1468           add %o3,%o5,%l0       // add high32(midparts) to high part
1469           add %l0,1,%o5
1470           movcs %xcc,%o5,%l0    // neuer Carry
1471           // Multiplikation fertig
1472           subcc %i4,%o0,%o0     // vom alten *destptr subtrahieren
1473           add %l0,1,%o2
1474           movcs %xcc,%o2,%l0    // neuer Carry
1475           brnz,pt %i3,1b
1476          _ stx %o0,[%i2]        // Low-Digit ablegen
1477         mov %l0,%i0             // letzter Carry
1478         ret
1479        _ restore
1480
1481 #endif
1482
1483 #if !CL_DS_BIG_ENDIAN_P
1484
1485 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
1486         DECLARE_FUNCTION(or_loop_down)
1487 C(or_loop_down:) // Input in %o0,%o1,%o2
1488 #if STANDARD_LOOPS
1489 //      srl %o2,0,%o2           // zero-extend %o2 = count
1490         brz,pn %o2,2f
1491        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1492         sub %o0,8,%o0
1493 1:        ldx [%o0],%o3         // *xptr
1494           ldx [%o0+%o1],%o4     // *yptr
1495           subcc %o2,1,%o2
1496           or %o3,%o4,%o3        // verknüpfen
1497           stx %o3,[%o0]         // =: *xptr
1498           bne,pt %xcc,1b
1499          _ sub %o0,8,%o0        // xptr++, yptr++
1500 2:      retl
1501        _ nop
1502 #endif
1503 #if COUNTER_LOOPS
1504 //      srl %o2,0,%o2           // zero-extend %o2 = count
1505         brz,pn %o2,2f
1506        _ sllx %o2,3,%o2         // %o2 = 8*count
1507         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1508         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1509 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1510           ldx [%o1+%o2],%o3     // nächstes Digit holen
1511           ldx [%o0+%o2],%o4     // noch ein Digit holen
1512           or %o4,%o3,%o3        // beide verknüpfen
1513           bne,pt %xcc,1b
1514          _ stx %o3,[%o1+%o2]    // Digit ablegen
1515 2:      retl
1516        _ nop
1517 #endif
1518
1519 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1520         DECLARE_FUNCTION(xor_loop_down)
1521 C(xor_loop_down:) // Input in %o0,%o1,%o2
1522 #if STANDARD_LOOPS
1523 //      srl %o2,0,%o2           // zero-extend %o2 = count
1524         brz,pn %o2,2f
1525        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1526         sub %o0,8,%o0
1527 1:        ldx [%o0],%o3         // *xptr
1528           ldx [%o0+%o1],%o4     // *yptr
1529           subcc %o2,1,%o2
1530           xor %o3,%o4,%o3       // verknüpfen
1531           stx %o3,[%o0]         // =: *xptr
1532           bne,pt %xcc,1b
1533          _ sub %o0,8,%o0        // xptr++, yptr++
1534 2:      retl
1535        _ nop
1536 #endif
1537 #if COUNTER_LOOPS
1538 //      srl %o2,0,%o2           // zero-extend %o2 = count
1539         brz,pn %o2,2f
1540        _ sllx %o2,3,%o2         // %o2 = 8*count
1541         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1542         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1543 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1544           ldx [%o1+%o2],%o3     // nächstes Digit holen
1545           ldx [%o0+%o2],%o4     // noch ein Digit holen
1546           xor %o4,%o3,%o3       // beide verknüpfen
1547           bne,pt %xcc,1b
1548          _ stx %o3,[%o1+%o2]    // Digit ablegen
1549 2:      retl
1550        _ nop
1551 #endif
1552
1553 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
1554         DECLARE_FUNCTION(and_loop_down)
1555 C(and_loop_down:) // Input in %o0,%o1,%o2
1556 #if STANDARD_LOOPS
1557 //      srl %o2,0,%o2           // zero-extend %o2 = count
1558         brz,pn %o2,2f
1559        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1560         sub %o0,8,%o0
1561 1:        ldx [%o0],%o3         // *xptr
1562           ldx [%o0+%o1],%o4     // *yptr
1563           subcc %o2,1,%o2
1564           and %o3,%o4,%o3       // verknüpfen
1565           stx %o3,[%o0]         // =: *xptr
1566           bne,pt %xcc,1b
1567          _ sub %o0,8,%o0        // xptr++, yptr++
1568 2:      retl
1569        _ nop
1570 #endif
1571 #if COUNTER_LOOPS
1572 //      srl %o2,0,%o2           // zero-extend %o2 = count
1573         brz,pn %o2,2f
1574        _ sllx %o2,3,%o2         // %o2 = 8*count
1575         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1576         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1577 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1578           ldx [%o1+%o2],%o3     // nächstes Digit holen
1579           ldx [%o0+%o2],%o4     // noch ein Digit holen
1580           and %o4,%o3,%o3       // beide verknüpfen
1581           bne,pt %xcc,1b
1582          _ stx %o3,[%o1+%o2]    // Digit ablegen
1583 2:      retl
1584        _ nop
1585 #endif
1586
1587 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
1588         DECLARE_FUNCTION(eqv_loop_down)
1589 C(eqv_loop_down:) // Input in %o0,%o1,%o2
1590 #if STANDARD_LOOPS
1591 //      srl %o2,0,%o2           // zero-extend %o2 = count
1592         brz,pn %o2,2f
1593        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1594         sub %o0,8,%o0
1595 1:        ldx [%o0],%o3         // *xptr
1596           ldx [%o0+%o1],%o4     // *yptr
1597           subcc %o2,1,%o2
1598           xnor %o3,%o4,%o3      // verknüpfen
1599           stx %o3,[%o0]         // =: *xptr
1600           bne,pt %xcc,1b
1601          _ sub %o0,8,%o0        // xptr++, yptr++
1602 2:      retl
1603        _ nop
1604 #endif
1605 #if COUNTER_LOOPS
1606 //      srl %o2,0,%o2           // zero-extend %o2 = count
1607         brz,pn %o2,2f
1608        _ sllx %o2,3,%o2         // %o2 = 8*count
1609         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1610         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1611 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1612           ldx [%o1+%o2],%o3     // nächstes Digit holen
1613           ldx [%o0+%o2],%o4     // noch ein Digit holen
1614           xnor %o4,%o3,%o3      // beide verknüpfen
1615           bne,pt %xcc,1b
1616          _ stx %o3,[%o1+%o2]    // Digit ablegen
1617 2:      retl
1618        _ nop
1619 #endif
1620
1621 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
1622         DECLARE_FUNCTION(nand_loop_down)
1623 C(nand_loop_down:) // Input in %o0,%o1,%o2
1624 #if STANDARD_LOOPS
1625 //      srl %o2,0,%o2           // zero-extend %o2 = count
1626         brz,pn %o2,2f
1627        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1628         sub %o0,8,%o0
1629 1:        ldx [%o0],%o3         // *xptr
1630           ldx [%o0+%o1],%o4     // *yptr
1631           subcc %o2,1,%o2
1632           and %o3,%o4,%o3       // verknüpfen
1633           xnor %g0,%o3,%o3
1634           stx %o3,[%o0]         // =: *xptr
1635           bne,pt %xcc,1b
1636          _ sub %o0,8,%o0        // xptr++, yptr++
1637 2:      retl
1638        _ nop
1639 #endif
1640 #if COUNTER_LOOPS
1641 //      srl %o2,0,%o2           // zero-extend %o2 = count
1642         brz,pn %o2,2f
1643        _ sllx %o2,3,%o2         // %o2 = 8*count
1644         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1645         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1646 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1647           ldx [%o1+%o2],%o3     // nächstes Digit holen
1648           ldx [%o0+%o2],%o4     // noch ein Digit holen
1649           and %o4,%o3,%o3       // beide verknüpfen
1650           xnor %g0,%o3,%o3
1651           bne,pt %xcc,1b
1652          _ stx %o3,[%o1+%o2]    // Digit ablegen
1653 2:      retl
1654        _ nop
1655 #endif
1656
1657 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1658         DECLARE_FUNCTION(nor_loop_down)
1659 C(nor_loop_down:) // Input in %o0,%o1,%o2
1660 #if STANDARD_LOOPS
1661 //      srl %o2,0,%o2           // zero-extend %o2 = count
1662         brz,pn %o2,2f
1663        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1664         sub %o0,8,%o0
1665 1:        ldx [%o0],%o3         // *xptr
1666           ldx [%o0+%o1],%o4     // *yptr
1667           subcc %o2,1,%o2
1668           or %o3,%o4,%o3        // verknüpfen
1669           xnor %g0,%o3,%o3
1670           stx %o3,[%o0]         // =: *xptr
1671           bne,pt %xcc,1b
1672          _ sub %o0,8,%o0        // xptr++, yptr++
1673 2:      retl
1674        _ nop
1675 #endif
1676 #if COUNTER_LOOPS
1677 //      srl %o2,0,%o2           // zero-extend %o2 = count
1678         brz,pn %o2,2f
1679        _ sllx %o2,3,%o2         // %o2 = 8*count
1680         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1681         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1682 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1683           ldx [%o1+%o2],%o3     // nächstes Digit holen
1684           ldx [%o0+%o2],%o4     // noch ein Digit holen
1685           or %o4,%o3,%o3        // beide verknüpfen
1686           xnor %g0,%o3,%o3
1687           bne,pt %xcc,1b
1688          _ stx %o3,[%o1+%o2]    // Digit ablegen
1689 2:      retl
1690        _ nop
1691 #endif
1692
1693 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1694         DECLARE_FUNCTION(andc2_loop_down)
1695 C(andc2_loop_down:) // Input in %o0,%o1,%o2
1696 #if STANDARD_LOOPS
1697 //      srl %o2,0,%o2           // zero-extend %o2 = count
1698         brz,pn %o2,2f
1699        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1700         sub %o0,8,%o0
1701 1:        ldx [%o0],%o3         // *xptr
1702           ldx [%o0+%o1],%o4     // *yptr
1703           subcc %o2,1,%o2
1704           andn %o3,%o4,%o3      // verknüpfen
1705           stx %o3,[%o0]         // =: *xptr
1706           bne,pt %xcc,1b
1707          _ sub %o0,8,%o0        // xptr++, yptr++
1708 2:      retl
1709        _ nop
1710 #endif
1711 #if COUNTER_LOOPS
1712 //      srl %o2,0,%o2           // zero-extend %o2 = count
1713         brz,pn %o2,2f
1714        _ sllx %o2,3,%o2         // %o2 = 8*count
1715         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1716         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1717 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1718           ldx [%o1+%o2],%o3     // nächstes Digit holen
1719           ldx [%o0+%o2],%o4     // noch ein Digit holen
1720           andn %o4,%o3,%o3      // beide verknüpfen
1721           bne,pt %xcc,1b
1722          _ stx %o3,[%o1+%o2]    // Digit ablegen
1723 2:      retl
1724        _ nop
1725 #endif
1726
1727 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1728         DECLARE_FUNCTION(orc2_loop_down)
1729 C(orc2_loop_down:) // Input in %o0,%o1,%o2
1730 #if STANDARD_LOOPS
1731 //      srl %o2,0,%o2           // zero-extend %o2 = count
1732         brz,pn %o2,2f
1733        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1734         sub %o0,8,%o0
1735 1:        ldx [%o0],%o3         // *xptr
1736           ldx [%o0+%o1],%o4     // *yptr
1737           subcc %o2,1,%o2
1738           orn %o3,%o4,%o3       // verknüpfen
1739           stx %o3,[%o0]         // =: *xptr
1740           bne,pt %xcc,1b
1741          _ sub %o0,8,%o0        // xptr++, yptr++
1742 2:      retl
1743        _ nop
1744 #endif
1745 #if COUNTER_LOOPS
1746 //      srl %o2,0,%o2           // zero-extend %o2 = count
1747         brz,pn %o2,2f
1748        _ sllx %o2,3,%o2         // %o2 = 8*count
1749         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1750         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1751 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1752           ldx [%o1+%o2],%o3     // nächstes Digit holen
1753           ldx [%o0+%o2],%o4     // noch ein Digit holen
1754           orn %o4,%o3,%o3       // beide verknüpfen
1755           bne,pt %xcc,1b
1756          _ stx %o3,[%o1+%o2]    // Digit ablegen
1757 2:      retl
1758        _ nop
1759 #endif
1760
1761 // extern void not_loop_down (uintD* xptr, uintC count);
1762         DECLARE_FUNCTION(not_loop_down)
1763 C(not_loop_down:) // Input in %o0,%o1
1764 #if STANDARD_LOOPS
1765 //      srl %o1,0,%o1           // zero-extend %o1 = count
1766         brz,pn %o1,2f
1767        _ sub %o0,8,%o0
1768 1:        ldx [%o0],%o2
1769           subcc %o1,1,%o1
1770           xnor %g0,%o2,%o2
1771           stx %o2,[%o0]
1772           bne,pt %xcc,1b
1773          _ sub %o0,8,%o0
1774 2:      retl
1775        _ nop
1776 #endif
1777 #if COUNTER_LOOPS
1778 //      srl %o1,0,%o1           // zero-extend %o1 = count
1779         brz,pn %o1,2f
1780        _ sllx %o1,3,%o1         // %o1 = 8*count
1781         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
1782 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
1783           ldx [%o0+%o1],%o2     // nächstes Digit holen
1784           xnor %g0,%o2,%o2
1785           bne,pt %xcc,1b
1786          _ stx %o2,[%o0+%o1]    // Digit ablegen
1787 2:      retl
1788        _ nop
1789 #endif
1790
1791 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
1792         DECLARE_FUNCTION(and_test_loop_down)
1793 C(and_test_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1794 #if STANDARD_LOOPS
1795 //      srl %o2,0,%o2           // zero-extend %o2 = count
1796         brz,pn %o2,4f
1797        _ sub %o0,8,%o0
1798 1:        ldx [%o0],%o3
1799           sub %o1,8,%o1
1800           ldx [%o1],%o4
1801           subcc %o2,1,%o2
1802           be,pn %xcc,3f
1803          _ andcc %o3,%o4,%g0
1804           be,pt %xcc,1b
1805          _ sub %o0,8,%o0
1806 2:      retl
1807        _ mov 1,%o0
1808 3:      bne 2b
1809        _ nop
1810 4:      retl
1811        _ mov 0,%o0
1812 #endif
1813 #if COUNTER_LOOPS
1814 //      srl %o2,0,%o2           // zero-extend %o2 = count
1815         sllx %o2,3,%o2          // %o2 = 8*count
1816         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1817         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1818         subcc %o2,8,%o2
1819         bcs,pn %xcc,2f
1820        _ nop
1821           ldx [%o0+%o2],%o3     // nächstes Digit holen
1822 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
1823           andcc %o3,%o4,%g0     // beide verknüpfen
1824           bne,pn %xcc,3f
1825          _ subcc %o2,8,%o2      // Zähler erniedrigen, Pointer erniedrigen
1826           bcc,a,pt %xcc,1b
1827          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
1828 2:      retl
1829        _ mov 0,%o0
1830 3:      retl
1831        _ mov 1,%o0
1832 #endif
1833
1834 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
1835         DECLARE_FUNCTION(compare_loop_down)
1836 C(compare_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1837 #if STANDARD_LOOPS
1838 //      srl %o2,0,%o2           // zero-extend %o2 = count
1839         brz,pn %o2,2f
1840        _ nop
1841 1:        ldx [%o0-8],%o3
1842           ldx [%o1-8],%o4
1843           subcc %o3,%o4,%g0
1844           bne,pn %xcc,3f
1845          _ sub %o0,8,%o0
1846           subcc %o2,1,%o2
1847           bne,pn %xcc,1b
1848          _ sub %o1,8,%o1
1849 2:      retl
1850        _ mov 0,%o0
1851 3:      mov 1,%o0
1852         movlu %xcc,-1,%o0
1853         retl
1854        _ sra %o0,0,%o0          // sign-extend %o0
1855 #endif
1856 #if COUNTER_LOOPS
1857 //      srl %o2,0,%o2           // zero-extend %o2 = count
1858         sllx %o2,3,%o2          // %o2 = 8*count
1859         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1860         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1861         subcc %o2,8,%o2
1862         bcs,pn %xcc,4f
1863        _ nop
1864           ldx [%o0+%o2],%o3     // nächstes Digit holen
1865 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
1866           subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1867           bcs,pn %xcc,3f
1868          _ subcc %o3,%o4,%g0    // vergleichen
1869           be,a,pt %xcc,1b
1870          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
1871 2:      mov 1,%o0
1872         movlu %xcc,-1,%o0
1873         retl
1874        _ sra %o0,0,%o0          // sign-extend %o0
1875 3:      bne 2b
1876        _ nop
1877 4:      retl
1878        _ mov 0,%o0
1879 #endif
1880
1881 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1882         DECLARE_FUNCTION(add_loop_up)
1883 C(add_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1884 #if STANDARD_LOOPS
1885 //      srl %o3,0,%o3           // zero-extend %o3 = count
1886         brz,pn %o3,2f
1887        _ mov %g0,%g1            // Carry := 0
1888 1:        ldx [%o0],%o4         // source1-digit
1889           add %o0,8,%o0
1890           ldx [%o1],%o5         // source2-digit
1891           add %o1,8,%o1
1892           addcc %o4,%g1,%o4
1893           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
1894           addcc %o4,%o5,%o4
1895           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
1896           stx %o4,[%o2]         // Digit ablegen
1897           subcc %o3,1,%o3
1898           bne,pt %xcc,1b
1899          _ add %o2,8,%o2
1900 2:      retl
1901        _ mov %g1,%o0
1902 #endif
1903 #if COUNTER_LOOPS
1904 //      srl %o3,0,%o3           // zero-extend %o3 = count
1905         brz,pn %o3,2f
1906        _ mov %g0,%g1            // Carry := 0
1907         sub %g0,%o3,%o3         // %o3 = -count
1908         sllx %o3,3,%o3          // %o3 = -8*count
1909         sub %o2,8,%o2
1910         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
1911         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
1912         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
1913 1:        ldx [%o0+%o3],%o4     // source1-digit
1914           ldx [%o1+%o3],%o5     // source2-digit
1915           addcc %o4,%g1,%o4
1916           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
1917           addcc %o4,%o5,%o4
1918           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
1919           addcc %o3,8,%o3       // Zähler erniedrigen, Pointer erhöhen
1920           bne,pt %xcc,1b
1921          _ stx %o4,[%o2+%o3]    // Digit ablegen
1922 2:      retl
1923        _ mov %g1,%o0
1924 #endif
1925
1926 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1927         DECLARE_FUNCTION(addto_loop_up)
1928 C(addto_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1929 #if STANDARD_LOOPS
1930 //      srl %o2,0,%o2           // zero-extend %o2 = count
1931         brz,pn %o2,2f
1932        _ mov %g0,%o5            // Carry := 0
1933 1:        ldx [%o0],%o3         // source-digit
1934           add %o0,8,%o0
1935           ldx [%o1],%o4         // dest-digit
1936           addcc %o3,%o5,%o3
1937           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1938           addcc %o3,%o4,%o4
1939           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
1940           stx %o4,[%o1]         // Digit ablegen
1941           subcc %o2,1,%o2
1942           bne,pt %xcc,1b
1943          _ add %o1,8,%o1
1944 2:      retl
1945        _ mov %o5,%o0
1946 #endif
1947 #if COUNTER_LOOPS
1948 //      srl %o2,0,%o2           // zero-extend %o2 = count
1949         brz,pn %o2,2f
1950        _ mov %g0,%o5            // Carry := 0
1951         sub %g0,%o2,%o2         // %o2 = -count
1952         sllx %o2,3,%o2          // %o2 = -8*count
1953         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
1954         sub %o1,%o2,%o1         // %o1 = &destptr[count]
1955           ldx [%o0+%o2],%o3     // source-digit
1956 1:        ldx [%o1+%o2],%o4     // dest-digit
1957           addcc %o3,%o5,%o3
1958           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1959           addcc %o3,%o4,%o4
1960           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
1961           stx %o4,[%o1+%o2]     // Digit ablegen
1962           addcc %o2,8,%o2       // Zähler erniedrigen, Pointer erhöhen
1963           bne,a,pt %xcc,1b
1964          __ ldx [%o0+%o2],%o3   // source-digit
1965 2:      retl
1966        _ mov %o5,%o0
1967 #endif
1968
1969 // extern uintD inc_loop_up (uintD* ptr, uintC count);
1970         DECLARE_FUNCTION(inc_loop_up)
1971 C(inc_loop_up:) // Input in %o0,%o1, Output in %o0
1972 #if STANDARD_LOOPS
1973 //      srl %o1,0,%o1           // zero-extend %o1 = count
1974         brz,pn %o1,2f
1975        _ nop
1976           ldx [%o0],%o2
1977 1:        add %o0,8,%o0
1978           addcc %o2,1,%o2
1979           bne,pn %xcc,3f
1980          _ stx %o2,[%o0-8]
1981           subcc %o1,1,%o1
1982           bne,a,pt %xcc,1b
1983          __ ldx [%o0],%o2
1984 2:      retl
1985        _ mov 1,%o0
1986 3:      retl
1987        _ mov 0,%o0
1988 #endif
1989 #if COUNTER_LOOPS
1990 //      srl %o1,0,%o1           // zero-extend %o1 = count
1991         brz,pn %o1,2f
1992        _ sub %g0,%o1,%o1        // %o1 = -count
1993         sllx %o1,3,%o1          // %o1 = -8*count
1994         sub %o0,%o1,%o0         // %o0 = &ptr[count]
1995           ldx [%o0+%o1],%o2     // digit holen
1996 1:        addcc %o2,1,%o2       // incrementieren
1997           bne,pn %xcc,3f
1998          _ stx %o2,[%o0+%o1]    // ablegen
1999           addcc %o1,8,%o1       // Zähler erniedrigen, Pointer erhöhen
2000           bne,a,pt %xcc,1b
2001          __ ldx [%o0+%o1],%o2
2002 2:      retl
2003        _ mov 1,%o0
2004 3:      retl
2005        _ mov 0,%o0
2006 #endif
2007
2008 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
2009         DECLARE_FUNCTION(sub_loop_up)
2010 C(sub_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2011 #if STANDARD_LOOPS
2012 //      srl %o3,0,%o3           // zero-extend %o3 = count
2013         brz,pn %o3,2f
2014        _ mov %g0,%g1            // Carry := 0
2015 1:        ldx [%o0],%o4         // source1-digit
2016           add %o0,8,%o0
2017           ldx [%o1],%o5         // source2-digit
2018           add %o1,8,%o1
2019           addcc %o5,%g1,%o5
2020           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
2021           subcc %o4,%o5,%o4
2022           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
2023           stx %o4,[%o2]         // Digit ablegen
2024           subcc %o3,1,%o3
2025           bne,pt %xcc,1b
2026          _ add %o2,8,%o2
2027 2:      retl
2028        _ mov %g1,%o0
2029 #endif
2030 #if COUNTER_LOOPS
2031 //      srl %o3,0,%o3           // zero-extend %o3 = count
2032         brz,pn %o3,2f
2033        _ mov %g0,%g1            // Carry := 0
2034         sub %g0,%o3,%o3         // %o3 = -count
2035         sllx %o3,3,%o3          // %o3 = -8*count
2036         sub %o2,8,%o2
2037         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
2038         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
2039         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
2040 1:        ldx [%o1+%o3],%o5     // source2-digit
2041           ldx [%o0+%o3],%o4     // source1-digit
2042           addcc %o5,%g1,%o5
2043           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
2044           subcc %o4,%o5,%o4
2045           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
2046           addcc %o3,8,%o3
2047           bne,pt %xcc,1b
2048          _ stx %o4,[%o2+%o3]    // Digit ablegen
2049 2:      retl
2050        _ mov %g1,%o0
2051 #endif
2052
2053 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
2054         DECLARE_FUNCTION(subx_loop_up)
2055 C(subx_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
2056 #if STANDARD_LOOPS
2057 //      srl %o3,0,%o3           // zero-extend %o3 = count
2058         brz,pn %o3,2f
2059        _ mov %o4,%g1            // Carry (0 oder -1)
2060 1:        ldx [%o0],%o4         // source1-digit
2061           add %o0,8,%o0
2062           ldx [%o1],%o5         // source2-digit
2063           add %o1,8,%o1
2064           subcc %o5,%g1,%o5
2065           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
2066           subcc %o4,%o5,%o4
2067           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
2068           stx %o4,[%o2]         // Digit ablegen
2069           subcc %o3,1,%o3
2070           bne,pt %xcc,1b
2071          _ add %o2,8,%o2
2072 2:      retl
2073        _ mov %g1,%o0
2074 #endif
2075 #if COUNTER_LOOPS
2076 //      srl %o3,0,%o3           // zero-extend %o3 = count
2077         brz,pn %o3,2f
2078        _ mov %o4,%g1            // Carry (0 oder -1)
2079         sub %g0,%o3,%o3         // %o3 = -count
2080         sllx %o3,3,%o3          // %o3 = -8*count
2081         sub %o2,8,%o2
2082         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
2083         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
2084         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
2085 1:        ldx [%o1+%o3],%o5     // source2-digit
2086           ldx [%o0+%o3],%o4     // source1-digit
2087           subcc %o5,%g1,%o5
2088           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
2089           subcc %o4,%o5,%o4
2090           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
2091           addcc %o3,8,%o3
2092           bne,pt %xcc,1b
2093          _ stx %o4,[%o2+%o3]    // Digit ablegen
2094 2:      retl
2095        _ mov %g1,%o0
2096 #endif
2097
2098 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
2099         DECLARE_FUNCTION(subfrom_loop_up)
2100 C(subfrom_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
2101 #if STANDARD_LOOPS
2102 //      srl %o2,0,%o2           // zero-extend %o2 = count
2103         brz,pn %o2,2f
2104        _ mov %g0,%o5            // Carry := 0
2105 1:        ldx [%o0],%o3         // source-digit
2106           add %o0,8,%o0
2107           ldx [%o1],%o4         // dest-digit
2108           addcc %o3,%o5,%o3
2109           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
2110           subcc %o4,%o3,%o4
2111           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
2112           stx %o4,[%o1]         // Digit ablegen
2113           subcc %o2,1,%o2
2114           bne,pt %xcc,1b
2115          _ add %o1,8,%o1
2116 2:      retl
2117        _ mov %o5,%o0
2118 #endif
2119 #if COUNTER_LOOPS
2120 //      srl %o2,0,%o2           // zero-extend %o2 = count
2121         brz,pn %o2,2f
2122        _ mov %g0,%o5            // Carry := 0
2123         sub %g0,%o2,%o2         // %o2 = -count
2124         sllx %o2,3,%o2          // %o2 = -8*count
2125         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
2126         sub %o1,%o2,%o1         // %o1 = &destptr[count]
2127           ldx [%o0+%o2],%o3     // source-digit
2128 1:        ldx [%o1+%o2],%o4     // dest-digit
2129           addcc %o3,%o5,%o3
2130           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
2131           subcc %o4,%o3,%o4
2132           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
2133           stx %o4,[%o1+%o2]     // Digit ablegen
2134           addcc %o2,8,%o2
2135           bne,a,pt %xcc,1b
2136          __ ldx [%o0+%o2],%o3   // source-digit
2137 2:      retl
2138        _ mov %o5,%o0
2139 #endif
2140
2141 // extern uintD dec_loop_up (uintD* ptr, uintC count);
2142         DECLARE_FUNCTION(dec_loop_up)
2143 C(dec_loop_up:) // Input in %o0,%o1, Output in %o0
2144 #if STANDARD_LOOPS
2145 //      srl %o1,0,%o1           // zero-extend %o1 = count
2146         brz,pn %o1,2f
2147        _ nop
2148           ldx [%o0],%o2
2149 1:        add %o0,8,%o0
2150           subcc %o2,1,%o2
2151           bcc,pn %xcc,3f
2152          _ stx %o2,[%o0-8]
2153           subcc %o1,1,%o1
2154           bne,a,pt %xcc,1b
2155          __ ldx [%o0],%o2
2156 2:      retl
2157        _ mov -1,%o0
2158 3:      retl
2159        _ mov 0,%o0
2160 #endif
2161 #if COUNTER_LOOPS
2162 //      srl %o1,0,%o1           // zero-extend %o1 = count
2163         brz,pn %o1,2f
2164        _ sub %g0,%o1,%o1        // %o1 = -count
2165         sllx %o1,3,%o1          // %o1 = -8*count
2166         sub %o0,%o1,%o0         // %o0 = &ptr[count]
2167           ldx [%o0+%o1],%o2     // digit holen
2168 1:        subcc %o2,1,%o2       // decrementieren
2169           bcc,pn %xcc,3f
2170          _ stx %o2,[%o0+%o1]    // ablegen
2171           addcc %o1,8,%o1       // Zähler erniedrigen, Pointer erhöhen
2172           bne,a,pt %xcc,1b
2173          __ ldx [%o0+%o1],%o2
2174 2:      retl
2175        _ mov -1,%o0
2176 3:      retl
2177        _ mov 0,%o0
2178 #endif
2179
2180 // extern uintD neg_loop_up (uintD* ptr, uintC count);
2181         DECLARE_FUNCTION(neg_loop_up)
2182 C(neg_loop_up:) // Input in %o0,%o1, Output in %o0
2183 #if STANDARD_LOOPS
2184 //      srl %o1,0,%o1           // zero-extend %o1 = count
2185         // erstes Digit /=0 suchen:
2186         brz,pn %o1,2f
2187        _ add %o0,8,%o0
2188 1:        ldx [%o0-8],%o2
2189           subcc %g0,%o2,%o2
2190           bne,pn %xcc,3f
2191          _ subcc %o1,1,%o1
2192           bne,pt %xcc,1b
2193          _ add %o0,8,%o0
2194 2:      retl
2195        _ mov 0,%o0
2196 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2197         // 1 Digit negieren, alle anderen Digits invertieren:
2198         be,pn %xcc,5f
2199        _ stx %o2,[%o0-8]
2200 4:        ldx [%o0],%o2
2201           subcc %o1,1,%o1
2202           xnor %g0,%o2,%o2
2203           stx %o2,[%o0]
2204           bne,pt %xcc,4b
2205          _ add %o0,8,%o0
2206 5:      retl
2207        _ mov -1,%o0
2208 #endif
2209 #if COUNTER_LOOPS
2210 //      srl %o1,0,%o1           // zero-extend %o1 = count
2211         // erstes Digit /=0 suchen:
2212         brz,pn %o1,2f
2213        _ sub %g0,%o1,%o1        // %o1 = -count
2214         sllx %o1,3,%o1          // %o1 = -8*count
2215         sub %o0,%o1,%o0         // %o0 = &ptr[count]
2216           ldx [%o0+%o1],%o2     // digit holen
2217 1:        subcc %g0,%o2,%o2     // negieren, testen
2218           bne,pn %xcc,3f
2219          _ addcc %o1,8,%o1      // Zähler erniedrigen, Pointer erhöhen
2220           bne,a,pt %xcc,1b
2221          __ ldx [%o0+%o1],%o2
2222 2:      retl
2223        _ mov 0,%o0
2224 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2225         // alle anderen Digits invertieren:
2226         sub %o1,8,%o1
2227         stx %o2,[%o0+%o1]       // ablegen
2228         addcc %o1,8,%o1
2229         be,pn %xcc,5f
2230        _ nop
2231           ldx [%o0+%o1],%o2
2232 4:        xnor %g0,%o2,%o2
2233           stx %o2,[%o0+%o1]
2234           addcc %o1,8,%o1
2235           bne,a,pt %xcc,4b
2236          __ ldx [%o0+%o1],%o2
2237 5:      retl
2238        _ mov -1,%o0
2239 #endif
2240
2241 // extern uintD shift1left_loop_up (uintD* ptr, uintC count);
2242         DECLARE_FUNCTION(shift1left_loop_up)
2243 C(shift1left_loop_up:) // Input in %o0,%o1, Output in %o0
2244 //      srl %o1,0,%o1           // zero-extend %o1 = count
2245         brz,pn %o1,2f
2246        _ mov 0,%o3              // Carry := 0
2247 1:        ldx [%o0],%o2         // Digit
2248           addcc %o2,%o2,%o4     // shiften
2249           add %o4,%o3,%o4       // und carry
2250           srlx %o2,63,%o3       // neues Carry
2251           stx %o4,[%o0]         // Digit ablegen
2252           subcc %o1,1,%o1
2253           bne,pt %xcc,1b
2254          _ add %o0,8,%o0
2255 2:      retl
2256        _ mov %o3,%o0
2257
2258 // extern uintD shiftleft_loop_up (uintD* ptr, uintC count, uintC i, uintD carry);
2259         DECLARE_FUNCTION(shiftleft_loop_up)
2260 C(shiftleft_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2261 //      srl %o1,0,%o1           // zero-extend %o1 = count
2262         brz,pn %o1,2f
2263        _ sub %g0,%o2,%g1        // 64-i (mod 64)
2264 1:        ldx [%o0],%o4         // Digit
2265           subcc %o1,1,%o1
2266           sllx %o4,%o2,%o5      // dessen niedere (64-i) Bits
2267           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
2268           stx %o5,[%o0]         // Digit ablegen
2269           srlx %o4,%g1,%o3      // dessen höchste i Bits liefern den neuen Carry
2270           bne,pt %xcc,1b
2271          _ add %o0,8,%o0
2272 2:      retl
2273        _ mov %o3,%o0
2274
2275 #endif
2276
2277 // extern uintD shiftleftcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
2278         DECLARE_FUNCTION(shiftleftcopy_loop_up)
2279 C(shiftleftcopy_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
2280 //      srl %o2,0,%o2           // zero-extend %o2 = count
2281         brz,pn %o2,2f
2282        _ mov 0,%o4              // Carry := 0
2283         sub %g0,%o3,%g1         // 64-i (mod 64)
2284 1:        ldx [%o0],%o5         // Digit
2285           subcc %o2,1,%o2
2286           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
2287           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
2288           stx %g2,[%o1]         // Digit ablegen
2289           add %o1,8,%o1
2290           srlx %o5,%g1,%o4      // dessen höchste i Bits liefern den neuen Carry
2291           bne,pt %xcc,1b
2292          _ add %o0,8,%o0
2293 2:      retl
2294        _ mov %o4,%o0
2295
2296 #if !CL_DS_BIG_ENDIAN_P
2297
2298 // extern uintD shift1right_loop_down (uintD* ptr, uintC count, uintD carry);
2299         DECLARE_FUNCTION(shift1right_loop_down)
2300 C(shift1right_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
2301 //      srl %o1,0,%o1           // zero-extend %o1 = count
2302         brz,pn %o1,2f
2303        _ sllx %o2,63,%o2        // Carry
2304         sub %o0,8,%o0
2305 1:        ldx [%o0],%o3         // Digit
2306           subcc %o1,1,%o1
2307           srlx %o3,1,%o4        // shiften
2308           or %o2,%o4,%o4        // und mit altem Carry kombinieren
2309           stx %o4,[%o0]         // und ablegen
2310           sllx %o3,63,%o2       // neuer Carry
2311           bne,pt %xcc,1b
2312          _ sub %o0,8,%o0
2313 2:      retl
2314        _ mov %o2,%o0
2315
2316 // extern uintD shiftright_loop_down (uintD* ptr, uintC count, uintC i);
2317         DECLARE_FUNCTION(shiftright_loop_down)
2318 C(shiftright_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2319 //      srl %o1,0,%o1           // zero-extend %o1 = count
2320         sub %g0,%o2,%g1         // 64-i (mod 64)
2321         brz,pn %o1,2f
2322        _ or %g0,%g0,%o3         // Carry := 0
2323         sub %o0,8,%o0
2324 1:        ldx [%o0],%o4         // Digit
2325           subcc %o1,1,%o1
2326           srlx %o4,%o2,%o5      // shiften
2327           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2328           stx %o5,[%o0]         // und ablegen
2329           sllx %o4,%g1,%o3      // neuer Carry
2330           bne,pt %xcc,1b
2331          _ sub %o0,8,%o0
2332 2:      retl
2333        _ mov %o3,%o0
2334
2335 // extern uintD shiftrightsigned_loop_down (uintD* ptr, uintC count, uintC i);
2336         DECLARE_FUNCTION(shiftrightsigned_loop_down)
2337 C(shiftrightsigned_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2338 //      srl %o1,0,%o1           // zero-extend %o1 = count
2339         ldx [%o0-8],%o4         // erstes Digit
2340         sub %g0,%o2,%g1         // 64-i (mod 64)
2341         srax %o4,%o2,%o5        // shiften
2342         stx %o5,[%o0-8]         // und ablegen
2343         sllx %o4,%g1,%o3        // neuer Carry
2344         subcc %o1,1,%o1
2345         be,pn %xcc,2f
2346        _ sub %o0,16,%o0
2347 1:        ldx [%o0],%o4         // Digit
2348           subcc %o1,1,%o1
2349           srlx %o4,%o2,%o5      // shiften
2350           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2351           stx %o5,[%o0]         // und ablegen
2352           sllx %o4,%g1,%o3      // neuer Carry
2353           bne,pt %xcc,1b
2354          _ sub %o0,8,%o0
2355 2:      retl
2356        _ mov %o3,%o0
2357
2358 // extern uintD shiftrightcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
2359         DECLARE_FUNCTION(shiftrightcopy_loop_down)
2360 C(shiftrightcopy_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
2361 //      srl %o2,0,%o2           // zero-extend %o2 = count
2362         sub %g0,%o3,%g1         // 64-i (mod 64)
2363         brz,pn %o2,2f
2364        _ sllx %o4,%g1,%g2       // erster Carry
2365           sub %o0,8,%o0
2366 1:        ldx [%o0],%o4         // Digit
2367           sub %o1,8,%o1
2368           srlx %o4,%o3,%o5      // shiften
2369           or %g2,%o5,%o5        // und mit altem Carry kombinieren
2370           stx %o5,[%o1]         // und ablegen
2371           sllx %o4,%g1,%g2      // neuer Carry
2372           subcc %o2,1,%o2
2373           bne,pt %xcc,1b
2374          _ sub %o0,8,%o0
2375 2:      retl
2376        _ mov %g2,%o0
2377
2378 // extern uintD mulusmall_loop_up (uintD digit, uintD* ptr, uintC len, uintD newdigit);
2379         DECLARE_FUNCTION(mulusmall_loop_up)
2380 C(mulusmall_loop_up:) // Input in %o0,%o1,%o2,%o3, Output in %o0, verändert %g1
2381 //      srl %o2,0,%o2           // zero-extend %o2 = len
2382         brz,pn %o2,2f
2383        _ nop
2384 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
2385           // und kleinen Carry %o3 dazu:
2386           ldx [%o1],%o4
2387           sub %o2,1,%o2
2388           srlx %o4,32,%o5       // high32(x)
2389           srl %o4,0,%o4         // low32(x)
2390           mulx %o4,%o0,%o4      // low32(x)*digit
2391           mulx %o5,%o0,%o5      // high32(x)*digit
2392           sllx %o5,32,%g1       // low32(high32(x)*digit)*2^32
2393           add %g1,%o3,%g1       // plus carry
2394           addcc %o4,%g1,%o4     // plus low32(x)*digit
2395           srlx %o5,32,%o3       // high32(high32(x)*digit)
2396           add %o3,1,%g1
2397           movcs %xcc,%g1,%o3    // neuer Carry
2398           stx %o4,[%o1]         // neues Digit ablegen
2399           brnz,pt %o2,1b
2400          _ add %o1,8,%o1
2401 2:      retl
2402        _ mov %o3,%o0
2403
2404 // extern void mulu_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2405         DECLARE_FUNCTION(mulu_loop_up)
2406 C(mulu_loop_up:) // Input in %i0,%i1,%i2,%i3
2407         save %sp,-192,%sp
2408         mov 0,%l0               // Carry
2409         srlx %i0,32,%l1         // %l1 = high32(digit)
2410         srl %i0,0,%l2           // %l2 = low32(digit)
2411         mov 1,%l3
2412         sllx %l3,32,%l3         // %l3 = 2^32
2413         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2414 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2415           subcc %i3,1,%i3
2416           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2417           srlx %o0,32,%o1
2418           srl %o0,0,%o2
2419           mulx %l1,%o1,%o3      // high part
2420           mulx %l1,%o2,%o4      // first mid part
2421           mulx %l2,%o1,%o1      // second mid part
2422           mulx %l2,%o2,%o2      // low part
2423           srlx %o2,32,%o5       // low part's upper half
2424           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2425           addcc %o4,%o1,%o4     // add other mid part
2426           add %o3,%l3,%o5
2427           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2428           srlx %o4,32,%o5
2429           sllx %o4,32,%o4
2430           srl %o2,0,%o2
2431           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2432           addcc %o0,%l0,%o0     // alten Carry addieren
2433           add %o3,%o5,%l0       // add high32(midparts) to high part
2434           add %l0,1,%o5
2435           movcs %xcc,%o5,%l0    // neuer Carry
2436           // Multiplikation fertig
2437           stx %o0,[%i2]         // Low-Digit ablegen
2438           brnz,pt %i3,1b
2439          _ add %i2,8,%i2
2440         stx %l0,[%i2]           // letzten Carry ablegen
2441         ret
2442        _ restore
2443
2444 // extern uintD muluadd_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2445         DECLARE_FUNCTION(muluadd_loop_up)
2446 C(muluadd_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2447         save %sp,-192,%sp
2448         mov 0,%l0               // Carry
2449         srlx %i0,32,%l1         // %l1 = high32(digit)
2450         srl %i0,0,%l2           // %l2 = low32(digit)
2451         mov 1,%l3
2452         sllx %l3,32,%l3         // %l3 = 2^32
2453         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2454 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2455           ldx [%i2],%i4         // *destptr
2456           subcc %i3,1,%i3
2457           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2458           srlx %o0,32,%o1
2459           srl %o0,0,%o2
2460           mulx %l1,%o1,%o3      // high part
2461           mulx %l1,%o2,%o4      // first mid part
2462           mulx %l2,%o1,%o1      // second mid part
2463           mulx %l2,%o2,%o2      // low part
2464           srlx %o2,32,%o5       // low part's upper half
2465           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2466           addcc %o4,%o1,%o4     // add other mid part
2467           add %o3,%l3,%o5
2468           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2469           srlx %o4,32,%o5
2470           sllx %o4,32,%o4
2471           srl %o2,0,%o2
2472           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2473           addcc %o0,%l0,%o0     // alten Carry addieren
2474           add %o3,%o5,%l0       // add high32(midparts) to high part
2475           add %l0,1,%o5
2476           movcs %xcc,%o5,%l0    // neuer Carry
2477           // Multiplikation fertig
2478           addcc %i4,%o0,%o0     // alten *destptr addieren
2479           add %l0,1,%o2
2480           movcs %xcc,%o2,%l0    // neuer Carry
2481           stx %o0,[%i2]         // Low-Digit ablegen
2482           brnz,pt %i3,1b
2483          _ add %i2,8,%i2
2484         mov %l0,%i0             // letzter Carry
2485         ret
2486        _ restore
2487
2488 // extern uintD mulusub_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2489         DECLARE_FUNCTION(mulusub_loop_up)
2490 C(mulusub_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2491         save %sp,-192,%sp
2492         mov 0,%l0               // Carry
2493         srlx %i0,32,%l1         // %l1 = high32(digit)
2494         srl %i0,0,%l2           // %l2 = low32(digit)
2495         mov 1,%l3
2496         sllx %l3,32,%l3         // %l3 = 2^32
2497         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2498 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2499           ldx [%i2],%i4         // *destptr
2500           subcc %i3,1,%i3
2501           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2502           srlx %o0,32,%o1
2503           srl %o0,0,%o2
2504           mulx %l1,%o1,%o3      // high part
2505           mulx %l1,%o2,%o4      // first mid part
2506           mulx %l2,%o1,%o1      // second mid part
2507           mulx %l2,%o2,%o2      // low part
2508           srlx %o2,32,%o5       // low part's upper half
2509           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2510           addcc %o4,%o1,%o4     // add other mid part
2511           add %o3,%l3,%o5
2512           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2513           srlx %o4,32,%o5
2514           sllx %o4,32,%o4
2515           srl %o2,0,%o2
2516           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2517           addcc %o0,%l0,%o0     // alten Carry addieren
2518           add %o3,%o5,%l0       // add high32(midparts) to high part
2519           add %l0,1,%o5
2520           movcs %xcc,%o5,%l0    // neuer Carry
2521           // Multiplikation fertig
2522           subcc %i4,%o0,%o0     // vom alten *destptr subtrahieren
2523           add %l0,1,%o2
2524           movcs %xcc,%o2,%l0    // neuer Carry
2525           stx %o0,[%i2]         // Low-Digit ablegen
2526           brnz,pt %i3,1b
2527          _ add %i2,8,%i2
2528         mov %l0,%i0             // letzter Carry
2529         ret
2530        _ restore
2531
2532 #endif
2533
2534 // extern void shiftxor_loop_up (uintD* xptr, const uintD* yptr, uintC count, uintC i);
2535         DECLARE_FUNCTION(shiftxor_loop_up)
2536 C(shiftxor_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2
2537 //      srl %o2,0,%o2           // zero-extend %o2 = count
2538         brz,pn %o2,2f
2539        _ sub %g0,%o3,%g1        // 64-i (mod 64)
2540         sub %o1,%o0,%o1
2541         ldx [%o0],%o4           // *xptr holen
2542 1:        ldx [%o0+%o1],%o5     // *yptr holen
2543           subcc %o2,1,%o2
2544           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
2545           xor %o4,%g2,%o4       // mit dem modifizierten *xptr kombinieren
2546           stx %o4,[%o0]         // und ablegen
2547           add %o0,8,%o0
2548           srlx %o5,%g1,%g2      // höchste i Bits von *yptr
2549           ldx [%o0],%o4         // schon mal mit dem nächsten *xptr
2550           bne,pt %xcc,1b
2551          _ xor %o4,%g2,%o4      // verknüpfen
2552         stx %o4,[%o0]           // und ablegen
2553 2:      retl
2554        _ nop
2555