src/base/digitseq/cl_asm_sparc64_.cc

   1 // Externe Routinen zu ARILEV1.D
   2 // Prozessor: SPARC 64-bit
   3 // Compiler: GNU-C oder ...
   4 // Parameter-Übergabe: in Registern %o0-%o5.
   5 // Parameter-Übergabe: in Registern %o0-%o5.
   6 //   Argumente vom Typ uint8, uint16, uint32 sind bereits vom Aufrufer zu
   7 //   uint64 umgewandelt worden (zero-extend, "srl reg,0,reg").
   8 //   Argumente vom Typ sint8, sint16, sint32 sind bereits vom Aufrufer zu
   9 //   sint64 umgewandelt worden (sign-extend, "sra reg,0,reg").
  10 //   Ergebnisse vom Typ uint8, uint16, uint32 müssen vor Rückgabe zu uint64
  11 //   umgewandelt werden (zero-extend, "srl reg,0,reg").
  12 //   Ergebnisse vom Typ sint8, sint16, sint32 müssen vor Rückgabe zu sint64
  13 //   umgewandelt werden (sign-extend, "sra reg,0,reg").
  14 // Einstellungen: intCsize=32, intDsize=32.
  15
  16 #ifdef ASM_UNDERSCORE
  17   #define C(entrypoint) _##entrypoint
  18 #else
  19   #define C(entrypoint) entrypoint
  20 #endif
  21
  22 // When this file is compiled into a shared library, ELF linkers need to
  23 // know which symbols are functions.
  24 #if defined(__NetBSD__) || defined(__OpenBSD__)
  25   #define DECLARE_FUNCTION(name) .type C(name),@function
  26 #elif defined(__svr4__) || defined(__ELF__)
  27   // Some preprocessors keep the backslash in place, some don't.
  28   // Some complain about the # being not in front of an ANSI C macro.
  29   // Therefore we use a dollar, which will be sed-converted to # later.
  30   #define DECLARE_FUNCTION(name) .type C(name),$function
  31 #else
  32   #define DECLARE_FUNCTION(name)
  33 #endif
  34
  35   // Indikatoren für Anweisungen (Instruktionen) in Delay-Slots
  36   // (diese werden VOR der vorigen Instruktion ausgeführt):
  37   #define _             // Instruktion, die stets ausgeführt wird
  38   #define __            // Instruktion, die nur im Sprung-Fall ausgeführt wird
  39   // Abkürzungen für Anweisungen:
  40   #define ret   jmp %i7+8    // return from subroutine
  41   #define retl  jmp %o7+8    // return from leaf subroutine (no save/restore)
  42
  43         .seg "text"
  44
  45         .global C(mulu16_),C(mulu32_),C(mulu32_unchecked),C(mulu64_)
  46         .global C(divu_6432_3232_),C(divu_3216_1616_)
  47         .global C(copy_loop_up),C(copy_loop_down),C(fill_loop_up),C(fill_loop_down)
  48         .global C(clear_loop_up),C(clear_loop_down)
  49         .global C(test_loop_up),C(test_loop_down)
  50         .global C(xor_loop_up),C(compare_loop_up),C(shiftleftcopy_loop_up),C(shiftxor_loop_up)
  51 #if CL_DS_BIG_ENDIAN_P
  52         .global C(or_loop_up),C(and_loop_up),C(eqv_loop_up)
  53         .global C(nand_loop_up),C(nor_loop_up),C(andc2_loop_up),C(orc2_loop_up)
  54         .global C(not_loop_up)
  55         .global C(and_test_loop_up)
  56         .global C(add_loop_down),C(addto_loop_down),C(inc_loop_down)
  57         .global C(sub_loop_down),C(subx_loop_down),C(subfrom_loop_down),C(dec_loop_down)
  58         .global C(neg_loop_down)
  59         .global C(shift1left_loop_down),C(shiftleft_loop_down),C(shiftleftcopy_loop_down)
  60         .global C(shift1right_loop_up),C(shiftright_loop_up),C(shiftrightsigned_loop_up),C(shiftrightcopy_loop_up)
  61         .global C(mulusmall_loop_down),C(mulu_loop_down),C(muluadd_loop_down),C(mulusub_loop_down)
  62 #else
  63         .global C(or_loop_down),C(xor_loop_down),C(and_loop_down),C(eqv_loop_down)
  64         .global C(nand_loop_down),C(nor_loop_down),C(andc2_loop_down),C(orc2_loop_down)
  65         .global C(not_loop_down)
  66         .global C(and_test_loop_down),C(compare_loop_down)
  67         .global C(add_loop_up),C(addto_loop_up),C(inc_loop_up)
  68         .global C(sub_loop_up),C(subx_loop_up),C(subfrom_loop_up),C(dec_loop_up)
  69         .global C(neg_loop_up)
  70         .global C(shift1left_loop_up),C(shiftleft_loop_up)
  71         .global C(shift1right_loop_down),C(shiftright_loop_down),C(shiftrightsigned_loop_down),C(shiftrightcopy_loop_down)
  72         .global C(mulusmall_loop_up),C(mulu_loop_up),C(muluadd_loop_up),C(mulusub_loop_up)
  73 #endif
  74
  75 #define LOOP_TYPE  1    // 1: Standard-Schleifen
  76                         // 2: Schleifen ohne Pointer, nur mit Zähler
  77 #define STANDARD_LOOPS  (LOOP_TYPE==1)
  78 #define COUNTER_LOOPS  (LOOP_TYPE==2)
  79
  80 // extern uint32 mulu16_ (uint16 arg1, uint16 arg2);
  81 // ergebnis := arg1*arg2.
  82         DECLARE_FUNCTION(mulu16_)
  83 C(mulu16_:) // Input in %o0,%o1, Output in %o0
  84         umul %o0,%o1,%o2
  85         retl
  86        _ srl %o2,0,%o0
  87
  88 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
  89 // 2^32*hi+lo := arg1*arg2.
  90         DECLARE_FUNCTION(mulu32_)
  91 C(mulu32_:) // Input in %o0,%o1, Output in %o0,%g1
  92         umul %o0,%o1,%o2
  93         rd %y,%g1
  94         retl
  95        _ srl %o2,0,%o0
  96
  97 // extern uint32 mulu32_unchecked (uint32 x, uint32 y);
  98 // ergebnis := arg1*arg2 < 2^32.
  99         DECLARE_FUNCTION(mulu32_unchecked)
 100 C(mulu32_unchecked:) // Input in %o0,%o1, Output in %o0
 101         umul %o0,%o1,%o2
 102         retl
 103        _ srl %o2,0,%o0
 104
 105 // extern struct { uint64 lo; uint64 hi; } mulu64_ (uint64 arg1, uint64 arg2);
 106 // 2^64*hi+lo := arg1*arg2.
 107         DECLARE_FUNCTION(mulu64_)
 108 C(mulu64_:) // Input in %o0,%o1, Output in %o0,%g2
 109         srlx %o0,32,%o2         // %o2 = high32(arg1)
 110         srl %o0,0,%o0           // %o0 = low32(arg1)
 111         srlx %o1,32,%o3         // %o3 = high32(arg2)
 112         srl %o1,0,%o1           // %o1 = low32(arg2)
 113         mulx %o2,%o3,%g2        // high part
 114         mulx %o2,%o1,%o2        // first mid part
 115         mulx %o0,%o3,%o3        // second mid part
 116         addcc %o2,%o3,%o2       // sum of mid parts
 117         mov 0,%o3
 118         movcs %xcc,1,%o3        // carry from sum of mid parts
 119         sllx %o3,32,%o3
 120         add %g2,%o3,%g2         // add to high part
 121         srlx %o2,32,%o3
 122         add %g2,%o3,%g2         // add high32(midparts) to high part
 123         mulx %o0,%o1,%o0        // low part
 124         sllx %o2,32,%o2
 125         addcc %o0,%o2,%o0       // add low32(midparts)*2^32 to low part
 126         add %g2,1,%o3
 127         retl
 128        _ movcs %xcc,%o3,%g2     // add carry to high part
 129
 130 // extern struct { uint32 q; uint32 r; } divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y);
 131 // x = 2^32*xhi+xlo = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^32*y .
 132         DECLARE_FUNCTION(divu_6432_3232_)
 133 C(divu_6432_3232_:) // Input in %o0,%o1,%o2, Output in %o0,%g1
 134         wr %o0,%g0,%y
 135         udiv %o1,%o2,%o0        // x durch y dividieren, %o0 := q
 136         umul %o0,%o2,%g1        // %g1 := (q*y) mod 2^32
 137         sub %o1,%g1,%g1         // %g1 := (xlo-q*y) mod 2^32 = r
 138         retl
 139        _ srl %o0,0,%o0
 140
 141 // extern struct { uint16 q; uint16 r; } divu_3216_1616_ (uint32 x, uint16 y);
 142 // x = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^16*y .
 143         DECLARE_FUNCTION(divu_3216_1616_)
 144 C(divu_3216_1616_:) // Input in %o0,%o1, Output in %o0 (Rest und Quotient).
 145         wr %g0,%g0,%y
 146         udiv %o0,%o1,%o2        // dividieren, Quotient nach %o2
 147 #if 0 // Who says that %y has some meaningful contents after `udiv' ??
 148         rd %y,%g1               // Rest aus %y
 149 #else
 150         umul %o2,%o1,%g1        // %g1 := (q*y) mod 2^32
 151         sub %o0,%g1,%g1         // %g1 := (x-q*y) mod 2^32 = r
 152 #endif
 153         sll %g1,16,%g1          // in die oberen 16 Bit schieben
 154         or %o2,%g1,%o0
 155         retl
 156        _ srl %o0,0,%o0
 157
 158 #if !defined(__GNUC__)
 159         .global C(_get_g1)
 160 // extern uint32 _get_g1 (void);
 161         DECLARE_FUNCTION(_get_g1)
 162 C(_get_g1:)
 163         retl
 164        _ srl %g1,0,%o0
 165 #endif
 166
 167 #if !defined(__GNUC__)
 168         .global C(_get_g2)
 169 // extern uint64 _get_g2 (void);
 170         DECLARE_FUNCTION(_get_g2)
 171 C(_get_g2:)
 172         retl
 173        _ mov %g2,%o0
 174 #endif
 175
 176 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
 177         DECLARE_FUNCTION(copy_loop_up)
 178 C(copy_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 179 #if STANDARD_LOOPS
 180 //      srl %o2,0,%o2           // zero-extend %o2 = count
 181         brz,pn %o2,2f
 182        _ nop
 183 1:        ldx [%o0],%o3
 184           add %o0,8,%o0
 185           stx %o3,[%o1]
 186           subcc %o2,1,%o2
 187           bne,pt %xcc,1b
 188          _ add %o1,8,%o1
 189 2:      retl
 190        _ mov %o1,%o0
 191 #endif
 192 #if COUNTER_LOOPS
 193 //      srl %o2,0,%o2           // zero-extend %o2 = count
 194         brz,pn %o2,2f
 195        _ sub %o1,8,%o1
 196         sub %g0,%o2,%o2         // %o2 = -count
 197         sllx %o2,3,%o2          // %o2 = -8*count
 198         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
 199         sub %o1,%o2,%o1         // %o1 = &destptr[count-1]
 200 1:        ldx [%o0+%o2],%o3     // nächstes Digit holen
 201           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 202           bne,pt %xcc,1b
 203          _ stx %o3,[%o1+%o2]    // Digit ablegen
 204 2:      retl
 205        _ add %o1,8,%o0
 206 #endif
 207
 208 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 209         DECLARE_FUNCTION(copy_loop_down)
 210 C(copy_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 211 #if STANDARD_LOOPS
 212 //      srl %o2,0,%o2           // zero-extend %o2 = count
 213         brz,pn %o2,2f
 214        _ sub %o0,8,%o0
 215 1:        ldx [%o0],%o3
 216           sub %o1,8,%o1
 217           stx %o3,[%o1]
 218           subcc %o2,1,%o2
 219           bne,pt %xcc,1b
 220          _ sub %o0,8,%o0
 221 2:      retl
 222        _ mov %o1,%o0
 223 #endif
 224 #if COUNTER_LOOPS
 225 //      srl %o2,0,%o2           // zero-extend %o2 = count
 226         brz,pn %o2,2f
 227        _ sub %o0,8,%o0
 228         sllx %o2,3,%o2          // %o2 = 8*count
 229         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 230         sub %o1,%o2,%o1         // %o1 = &destptr[-count]
 231 1:        ldx [%o0+%o2],%o3     // nächstes Digit holen
 232           subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
 233           bne,pt %xcc,1b
 234          _ stx %o3,[%o1+%o2]    // Digit ablegen
 235 2:      retl
 236        _ mov %o1,%o0
 237 #endif
 238
 239 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
 240         DECLARE_FUNCTION(fill_loop_up)
 241 C(fill_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 242 #if STANDARD_LOOPS
 243 //      srl %o1,0,%o1           // zero-extend %o1 = count
 244         brz,pn %o1,2f
 245        _ nop
 246 1:        stx %o2,[%o0]
 247           subcc %o1,1,%o1
 248           bne,pt %xcc,1b
 249          _ add %o0,8,%o0
 250 2:      retl
 251        _ nop
 252 #endif
 253 #if COUNTER_LOOPS
 254 //      srl %o1,0,%o1           // zero-extend %o1 = count
 255         brz,pn %o1,2f
 256        _ sub %o0,8,%o0
 257         sub %g0,%o1,%o1         // %o1 = -count
 258         sllx %o1,3,%o1          // %o1 = -8*count
 259         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 260 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 261           bne,pt %xcc,1b
 262          _ stx %o2,[%o0+%o1]    // Digit ablegen
 263 2:      retl
 264        _ add %o0,8,%o0
 265 #endif
 266
 267 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
 268         DECLARE_FUNCTION(fill_loop_down)
 269 C(fill_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 270 #if STANDARD_LOOPS
 271 //      srl %o1,0,%o1           // zero-extend %o1 = count
 272         brz,pn %o1,2f
 273        _ sub %o0,8,%o0
 274 1:        stx %o2,[%o0]
 275           subcc %o1,1,%o1
 276           bne,pt %xcc,1b
 277          _ sub %o0,8,%o0
 278 2:      retl
 279        _ add %o0,8,%o0
 280 #endif
 281 #if COUNTER_LOOPS
 282 //      srl %o1,0,%o1           // zero-extend %o1 = count
 283         brz,pn %o1,2f
 284        _ sllx %o1,3,%o1         // %o1 = 8*count
 285         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 286 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 287           bne,pt %xcc,1b
 288          _ stx %o2,[%o0+%o1]    // Digit ablegen
 289 2:      retl
 290        _ nop
 291 #endif
 292
 293 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
 294         DECLARE_FUNCTION(clear_loop_up)
 295 C(clear_loop_up:) // Input in %o0,%o1, Output in %o0
 296 #if STANDARD_LOOPS
 297 //      srl %o1,0,%o1           // zero-extend %o1 = count
 298         brz,pn %o1,2f
 299        _ nop
 300 1:        stx %g0,[%o0]
 301           subcc %o1,1,%o1
 302           bne,pt %xcc,1b
 303          _ add %o0,8,%o0
 304 2:      retl
 305        _ nop
 306 #endif
 307 #if COUNTER_LOOPS
 308 //      srl %o1,0,%o1           // zero-extend %o1 = count
 309         brz,pn %o1,2f
 310        _ sub %o0,8,%o0
 311         sub %g0,%o1,%o1         // %o1 = -count
 312         sllx %o1,3,%o1          // %o1 = -8*count
 313         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 314 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 315           bne,pt %xcc,1b
 316          _ stx %g0,[%o0+%o1]    // Digit 0 ablegen
 317 2:      retl
 318        _ add %o0,8,%o0
 319 #endif
 320
 321 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
 322         DECLARE_FUNCTION(clear_loop_down)
 323 C(clear_loop_down:) // Input in %o0,%o1, Output in %o0
 324 #if STANDARD_LOOPS
 325 //      srl %o1,0,%o1           // zero-extend %o1 = count
 326         brz,pn %o1,2f
 327        _ sub %o0,8,%o0
 328 1:        stx %g0,[%o0]
 329           subcc %o1,1,%o1
 330           bne,pt %xcc,1b
 331          _ sub %o0,8,%o0
 332 2:      retl
 333        _ add %o0,8,%o0
 334 #endif
 335 #if COUNTER_LOOPS
 336 //      srl %o1,0,%o1           // zero-extend %o1 = count
 337         brz,pn %o1,2f
 338        _ sllx %o1,3,%o1         // %o1 = 8*count
 339         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
 340 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 341           bne,pt %xcc,1b
 342          _ stx %g0,[%o0+%o1]    // Digit 0 ablegen
 343 2:      retl
 344        _ nop
 345 #endif
 346
 347 // extern boolean test_loop_up (uintD* ptr, uintC count);
 348         DECLARE_FUNCTION(test_loop_up)
 349 C(test_loop_up:) // Input in %o0,%o1, Output in %o0
 350 #if STANDARD_LOOPS
 351 //      srl %o1,0,%o1           // zero-extend %o1 = count
 352         brz,pn %o1,2f
 353        _ nop
 354           ldx [%o0],%o2
 355 1:        add %o0,8,%o0
 356           brnz,pn %o2,3f
 357          _ subcc %o1,1,%o1
 358           bne,a,pt %xcc,1b
 359          __ ldx [%o0],%o2
 360 2:      retl
 361        _ mov 0,%o0
 362 3:      retl
 363        _ mov 1,%o0
 364 #endif
 365 #if COUNTER_LOOPS
 366 //      srl %o1,0,%o1           // zero-extend %o1 = count
 367         brz,pn %o1,2f
 368        _ sub %g0,%o1,%o1        // %o1 = -count
 369         sllx %o1,3,%o1          // %o1 = -8*count
 370         sub %o0,%o1,%o0         // %o0 = &ptr[count]
 371           ldx [%o0+%o1],%o2     // nächstes Digit holen
 372 1:        brnz,pn %o2,3f        // testen
 373          _ addcc %o1,8,%o1      // Zähler "erniedrigen", Pointer erhöhen
 374           bne,a,pt %xcc,1b
 375          __ ldx [%o0+%o1],%o2   // nächstes Digit holen
 376 2:      retl
 377        _ mov 0,%o0
 378 3:      retl
 379        _ mov 1,%o0
 380 #endif
 381
 382 // extern boolean test_loop_down (uintD* ptr, uintC count);
 383         DECLARE_FUNCTION(test_loop_down)
 384 C(test_loop_down:) // Input in %o0,%o1, Output in %o0
 385 #if STANDARD_LOOPS
 386 //      srl %o1,0,%o1           // zero-extend %o1 = count
 387         brz,pn %o1,2f
 388        _ sub %o0,8,%o0
 389           ldx [%o0],%o2
 390 1:        sub %o0,8,%o0
 391           brnz,pn %o2,3f
 392          _ subcc %o1,1,%o1
 393           bne,a,pt %xcc,1b
 394          __ ldx [%o0],%o2
 395 2:      retl
 396        _ mov 0,%o0
 397 3:      retl
 398        _ mov 1,%o0
 399 #endif
 400 #if COUNTER_LOOPS
 401 //      srl %o1,0,%o1           // zero-extend %o1 = count
 402         brz,pn %o1,2f
 403        _ sllx %o1,3,%o1         // %o1 = 8*count
 404         sub %o0,%o1,%o0         // %o0 = &ptr[-count]
 405         sub %o1,8,%o1
 406           ldx [%o0+%o1],%o2     // nächstes Digit holen
 407 1:        brnz,pn %o2,3f        // testen
 408          _ subcc %o1,8,%o1      // Zähler erniedrigen, Pointer erniedrigen
 409           bcc,a,pt %xcc,1b
 410          __ ldx [%o0+%o1],%o2   // nächstes Digit holen
 411 2:      retl
 412        _ mov 0,%o0
 413 3:      retl
 414        _ mov 1,%o0
 415 #endif
 416
 417 #if CL_DS_BIG_ENDIAN_P
 418
 419 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
 420         DECLARE_FUNCTION(or_loop_up)
 421 C(or_loop_up:) // Input in %o0,%o1,%o2
 422 #if STANDARD_LOOPS
 423 //      srl %o2,0,%o2           // zero-extend %o2 = count
 424         brz,pn %o2,2f
 425        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 426 1:        ldx [%o0],%o3         // *xptr
 427           ldx [%o0+%o1],%o4     // *yptr
 428           subcc %o2,1,%o2
 429           or %o3,%o4,%o3        // verknüpfen
 430           stx %o3,[%o0]         // =: *xptr
 431           bne,pt %xcc,1b
 432          _ add %o0,8,%o0        // xptr++, yptr++
 433 2:      retl
 434        _ nop
 435 #endif
 436 #if COUNTER_LOOPS
 437 //      srl %o2,0,%o2           // zero-extend %o2 = count
 438         brz,pn %o2,2f
 439        _ sub %o0,8,%o0
 440         sub %g0,%o2,%o2         // %o2 = -count
 441         sllx %o2,3,%o2          // %o2 = -8*count
 442         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 443         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 444 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 445           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 446           ldx [%o0+%o2],%o4     // noch ein Digit holen
 447           or %o4,%o3,%o3        // beide verknüpfen
 448           bne,pt %xcc,1b
 449          _ stx %o3,[%o1+%o2]    // Digit ablegen
 450 2:      retl
 451        _ nop
 452 #endif
 453
 454 #endif
 455
 456 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 457         DECLARE_FUNCTION(xor_loop_up)
 458 C(xor_loop_up:) // Input in %o0,%o1,%o2
 459 #if STANDARD_LOOPS
 460 //      srl %o2,0,%o2           // zero-extend %o2 = count
 461         brz,pn %o2,2f
 462        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 463 1:        ldx [%o0],%o3         // *xptr
 464           ldx [%o0+%o1],%o4     // *yptr
 465           subcc %o2,1,%o2
 466           xor %o3,%o4,%o3       // verknüpfen
 467           stx %o3,[%o0]         // =: *xptr
 468           bne,pt %xcc,1b
 469          _ add %o0,8,%o0        // xptr++, yptr++
 470 2:      retl
 471        _ nop
 472 #endif
 473 #if COUNTER_LOOPS
 474 //      srl %o2,0,%o2           // zero-extend %o2 = count
 475         brz,pn %o2,2f
 476        _ sub %o0,8,%o0
 477         sub %g0,%o2,%o2         // %o2 = -count
 478         sllx %o2,3,%o2          // %o2 = -8*count
 479         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 480         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 481 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 482           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 483           ldx [%o0+%o2],%o4     // noch ein Digit holen
 484           xor %o4,%o3,%o3       // beide verknüpfen
 485           bne,pt %xcc,1b
 486          _ stx %o3,[%o1+%o2]    // Digit ablegen
 487 2:      retl
 488        _ nop
 489 #endif
 490
 491 #if CL_DS_BIG_ENDIAN_P
 492
 493 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
 494         DECLARE_FUNCTION(and_loop_up)
 495 C(and_loop_up:) // Input in %o0,%o1,%o2
 496 #if STANDARD_LOOPS
 497 //      srl %o2,0,%o2           // zero-extend %o2 = count
 498         brz,pn %o2,2f
 499        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 500 1:        ldx [%o0],%o3         // *xptr
 501           ldx [%o0+%o1],%o4     // *yptr
 502           subcc %o2,1,%o2
 503           and %o3,%o4,%o3       // verknüpfen
 504           stx %o3,[%o0]         // =: *xptr
 505           bne,pt %xcc,1b
 506          _ add %o0,8,%o0        // xptr++, yptr++
 507 2:      retl
 508        _ nop
 509 #endif
 510 #if COUNTER_LOOPS
 511 //      srl %o2,0,%o2           // zero-extend %o2 = count
 512         brz,pn %o2,2f
 513        _ sub %o0,8,%o0
 514         sub %g0,%o2,%o2         // %o2 = -count
 515         sllx %o2,3,%o2          // %o2 = -8*count
 516         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 517         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 518 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 519           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 520           ldx [%o0+%o2],%o4     // noch ein Digit holen
 521           and %o4,%o3,%o3       // beide verknüpfen
 522           bne,pt %xcc,1b
 523          _ stx %o3,[%o1+%o2]    // Digit ablegen
 524 2:      retl
 525        _ nop
 526 #endif
 527
 528 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
 529         DECLARE_FUNCTION(eqv_loop_up)
 530 C(eqv_loop_up:) // Input in %o0,%o1,%o2
 531 #if STANDARD_LOOPS
 532 //      srl %o2,0,%o2           // zero-extend %o2 = count
 533         brz,pn %o2,2f
 534        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 535 1:        ldx [%o0],%o3         // *xptr
 536           ldx [%o0+%o1],%o4     // *yptr
 537           subcc %o2,1,%o2
 538           xnor %o3,%o4,%o3      // verknüpfen
 539           stx %o3,[%o0]         // =: *xptr
 540           bne,pt %xcc,1b
 541          _ add %o0,8,%o0        // xptr++, yptr++
 542 2:      retl
 543        _ nop
 544 #endif
 545 #if COUNTER_LOOPS
 546 //      srl %o2,0,%o2           // zero-extend %o2 = count
 547         brz,pn %o2,2f
 548        _ sub %o0,8,%o0
 549         sub %g0,%o2,%o2         // %o2 = -count
 550         sllx %o2,3,%o2          // %o2 = -8*count
 551         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 552         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 553 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 554           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 555           ldx [%o0+%o2],%o4     // noch ein Digit holen
 556           xnor %o4,%o3,%o3      // beide verknüpfen
 557           bne,pt %xcc,1b
 558          _ stx %o3,[%o1+%o2]    // Digit ablegen
 559 2:      retl
 560        _ nop
 561 #endif
 562
 563 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
 564         DECLARE_FUNCTION(nand_loop_up)
 565 C(nand_loop_up:) // Input in %o0,%o1,%o2
 566 #if STANDARD_LOOPS
 567 //      srl %o2,0,%o2           // zero-extend %o2 = count
 568         brz,pn %o2,2f
 569        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 570 1:        ldx [%o0],%o3         // *xptr
 571           ldx [%o0+%o1],%o4     // *yptr
 572           subcc %o2,1,%o2
 573           and %o3,%o4,%o3       // verknüpfen
 574           xnor %g0,%o3,%o3
 575           stx %o3,[%o0]         // =: *xptr
 576           bne,pt %xcc,1b
 577          _ add %o0,8,%o0        // xptr++, yptr++
 578 2:      retl
 579        _ nop
 580 #endif
 581 #if COUNTER_LOOPS
 582 //      srl %o2,0,%o2           // zero-extend %o2 = count
 583         brz,pn %o2,2f
 584        _ sub %o0,8,%o0
 585         sub %g0,%o2,%o2         // %o2 = -count
 586         sllx %o2,3,%o2          // %o2 = -8*count
 587         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 588         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 589 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 590           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 591           ldx [%o0+%o2],%o4     // noch ein Digit holen
 592           and %o4,%o3,%o3       // beide verknüpfen
 593           xnor %g0,%o3,%o3
 594           bne,pt %xcc,1b
 595          _ stx %o3,[%o1+%o2]    // Digit ablegen
 596 2:      retl
 597        _ nop
 598 #endif
 599
 600 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 601         DECLARE_FUNCTION(nor_loop_up)
 602 C(nor_loop_up:) // Input in %o0,%o1,%o2
 603 #if STANDARD_LOOPS
 604 //      srl %o2,0,%o2           // zero-extend %o2 = count
 605         brz,pn %o2,2f
 606        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 607 1:        ldx [%o0],%o3         // *xptr
 608           ldx [%o0+%o1],%o4     // *yptr
 609           subcc %o2,1,%o2
 610           or %o3,%o4,%o3        // verknüpfen
 611           xnor %g0,%o3,%o3
 612           stx %o3,[%o0]         // =: *xptr
 613           bne,pt %xcc,1b
 614          _ add %o0,8,%o0        // xptr++, yptr++
 615 2:      retl
 616        _ nop
 617 #endif
 618 #if COUNTER_LOOPS
 619 //      srl %o2,0,%o2           // zero-extend %o2 = count
 620         brz,pn %o2,2f
 621        _ sub %o0,8,%o0
 622         sub %g0,%o2,%o2         // %o2 = -count
 623         sllx %o2,3,%o2          // %o2 = -8*count
 624         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 625         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 626 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 627           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 628           ldx [%o0+%o2],%o4     // noch ein Digit holen
 629           or %o4,%o3,%o3        // beide verknüpfen
 630           xnor %g0,%o3,%o3
 631           bne,pt %xcc,1b
 632          _ stx %o3,[%o1+%o2]    // Digit ablegen
 633 2:      retl
 634        _ nop
 635 #endif
 636
 637 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 638         DECLARE_FUNCTION(andc2_loop_up)
 639 C(andc2_loop_up:) // Input in %o0,%o1,%o2
 640 #if STANDARD_LOOPS
 641 //      srl %o2,0,%o2           // zero-extend %o2 = count
 642         brz,pn %o2,2f
 643        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 644 1:        ldx [%o0],%o3         // *xptr
 645           ldx [%o0+%o1],%o4     // *yptr
 646           subcc %o2,1,%o2
 647           andn %o3,%o4,%o3      // verknüpfen
 648           stx %o3,[%o0]         // =: *xptr
 649           bne,pt %xcc,1b
 650          _ add %o0,8,%o0        // xptr++, yptr++
 651 2:      retl
 652        _ nop
 653 #endif
 654 #if COUNTER_LOOPS
 655 //      srl %o2,0,%o2           // zero-extend %o2 = count
 656         brz,pn %o2,2f
 657        _ sub %o0,8,%o0
 658         sub %g0,%o2,%o2         // %o2 = -count
 659         sllx %o2,3,%o2          // %o2 = -8*count
 660         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 661         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 662 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 663           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 664           ldx [%o0+%o2],%o4     // noch ein Digit holen
 665           andn %o4,%o3,%o3      // beide verknüpfen
 666           bne,pt %xcc,1b
 667          _ stx %o3,[%o1+%o2]    // Digit ablegen
 668 2:      retl
 669        _ nop
 670 #endif
 671
 672 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 673         DECLARE_FUNCTION(orc2_loop_up)
 674 C(orc2_loop_up:) // Input in %o0,%o1,%o2
 675 #if STANDARD_LOOPS
 676 //      srl %o2,0,%o2           // zero-extend %o2 = count
 677         brz,pn %o2,2f
 678        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
 679 1:        ldx [%o0],%o3         // *xptr
 680           ldx [%o0+%o1],%o4     // *yptr
 681           subcc %o2,1,%o2
 682           orn %o3,%o4,%o3       // verknüpfen
 683           stx %o3,[%o0]         // =: *xptr
 684           bne,pt %xcc,1b
 685          _ add %o0,8,%o0        // xptr++, yptr++
 686 2:      retl
 687        _ nop
 688 #endif
 689 #if COUNTER_LOOPS
 690 //      srl %o2,0,%o2           // zero-extend %o2 = count
 691         brz,pn %o2,2f
 692        _ sub %o0,8,%o0
 693         sub %g0,%o2,%o2         // %o2 = -count
 694         sllx %o2,3,%o2          // %o2 = -8*count
 695         sub %o0,%o2,%o0         // %o0 = &xptr[count-1]
 696         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 697 1:        ldx [%o1+%o2],%o3     // nächstes Digit holen
 698           addcc %o2,8,%o2       // Zähler "erniedrigen", Pointer erhöhen
 699           ldx [%o0+%o2],%o4     // noch ein Digit holen
 700           orn %o4,%o3,%o3       // beide verknüpfen
 701           bne,pt %xcc,1b
 702          _ stx %o3,[%o1+%o2]    // Digit ablegen
 703 2:      retl
 704        _ nop
 705 #endif
 706
 707 // extern void not_loop_up (uintD* xptr, uintC count);
 708         DECLARE_FUNCTION(not_loop_up)
 709 C(not_loop_up:) // Input in %o0,%o1
 710 #if STANDARD_LOOPS
 711 //      srl %o1,0,%o1           // zero-extend %o1 = count
 712         brz,pn %o1,2f
 713        _ nop
 714 1:        ldx [%o0],%o2
 715           subcc %o1,1,%o1
 716           xnor %g0,%o2,%o2
 717           stx %o2,[%o0]
 718           bne,pt %xcc,1b
 719          _ add %o0,8,%o0
 720 2:      retl
 721        _ nop
 722 #endif
 723 #if COUNTER_LOOPS
 724 //      srl %o1,0,%o1           // zero-extend %o1 = count
 725         brz,pn %o1,2f
 726        _ sub %o0,8,%o0
 727         sub %g0,%o1,%o1         // %o1 = -count
 728         sllx %o1,3,%o1          // %o1 = -8*count
 729         sub %o0,%o1,%o0         // %o0 = &destptr[count-1]
 730 1:        addcc %o1,8,%o1       // Zähler "erniedrigen", Pointer erhöhen
 731           ldx [%o0+%o1],%o2     // nächstes Digit holen
 732           xnor %g0,%o2,%o2
 733           bne,pt %xcc,1b
 734          _ stx %o2,[%o0+%o1]    // Digit ablegen
 735 2:      retl
 736        _ nop
 737 #endif
 738
 739 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
 740         DECLARE_FUNCTION(and_test_loop_up)
 741 C(and_test_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 742 #if STANDARD_LOOPS
 743 //      srl %o2,0,%o2           // zero-extend %o2 = count
 744         brz,pn %o2,2f
 745        _ nop
 746 1:        ldx [%o0],%o3
 747           ldx [%o1],%o4
 748           add %o0,8,%o0
 749           andcc %o3,%o4,%g0
 750           bne,pn %xcc,3f
 751          _ subcc %o2,1,%o2
 752           bne,pt %xcc,1b
 753          _ add %o1,8,%o1
 754 2:      retl
 755        _ mov 0,%o0
 756 3:      retl
 757        _ mov 1,%o0
 758 #endif
 759 #if COUNTER_LOOPS
 760 //      srl %o2,0,%o2           // zero-extend %o2 = count
 761         brz,pn %o2,2f
 762        _ sub %g0,%o2,%o2        // %o2 = -count
 763         sllx %o2,3,%o2          // %o2 = -8*count
 764         sub %o0,%o2,%o0         // %o0 = &xptr[count]
 765         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 766           ldx [%o0+%o2],%o3     // nächstes Digit holen
 767 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
 768           andcc %o3,%o4,%g0     // beide verknüpfen
 769           bne,pn %xcc,3f
 770          _ addcc %o2,8,%o2      // Zähler "erniedrigen", Pointer erhöhen
 771           bne,a,pt %xcc,1b
 772          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
 773 2:      retl
 774        _ mov 0,%o0
 775 3:      retl
 776        _ mov 1,%o0
 777 #endif
 778
 779 #endif
 780
 781 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
 782         DECLARE_FUNCTION(compare_loop_up)
 783 C(compare_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
 784 #if STANDARD_LOOPS
 785 //      srl %o2,0,%o2           // zero-extend %o2 = count
 786         brz,pn %o2,2f
 787        _ nop
 788           ldx [%o0],%o3
 789 1:        ldx [%o1],%o4
 790           add %o0,8,%o0
 791           subcc %o3,%o4,%g0
 792           bne,pn %xcc,3f
 793          _ add %o1,8,%o1
 794           subcc %o2,1,%o2
 795           bne,a,pt %xcc,1b
 796          __ ldx [%o0],%o3
 797 2:      retl
 798        _ mov 0,%o0
 799 3:      mov 1,%o0
 800         movlu %xcc,-1,%o0
 801         retl
 802        _ sra %o0,0,%o0          // sign-extend %o0
 803 #endif
 804 #if COUNTER_LOOPS
 805 //      srl %o2,0,%o2           // zero-extend %o2 = count
 806         brz,pn %o2,2f
 807        _ sub %g0,%o2,%o2        // %o2 = -count
 808         sllx %o2,3,%o2          // %o2 = -8*count
 809         sub %o0,%o2,%o0         // %o0 = &xptr[count]
 810         sub %o1,%o2,%o1         // %o1 = &yptr[count]
 811           ldx [%o0+%o2],%o3     // nächstes Digit holen
 812 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
 813           subcc %o3,%o4,%g0     // vergleichen
 814           bne,pn %xcc,3f
 815          _ addcc %o2,8,%o2      // Zähler "erniedrigen", Pointer erhöhen
 816           bne,a,pt %xcc,1b
 817          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
 818 2:      retl
 819        _ mov 0,%o0
 820 3:      subcc %o3,%o4,%g0       // nochmals vergleichen
 821         mov 1,%o0
 822         movlu %xcc,-1,%o0
 823         retl
 824        _ sra %o0,0,%o0          // sign-extend %o0
 825 #endif
 826
 827 #if CL_DS_BIG_ENDIAN_P
 828
 829 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 830         DECLARE_FUNCTION(add_loop_down)
 831 C(add_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
 832 #if STANDARD_LOOPS
 833 //      srl %o3,0,%o3           // zero-extend %o3 = count
 834         brz,pn %o3,2f
 835        _ mov %g0,%g1            // Carry := 0
 836         sub %o0,8,%o0
 837 1:        ldx [%o0],%o4         // source1-digit
 838           sub %o1,8,%o1
 839           ldx [%o1],%o5         // source2-digit
 840           addcc %o4,%g1,%o4
 841           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
 842           addcc %o4,%o5,%o4
 843           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
 844           sub %o2,8,%o2
 845           stx %o4,[%o2]         // Digit ablegen
 846           subcc %o3,1,%o3
 847           bne,pt %xcc,1b
 848          _ sub %o0,8,%o0
 849 2:      retl
 850        _ mov %g1,%o0
 851 #endif
 852 #if COUNTER_LOOPS
 853 //      srl %o3,0,%o3           // zero-extend %o3 = count
 854         brz,pn %o3,2f
 855        _ mov %g0,%g1            // Carry := 0
 856         sub %o0,8,%o0
 857         sub %o1,8,%o1
 858         sllx %o3,3,%o3          // %o3 = 8*count
 859         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
 860         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
 861         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
 862 1:        ldx [%o0+%o3],%o4     // source1-digit
 863           ldx [%o1+%o3],%o5     // source2-digit
 864           addcc %o4,%g1,%o4
 865           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
 866           addcc %o4,%o5,%o4
 867           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
 868           subcc %o3,8,%o3
 869           bne,pt %xcc,1b
 870          _ stx %o4,[%o2+%o3]    // Digit ablegen
 871 2:      retl
 872        _ mov %g1,%o0
 873 #endif
 874
 875 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 876         DECLARE_FUNCTION(addto_loop_down)
 877 C(addto_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
 878 #if STANDARD_LOOPS
 879 //      srl %o2,0,%o2           // zero-extend %o2 = count
 880         brz,pn %o2,2f
 881        _ mov %g0,%o5            // Carry := 0
 882         sub %o0,8,%o0
 883 1:        ldx [%o0],%o3         // source-digit
 884           sub %o1,8,%o1
 885           ldx [%o1],%o4         // dest-digit
 886           addcc %o3,%o5,%o3
 887           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
 888           addcc %o3,%o4,%o4
 889           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
 890           stx %o4,[%o1]         // Digit ablegen
 891           subcc %o2,1,%o2
 892           bne,pt %xcc,1b
 893          _ sub %o0,8,%o0
 894 2:      retl
 895        _ mov %o5,%o0
 896 #endif
 897 #if COUNTER_LOOPS
 898 //      srl %o2,0,%o2           // zero-extend %o2 = count
 899         brz,pn %o2,2f
 900        _ mov %g0,%o5            // Carry := 0
 901         sub %o0,8,%o0
 902         sub %o1,8,%o1
 903         sllx %o2,3,%o2          // %o2 = 8*count
 904         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
 905         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
 906           ldx [%o0+%o2],%o3     // source-digit
 907 1:        ldx [%o1+%o2],%o4     // dest-digit
 908           addcc %o3,%o5,%o3
 909           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
 910           addcc %o3,%o4,%o4
 911           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
 912           stx %o4,[%o1+%o2]     // Digit ablegen
 913           subcc %o2,8,%o2
 914           bne,a,pt %xcc,1b
 915          __ ldx [%o0+%o2],%o3   // source-digit
 916 2:      retl
 917        _ mov %o5,%o0
 918 #endif
 919
 920 // extern uintD inc_loop_down (uintD* ptr, uintC count);
 921         DECLARE_FUNCTION(inc_loop_down)
 922 C(inc_loop_down:) // Input in %o0,%o1, Output in %o0
 923 #if STANDARD_LOOPS
 924 //      srl %o1,0,%o1           // zero-extend %o1 = count
 925         brz,pn %o1,2f
 926        _ sub %o0,8,%o0
 927 1:        ldx [%o0],%o2
 928           addcc %o2,1,%o2
 929           bne,pn %xcc,3f
 930          _ stx %o2,[%o0]
 931           subcc %o1,1,%o1
 932           bne,pt %xcc,1b
 933          _ sub %o0,8,%o0
 934 2:      retl
 935        _ mov 1,%o0
 936 3:      retl
 937        _ mov 0,%o0
 938 #endif
 939 #if COUNTER_LOOPS
 940 //      srl %o1,0,%o1           // zero-extend %o1 = count
 941         brz,pn %o1,2f
 942        _ sub %o0,8,%o0
 943         sllx %o1,3,%o1          // %o1 = 8*count
 944         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
 945           ldx [%o0+%o1],%o2     // digit holen
 946 1:        addcc %o2,1,%o2       // incrementieren
 947           bne,pn %xcc,3f
 948          _ stx %o2,[%o0+%o1]    // ablegen
 949           subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
 950           bne,a,pt %xcc,1b
 951          __ ldx [%o0+%o1],%o2
 952 2:      retl
 953        _ mov 1,%o0
 954 3:      retl
 955        _ mov 0,%o0
 956 #endif
 957
 958 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 959         DECLARE_FUNCTION(sub_loop_down)
 960 C(sub_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
 961 #if STANDARD_LOOPS
 962 //      srl %o3,0,%o3           // zero-extend %o3 = count
 963         brz,pn %o3,2f
 964        _ mov %g0,%g1            // Carry := 0
 965         sub %o1,8,%o1
 966 1:        ldx [%o1],%o5         // source2-digit
 967           sub %o0,8,%o0
 968           ldx [%o0],%o4         // source1-digit
 969           addcc %o5,%g1,%o5
 970           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
 971           subcc %o4,%o5,%o4
 972           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
 973           sub %o2,8,%o2
 974           stx %o4,[%o2]         // Digit ablegen
 975           subcc %o3,1,%o3
 976           bne,pt %xcc,1b
 977          _ sub %o1,8,%o1
 978 2:      retl
 979        _ mov %g1,%o0
 980 #endif
 981 #if COUNTER_LOOPS
 982 //      srl %o3,0,%o3           // zero-extend %o3 = count
 983         brz,pn %o3,2f
 984        _ mov %g0,%g1            // Carry := 0
 985         sub %o0,8,%o0
 986         sub %o1,8,%o1
 987         sllx %o3,3,%o3          // %o3 = 8*count
 988         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
 989         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
 990         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
 991 1:        ldx [%o0+%o3],%o4     // source1-digit
 992           ldx [%o1+%o3],%o5     // source2-digit
 993           addcc %o5,%g1,%o5
 994           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
 995           subcc %o4,%o5,%o4
 996           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
 997           subcc %o3,8,%o3
 998           bne,pt %xcc,1b
 999          _ stx %o4,[%o2+%o3]    // Digit ablegen
1000 2:      retl
1001        _ mov %g1,%o0
1002 #endif
1003
1004 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1005         DECLARE_FUNCTION(subx_loop_down)
1006 C(subx_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
1007 #if STANDARD_LOOPS
1008 //      srl %o3,0,%o3           // zero-extend %o3 = count
1009         brz,pn %o3,2f
1010        _ mov %o4,%g1            // Carry (0 oder -1)
1011         sub %o1,8,%o1
1012 1:        ldx [%o1],%o5         // source2-digit
1013           sub %o0,8,%o0
1014           ldx [%o0],%o4         // source1-digit
1015           subcc %o5,%g1,%o5
1016           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
1017           subcc %o4,%o5,%o4
1018           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
1019           sub %o2,8,%o2
1020           stx %o4,[%o2]         // Digit ablegen
1021           subcc %o3,1,%o3
1022           bne,pt %xcc,1b
1023          _ sub %o1,8,%o1
1024 2:      retl
1025        _ mov %g1,%o0
1026 #endif
1027 #if COUNTER_LOOPS
1028 //      srl %o3,0,%o3           // zero-extend %o3 = count
1029         brz,pn %o3,2f
1030        _ mov %o4,%g1            // Carry (0 oder -1)
1031         sub %o0,8,%o0
1032         sub %o1,8,%o1
1033         sllx %o3,3,%o3          // %o3 = 8*count
1034         sub %o0,%o3,%o0         // %o0 = &sourceptr1[-count-1]
1035         sub %o1,%o3,%o1         // %o1 = &sourceptr2[-count-1]
1036         sub %o2,%o3,%o2         // %o2 = &destptr[-count]
1037 1:        ldx [%o1+%o3],%o5     // source2-digit
1038           ldx [%o0+%o3],%o4     // source1-digit
1039           subcc %o5,%g1,%o5
1040           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
1041           subcc %o4,%o5,%o4
1042           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
1043           subcc %o3,8,%o3
1044           bne,pt %xcc,1b
1045          _ stx %o4,[%o2+%o3]    // Digit ablegen
1046 2:      retl
1047        _ mov %g1,%o0
1048 #endif
1049
1050 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
1051         DECLARE_FUNCTION(subfrom_loop_down)
1052 C(subfrom_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1053 #if STANDARD_LOOPS
1054 //      srl %o2,0,%o2           // zero-extend %o2 = count
1055         brz,pn %o2,2f
1056        _ mov %g0,%o5            // Carry := 0
1057         sub %o0,8,%o0
1058 1:        ldx [%o0],%o3         // source-digit
1059           sub %o1,8,%o1
1060           ldx [%o1],%o4         // dest-digit
1061           addcc %o3,%o5,%o3
1062           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1063           subcc %o4,%o3,%o4
1064           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
1065           stx %o4,[%o1]         // Digit ablegen
1066           subcc %o2,1,%o2
1067           bne,pt %xcc,1b
1068          _ sub %o0,8,%o0
1069 2:      retl
1070        _ mov %o5,%o0
1071 #endif
1072 #if COUNTER_LOOPS
1073 //      srl %o2,0,%o2           // zero-extend %o2 = count
1074         brz,pn %o2,2f
1075        _ mov %g0,%o5            // Carry := 0
1076         sub %o0,8,%o0
1077         sub %o1,8,%o1
1078         sllx %o2,3,%o2          // %o2 = 8*count
1079         sub %o0,%o2,%o0         // %o0 = &sourceptr[-count-1]
1080         sub %o1,%o2,%o1         // %o1 = &destptr[-count-1]
1081           ldx [%o0+%o2],%o3     // source-digit
1082 1:        ldx [%o1+%o2],%o4     // dest-digit
1083           addcc %o3,%o5,%o3
1084           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1085           subcc %o4,%o3,%o4
1086           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
1087           stx %o4,[%o1+%o2]     // Digit ablegen
1088           subcc %o2,8,%o2
1089           bne,a,pt %xcc,1b
1090          __ ldx [%o0+%o2],%o3   // source-digit
1091 2:      retl
1092        _ mov %o5,%o0
1093 #endif
1094
1095 // extern uintD dec_loop_down (uintD* ptr, uintC count);
1096         DECLARE_FUNCTION(dec_loop_down)
1097 C(dec_loop_down:) // Input in %o0,%o1, Output in %o0
1098 #if STANDARD_LOOPS
1099 //      srl %o1,0,%o1           // zero-extend %o1 = count
1100         brz,pn %o1,2f
1101        _ sub %o0,8,%o0
1102 1:        ldx [%o0],%o2
1103           subcc %o2,1,%o2
1104           bcc,pn %xcc,3f
1105          _ stx %o2,[%o0]
1106           subcc %o1,1,%o1
1107           bne,pt %xcc,1b
1108          _ sub %o0,8,%o0
1109 2:      retl
1110        _ mov -1,%o0
1111 3:      retl
1112        _ mov 0,%o0
1113 #endif
1114 #if COUNTER_LOOPS
1115 //      srl %o1,0,%o1           // zero-extend %o1 = count
1116         brz,pn %o1,2f
1117        _ sub %o0,8,%o0
1118         sllx %o1,3,%o1          // %o1 = 8*count
1119         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1120           ldx [%o0+%o1],%o2     // digit holen
1121 1:        subcc %o2,1,%o2       // decrementieren
1122           bcc,pn %xcc,3f
1123          _ stx %o2,[%o0+%o1]    // ablegen
1124           subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
1125           bne,a,pt %xcc,1b
1126          __ ldx [%o0+%o1],%o2
1127 2:      retl
1128        _ mov -1,%o0
1129 3:      retl
1130        _ mov 0,%o0
1131 #endif
1132
1133 // extern uintD neg_loop_down (uintD* ptr, uintC count);
1134         DECLARE_FUNCTION(neg_loop_down)
1135 C(neg_loop_down:) // Input in %o0,%o1, Output in %o0
1136 #if STANDARD_LOOPS
1137 //      srl %o1,0,%o1           // zero-extend %o1 = count
1138         // erstes Digit /=0 suchen:
1139         brz,pn %o1,2f
1140        _ sub %o0,8,%o0
1141 1:        ldx [%o0],%o2
1142           subcc %g0,%o2,%o2
1143           bne,pn %xcc,3f
1144          _ subcc %o1,1,%o1
1145           bne,pt %xcc,1b
1146          _ sub %o0,8,%o0
1147 2:      retl
1148        _ mov 0,%o0
1149 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1150         stx %o2,[%o0]           // 1 Digit negieren
1151         // alle anderen Digits invertieren:
1152         be,pn %xcc,5f
1153        _ sub %o0,8,%o0
1154 4:        ldx [%o0],%o2
1155           subcc %o1,1,%o1
1156           xnor %g0,%o2,%o2
1157           stx %o2,[%o0]
1158           bne,pt %xcc,4b
1159          _ sub %o0,8,%o0
1160 5:      retl
1161        _ mov -1,%o0
1162 #endif
1163 #if COUNTER_LOOPS
1164 //      srl %o1,0,%o1           // zero-extend %o1 = count
1165         // erstes Digit /=0 suchen:
1166         brz,pn %o1,2f
1167        _ sub %o0,8,%o0
1168         sllx %o1,3,%o1          // %o1 = 8*count
1169         sub %o0,%o1,%o0         // %o0 = &ptr[-count-1]
1170           ldx [%o0+%o1],%o2     // digit holen
1171 1:        subcc %g0,%o2,%o2     // negieren, testen
1172           bne,pn %xcc,3f
1173          _ subcc %o1,8,%o1      // Zähler erniedrigen, Pointer erniedrigen
1174           bne,a,pt %xcc,1b
1175          __ ldx [%o0+%o1],%o2
1176 2:      retl
1177        _ mov 0,%o0
1178 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1179         // alle anderen Digits invertieren:
1180         add %o1,8,%o1
1181         stx %o2,[%o0+%o1]       // ablegen
1182         subcc %o1,8,%o1
1183         be,pn %xcc,5f
1184        _ nop
1185           ldx [%o0+%o1],%o2
1186 4:        xnor %g0,%o2,%o2
1187           stx %o2,[%o0+%o1]
1188           subcc %o1,8,%o1
1189           bne,a,pt %xcc,4b
1190          __ ldx [%o0+%o1],%o2
1191 5:      retl
1192        _ mov -1,%o0
1193 #endif
1194
1195 // extern uintD shift1left_loop_down (uintD* ptr, uintC count);
1196         DECLARE_FUNCTION(shift1left_loop_down)
1197 C(shift1left_loop_down:) // Input in %o0,%o1, Output in %o0
1198 //      srl %o1,0,%o1           // zero-extend %o1 = count
1199         brz,pn %o1,2f
1200        _ mov 0,%o3              // Carry := 0
1201         sub %o0,8,%o0
1202 1:        ldx [%o0],%o2         // Digit
1203           addcc %o2,%o2,%o4     // shiften
1204           add %o4,%o3,%o4       // und carry
1205           srlx %o2,63,%o3       // neues Carry
1206           stx %o4,[%o0]         // Digit ablegen
1207           subcc %o1,1,%o1
1208           bne,pt %xcc,1b
1209          _ sub %o0,8,%o0
1210 2:      retl
1211        _ mov %o3,%o0
1212
1213 // extern uintD shiftleft_loop_down (uintD* ptr, uintC count, uintC i, uintD carry);
1214         DECLARE_FUNCTION(shiftleft_loop_down)
1215 C(shiftleft_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1216 //      srl %o1,0,%o1           // zero-extend %o1 = count
1217         brz,pn %o1,2f
1218        _ sub %g0,%o2,%g1        // 64-i (mod 64)
1219         sub %o0,8,%o0
1220 1:        ldx [%o0],%o4         // Digit
1221           subcc %o1,1,%o1
1222           sllx %o4,%o2,%o5      // dessen niedere (64-i) Bits
1223           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
1224           stx %o5,[%o0]         // Digit ablegen
1225           srlx %o4,%g1,%o3      // dessen höchste i Bits liefern den neuen Carry
1226           bne,pt %xcc,1b
1227          _ sub %o0,8,%o0
1228 2:      retl
1229        _ mov %o3,%o0
1230
1231 // extern uintD shiftleftcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
1232         DECLARE_FUNCTION(shiftleftcopy_loop_down)
1233 C(shiftleftcopy_loop_down:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
1234 //      srl %o2,0,%o2           // zero-extend %o2 = count
1235         brz,pn %o2,2f
1236        _ mov 0,%o4              // Carry := 0
1237         sub %g0,%o3,%g1         // 64-i (mod 64)
1238         sub %o0,8,%o0
1239 1:        ldx [%o0],%o5         // Digit
1240           subcc %o2,1,%o2
1241           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
1242           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
1243           sub %o1,8,%o1
1244           stx %g2,[%o1]         // Digit ablegen
1245           srlx %o5,%g1,%o4      // dessen höchste i Bits liefern den neuen Carry
1246           bne,pt %xcc,1b
1247          _ sub %o0,8,%o0
1248 2:      retl
1249        _ mov %o4,%o0
1250
1251 // extern uintD shift1right_loop_up (uintD* ptr, uintC count, uintD carry);
1252         DECLARE_FUNCTION(shift1right_loop_up)
1253 C(shift1right_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1254 //      srl %o1,0,%o1           // zero-extend %o1 = count
1255         brz,pn %o1,2f
1256        _ sllx %o2,63,%o2        // Carry
1257 1:        ldx [%o0],%o3         // Digit
1258           subcc %o1,1,%o1
1259           srlx %o3,1,%o4        // shiften
1260           or %o2,%o4,%o4        // und mit altem Carry kombinieren
1261           stx %o4,[%o0]         // und ablegen
1262           sllx %o3,63,%o2       // neuer Carry
1263           bne,pt %xcc,1b
1264          _ add %o0,8,%o0
1265 2:      retl
1266        _ mov %o2,%o0
1267
1268 // extern uintD shiftright_loop_up (uintD* ptr, uintC count, uintC i);
1269         DECLARE_FUNCTION(shiftright_loop_up)
1270 C(shiftright_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
1271 //      srl %o1,0,%o1           // zero-extend %o1 = count
1272         sub %g0,%o2,%g1         // 64-i (mod 64)
1273         brz,pn %o1,2f
1274        _ or %g0,%g0,%o3         // Carry := 0
1275 1:        ldx [%o0],%o4         // Digit
1276           subcc %o1,1,%o1
1277           srlx %o4,%o2,%o5      // shiften
1278           or %o3,%o5,%o5        // und mit altem Carry kombinieren
1279           stx %o5,[%o0]         // und ablegen
1280           sllx %o4,%g1,%o3      // neuer Carry
1281           bne,pt %xcc,1b
1282          _ add %o0,8,%o0
1283 2:      retl
1284        _ mov %o3,%o0
1285
1286 // extern uintD shiftrightsigned_loop_up (uintD* ptr, uintC count, uintC i);
1287         DECLARE_FUNCTION(shiftrightsigned_loop_up)
1288 C(shiftrightsigned_loop_up:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
1289 //      srl %o1,0,%o1           // zero-extend %o1 = count
1290         ldx [%o0],%o4           // erstes Digit
1291         sub %g0,%o2,%g1         // 64-i (mod 64)
1292         srax %o4,%o2,%o5        // shiften
1293         stx %o5,[%o0]           // und ablegen
1294         sllx %o4,%g1,%o3        // neuer Carry
1295         subcc %o1,1,%o1
1296         be,pn %xcc,2f
1297        _ add %o0,8,%o0
1298 1:        ldx [%o0],%o4         // Digit
1299           subcc %o1,1,%o1
1300           srlx %o4,%o2,%o5      // shiften
1301           or %o3,%o5,%o5        // und mit altem Carry kombinieren
1302           stx %o5,[%o0]         // und ablegen
1303           sllx %o4,%g1,%o3      // neuer Carry
1304           bne,pt %xcc,1b
1305          _ add %o0,8,%o0
1306 2:      retl
1307        _ mov %o3,%o0
1308
1309 // extern uintD shiftrightcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
1310         DECLARE_FUNCTION(shiftrightcopy_loop_up)
1311 C(shiftrightcopy_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
1312 //      srl %o2,0,%o2           // zero-extend %o2 = count
1313         sub %g0,%o3,%g1         // 64-i (mod 64)
1314         brz,pn %o2,2f
1315        _ sllx %o4,%g1,%g2       // erster Carry
1316 1:        ldx [%o0],%o4         // Digit
1317           add %o0,8,%o0
1318           srlx %o4,%o3,%o5      // shiften
1319           or %g2,%o5,%o5        // und mit altem Carry kombinieren
1320           stx %o5,[%o1]         // und ablegen
1321           sllx %o4,%g1,%g2      // neuer Carry
1322           subcc %o2,1,%o2
1323           bne,pt %xcc,1b
1324          _ add %o1,8,%o1
1325 2:      retl
1326        _ mov %g2,%o0
1327
1328 // extern uintD mulusmall_loop_down (uintD digit, uintD* ptr, uintC len, uintD newdigit);
1329         DECLARE_FUNCTION(mulusmall_loop_down)
1330 C(mulusmall_loop_down:) // Input in %o0,%o1,%o2,%o3, Output in %o0, verändert %g1
1331 //      srl %o2,0,%o2           // zero-extend %o2 = len
1332         brz,pn %o2,2f
1333        _ sub %o1,8,%o1
1334 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
1335           // und kleinen Carry %o3 dazu:
1336           ldx [%o1],%o4
1337           sub %o2,1,%o2
1338           srlx %o4,32,%o5       // high32(x)
1339           srl %o4,0,%o4         // low32(x)
1340           mulx %o4,%o0,%o4      // low32(x)*digit
1341           mulx %o5,%o0,%o5      // high32(x)*digit
1342           sllx %o5,32,%g1       // low32(high32(x)*digit)*2^32
1343           add %g1,%o3,%g1       // plus carry
1344           addcc %o4,%g1,%o4     // plus low32(x)*digit
1345           srlx %o5,32,%o3       // high32(high32(x)*digit)
1346           add %o3,1,%g1
1347           movcs %xcc,%g1,%o3    // neuer Carry
1348           stx %o4,[%o1]         // neues Digit ablegen
1349           brnz,pt %o2,1b
1350          _ sub %o1,8,%o1
1351 2:      retl
1352        _ mov %o3,%o0
1353
1354 // extern void mulu_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1355         DECLARE_FUNCTION(mulu_loop_down)
1356 C(mulu_loop_down:) // Input in %i0,%i1,%i2,%i3
1357         save %sp,-192,%sp
1358         mov 0,%l0               // Carry
1359         srlx %i0,32,%l1         // %l1 = high32(digit)
1360         srl %i0,0,%l2           // %l2 = low32(digit)
1361         mov 1,%l3
1362         sllx %l3,32,%l3         // %l3 = 2^32
1363         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1364 1:        sub %i2,8,%i2
1365           ldx [%i1+%i2],%o0     // nächstes Digit
1366           subcc %i3,1,%i3
1367           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1368           srlx %o0,32,%o1
1369           srl %o0,0,%o2
1370           mulx %l1,%o1,%o3      // high part
1371           mulx %l1,%o2,%o4      // first mid part
1372           mulx %l2,%o1,%o1      // second mid part
1373           mulx %l2,%o2,%o2      // low part
1374           srlx %o2,32,%o5       // low part's upper half
1375           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1376           addcc %o4,%o1,%o4     // add other mid part
1377           add %o3,%l3,%o5
1378           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1379           srlx %o4,32,%o5
1380           sllx %o4,32,%o4
1381           srl %o2,0,%o2
1382           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1383           addcc %o0,%l0,%o0     // alten Carry addieren
1384           add %o3,%o5,%l0       // add high32(midparts) to high part
1385           add %l0,1,%o5
1386           movcs %xcc,%o5,%l0    // neuer Carry
1387           // Multiplikation fertig
1388           brnz,pt %i3,1b
1389          _ stx %o0,[%i2]        // Low-Digit ablegen
1390         stx %l0,[%i2-8]         // letzten Carry ablegen
1391         ret
1392        _ restore
1393
1394 // extern uintD muluadd_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1395         DECLARE_FUNCTION(muluadd_loop_down)
1396 C(muluadd_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
1397         save %sp,-192,%sp
1398         mov 0,%l0               // Carry
1399         srlx %i0,32,%l1         // %l1 = high32(digit)
1400         srl %i0,0,%l2           // %l2 = low32(digit)
1401         mov 1,%l3
1402         sllx %l3,32,%l3         // %l3 = 2^32
1403         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1404 1:        sub %i2,8,%i2
1405           ldx [%i1+%i2],%o0     // nächstes Digit
1406           ldx [%i2],%i4         // *destptr
1407           subcc %i3,1,%i3
1408           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1409           srlx %o0,32,%o1
1410           srl %o0,0,%o2
1411           mulx %l1,%o1,%o3      // high part
1412           mulx %l1,%o2,%o4      // first mid part
1413           mulx %l2,%o1,%o1      // second mid part
1414           mulx %l2,%o2,%o2      // low part
1415           srlx %o2,32,%o5       // low part's upper half
1416           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1417           addcc %o4,%o1,%o4     // add other mid part
1418           add %o3,%l3,%o5
1419           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1420           srlx %o4,32,%o5
1421           sllx %o4,32,%o4
1422           srl %o2,0,%o2
1423           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1424           addcc %o0,%l0,%o0     // alten Carry addieren
1425           add %o3,%o5,%l0       // add high32(midparts) to high part
1426           add %l0,1,%o5
1427           movcs %xcc,%o5,%l0    // neuer Carry
1428           // Multiplikation fertig
1429           addcc %i4,%o0,%o0     // alten *destptr addieren
1430           add %l0,1,%o2
1431           movcs %xcc,%o2,%l0    // neuer Carry
1432           brnz,pt %i3,1b
1433          _ stx %o0,[%i2]        // Low-Digit ablegen
1434         mov %l0,%i0             // letzter Carry
1435         ret
1436        _ restore
1437
1438 // extern uintD mulusub_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1439         DECLARE_FUNCTION(mulusub_loop_down)
1440 C(mulusub_loop_down:) // Input in %i0,%i1,%i2,%i3, Output in %i0
1441         save %sp,-192,%sp
1442         mov 0,%l0               // Carry
1443         srlx %i0,32,%l1         // %l1 = high32(digit)
1444         srl %i0,0,%l2           // %l2 = low32(digit)
1445         mov 1,%l3
1446         sllx %l3,32,%l3         // %l3 = 2^32
1447         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
1448 1:        sub %i2,8,%i2
1449           ldx [%i1+%i2],%o0     // nächstes Digit
1450           ldx [%i2],%i4         // *destptr
1451           subcc %i3,1,%i3
1452           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
1453           srlx %o0,32,%o1
1454           srl %o0,0,%o2
1455           mulx %l1,%o1,%o3      // high part
1456           mulx %l1,%o2,%o4      // first mid part
1457           mulx %l2,%o1,%o1      // second mid part
1458           mulx %l2,%o2,%o2      // low part
1459           srlx %o2,32,%o5       // low part's upper half
1460           add %o4,%o5,%o4       // add to one of the mid parts, no carry
1461           addcc %o4,%o1,%o4     // add other mid part
1462           add %o3,%l3,%o5
1463           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
1464           srlx %o4,32,%o5
1465           sllx %o4,32,%o4
1466           srl %o2,0,%o2
1467           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
1468           addcc %o0,%l0,%o0     // alten Carry addieren
1469           add %o3,%o5,%l0       // add high32(midparts) to high part
1470           add %l0,1,%o5
1471           movcs %xcc,%o5,%l0    // neuer Carry
1472           // Multiplikation fertig
1473           subcc %i4,%o0,%o0     // vom alten *destptr subtrahieren
1474           add %l0,1,%o2
1475           movcs %xcc,%o2,%l0    // neuer Carry
1476           brnz,pt %i3,1b
1477          _ stx %o0,[%i2]        // Low-Digit ablegen
1478         mov %l0,%i0             // letzter Carry
1479         ret
1480        _ restore
1481
1482 #endif
1483
1484 #if !CL_DS_BIG_ENDIAN_P
1485
1486 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
1487         DECLARE_FUNCTION(or_loop_down)
1488 C(or_loop_down:) // Input in %o0,%o1,%o2
1489 #if STANDARD_LOOPS
1490 //      srl %o2,0,%o2           // zero-extend %o2 = count
1491         brz,pn %o2,2f
1492        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1493         sub %o0,8,%o0
1494 1:        ldx [%o0],%o3         // *xptr
1495           ldx [%o0+%o1],%o4     // *yptr
1496           subcc %o2,1,%o2
1497           or %o3,%o4,%o3        // verknüpfen
1498           stx %o3,[%o0]         // =: *xptr
1499           bne,pt %xcc,1b
1500          _ sub %o0,8,%o0        // xptr++, yptr++
1501 2:      retl
1502        _ nop
1503 #endif
1504 #if COUNTER_LOOPS
1505 //      srl %o2,0,%o2           // zero-extend %o2 = count
1506         brz,pn %o2,2f
1507        _ sllx %o2,3,%o2         // %o2 = 8*count
1508         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1509         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1510 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1511           ldx [%o1+%o2],%o3     // nächstes Digit holen
1512           ldx [%o0+%o2],%o4     // noch ein Digit holen
1513           or %o4,%o3,%o3        // beide verknüpfen
1514           bne,pt %xcc,1b
1515          _ stx %o3,[%o1+%o2]    // Digit ablegen
1516 2:      retl
1517        _ nop
1518 #endif
1519
1520 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1521         DECLARE_FUNCTION(xor_loop_down)
1522 C(xor_loop_down:) // Input in %o0,%o1,%o2
1523 #if STANDARD_LOOPS
1524 //      srl %o2,0,%o2           // zero-extend %o2 = count
1525         brz,pn %o2,2f
1526        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1527         sub %o0,8,%o0
1528 1:        ldx [%o0],%o3         // *xptr
1529           ldx [%o0+%o1],%o4     // *yptr
1530           subcc %o2,1,%o2
1531           xor %o3,%o4,%o3       // verknüpfen
1532           stx %o3,[%o0]         // =: *xptr
1533           bne,pt %xcc,1b
1534          _ sub %o0,8,%o0        // xptr++, yptr++
1535 2:      retl
1536        _ nop
1537 #endif
1538 #if COUNTER_LOOPS
1539 //      srl %o2,0,%o2           // zero-extend %o2 = count
1540         brz,pn %o2,2f
1541        _ sllx %o2,3,%o2         // %o2 = 8*count
1542         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1543         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1544 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1545           ldx [%o1+%o2],%o3     // nächstes Digit holen
1546           ldx [%o0+%o2],%o4     // noch ein Digit holen
1547           xor %o4,%o3,%o3       // beide verknüpfen
1548           bne,pt %xcc,1b
1549          _ stx %o3,[%o1+%o2]    // Digit ablegen
1550 2:      retl
1551        _ nop
1552 #endif
1553
1554 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
1555         DECLARE_FUNCTION(and_loop_down)
1556 C(and_loop_down:) // Input in %o0,%o1,%o2
1557 #if STANDARD_LOOPS
1558 //      srl %o2,0,%o2           // zero-extend %o2 = count
1559         brz,pn %o2,2f
1560        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1561         sub %o0,8,%o0
1562 1:        ldx [%o0],%o3         // *xptr
1563           ldx [%o0+%o1],%o4     // *yptr
1564           subcc %o2,1,%o2
1565           and %o3,%o4,%o3       // verknüpfen
1566           stx %o3,[%o0]         // =: *xptr
1567           bne,pt %xcc,1b
1568          _ sub %o0,8,%o0        // xptr++, yptr++
1569 2:      retl
1570        _ nop
1571 #endif
1572 #if COUNTER_LOOPS
1573 //      srl %o2,0,%o2           // zero-extend %o2 = count
1574         brz,pn %o2,2f
1575        _ sllx %o2,3,%o2         // %o2 = 8*count
1576         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1577         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1578 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1579           ldx [%o1+%o2],%o3     // nächstes Digit holen
1580           ldx [%o0+%o2],%o4     // noch ein Digit holen
1581           and %o4,%o3,%o3       // beide verknüpfen
1582           bne,pt %xcc,1b
1583          _ stx %o3,[%o1+%o2]    // Digit ablegen
1584 2:      retl
1585        _ nop
1586 #endif
1587
1588 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
1589         DECLARE_FUNCTION(eqv_loop_down)
1590 C(eqv_loop_down:) // Input in %o0,%o1,%o2
1591 #if STANDARD_LOOPS
1592 //      srl %o2,0,%o2           // zero-extend %o2 = count
1593         brz,pn %o2,2f
1594        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1595         sub %o0,8,%o0
1596 1:        ldx [%o0],%o3         // *xptr
1597           ldx [%o0+%o1],%o4     // *yptr
1598           subcc %o2,1,%o2
1599           xnor %o3,%o4,%o3      // verknüpfen
1600           stx %o3,[%o0]         // =: *xptr
1601           bne,pt %xcc,1b
1602          _ sub %o0,8,%o0        // xptr++, yptr++
1603 2:      retl
1604        _ nop
1605 #endif
1606 #if COUNTER_LOOPS
1607 //      srl %o2,0,%o2           // zero-extend %o2 = count
1608         brz,pn %o2,2f
1609        _ sllx %o2,3,%o2         // %o2 = 8*count
1610         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1611         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1612 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1613           ldx [%o1+%o2],%o3     // nächstes Digit holen
1614           ldx [%o0+%o2],%o4     // noch ein Digit holen
1615           xnor %o4,%o3,%o3      // beide verknüpfen
1616           bne,pt %xcc,1b
1617          _ stx %o3,[%o1+%o2]    // Digit ablegen
1618 2:      retl
1619        _ nop
1620 #endif
1621
1622 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
1623         DECLARE_FUNCTION(nand_loop_down)
1624 C(nand_loop_down:) // Input in %o0,%o1,%o2
1625 #if STANDARD_LOOPS
1626 //      srl %o2,0,%o2           // zero-extend %o2 = count
1627         brz,pn %o2,2f
1628        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1629         sub %o0,8,%o0
1630 1:        ldx [%o0],%o3         // *xptr
1631           ldx [%o0+%o1],%o4     // *yptr
1632           subcc %o2,1,%o2
1633           and %o3,%o4,%o3       // verknüpfen
1634           xnor %g0,%o3,%o3
1635           stx %o3,[%o0]         // =: *xptr
1636           bne,pt %xcc,1b
1637          _ sub %o0,8,%o0        // xptr++, yptr++
1638 2:      retl
1639        _ nop
1640 #endif
1641 #if COUNTER_LOOPS
1642 //      srl %o2,0,%o2           // zero-extend %o2 = count
1643         brz,pn %o2,2f
1644        _ sllx %o2,3,%o2         // %o2 = 8*count
1645         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1646         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1647 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1648           ldx [%o1+%o2],%o3     // nächstes Digit holen
1649           ldx [%o0+%o2],%o4     // noch ein Digit holen
1650           and %o4,%o3,%o3       // beide verknüpfen
1651           xnor %g0,%o3,%o3
1652           bne,pt %xcc,1b
1653          _ stx %o3,[%o1+%o2]    // Digit ablegen
1654 2:      retl
1655        _ nop
1656 #endif
1657
1658 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1659         DECLARE_FUNCTION(nor_loop_down)
1660 C(nor_loop_down:) // Input in %o0,%o1,%o2
1661 #if STANDARD_LOOPS
1662 //      srl %o2,0,%o2           // zero-extend %o2 = count
1663         brz,pn %o2,2f
1664        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1665         sub %o0,8,%o0
1666 1:        ldx [%o0],%o3         // *xptr
1667           ldx [%o0+%o1],%o4     // *yptr
1668           subcc %o2,1,%o2
1669           or %o3,%o4,%o3        // verknüpfen
1670           xnor %g0,%o3,%o3
1671           stx %o3,[%o0]         // =: *xptr
1672           bne,pt %xcc,1b
1673          _ sub %o0,8,%o0        // xptr++, yptr++
1674 2:      retl
1675        _ nop
1676 #endif
1677 #if COUNTER_LOOPS
1678 //      srl %o2,0,%o2           // zero-extend %o2 = count
1679         brz,pn %o2,2f
1680        _ sllx %o2,3,%o2         // %o2 = 8*count
1681         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1682         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1683 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1684           ldx [%o1+%o2],%o3     // nächstes Digit holen
1685           ldx [%o0+%o2],%o4     // noch ein Digit holen
1686           or %o4,%o3,%o3        // beide verknüpfen
1687           xnor %g0,%o3,%o3
1688           bne,pt %xcc,1b
1689          _ stx %o3,[%o1+%o2]    // Digit ablegen
1690 2:      retl
1691        _ nop
1692 #endif
1693
1694 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1695         DECLARE_FUNCTION(andc2_loop_down)
1696 C(andc2_loop_down:) // Input in %o0,%o1,%o2
1697 #if STANDARD_LOOPS
1698 //      srl %o2,0,%o2           // zero-extend %o2 = count
1699         brz,pn %o2,2f
1700        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1701         sub %o0,8,%o0
1702 1:        ldx [%o0],%o3         // *xptr
1703           ldx [%o0+%o1],%o4     // *yptr
1704           subcc %o2,1,%o2
1705           andn %o3,%o4,%o3      // verknüpfen
1706           stx %o3,[%o0]         // =: *xptr
1707           bne,pt %xcc,1b
1708          _ sub %o0,8,%o0        // xptr++, yptr++
1709 2:      retl
1710        _ nop
1711 #endif
1712 #if COUNTER_LOOPS
1713 //      srl %o2,0,%o2           // zero-extend %o2 = count
1714         brz,pn %o2,2f
1715        _ sllx %o2,3,%o2         // %o2 = 8*count
1716         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1717         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1718 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1719           ldx [%o1+%o2],%o3     // nächstes Digit holen
1720           ldx [%o0+%o2],%o4     // noch ein Digit holen
1721           andn %o4,%o3,%o3      // beide verknüpfen
1722           bne,pt %xcc,1b
1723          _ stx %o3,[%o1+%o2]    // Digit ablegen
1724 2:      retl
1725        _ nop
1726 #endif
1727
1728 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1729         DECLARE_FUNCTION(orc2_loop_down)
1730 C(orc2_loop_down:) // Input in %o0,%o1,%o2
1731 #if STANDARD_LOOPS
1732 //      srl %o2,0,%o2           // zero-extend %o2 = count
1733         brz,pn %o2,2f
1734        _ sub %o1,%o0,%o1        // %o1 = yptr-xptr
1735         sub %o0,8,%o0
1736 1:        ldx [%o0],%o3         // *xptr
1737           ldx [%o0+%o1],%o4     // *yptr
1738           subcc %o2,1,%o2
1739           orn %o3,%o4,%o3       // verknüpfen
1740           stx %o3,[%o0]         // =: *xptr
1741           bne,pt %xcc,1b
1742          _ sub %o0,8,%o0        // xptr++, yptr++
1743 2:      retl
1744        _ nop
1745 #endif
1746 #if COUNTER_LOOPS
1747 //      srl %o2,0,%o2           // zero-extend %o2 = count
1748         brz,pn %o2,2f
1749        _ sllx %o2,3,%o2         // %o2 = 8*count
1750         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1751         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1752 1:        subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1753           ldx [%o1+%o2],%o3     // nächstes Digit holen
1754           ldx [%o0+%o2],%o4     // noch ein Digit holen
1755           orn %o4,%o3,%o3       // beide verknüpfen
1756           bne,pt %xcc,1b
1757          _ stx %o3,[%o1+%o2]    // Digit ablegen
1758 2:      retl
1759        _ nop
1760 #endif
1761
1762 // extern void not_loop_down (uintD* xptr, uintC count);
1763         DECLARE_FUNCTION(not_loop_down)
1764 C(not_loop_down:) // Input in %o0,%o1
1765 #if STANDARD_LOOPS
1766 //      srl %o1,0,%o1           // zero-extend %o1 = count
1767         brz,pn %o1,2f
1768        _ sub %o0,8,%o0
1769 1:        ldx [%o0],%o2
1770           subcc %o1,1,%o1
1771           xnor %g0,%o2,%o2
1772           stx %o2,[%o0]
1773           bne,pt %xcc,1b
1774          _ sub %o0,8,%o0
1775 2:      retl
1776        _ nop
1777 #endif
1778 #if COUNTER_LOOPS
1779 //      srl %o1,0,%o1           // zero-extend %o1 = count
1780         brz,pn %o1,2f
1781        _ sllx %o1,3,%o1         // %o1 = 8*count
1782         sub %o0,%o1,%o0         // %o0 = &destptr[-count]
1783 1:        subcc %o1,8,%o1       // Zähler erniedrigen, Pointer erniedrigen
1784           ldx [%o0+%o1],%o2     // nächstes Digit holen
1785           xnor %g0,%o2,%o2
1786           bne,pt %xcc,1b
1787          _ stx %o2,[%o0+%o1]    // Digit ablegen
1788 2:      retl
1789        _ nop
1790 #endif
1791
1792 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
1793         DECLARE_FUNCTION(and_test_loop_down)
1794 C(and_test_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1795 #if STANDARD_LOOPS
1796 //      srl %o2,0,%o2           // zero-extend %o2 = count
1797         brz,pn %o2,4f
1798        _ sub %o0,8,%o0
1799 1:        ldx [%o0],%o3
1800           sub %o1,8,%o1
1801           ldx [%o1],%o4
1802           subcc %o2,1,%o2
1803           be,pn %xcc,3f
1804          _ andcc %o3,%o4,%g0
1805           be,pt %xcc,1b
1806          _ sub %o0,8,%o0
1807 2:      retl
1808        _ mov 1,%o0
1809 3:      bne 2b
1810        _ nop
1811 4:      retl
1812        _ mov 0,%o0
1813 #endif
1814 #if COUNTER_LOOPS
1815 //      srl %o2,0,%o2           // zero-extend %o2 = count
1816         sllx %o2,3,%o2          // %o2 = 8*count
1817         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1818         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1819         subcc %o2,8,%o2
1820         bcs,pn %xcc,2f
1821        _ nop
1822           ldx [%o0+%o2],%o3     // nächstes Digit holen
1823 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
1824           andcc %o3,%o4,%g0     // beide verknüpfen
1825           bne,pn %xcc,3f
1826          _ subcc %o2,8,%o2      // Zähler erniedrigen, Pointer erniedrigen
1827           bcc,a,pt %xcc,1b
1828          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
1829 2:      retl
1830        _ mov 0,%o0
1831 3:      retl
1832        _ mov 1,%o0
1833 #endif
1834
1835 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
1836         DECLARE_FUNCTION(compare_loop_down)
1837 C(compare_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
1838 #if STANDARD_LOOPS
1839 //      srl %o2,0,%o2           // zero-extend %o2 = count
1840         brz,pn %o2,2f
1841        _ nop
1842 1:        ldx [%o0-8],%o3
1843           ldx [%o1-8],%o4
1844           subcc %o3,%o4,%g0
1845           bne,pn %xcc,3f
1846          _ sub %o0,8,%o0
1847           subcc %o2,1,%o2
1848           bne,pn %xcc,1b
1849          _ sub %o1,8,%o1
1850 2:      retl
1851        _ mov 0,%o0
1852 3:      mov 1,%o0
1853         movlu %xcc,-1,%o0
1854         retl
1855        _ sra %o0,0,%o0          // sign-extend %o0
1856 #endif
1857 #if COUNTER_LOOPS
1858 //      srl %o2,0,%o2           // zero-extend %o2 = count
1859         sllx %o2,3,%o2          // %o2 = 8*count
1860         sub %o0,%o2,%o0         // %o0 = &xptr[-count]
1861         sub %o1,%o2,%o1         // %o1 = &yptr[-count]
1862         subcc %o2,8,%o2
1863         bcs,pn %xcc,4f
1864        _ nop
1865           ldx [%o0+%o2],%o3     // nächstes Digit holen
1866 1:        ldx [%o1+%o2],%o4     // noch ein Digit holen
1867           subcc %o2,8,%o2       // Zähler erniedrigen, Pointer erniedrigen
1868           bcs,pn %xcc,3f
1869          _ subcc %o3,%o4,%g0    // vergleichen
1870           be,a,pt %xcc,1b
1871          __ ldx [%o0+%o2],%o3   // nächstes Digit holen
1872 2:      mov 1,%o0
1873         movlu %xcc,-1,%o0
1874         retl
1875        _ sra %o0,0,%o0          // sign-extend %o0
1876 3:      bne 2b
1877        _ nop
1878 4:      retl
1879        _ mov 0,%o0
1880 #endif
1881
1882 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1883         DECLARE_FUNCTION(add_loop_up)
1884 C(add_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
1885 #if STANDARD_LOOPS
1886 //      srl %o3,0,%o3           // zero-extend %o3 = count
1887         brz,pn %o3,2f
1888        _ mov %g0,%g1            // Carry := 0
1889 1:        ldx [%o0],%o4         // source1-digit
1890           add %o0,8,%o0
1891           ldx [%o1],%o5         // source2-digit
1892           add %o1,8,%o1
1893           addcc %o4,%g1,%o4
1894           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
1895           addcc %o4,%o5,%o4
1896           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
1897           stx %o4,[%o2]         // Digit ablegen
1898           subcc %o3,1,%o3
1899           bne,pt %xcc,1b
1900          _ add %o2,8,%o2
1901 2:      retl
1902        _ mov %g1,%o0
1903 #endif
1904 #if COUNTER_LOOPS
1905 //      srl %o3,0,%o3           // zero-extend %o3 = count
1906         brz,pn %o3,2f
1907        _ mov %g0,%g1            // Carry := 0
1908         sub %g0,%o3,%o3         // %o3 = -count
1909         sllx %o3,3,%o3          // %o3 = -8*count
1910         sub %o2,8,%o2
1911         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
1912         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
1913         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
1914 1:        ldx [%o0+%o3],%o4     // source1-digit
1915           ldx [%o1+%o3],%o5     // source2-digit
1916           addcc %o4,%g1,%o4
1917           movcc %xcc,0,%g1      // %g1|%o4 := %o4 + alter Carry %g1
1918           addcc %o4,%o5,%o4
1919           movcs %xcc,1,%g1      // %g1|%o4 := %o4 + alter Carry %g1 + %o5
1920           addcc %o3,8,%o3       // Zähler erniedrigen, Pointer erhöhen
1921           bne,pt %xcc,1b
1922          _ stx %o4,[%o2+%o3]    // Digit ablegen
1923 2:      retl
1924        _ mov %g1,%o0
1925 #endif
1926
1927 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1928         DECLARE_FUNCTION(addto_loop_up)
1929 C(addto_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
1930 #if STANDARD_LOOPS
1931 //      srl %o2,0,%o2           // zero-extend %o2 = count
1932         brz,pn %o2,2f
1933        _ mov %g0,%o5            // Carry := 0
1934 1:        ldx [%o0],%o3         // source-digit
1935           add %o0,8,%o0
1936           ldx [%o1],%o4         // dest-digit
1937           addcc %o3,%o5,%o3
1938           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1939           addcc %o3,%o4,%o4
1940           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
1941           stx %o4,[%o1]         // Digit ablegen
1942           subcc %o2,1,%o2
1943           bne,pt %xcc,1b
1944          _ add %o1,8,%o1
1945 2:      retl
1946        _ mov %o5,%o0
1947 #endif
1948 #if COUNTER_LOOPS
1949 //      srl %o2,0,%o2           // zero-extend %o2 = count
1950         brz,pn %o2,2f
1951        _ mov %g0,%o5            // Carry := 0
1952         sub %g0,%o2,%o2         // %o2 = -count
1953         sllx %o2,3,%o2          // %o2 = -8*count
1954         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
1955         sub %o1,%o2,%o1         // %o1 = &destptr[count]
1956           ldx [%o0+%o2],%o3     // source-digit
1957 1:        ldx [%o1+%o2],%o4     // dest-digit
1958           addcc %o3,%o5,%o3
1959           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
1960           addcc %o3,%o4,%o4
1961           movcs %xcc,1,%o5      // %o5|%o4 := %o3 + alter Carry %o5 + %o4
1962           stx %o4,[%o1+%o2]     // Digit ablegen
1963           addcc %o2,8,%o2       // Zähler erniedrigen, Pointer erhöhen
1964           bne,a,pt %xcc,1b
1965          __ ldx [%o0+%o2],%o3   // source-digit
1966 2:      retl
1967        _ mov %o5,%o0
1968 #endif
1969
1970 // extern uintD inc_loop_up (uintD* ptr, uintC count);
1971         DECLARE_FUNCTION(inc_loop_up)
1972 C(inc_loop_up:) // Input in %o0,%o1, Output in %o0
1973 #if STANDARD_LOOPS
1974 //      srl %o1,0,%o1           // zero-extend %o1 = count
1975         brz,pn %o1,2f
1976        _ nop
1977           ldx [%o0],%o2
1978 1:        add %o0,8,%o0
1979           addcc %o2,1,%o2
1980           bne,pn %xcc,3f
1981          _ stx %o2,[%o0-8]
1982           subcc %o1,1,%o1
1983           bne,a,pt %xcc,1b
1984          __ ldx [%o0],%o2
1985 2:      retl
1986        _ mov 1,%o0
1987 3:      retl
1988        _ mov 0,%o0
1989 #endif
1990 #if COUNTER_LOOPS
1991 //      srl %o1,0,%o1           // zero-extend %o1 = count
1992         brz,pn %o1,2f
1993        _ sub %g0,%o1,%o1        // %o1 = -count
1994         sllx %o1,3,%o1          // %o1 = -8*count
1995         sub %o0,%o1,%o0         // %o0 = &ptr[count]
1996           ldx [%o0+%o1],%o2     // digit holen
1997 1:        addcc %o2,1,%o2       // incrementieren
1998           bne,pn %xcc,3f
1999          _ stx %o2,[%o0+%o1]    // ablegen
2000           addcc %o1,8,%o1       // Zähler erniedrigen, Pointer erhöhen
2001           bne,a,pt %xcc,1b
2002          __ ldx [%o0+%o1],%o2
2003 2:      retl
2004        _ mov 1,%o0
2005 3:      retl
2006        _ mov 0,%o0
2007 #endif
2008
2009 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
2010         DECLARE_FUNCTION(sub_loop_up)
2011 C(sub_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2012 #if STANDARD_LOOPS
2013 //      srl %o3,0,%o3           // zero-extend %o3 = count
2014         brz,pn %o3,2f
2015        _ mov %g0,%g1            // Carry := 0
2016 1:        ldx [%o0],%o4         // source1-digit
2017           add %o0,8,%o0
2018           ldx [%o1],%o5         // source2-digit
2019           add %o1,8,%o1
2020           addcc %o5,%g1,%o5
2021           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
2022           subcc %o4,%o5,%o4
2023           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
2024           stx %o4,[%o2]         // Digit ablegen
2025           subcc %o3,1,%o3
2026           bne,pt %xcc,1b
2027          _ add %o2,8,%o2
2028 2:      retl
2029        _ mov %g1,%o0
2030 #endif
2031 #if COUNTER_LOOPS
2032 //      srl %o3,0,%o3           // zero-extend %o3 = count
2033         brz,pn %o3,2f
2034        _ mov %g0,%g1            // Carry := 0
2035         sub %g0,%o3,%o3         // %o3 = -count
2036         sllx %o3,3,%o3          // %o3 = -8*count
2037         sub %o2,8,%o2
2038         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
2039         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
2040         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
2041 1:        ldx [%o1+%o3],%o5     // source2-digit
2042           ldx [%o0+%o3],%o4     // source1-digit
2043           addcc %o5,%g1,%o5
2044           movcc %xcc,0,%g1      // %g1|%o5 := %o5 + alter Carry %g1
2045           subcc %o4,%o5,%o4
2046           movcs %xcc,1,%g1      // %o4-2^64*%g1 := %o4 - %o5 - alter Carry %g1
2047           addcc %o3,8,%o3
2048           bne,pt %xcc,1b
2049          _ stx %o4,[%o2+%o3]    // Digit ablegen
2050 2:      retl
2051        _ mov %g1,%o0
2052 #endif
2053
2054 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
2055         DECLARE_FUNCTION(subx_loop_up)
2056 C(subx_loop_up:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1, Output in %o0
2057 #if STANDARD_LOOPS
2058 //      srl %o3,0,%o3           // zero-extend %o3 = count
2059         brz,pn %o3,2f
2060        _ mov %o4,%g1            // Carry (0 oder -1)
2061 1:        ldx [%o0],%o4         // source1-digit
2062           add %o0,8,%o0
2063           ldx [%o1],%o5         // source2-digit
2064           add %o1,8,%o1
2065           subcc %o5,%g1,%o5
2066           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
2067           subcc %o4,%o5,%o4
2068           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
2069           stx %o4,[%o2]         // Digit ablegen
2070           subcc %o3,1,%o3
2071           bne,pt %xcc,1b
2072          _ add %o2,8,%o2
2073 2:      retl
2074        _ mov %g1,%o0
2075 #endif
2076 #if COUNTER_LOOPS
2077 //      srl %o3,0,%o3           // zero-extend %o3 = count
2078         brz,pn %o3,2f
2079        _ mov %o4,%g1            // Carry (0 oder -1)
2080         sub %g0,%o3,%o3         // %o3 = -count
2081         sllx %o3,3,%o3          // %o3 = -8*count
2082         sub %o2,8,%o2
2083         sub %o0,%o3,%o0         // %o0 = &sourceptr1[count]
2084         sub %o1,%o3,%o1         // %o1 = &sourceptr2[count]
2085         sub %o2,%o3,%o2         // %o2 = &destptr[count-1]
2086 1:        ldx [%o1+%o3],%o5     // source2-digit
2087           ldx [%o0+%o3],%o4     // source1-digit
2088           subcc %o5,%g1,%o5
2089           movcc %xcc,0,%g1      // %o5-2^64*%g1 := %o5 - alter Carry %g1
2090           subcc %o4,%o5,%o4
2091           movcs %xcc,-1,%g1     // %o4+2^64*%g1 := %o4 - %o5 + alter Carry %g1
2092           addcc %o3,8,%o3
2093           bne,pt %xcc,1b
2094          _ stx %o4,[%o2+%o3]    // Digit ablegen
2095 2:      retl
2096        _ mov %g1,%o0
2097 #endif
2098
2099 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
2100         DECLARE_FUNCTION(subfrom_loop_up)
2101 C(subfrom_loop_up:) // Input in %o0,%o1,%o2, Output in %o0
2102 #if STANDARD_LOOPS
2103 //      srl %o2,0,%o2           // zero-extend %o2 = count
2104         brz,pn %o2,2f
2105        _ mov %g0,%o5            // Carry := 0
2106 1:        ldx [%o0],%o3         // source-digit
2107           add %o0,8,%o0
2108           ldx [%o1],%o4         // dest-digit
2109           addcc %o3,%o5,%o3
2110           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
2111           subcc %o4,%o3,%o4
2112           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
2113           stx %o4,[%o1]         // Digit ablegen
2114           subcc %o2,1,%o2
2115           bne,pt %xcc,1b
2116          _ add %o1,8,%o1
2117 2:      retl
2118        _ mov %o5,%o0
2119 #endif
2120 #if COUNTER_LOOPS
2121 //      srl %o2,0,%o2           // zero-extend %o2 = count
2122         brz,pn %o2,2f
2123        _ mov %g0,%o5            // Carry := 0
2124         sub %g0,%o2,%o2         // %o2 = -count
2125         sllx %o2,3,%o2          // %o2 = -8*count
2126         sub %o0,%o2,%o0         // %o0 = &sourceptr[count]
2127         sub %o1,%o2,%o1         // %o1 = &destptr[count]
2128           ldx [%o0+%o2],%o3     // source-digit
2129 1:        ldx [%o1+%o2],%o4     // dest-digit
2130           addcc %o3,%o5,%o3
2131           movcc %xcc,0,%o5      // %o5|%o3 := %o3 + alter Carry %o5
2132           subcc %o4,%o3,%o4
2133           movcs %xcc,1,%o5      // %o4-2^64*%o5 := %o4 - %o3 - alter Carry %o5
2134           stx %o4,[%o1+%o2]     // Digit ablegen
2135           addcc %o2,8,%o2
2136           bne,a,pt %xcc,1b
2137          __ ldx [%o0+%o2],%o3   // source-digit
2138 2:      retl
2139        _ mov %o5,%o0
2140 #endif
2141
2142 // extern uintD dec_loop_up (uintD* ptr, uintC count);
2143         DECLARE_FUNCTION(dec_loop_up)
2144 C(dec_loop_up:) // Input in %o0,%o1, Output in %o0
2145 #if STANDARD_LOOPS
2146 //      srl %o1,0,%o1           // zero-extend %o1 = count
2147         brz,pn %o1,2f
2148        _ nop
2149           ldx [%o0],%o2
2150 1:        add %o0,8,%o0
2151           subcc %o2,1,%o2
2152           bcc,pn %xcc,3f
2153          _ stx %o2,[%o0-8]
2154           subcc %o1,1,%o1
2155           bne,a,pt %xcc,1b
2156          __ ldx [%o0],%o2
2157 2:      retl
2158        _ mov -1,%o0
2159 3:      retl
2160        _ mov 0,%o0
2161 #endif
2162 #if COUNTER_LOOPS
2163 //      srl %o1,0,%o1           // zero-extend %o1 = count
2164         brz,pn %o1,2f
2165        _ sub %g0,%o1,%o1        // %o1 = -count
2166         sllx %o1,3,%o1          // %o1 = -8*count
2167         sub %o0,%o1,%o0         // %o0 = &ptr[count]
2168           ldx [%o0+%o1],%o2     // digit holen
2169 1:        subcc %o2,1,%o2       // decrementieren
2170           bcc,pn %xcc,3f
2171          _ stx %o2,[%o0+%o1]    // ablegen
2172           addcc %o1,8,%o1       // Zähler erniedrigen, Pointer erhöhen
2173           bne,a,pt %xcc,1b
2174          __ ldx [%o0+%o1],%o2
2175 2:      retl
2176        _ mov -1,%o0
2177 3:      retl
2178        _ mov 0,%o0
2179 #endif
2180
2181 // extern uintD neg_loop_up (uintD* ptr, uintC count);
2182         DECLARE_FUNCTION(neg_loop_up)
2183 C(neg_loop_up:) // Input in %o0,%o1, Output in %o0
2184 #if STANDARD_LOOPS
2185 //      srl %o1,0,%o1           // zero-extend %o1 = count
2186         // erstes Digit /=0 suchen:
2187         brz,pn %o1,2f
2188        _ add %o0,8,%o0
2189 1:        ldx [%o0-8],%o2
2190           subcc %g0,%o2,%o2
2191           bne,pn %xcc,3f
2192          _ subcc %o1,1,%o1
2193           bne,pt %xcc,1b
2194          _ add %o0,8,%o0
2195 2:      retl
2196        _ mov 0,%o0
2197 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2198         // 1 Digit negieren, alle anderen Digits invertieren:
2199         be,pn %xcc,5f
2200        _ stx %o2,[%o0-8]
2201 4:        ldx [%o0],%o2
2202           subcc %o1,1,%o1
2203           xnor %g0,%o2,%o2
2204           stx %o2,[%o0]
2205           bne,pt %xcc,4b
2206          _ add %o0,8,%o0
2207 5:      retl
2208        _ mov -1,%o0
2209 #endif
2210 #if COUNTER_LOOPS
2211 //      srl %o1,0,%o1           // zero-extend %o1 = count
2212         // erstes Digit /=0 suchen:
2213         brz,pn %o1,2f
2214        _ sub %g0,%o1,%o1        // %o1 = -count
2215         sllx %o1,3,%o1          // %o1 = -8*count
2216         sub %o0,%o1,%o0         // %o0 = &ptr[count]
2217           ldx [%o0+%o1],%o2     // digit holen
2218 1:        subcc %g0,%o2,%o2     // negieren, testen
2219           bne,pn %xcc,3f
2220          _ addcc %o1,8,%o1      // Zähler erniedrigen, Pointer erhöhen
2221           bne,a,pt %xcc,1b
2222          __ ldx [%o0+%o1],%o2
2223 2:      retl
2224        _ mov 0,%o0
2225 3:      // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
2226         // alle anderen Digits invertieren:
2227         sub %o1,8,%o1
2228         stx %o2,[%o0+%o1]       // ablegen
2229         addcc %o1,8,%o1
2230         be,pn %xcc,5f
2231        _ nop
2232           ldx [%o0+%o1],%o2
2233 4:        xnor %g0,%o2,%o2
2234           stx %o2,[%o0+%o1]
2235           addcc %o1,8,%o1
2236           bne,a,pt %xcc,4b
2237          __ ldx [%o0+%o1],%o2
2238 5:      retl
2239        _ mov -1,%o0
2240 #endif
2241
2242 // extern uintD shift1left_loop_up (uintD* ptr, uintC count);
2243         DECLARE_FUNCTION(shift1left_loop_up)
2244 C(shift1left_loop_up:) // Input in %o0,%o1, Output in %o0
2245 //      srl %o1,0,%o1           // zero-extend %o1 = count
2246         brz,pn %o1,2f
2247        _ mov 0,%o3              // Carry := 0
2248 1:        ldx [%o0],%o2         // Digit
2249           addcc %o2,%o2,%o4     // shiften
2250           add %o4,%o3,%o4       // und carry
2251           srlx %o2,63,%o3       // neues Carry
2252           stx %o4,[%o0]         // Digit ablegen
2253           subcc %o1,1,%o1
2254           bne,pt %xcc,1b
2255          _ add %o0,8,%o0
2256 2:      retl
2257        _ mov %o3,%o0
2258
2259 // extern uintD shiftleft_loop_up (uintD* ptr, uintC count, uintC i, uintD carry);
2260         DECLARE_FUNCTION(shiftleft_loop_up)
2261 C(shiftleft_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1, Output in %o0
2262 //      srl %o1,0,%o1           // zero-extend %o1 = count
2263         brz,pn %o1,2f
2264        _ sub %g0,%o2,%g1        // 64-i (mod 64)
2265 1:        ldx [%o0],%o4         // Digit
2266           subcc %o1,1,%o1
2267           sllx %o4,%o2,%o5      // dessen niedere (64-i) Bits
2268           or %o3,%o5,%o5        // mit dem alten Carry kombinieren
2269           stx %o5,[%o0]         // Digit ablegen
2270           srlx %o4,%g1,%o3      // dessen höchste i Bits liefern den neuen Carry
2271           bne,pt %xcc,1b
2272          _ add %o0,8,%o0
2273 2:      retl
2274        _ mov %o3,%o0
2275
2276 #endif
2277
2278 // extern uintD shiftleftcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
2279         DECLARE_FUNCTION(shiftleftcopy_loop_up)
2280 C(shiftleftcopy_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2, Output in %o0
2281 //      srl %o2,0,%o2           // zero-extend %o2 = count
2282         brz,pn %o2,2f
2283        _ mov 0,%o4              // Carry := 0
2284         sub %g0,%o3,%g1         // 64-i (mod 64)
2285 1:        ldx [%o0],%o5         // Digit
2286           subcc %o2,1,%o2
2287           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
2288           or %o4,%g2,%g2        // mit dem alten Carry kombinieren
2289           stx %g2,[%o1]         // Digit ablegen
2290           add %o1,8,%o1
2291           srlx %o5,%g1,%o4      // dessen höchste i Bits liefern den neuen Carry
2292           bne,pt %xcc,1b
2293          _ add %o0,8,%o0
2294 2:      retl
2295        _ mov %o4,%o0
2296
2297 #if !CL_DS_BIG_ENDIAN_P
2298
2299 // extern uintD shift1right_loop_down (uintD* ptr, uintC count, uintD carry);
2300         DECLARE_FUNCTION(shift1right_loop_down)
2301 C(shift1right_loop_down:) // Input in %o0,%o1,%o2, Output in %o0
2302 //      srl %o1,0,%o1           // zero-extend %o1 = count
2303         brz,pn %o1,2f
2304        _ sllx %o2,63,%o2        // Carry
2305         sub %o0,8,%o0
2306 1:        ldx [%o0],%o3         // Digit
2307           subcc %o1,1,%o1
2308           srlx %o3,1,%o4        // shiften
2309           or %o2,%o4,%o4        // und mit altem Carry kombinieren
2310           stx %o4,[%o0]         // und ablegen
2311           sllx %o3,63,%o2       // neuer Carry
2312           bne,pt %xcc,1b
2313          _ sub %o0,8,%o0
2314 2:      retl
2315        _ mov %o2,%o0
2316
2317 // extern uintD shiftright_loop_down (uintD* ptr, uintC count, uintC i);
2318         DECLARE_FUNCTION(shiftright_loop_down)
2319 C(shiftright_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2320 //      srl %o1,0,%o1           // zero-extend %o1 = count
2321         sub %g0,%o2,%g1         // 64-i (mod 64)
2322         brz,pn %o1,2f
2323        _ or %g0,%g0,%o3         // Carry := 0
2324         sub %o0,8,%o0
2325 1:        ldx [%o0],%o4         // Digit
2326           subcc %o1,1,%o1
2327           srlx %o4,%o2,%o5      // shiften
2328           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2329           stx %o5,[%o0]         // und ablegen
2330           sllx %o4,%g1,%o3      // neuer Carry
2331           bne,pt %xcc,1b
2332          _ sub %o0,8,%o0
2333 2:      retl
2334        _ mov %o3,%o0
2335
2336 // extern uintD shiftrightsigned_loop_down (uintD* ptr, uintC count, uintC i);
2337         DECLARE_FUNCTION(shiftrightsigned_loop_down)
2338 C(shiftrightsigned_loop_down:) // Input in %o0,%o1,%o2, verändert %g1, Output in %o0
2339 //      srl %o1,0,%o1           // zero-extend %o1 = count
2340         ldx [%o0-8],%o4         // erstes Digit
2341         sub %g0,%o2,%g1         // 64-i (mod 64)
2342         srax %o4,%o2,%o5        // shiften
2343         stx %o5,[%o0-8]         // und ablegen
2344         sllx %o4,%g1,%o3        // neuer Carry
2345         subcc %o1,1,%o1
2346         be,pn %xcc,2f
2347        _ sub %o0,16,%o0
2348 1:        ldx [%o0],%o4         // Digit
2349           subcc %o1,1,%o1
2350           srlx %o4,%o2,%o5      // shiften
2351           or %o3,%o5,%o5        // und mit altem Carry kombinieren
2352           stx %o5,[%o0]         // und ablegen
2353           sllx %o4,%g1,%o3      // neuer Carry
2354           bne,pt %xcc,1b
2355          _ sub %o0,8,%o0
2356 2:      retl
2357        _ mov %o3,%o0
2358
2359 // extern uintD shiftrightcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
2360         DECLARE_FUNCTION(shiftrightcopy_loop_down)
2361 C(shiftrightcopy_loop_down:) // Input in %o0,%o1,%o2,%o3,%o4, verändert %g1,%g2, Output in %o0
2362 //      srl %o2,0,%o2           // zero-extend %o2 = count
2363         sub %g0,%o3,%g1         // 64-i (mod 64)
2364         brz,pn %o2,2f
2365        _ sllx %o4,%g1,%g2       // erster Carry
2366           sub %o0,8,%o0
2367 1:        ldx [%o0],%o4         // Digit
2368           sub %o1,8,%o1
2369           srlx %o4,%o3,%o5      // shiften
2370           or %g2,%o5,%o5        // und mit altem Carry kombinieren
2371           stx %o5,[%o1]         // und ablegen
2372           sllx %o4,%g1,%g2      // neuer Carry
2373           subcc %o2,1,%o2
2374           bne,pt %xcc,1b
2375          _ sub %o0,8,%o0
2376 2:      retl
2377        _ mov %g2,%o0
2378
2379 // extern uintD mulusmall_loop_up (uintD digit, uintD* ptr, uintC len, uintD newdigit);
2380         DECLARE_FUNCTION(mulusmall_loop_up)
2381 C(mulusmall_loop_up:) // Input in %o0,%o1,%o2,%o3, Output in %o0, verändert %g1
2382 //      srl %o2,0,%o2           // zero-extend %o2 = len
2383         brz,pn %o2,2f
2384        _ nop
2385 1:        // nächstes Digit [%o1] mit der 6-Bit-Zahl %o0 multiplizieren
2386           // und kleinen Carry %o3 dazu:
2387           ldx [%o1],%o4
2388           sub %o2,1,%o2
2389           srlx %o4,32,%o5       // high32(x)
2390           srl %o4,0,%o4         // low32(x)
2391           mulx %o4,%o0,%o4      // low32(x)*digit
2392           mulx %o5,%o0,%o5      // high32(x)*digit
2393           sllx %o5,32,%g1       // low32(high32(x)*digit)*2^32
2394           add %g1,%o3,%g1       // plus carry
2395           addcc %o4,%g1,%o4     // plus low32(x)*digit
2396           srlx %o5,32,%o3       // high32(high32(x)*digit)
2397           add %o3,1,%g1
2398           movcs %xcc,%g1,%o3    // neuer Carry
2399           stx %o4,[%o1]         // neues Digit ablegen
2400           brnz,pt %o2,1b
2401          _ add %o1,8,%o1
2402 2:      retl
2403        _ mov %o3,%o0
2404
2405 // extern void mulu_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2406         DECLARE_FUNCTION(mulu_loop_up)
2407 C(mulu_loop_up:) // Input in %i0,%i1,%i2,%i3
2408         save %sp,-192,%sp
2409         mov 0,%l0               // Carry
2410         srlx %i0,32,%l1         // %l1 = high32(digit)
2411         srl %i0,0,%l2           // %l2 = low32(digit)
2412         mov 1,%l3
2413         sllx %l3,32,%l3         // %l3 = 2^32
2414         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2415 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2416           subcc %i3,1,%i3
2417           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2418           srlx %o0,32,%o1
2419           srl %o0,0,%o2
2420           mulx %l1,%o1,%o3      // high part
2421           mulx %l1,%o2,%o4      // first mid part
2422           mulx %l2,%o1,%o1      // second mid part
2423           mulx %l2,%o2,%o2      // low part
2424           srlx %o2,32,%o5       // low part's upper half
2425           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2426           addcc %o4,%o1,%o4     // add other mid part
2427           add %o3,%l3,%o5
2428           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2429           srlx %o4,32,%o5
2430           sllx %o4,32,%o4
2431           srl %o2,0,%o2
2432           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2433           addcc %o0,%l0,%o0     // alten Carry addieren
2434           add %o3,%o5,%l0       // add high32(midparts) to high part
2435           add %l0,1,%o5
2436           movcs %xcc,%o5,%l0    // neuer Carry
2437           // Multiplikation fertig
2438           stx %o0,[%i2]         // Low-Digit ablegen
2439           brnz,pt %i3,1b
2440          _ add %i2,8,%i2
2441         stx %l0,[%i2]           // letzten Carry ablegen
2442         ret
2443        _ restore
2444
2445 // extern uintD muluadd_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2446         DECLARE_FUNCTION(muluadd_loop_up)
2447 C(muluadd_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2448         save %sp,-192,%sp
2449         mov 0,%l0               // Carry
2450         srlx %i0,32,%l1         // %l1 = high32(digit)
2451         srl %i0,0,%l2           // %l2 = low32(digit)
2452         mov 1,%l3
2453         sllx %l3,32,%l3         // %l3 = 2^32
2454         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2455 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2456           ldx [%i2],%i4         // *destptr
2457           subcc %i3,1,%i3
2458           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2459           srlx %o0,32,%o1
2460           srl %o0,0,%o2
2461           mulx %l1,%o1,%o3      // high part
2462           mulx %l1,%o2,%o4      // first mid part
2463           mulx %l2,%o1,%o1      // second mid part
2464           mulx %l2,%o2,%o2      // low part
2465           srlx %o2,32,%o5       // low part's upper half
2466           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2467           addcc %o4,%o1,%o4     // add other mid part
2468           add %o3,%l3,%o5
2469           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2470           srlx %o4,32,%o5
2471           sllx %o4,32,%o4
2472           srl %o2,0,%o2
2473           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2474           addcc %o0,%l0,%o0     // alten Carry addieren
2475           add %o3,%o5,%l0       // add high32(midparts) to high part
2476           add %l0,1,%o5
2477           movcs %xcc,%o5,%l0    // neuer Carry
2478           // Multiplikation fertig
2479           addcc %i4,%o0,%o0     // alten *destptr addieren
2480           add %l0,1,%o2
2481           movcs %xcc,%o2,%l0    // neuer Carry
2482           stx %o0,[%i2]         // Low-Digit ablegen
2483           brnz,pt %i3,1b
2484          _ add %i2,8,%i2
2485         mov %l0,%i0             // letzter Carry
2486         ret
2487        _ restore
2488
2489 // extern uintD mulusub_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
2490         DECLARE_FUNCTION(mulusub_loop_up)
2491 C(mulusub_loop_up:) // Input in %i0,%i1,%i2,%i3, Output in %i0
2492         save %sp,-192,%sp
2493         mov 0,%l0               // Carry
2494         srlx %i0,32,%l1         // %l1 = high32(digit)
2495         srl %i0,0,%l2           // %l2 = low32(digit)
2496         mov 1,%l3
2497         sllx %l3,32,%l3         // %l3 = 2^32
2498         sub %i1,%i2,%i1         // %i1 = sourceptr - destptr
2499 1:        ldx [%i1+%i2],%o0     // nächstes Digit
2500           ldx [%i2],%i4         // *destptr
2501           subcc %i3,1,%i3
2502           // mit digit multiplizieren: (%l1*2^32+%l2) * %o0 + %l0 -> %l0|%o0
2503           srlx %o0,32,%o1
2504           srl %o0,0,%o2
2505           mulx %l1,%o1,%o3      // high part
2506           mulx %l1,%o2,%o4      // first mid part
2507           mulx %l2,%o1,%o1      // second mid part
2508           mulx %l2,%o2,%o2      // low part
2509           srlx %o2,32,%o5       // low part's upper half
2510           add %o4,%o5,%o4       // add to one of the mid parts, no carry
2511           addcc %o4,%o1,%o4     // add other mid part
2512           add %o3,%l3,%o5
2513           movcs %xcc,%o5,%o3    // if carry, add 2^32 to the high part
2514           srlx %o4,32,%o5
2515           sllx %o4,32,%o4
2516           srl %o2,0,%o2
2517           add %o2,%o4,%o0       // combine low32(midparts) and low32(lowpart)
2518           addcc %o0,%l0,%o0     // alten Carry addieren
2519           add %o3,%o5,%l0       // add high32(midparts) to high part
2520           add %l0,1,%o5
2521           movcs %xcc,%o5,%l0    // neuer Carry
2522           // Multiplikation fertig
2523           subcc %i4,%o0,%o0     // vom alten *destptr subtrahieren
2524           add %l0,1,%o2
2525           movcs %xcc,%o2,%l0    // neuer Carry
2526           stx %o0,[%i2]         // Low-Digit ablegen
2527           brnz,pt %i3,1b
2528          _ add %i2,8,%i2
2529         mov %l0,%i0             // letzter Carry
2530         ret
2531        _ restore
2532
2533 #endif
2534
2535 // extern void shiftxor_loop_up (uintD* xptr, const uintD* yptr, uintC count, uintC i);
2536         DECLARE_FUNCTION(shiftxor_loop_up)
2537 C(shiftxor_loop_up:) // Input in %o0,%o1,%o2,%o3, verändert %g1,%g2
2538 //      srl %o2,0,%o2           // zero-extend %o2 = count
2539         brz,pn %o2,2f
2540        _ sub %g0,%o3,%g1        // 64-i (mod 64)
2541         sub %o1,%o0,%o1
2542         ldx [%o0],%o4           // *xptr holen
2543 1:        ldx [%o0+%o1],%o5     // *yptr holen
2544           subcc %o2,1,%o2
2545           sllx %o5,%o3,%g2      // dessen niedere (64-i) Bits
2546           xor %o4,%g2,%o4       // mit dem modifizierten *xptr kombinieren
2547           stx %o4,[%o0]         // und ablegen
2548           add %o0,8,%o0
2549           srlx %o5,%g1,%g2      // höchste i Bits von *yptr
2550           ldx [%o0],%o4         // schon mal mit dem nächsten *xptr
2551           bne,pt %xcc,1b
2552          _ xor %o4,%g2,%o4      // verknüpfen
2553         stx %o4,[%o0]           // und ablegen
2554 2:      retl
2555        _ nop
2556