src/base/digitseq/cl_asm_i386_.cc

   1 // Externe Routinen zu ARILEV1.D
   2 // Prozessor: 80386 im native mode
   3 // Assembler-Syntax: GNU oder SUN, Moves von links nach rechts
   4 // Compiler: GNU-C oder SUN-C
   5 // Parameter-Übergabe: auf dem Stack 4(%esp),8(%esp),...
   6 // Register: %eax,%edx,%ecx dürfen stets verändert werden, alles andere retten.
   7 // Ergebnis-Übergabe: in %eax
   8 // Einstellungen: intCsize=32, intDsize=32.
   9
  10 // Bruno Haible 14.8.1992
  11 // Zum Teil abgeschrieben von Bernhard Degels "v-i386.s"
  12
  13   #ifdef ASM_UNDERSCORE
  14     #if defined(__STDC__) || defined (__cplusplus)
  15       #define C(entrypoint) _##entrypoint
  16     #else
  17       #define C(entrypoint) _/**/entrypoint
  18     #endif
  19   #else
  20     #define C(entrypoint) entrypoint
  21   #endif
  22   #ifdef ASM_UNDERSCORE
  23     #if defined(__STDC__) || defined (__cplusplus)
  24       #define L(label) L##label
  25     #else
  26       #define L(label) L/**/label
  27     #endif
  28   #else
  29     #if defined(__STDC__) || defined (__cplusplus)
  30       #define L(label) .L##label
  31     #else
  32       #define L(label) .L/**/label
  33     #endif
  34   #endif
  35   #if defined(ASM_UNDERSCORE) || defined(COHERENT) /* defined(__EMX__) || defined(__GO32__) || defined(linux) || defined(__386BSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(COHERENT) || ... */
  36     // GNU-Assembler oder MWC-Assembler
  37     #define repz     repe
  38     #define shcl     %cl,
  39   #else /* defined(sun) || ... */
  40     // SUN-Assembler oder Consensys-Assembler
  41     #define jecxz    orl %ecx,%ecx ; jz
  42     #define shcl
  43   #endif
  44   #if defined(__EMX__)
  45     // Direction-Flag ist defaultmäßig gelöscht
  46     #define dir0start
  47     #define dir0end
  48     #define dir1start  std
  49     #define dir1end    cld
  50   #elif 1
  51     // Wir gehen auf Nummer sicher.
  52     #define dir0start  cld
  53     #define dir0end
  54     #define dir1start  std
  55     #define dir1end    cld
  56   #else
  57     // Direction-Flag darf nach Belieben modifiziert werden
  58     #define dir0start  cld
  59     #define dir0end
  60     #define dir1start  std
  61     #define dir1end
  62   #endif
  63   // Alignment. Note that some assemblers need ".align 3,0x90" whereas other
  64   // assemblers don't like this syntax. So we put in the "nop"s by hand.
  65   #if defined(ASM_UNDERSCORE) && !(defined(__CYGWIN32__) || defined(__MINGW32__))
  66     // BSD syntax assembler
  67     #define ALIGN  .align 3
  68   #else
  69     // ELF syntax assembler
  70     #define ALIGN  .align 8
  71   #endif
  72   // When this file is compiled into a shared library, ELF linkers need to
  73   // know which symbols are functions.
  74   #if defined(__svr4__) || defined(__ELF__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__ROSE__) || defined(_SEQUENT_) || defined(DGUX) || defined(_SCO_COFF) || defined(_SCO_ELF)
  75     #define DECLARE_FUNCTION(name) .type C(name),@function
  76   #else
  77     #define DECLARE_FUNCTION(name)
  78   #endif
  79
  80             .text
  81
  82             .globl C(copy_loop_up)
  83             .globl C(copy_loop_down)
  84             .globl C(fill_loop_up)
  85             .globl C(fill_loop_down)
  86             .globl C(clear_loop_up)
  87             .globl C(clear_loop_down)
  88             .globl C(test_loop_up)
  89             .globl C(test_loop_down)
  90             .globl C(xor_loop_up)
  91             .globl C(compare_loop_up)
  92             .globl C(shiftleftcopy_loop_up)
  93             .globl C(shiftxor_loop_up)
  94 #if CL_DS_BIG_ENDIAN_P
  95             .globl C(or_loop_up)
  96             .globl C(and_loop_up)
  97             .globl C(eqv_loop_up)
  98             .globl C(nand_loop_up)
  99             .globl C(nor_loop_up)
 100             .globl C(andc2_loop_up)
 101             .globl C(orc2_loop_up)
 102             .globl C(not_loop_up)
 103             .globl C(and_test_loop_up)
 104             .globl C(add_loop_down)
 105             .globl C(addto_loop_down)
 106             .globl C(inc_loop_down)
 107             .globl C(sub_loop_down)
 108             .globl C(subx_loop_down)
 109             .globl C(subfrom_loop_down)
 110             .globl C(dec_loop_down)
 111             .globl C(neg_loop_down)
 112             .globl C(shift1left_loop_down)
 113             .globl C(shiftleft_loop_down)
 114             .globl C(shiftleftcopy_loop_down)
 115             .globl C(shift1right_loop_up)
 116             .globl C(shiftright_loop_up)
 117             .globl C(shiftrightsigned_loop_up)
 118             .globl C(shiftrightcopy_loop_up)
 119             .globl C(mulusmall_loop_down)
 120             .globl C(mulu_loop_down)
 121             .globl C(muluadd_loop_down)
 122             .globl C(mulusub_loop_down)
 123             .globl C(divu_loop_up)
 124             .globl C(divucopy_loop_up)
 125 #else
 126             .globl C(or_loop_down)
 127             .globl C(xor_loop_down)
 128             .globl C(and_loop_down)
 129             .globl C(eqv_loop_down)
 130             .globl C(nand_loop_down)
 131             .globl C(nor_loop_down)
 132             .globl C(andc2_loop_down)
 133             .globl C(orc2_loop_down)
 134             .globl C(not_loop_down)
 135             .globl C(and_test_loop_down)
 136             .globl C(compare_loop_down)
 137             .globl C(add_loop_up)
 138             .globl C(addto_loop_up)
 139             .globl C(inc_loop_up)
 140             .globl C(sub_loop_up)
 141             .globl C(subx_loop_up)
 142             .globl C(subfrom_loop_up)
 143             .globl C(dec_loop_up)
 144             .globl C(neg_loop_up)
 145             .globl C(shift1left_loop_up)
 146             .globl C(shiftleft_loop_up)
 147             .globl C(shift1right_loop_down)
 148             .globl C(shiftright_loop_down)
 149             .globl C(shiftrightsigned_loop_down)
 150             .globl C(shiftrightcopy_loop_down)
 151             .globl C(mulusmall_loop_up)
 152             .globl C(mulu_loop_up)
 153             .globl C(muluadd_loop_up)
 154             .globl C(mulusub_loop_up)
 155             .globl C(divu_loop_down)
 156             .globl C(divucopy_loop_down)
 157 #endif
 158
 159 #ifndef __GNUC__ /* mit GNU-C machen wir mulu32() als Macro, der inline multipliziert */
 160
 161 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
 162 // 2^32*hi+lo := arg1*arg2.
 163             .globl C(mulu32_)
 164             ALIGN
 165             DECLARE_FUNCTION(mulu32_)
 166 C(mulu32_:)
 167             movl    4(%esp),%eax    // arg1
 168             mull    8(%esp)         // %edx|%eax := arg1 * arg2
 169             movl    %edx,C(mulu32_high) // %edx = hi abspeichern
 170             ret                     // %eax = lo als Ergebnis
 171
 172 #endif
 173
 174 #ifndef __GNUC__ /* mit GNU-C machen wir divu_6432_3232() als Macro, der inline dividiert */
 175
 176 // extern struct { uint32 q; uint32 r; } divu_6432_3232_ (uint32 xhi, uint32 xlo, uint32 y);
 177 // x = 2^32*xhi+xlo = q*y+r schreiben. Sei bekannt, daß 0 <= x < 2^32*y .
 178             .globl C(divu_6432_3232_)
 179             ALIGN
 180             DECLARE_FUNCTION(divu_6432_3232_)
 181 C(divu_6432_3232_:)
 182             movl    4(%esp),%edx
 183             movl    8(%esp),%eax
 184             divl    12(%esp)       // x = %edx|%eax durch dividieren
 185             movl    %edx,C(divu_32_rest) // Rest %edx = r abspeichern
 186             ret                    // Quotient %eax = q als Ergebnis
 187
 188 #endif
 189
 190 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
 191             ALIGN
 192             DECLARE_FUNCTION(copy_loop_up)
 193 C(copy_loop_up:)
 194             movl    %edi,%edx       // %edi retten
 195             movl    %esi,%eax       // %esi retten
 196             movl    4(%esp),%esi    // %esi = sourceptr
 197             movl    8(%esp),%edi    // %edi = destptr
 198             movl    12(%esp),%ecx   // %ecx = count
 199             dir0start
 200             rep
 201               movsl                 // %ecx mal aufwärts (%edi) := (%esi)
 202             dir0end
 203             movl    %eax,%esi       // %esi zurück
 204             movl    %edi,%eax       // %edi als Ergebnis
 205             movl    %edx,%edi       // %edi zurück
 206             ret
 207
 208 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 209             ALIGN
 210             DECLARE_FUNCTION(copy_loop_down)
 211 C(copy_loop_down:)
 212             movl    %edi,%edx       // %edi retten
 213             movl    %esi,%eax       // %esi retten
 214             movl    4(%esp),%esi    // %esi = sourceptr
 215             movl    8(%esp),%edi    // %edi = destptr
 216             movl    12(%esp),%ecx   // %ecx = count
 217             leal    -4(%esi),%esi
 218             leal    -4(%edi),%edi
 219             dir1start
 220             rep
 221               movsl                 // %ecx mal abwärts (%edi) := (%esi)
 222             dir1end
 223             movl    %eax,%esi       // %esi zurück
 224             leal    4(%edi),%eax    // %edi als Ergebnis
 225             movl    %edx,%edi       // %edi zurück
 226             ret
 227
 228 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
 229             ALIGN
 230             DECLARE_FUNCTION(fill_loop_up)
 231 C(fill_loop_up:)
 232             movl    %edi,%edx       // %edi retten
 233             movl    4(%esp),%edi    // %edi = destptr
 234             movl    8(%esp),%ecx    // %ecx = count
 235             movl    12(%esp),%eax   // %eax = filler
 236             dir0start
 237             rep
 238               stosl                 // %ecx mal aufwärts (%edi) := %eax
 239             dir0end
 240             movl    %edi,%eax       // %edi als Ergebnis
 241             movl    %edx,%edi       // %edi zurück
 242             ret
 243
 244 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
 245             ALIGN
 246             DECLARE_FUNCTION(fill_loop_down)
 247 C(fill_loop_down:)
 248             movl    %edi,%edx       // %edi retten
 249             movl    4(%esp),%edi    // %edi = destptr
 250             movl    8(%esp),%ecx    // %ecx = count
 251             movl    12(%esp),%eax   // %eax = filler
 252             leal    -4(%edi),%edi
 253             dir1start
 254             rep
 255               stosl                 // %ecx mal abwärts (%edi) := %eax
 256             dir1end
 257             leal    4(%edi),%eax    // %edi als Ergebnis
 258             movl    %edx,%edi       // %edi zurück
 259             ret
 260
 261 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
 262             ALIGN
 263             DECLARE_FUNCTION(clear_loop_up)
 264 C(clear_loop_up:)
 265             movl    %edi,%edx       // %edi retten
 266             movl    4(%esp),%edi    // %edi = destptr
 267             movl    8(%esp),%ecx    // %ecx = count
 268             xorl    %eax,%eax       // %eax = 0
 269             dir0start
 270             rep
 271               stosl                 // %ecx mal aufwärts (%edi) := %eax
 272             dir0end
 273             movl    %edi,%eax       // %edi als Ergebnis
 274             movl    %edx,%edi       // %edi zurück
 275             ret
 276
 277 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
 278             ALIGN
 279             DECLARE_FUNCTION(clear_loop_down)
 280 C(clear_loop_down:)
 281             movl    %edi,%edx       // %edi retten
 282             movl    4(%esp),%edi    // %edi = destptr
 283             movl    8(%esp),%ecx    // %ecx = count
 284             leal    -4(%edi),%edi
 285             xorl    %eax,%eax       // %eax = 0
 286             dir1start
 287             rep
 288               stosl                 // %ecx mal abwärts (%edi) := %eax
 289             dir1end
 290             leal    4(%edi),%eax    // %edi als Ergebnis
 291             movl    %edx,%edi       // %edi zurück
 292             ret
 293
 294 // extern boolean test_loop_up (uintD* ptr, uintC count);
 295             ALIGN
 296             DECLARE_FUNCTION(test_loop_up)
 297 C(test_loop_up:)
 298             movl    %edi,%edx       // %edi retten
 299             movl    4(%esp),%edi    // %edi = ptr
 300             movl    8(%esp),%ecx    // %ecx = count
 301             xorl    %eax,%eax       // %eax = 0
 302             dir0start
 303             repz                    // Falls %ecx > 0:
 304               scasl                 // %ecx mal aufwärts (%edi) testen
 305                                     // und weiterschleifen, falls Z, d.h. (%edi)=0.
 306             dir0end
 307             // Noch ist %eax = 0.
 308             jz      L(tlu1)         // alles =0 -> Ergebnis 0
 309             incl    %eax            // Ergebnis 1
 310 L(tlu1:)    movl    %edx,%edi       // %edi zurück
 311             ret
 312
 313 // extern boolean test_loop_down (uintD* ptr, uintC count);
 314             ALIGN
 315             DECLARE_FUNCTION(test_loop_down)
 316 C(test_loop_down:)
 317             movl    %edi,%edx       // %edi retten
 318             movl    4(%esp),%edi    // %edi = ptr
 319             movl    8(%esp),%ecx    // %ecx = count
 320             xorl    %eax,%eax       // %eax = 0
 321             leal    -4(%edi),%edi
 322             dir1start
 323             repz                    // Falls %ecx > 0:
 324               scasl                 // %ecx mal aufwärts (%edi) testen
 325                                     // und weiterschleifen, falls Z, d.h. (%edi)=0.
 326             dir1end
 327             // Noch ist %eax = 0.
 328             jz      L(tld1)         // alles =0 -> Ergebnis 0
 329             incl    %eax            // Ergebnis 1
 330 L(tld1:)    movl    %edx,%edi       // %edi zurück
 331             ret
 332
 333 #if CL_DS_BIG_ENDIAN_P
 334
 335 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
 336             ALIGN
 337             DECLARE_FUNCTION(or_loop_up)
 338 C(or_loop_up:)
 339             pushl   %esi            // %esi retten
 340             movl    8(%esp),%edx    // %edx = xptr
 341             movl    12(%esp),%esi   // %esi = yptr
 342             movl    16(%esp),%ecx   // %ecx = count
 343             subl    %edx,%esi
 344             jecxz   L(olu2)         // %ecx = 0 ?
 345 L(olu1:)      movl    (%edx,%esi),%eax // *yptr
 346               orl     %eax,(%edx)      // *xptr |= ...
 347               leal    4(%edx),%edx     // xptr++, yptr++
 348               decl    %ecx
 349               jnz     L(olu1)
 350 L(olu2:)    popl    %esi            // %esi zurück
 351             ret
 352
 353 #endif
 354
 355 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 356             ALIGN
 357             DECLARE_FUNCTION(xor_loop_up)
 358 C(xor_loop_up:)
 359             pushl   %esi            // %esi retten
 360             movl    8(%esp),%edx    // %edx = xptr
 361             movl    12(%esp),%esi   // %esi = yptr
 362             movl    16(%esp),%ecx   // %ecx = count
 363             subl    %edx,%esi
 364             jecxz   L(xlu2)         // %ecx = 0 ?
 365 L(xlu1:)      movl    (%edx,%esi),%eax // *yptr
 366               xorl    %eax,(%edx)      // *xptr ^= ...
 367               leal    4(%edx),%edx     // xptr++, yptr++
 368               decl    %ecx
 369               jnz     L(xlu1)
 370 L(xlu2:)    popl    %esi            // %esi zurück
 371             ret
 372
 373 #if CL_DS_BIG_ENDIAN_P
 374
 375 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
 376             ALIGN
 377             DECLARE_FUNCTION(and_loop_up)
 378 C(and_loop_up:)
 379             pushl   %esi            // %esi retten
 380             movl    8(%esp),%edx    // %edx = xptr
 381             movl    12(%esp),%esi   // %esi = yptr
 382             movl    16(%esp),%ecx   // %ecx = count
 383             subl    %edx,%esi
 384             jecxz   L(alu2)         // %ecx = 0 ?
 385 L(alu1:)      movl    (%edx,%esi),%eax // *yptr
 386               andl    %eax,(%edx)      // *xptr &= ...
 387               leal    4(%edx),%edx     // xptr++, yptr++
 388               decl    %ecx
 389               jnz     L(alu1)
 390 L(alu2:)    popl    %esi            // %esi zurück
 391             ret
 392
 393 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
 394             ALIGN
 395             DECLARE_FUNCTION(eqv_loop_up)
 396 C(eqv_loop_up:)
 397             pushl   %esi            // %esi retten
 398             movl    8(%esp),%edx    // %edx = xptr
 399             movl    12(%esp),%esi   // %esi = yptr
 400             movl    16(%esp),%ecx   // %ecx = count
 401             subl    %edx,%esi
 402             jecxz   L(elu2)         // %ecx = 0 ?
 403 L(elu1:)      movl    (%edx),%eax      // *xptr
 404               xorl    (%edx,%esi),%eax // ^ *yptr
 405               notl    %eax             // ~(...)
 406               movl    %eax,(%edx)      // =: *xptr
 407               leal    4(%edx),%edx     // xptr++, yptr++
 408               decl    %ecx
 409               jnz     L(elu1)
 410 L(elu2:)    popl    %esi            // %esi zurück
 411             ret
 412
 413 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
 414             ALIGN
 415             DECLARE_FUNCTION(nand_loop_up)
 416 C(nand_loop_up:)
 417             pushl   %esi            // %esi retten
 418             movl    8(%esp),%edx    // %edx = xptr
 419             movl    12(%esp),%esi   // %esi = yptr
 420             movl    16(%esp),%ecx   // %ecx = count
 421             subl    %edx,%esi
 422             jecxz   L(nalu2)        // %ecx = 0 ?
 423 L(nalu1:)     movl    (%edx),%eax      // *xptr
 424               andl    (%edx,%esi),%eax // & *yptr
 425               notl    %eax             // ~(...)
 426               movl    %eax,(%edx)      // =: *xptr
 427               leal    4(%edx),%edx     // xptr++, yptr++
 428               decl    %ecx
 429               jnz     L(nalu1)
 430 L(nalu2:)   popl    %esi            // %esi zurück
 431             ret
 432
 433 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
 434             ALIGN
 435             DECLARE_FUNCTION(nor_loop_up)
 436 C(nor_loop_up:)
 437             pushl   %esi            // %esi retten
 438             movl    8(%esp),%edx    // %edx = xptr
 439             movl    12(%esp),%esi   // %esi = yptr
 440             movl    16(%esp),%ecx   // %ecx = count
 441             subl    %edx,%esi
 442             jecxz   L(nolu2)        // %ecx = 0 ?
 443 L(nolu1:)     movl    (%edx),%eax      // *xptr
 444               orl     (%edx,%esi),%eax // | *yptr
 445               notl    %eax             // ~(...)
 446               movl    %eax,(%edx)      // =: *xptr
 447               leal    4(%edx),%edx     // xptr++, yptr++
 448               decl    %ecx
 449               jnz     L(nolu1)
 450 L(nolu2:)   popl    %esi            // %esi zurück
 451             ret
 452
 453 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 454             ALIGN
 455             DECLARE_FUNCTION(andc2_loop_up)
 456 C(andc2_loop_up:)
 457             pushl   %esi            // %esi retten
 458             movl    8(%esp),%edx    // %edx = xptr
 459             movl    12(%esp),%esi   // %esi = yptr
 460             movl    16(%esp),%ecx   // %ecx = count
 461             subl    %edx,%esi
 462             jecxz   L(aclu2)        // %ecx = 0 ?
 463 L(aclu1:)     movl    (%edx,%esi),%eax // *yptr
 464               notl    %eax             // ~ *yptr
 465               andl    %eax,(%edx)      // *xptr &= ...
 466               leal    4(%edx),%edx     // xptr++, yptr++
 467               decl    %ecx
 468               jnz     L(aclu1)
 469 L(aclu2:)   popl    %esi            // %esi zurück
 470             ret
 471
 472 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
 473             ALIGN
 474             DECLARE_FUNCTION(orc2_loop_up)
 475 C(orc2_loop_up:)
 476             pushl   %esi            // %esi retten
 477             movl    8(%esp),%edx    // %edx = xptr
 478             movl    12(%esp),%esi   // %esi = yptr
 479             movl    16(%esp),%ecx   // %ecx = count
 480             subl    %edx,%esi
 481             jecxz   L(oclu2)        // %ecx = 0 ?
 482 L(oclu1:)     movl    (%edx,%esi),%eax // *yptr
 483               notl    %eax             // ~ *yptr
 484               orl     %eax,(%edx)      // *xptr |= ...
 485               leal    4(%edx),%edx     // xptr++, yptr++
 486               decl    %ecx
 487               jnz     L(oclu1)
 488 L(oclu2:)   popl    %esi            // %esi zurück
 489             ret
 490
 491 // extern void not_loop_up (uintD* xptr, uintC count);
 492             ALIGN
 493             DECLARE_FUNCTION(not_loop_up)
 494 C(not_loop_up:)
 495             movl    4(%esp),%edx    // %edx = xptr
 496             movl    8(%esp),%ecx    // %ecx = count
 497             jecxz   L(nlu2)         // %ecx = 0 ?
 498             nop ; nop ; nop ; nop ; nop ; nop
 499 L(nlu1:)      notl    (%edx)           // ~= *xptr
 500               leal    4(%edx),%edx     // xptr++
 501               decl    %ecx
 502               jnz     L(nlu1)
 503 L(nlu2:)    ret
 504
 505 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
 506             ALIGN
 507             DECLARE_FUNCTION(and_test_loop_up)
 508 C(and_test_loop_up:)
 509             pushl   %esi            // %esi retten
 510             movl    8(%esp),%edx    // %edx = xptr
 511             movl    12(%esp),%esi   // %esi = yptr
 512             movl    16(%esp),%ecx   // %ecx = count
 513             jecxz   L(atlu2)        // %ecx = 0 ?
 514             subl    %edx,%esi
 515 L(atlu1:)     movl    (%edx,%esi),%eax // *yptr
 516               andl    (%edx),%eax      // *xptr & ...
 517               jnz     L(atlu3)
 518               leal    4(%edx),%edx     // xptr++, yptr++
 519               decl    %ecx
 520               jnz     L(atlu1)
 521 L(atlu2:)   xorl    %eax,%eax       // Ergebnis 0
 522             popl    %esi            // %esi zurück
 523             ret
 524 L(atlu3:)   movl    $1,%eax         // Ergebnis 1 (nicht irgendwas /=0 !)
 525             popl    %esi            // %esi zurück
 526             ret
 527
 528 #endif
 529
 530 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
 531             ALIGN
 532             DECLARE_FUNCTION(compare_loop_up)
 533 C(compare_loop_up:)
 534             movl    %esi,%edx       // %esi retten
 535             movl    %edi,%eax       // %edi retten
 536             movl    4(%esp),%esi    // %esi = xptr
 537             movl    8(%esp),%edi    // %edi = yptr
 538             movl    12(%esp),%ecx   // %ecx = count
 539             dir0start
 540             repz                    // Falls %ecx > 0:
 541               cmpsl                 // %ecx mal aufwärts (%edi) und (%esi) vergleichen
 542                                     // und weiterschleifen, falls Z, d.h. (%edi)=(%esi).
 543             dir0end
 544             // Flags -> Ergebnis:
 545             // Z,NC -> bis zum Schluß (%esi)-(%edi) = 0 -> x=y -> Ergebnis 0
 546             // NZ,C -> schließlich (%esi)-(%edi) < 0 -> x<y -> Ergebnis -1
 547             // NZ,NC -> schließlich (%esi)-(%edi) > 0 -> x>y -> Ergebnis +1
 548             movl    %eax,%edi       // %edi zurück
 549             movl    %edx,%esi       // %esi zurück
 550             jbe     L(cmlu1)        // "be" = Z oder C
 551             movl    $1,%eax         // Ergebnis +1
 552             ret
 553 L(cmlu1:)   sbbl    %eax,%eax       // Ergebnis -1 (falls C) oder 0 (falls NC)
 554             ret
 555
 556 #if CL_DS_BIG_ENDIAN_P
 557
 558 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 559             ALIGN
 560             DECLARE_FUNCTION(add_loop_down)
 561 C(add_loop_down:)
 562             pushl   %esi            // %esi retten
 563             pushl   %edi            // %edi retten
 564             movl    12(%esp),%edx   // %edx = sourceptr1
 565             movl    16(%esp),%esi   // %esi = sourceptr2
 566             movl    20(%esp),%edi   // %edi = destptr
 567             movl    24(%esp),%ecx   // %ecx = count
 568             subl    %edi,%edx
 569             subl    %edi,%esi
 570             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
 571             jz      L(ald2)
 572 L(ald1:)      leal    -4(%edi),%edi   // sourceptr1--, sourceptr2--, destptr--
 573               movl    (%edx,%edi),%eax // *sourceptr1
 574               adcl    (%esi,%edi),%eax // + *sourceptr2 + carry
 575               movl    %eax,(%edi)     // =: *destptr, neuen Carry behalten
 576               decl    %ecx
 577               jnz     L(ald1)
 578 L(ald2:)    sbbl    %eax,%eax      // Ergebnis := - Carry
 579             popl    %edi           // %edi zurück
 580             popl    %esi           // %esi zurück
 581             ret
 582
 583 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 584             ALIGN
 585             DECLARE_FUNCTION(addto_loop_down)
 586 C(addto_loop_down:)
 587             pushl   %edi            // %edi retten
 588             movl    8(%esp),%edx    // %edx = sourceptr
 589             movl    12(%esp),%edi   // %edi = destptr
 590             movl    16(%esp),%ecx   // %ecx = count
 591             subl    %edi,%edx
 592             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
 593             jz      L(atld2)
 594 L(atld1:)     leal    -4(%edi),%edi   // sourceptr--, destptr--
 595               movl    (%edx,%edi),%eax // *sourceptr
 596               adcl    %eax,(%edi)     // + *destptr + carry =: *destptr, neuer Carry
 597               decl    %ecx
 598               jnz     L(atld1)
 599 L(atld2:)   sbbl    %eax,%eax       // Ergebnis := - Carry
 600             popl    %edi            // %edi zurück
 601             ret
 602
 603 // extern uintD inc_loop_down (uintD* ptr, uintC count);
 604             ALIGN
 605             DECLARE_FUNCTION(inc_loop_down)
 606 C(inc_loop_down:)
 607             movl    4(%esp),%edx    // %edx = ptr
 608             movl    8(%esp),%ecx    // %ecx = count
 609             jecxz   L(ild2)         // %ecx = 0 ?
 610 L(ild1:)      leal    -4(%edx),%edx
 611               addl    $1,(%edx)       // (*ptr)++
 612               jnc     L(ild3)         // kein Carry -> fertig
 613               decl    %ecx
 614               jnz     L(ild1)
 615 L(ild2:)    movl    $1,%eax         // Ergebnis := 1
 616             ret
 617 L(ild3:)    xorl    %eax,%eax       // Ergebnis := 0
 618             ret
 619
 620 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
 621             ALIGN
 622             DECLARE_FUNCTION(sub_loop_down)
 623 C(sub_loop_down:)
 624             pushl   %esi            // %esi retten
 625             pushl   %edi            // %edi retten
 626             movl    12(%esp),%edx   // %edx = sourceptr1
 627             movl    16(%esp),%esi   // %esi = sourceptr2
 628             movl    20(%esp),%edi   // %edi = destptr
 629             movl    24(%esp),%ecx   // %ecx = count
 630             subl    %edi,%edx
 631             subl    %edi,%esi
 632             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
 633             jz      L(sld2)
 634 L(sld1:)      leal    -4(%edi),%edi   // sourceptr1--, sourceptr2--, destptr--
 635               movl    (%edx,%edi),%eax // *sourceptr1
 636               sbbl    (%esi,%edi),%eax // - *sourceptr2 - carry
 637               movl    %eax,(%edi)     // =: *destptr, neuen Carry behalten
 638               decl    %ecx
 639               jnz     L(sld1)
 640 L(sld2:)    sbbl    %eax,%eax      // Ergebnis := - Carry
 641             popl    %edi           // %edi zurück
 642             popl    %esi           // %esi zurück
 643             ret
 644
 645 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
 646             ALIGN
 647             DECLARE_FUNCTION(subx_loop_down)
 648 C(subx_loop_down:)
 649             pushl   %esi            // %esi retten
 650             pushl   %edi            // %edi retten
 651             movl    12(%esp),%edx   // %edx = sourceptr1
 652             movl    16(%esp),%esi   // %esi = sourceptr2
 653             movl    20(%esp),%edi   // %edi = destptr
 654             movl    24(%esp),%ecx   // %ecx = count
 655             jecxz   L(sxld2)        // %ecx = 0 ?
 656             subl    %edi,%edx
 657             subl    %edi,%esi
 658             movl    28(%esp),%eax   // carry, 0 oder -1
 659             addl    %eax,%eax       // Bit 31 davon in den Carry
 660             nop ; nop
 661 L(sxld1:)     leal    -4(%edi),%edi   // sourceptr1--, sourceptr2--, destptr--
 662               movl    (%edx,%edi),%eax // *sourceptr1
 663               sbbl    (%esi,%edi),%eax // - *sourceptr2 - carry
 664               movl    %eax,(%edi)     // =: *destptr, neuen Carry behalten
 665               decl    %ecx
 666               jnz     L(sxld1)
 667             sbbl    %eax,%eax      // Ergebnis := - Carry
 668             popl    %edi           // %edi zurück
 669             popl    %esi           // %esi zurück
 670             ret
 671 L(sxld2:)   movl    28(%esp),%eax  // Ergebnis := carry
 672             popl    %edi           // %edi zurück
 673             popl    %esi           // %esi zurück
 674             ret
 675
 676 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
 677             ALIGN
 678             DECLARE_FUNCTION(subfrom_loop_down)
 679 C(subfrom_loop_down:)
 680             pushl   %edi            // %edi retten
 681             movl    8(%esp),%edx    // %edx = sourceptr
 682             movl    12(%esp),%edi   // %edi = destptr
 683             movl    16(%esp),%ecx   // %ecx = count
 684             subl    %edi,%edx
 685             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
 686             jz      L(sfld2)
 687 L(sfld1:)     leal    -4(%edi),%edi   // sourceptr--, destptr--
 688               movl    (%edx,%edi),%eax // *sourceptr
 689               sbbl    %eax,(%edi)     // *destptr - *sourceptr - carry =: *destptr, neuer Carry
 690               decl    %ecx
 691               jnz     L(sfld1)
 692 L(sfld2:)   sbbl    %eax,%eax       // Ergebnis := - Carry
 693             popl    %edi            // %edi zurück
 694             ret
 695
 696 // extern uintD dec_loop_down (uintD* ptr, uintC count);
 697             ALIGN
 698             DECLARE_FUNCTION(dec_loop_down)
 699 C(dec_loop_down:)
 700             movl    4(%esp),%edx    // %edx = ptr
 701             movl    8(%esp),%ecx    // %ecx = count
 702             jecxz   L(dld2)         // %ecx = 0 ?
 703 L(dld1:)      leal    -4(%edx),%edx
 704               subl    $1,(%edx)       // (*ptr)--
 705               jnc     L(dld3)         // kein Carry -> fertig
 706               decl    %ecx
 707               jnz     L(dld1)
 708 L(dld2:)    movl    $-1,%eax        // Ergebnis := -1
 709             ret
 710 L(dld3:)    xorl    %eax,%eax       // Ergebnis := 0
 711             ret
 712
 713 // extern uintD neg_loop_down (uintD* ptr, uintC count);
 714             ALIGN
 715             DECLARE_FUNCTION(neg_loop_down)
 716 C(neg_loop_down:)
 717             movl    4(%esp),%edx    // %edx = ptr
 718             movl    8(%esp),%ecx    // %ecx = count
 719             // erstes Digit /=0 suchen:
 720             jecxz   L(nld2)         // %ecx = 0 ?
 721 L(nld1:)      leal    -4(%edx),%edx
 722               negl    (%edx)
 723               jnz     L(nld3)
 724               decl    %ecx
 725               jnz     L(nld1)
 726 L(nld2:)    xorl    %eax,%eax       // Ergebnis := 0
 727             ret
 728             nop ; nop ; nop ; nop ; nop ; nop
 729 L(nld3:)    // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
 730             // alle anderen Digits invertieren:
 731             decl    %ecx
 732             jz      L(nld5)
 733 L(nld4:)      leal    -4(%edx),%edx
 734               notl    (%edx)
 735               decl    %ecx
 736               jnz     L(nld4)
 737 L(nld5:)    movl    $-1,%eax        // Ergebnis := -1
 738             ret
 739
 740 // extern uintD shift1left_loop_down (uintD* ptr, uintC count);
 741             ALIGN
 742             DECLARE_FUNCTION(shift1left_loop_down)
 743 C(shift1left_loop_down:)
 744             movl    4(%esp),%edx    // %edx = ptr
 745             movl    8(%esp),%ecx    // %ecx = count
 746             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
 747             jz      L(s1lld2)
 748             nop ; nop ; nop ; nop
 749 L(s1lld1:)    leal    -4(%edx),%edx   // ptr--
 750               rcll    $1,(%edx)       // *ptr und Carry um 1 Bit links rotieren
 751               decl    %ecx
 752               jnz     L(s1lld1)
 753 L(s1lld2:)  sbbl    %eax,%eax       // Ergebnis := - Carry
 754             ret
 755
 756 // extern uintD shiftleft_loop_down (uintD* ptr, uintC count, uintC i, uintD carry);
 757             ALIGN
 758             DECLARE_FUNCTION(shiftleft_loop_down)
 759 C(shiftleft_loop_down:)
 760             pushl   %edi            // %edi retten
 761             pushl   %ebx            // %ebx retten
 762             movl    12(%esp),%edi   // %edi = ptr
 763             movl    16(%esp),%edx   // %edx = count
 764             movb    20(%esp),%cl    // %cl = i
 765             orl     %edx,%edx       // count = 0 ?
 766             jz      L(slld4)
 767             // erstes Digit shiften:
 768             leal    -4(%edi),%edi
 769             movl    (%edi),%eax     // Digit in %eax halten
 770             movl    %eax,%ebx       // und in %ebx rechnen:
 771             shll    %cl,%ebx        // um i Bits links shiften
 772             orl     24(%esp),%ebx   // und die unteren i Bits eintragen
 773             movl    %ebx,(%edi)     // und wieder ablegen
 774             // Letztes Digit in %eax.
 775             decl    %edx
 776             jz      L(slld2)
 777             nop ; nop ; nop ; nop
 778 L(slld1:)     // weiteres Digit shiften:
 779               leal    -4(%edi),%edi
 780               movl    (%edi),%ebx
 781               shldl   shcl %eax,(%edi) // (%edi) um %cl=i Bits links shiften, %eax von rechts reinshiften
 782               // Letztes Digit in %ebx.
 783               decl    %edx
 784               jz      L(slld3)
 785               // weiteres Digit shiften:
 786               leal    -4(%edi),%edi
 787               movl    (%edi),%eax
 788               shldl   shcl %ebx,(%edi) // (%edi) um %cl=i Bits links shiften, %ebx von rechts reinshiften
 789               // Letztes Digit in %eax.
 790               decl    %edx
 791               jnz     L(slld1)
 792 L(slld2:)   movl    %eax,%ebx
 793 L(slld3:)   xorl    %eax,%eax       // %eax := 0
 794             shldl   shcl %ebx,%eax  // %eax := höchste %cl=i Bits von %ebx
 795             popl    %ebx            // %ebx zurück
 796             popl    %edi            // %edi zurück
 797             ret
 798 L(slld4:)   movl    24(%esp),%eax   // %eax := carry
 799             popl    %ebx            // %ebx zurück
 800             popl    %edi            // %edi zurück
 801             ret
 802
 803 // extern uintD shiftleftcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
 804             ALIGN
 805             DECLARE_FUNCTION(shiftleftcopy_loop_down)
 806 C(shiftleftcopy_loop_down:)
 807             pushl   %esi            // %esi retten
 808             pushl   %edi            // %edi retten
 809             pushl   %ebx            // %ebx retten
 810             movl    16(%esp),%esi   // %esi = sourceptr
 811             movl    20(%esp),%edi   // %edi = destptr
 812             movl    24(%esp),%edx   // count
 813             movb    28(%esp),%cl    // i
 814             orl     %edx,%edx       // count = 0 ?
 815             jz      L(slcld4)
 816             subl    %edi,%esi
 817             // erstes Digit shiften:
 818             leal    -4(%edi),%edi   // sourceptr--, destptr--
 819             movl    (%edi,%esi),%ebx // *sourceptr in %ebx halten
 820             movl    %ebx,%eax       // und in %eax rechnen:
 821             shll    %cl,%eax        // um i Bits links shiften, rechts Nullen rein
 822             movl    %eax,(%edi)     // und als *destptr ablegen
 823             // Letztes Digit in %ebx.
 824             negb    %cl             // 32-i
 825             decl    %edx
 826             jz      L(slcld2)
 827 L(slcld1:)    // weiteres Digit shiften:
 828               leal    -4(%edi),%edi   // sourceptr--, destptr--
 829               movl    (%edi,%esi),%eax // nächstes Digit nach %eax
 830               shrdl   shcl %eax,%ebx  // %ebx um %cl=32-i Bits rechts shiften, %eax von links reinshiften
 831               movl    %ebx,(%edi)     // %ebx als *destptr ablegen
 832               // Letztes Digit in %eax.
 833               decl    %edx
 834               jz      L(slcld3)
 835               // weiteres Digit shiften:
 836               leal    -4(%edi),%edi   // sourceptr--, destptr--
 837               movl    (%edi,%esi),%ebx // nächstes Digit nach %ebx
 838               shrdl   shcl %ebx,%eax  // %eax um %cl=32-i Bits rechts shiften, %ebx von links reinshiften
 839               movl    %eax,(%edi)     // %eax als *destptr ablegen
 840               // Letztes Digit in %ebx.
 841               decl    %edx
 842               jnz     L(slcld1)
 843 L(slcld2:)  movl    %ebx,%eax
 844 L(slcld3:)  shrl    %cl,%eax        // %eax um 32-i Bits nach rechts shiften
 845             popl    %ebx            // %ebx zurück
 846             popl    %edi            // %edi zurück
 847             popl    %esi            // %esi zurück
 848             ret
 849 L(slcld4:)  xorl    %eax,%eax       // %eax := 0
 850             popl    %ebx            // %ebx zurück
 851             popl    %edi            // %edi zurück
 852             popl    %esi            // %esi zurück
 853             ret
 854
 855 // extern uintD shift1right_loop_up (uintD* ptr, uintC count, uintD carry);
 856             ALIGN
 857             DECLARE_FUNCTION(shift1right_loop_up)
 858 C(shift1right_loop_up:)
 859             movl    4(%esp),%edx    // %edx = ptr
 860             movl    8(%esp),%ecx    // %ecx = count
 861             movl    12(%esp),%eax   // %eax = carry (0 oder -1)
 862             jecxz   L(s1rld3)       // %ecx = 0 ?
 863             addl    %eax,%eax       // Carry := Bit 31 von carry
 864 L(s1rld1:)    rcrl    $1,(%edx)       // *ptr und Carry um 1 Bit rechts rotieren
 865               leal    4(%edx),%edx    // ptr++
 866               decl    %ecx
 867               jnz     L(s1rld1)
 868 L(s1rld2:)  sbbl    %eax,%eax       // Ergebnis := - Carry
 869 L(s1rld3:)  ret
 870
 871 // extern uintD shiftright_loop_up (uintD* ptr, uintC count, uintC i);
 872             ALIGN
 873             DECLARE_FUNCTION(shiftright_loop_up)
 874 C(shiftright_loop_up:)
 875             pushl   %edi            // %edi retten
 876             pushl   %ebx            // %ebx retten
 877             movl    12(%esp),%edi   // %edi = ptr
 878             movl    16(%esp),%edx   // %edx = count
 879             movb    20(%esp),%cl    // %cl = i
 880             orl     %edx,%edx       // count = 0 ?
 881             jz      L(srlu4)
 882             // erstes Digit shiften:
 883             movl    (%edi),%eax     // Digit in %eax halten
 884             movl    %eax,%ebx       // und in %ebx rechnen:
 885             shrl    %cl,%ebx        // um i Bits rechts shiften
 886             movl    %ebx,(%edi)     // und wieder ablegen
 887             // Letztes Digit in %eax.
 888             decl    %edx
 889             jz      L(srlu2)
 890             nop ; nop ; nop
 891 L(srlu1:)     // weiteres Digit shiften:
 892               leal    4(%edi),%edi
 893               movl    (%edi),%ebx
 894               shrdl   shcl %eax,(%edi) // (%edi) um %cl=i Bits rechts shiften, %eax von links reinshiften
 895               // Letztes Digit in %ebx.
 896               decl    %edx
 897               jz      L(srlu3)
 898               // weiteres Digit shiften:
 899               leal    4(%edi),%edi
 900               movl    (%edi),%eax
 901               shrdl   shcl %ebx,(%edi) // (%edi) um %cl=i Bits rechts shiften, %ebx von links reinshiften
 902               // Letztes Digit in %eax.
 903               decl    %edx
 904               jnz     L(srlu1)
 905 L(srlu2:)   movl    %eax,%ebx
 906 L(srlu3:)   xorl    %eax,%eax       // %eax := 0
 907             shrdl   shcl %ebx,%eax  // %eax := niedrigste %cl=i Bits von %ebx, als Bits 31..32-i
 908             popl    %ebx            // %ebx zurück
 909             popl    %edi            // %edi zurück
 910             ret
 911 L(srlu4:)   xorl    %eax,%eax       // %eax := 0
 912             popl    %ebx            // %ebx zurück
 913             popl    %edi            // %edi zurück
 914             ret
 915
 916 // extern uintD shiftrightsigned_loop_up (uintD* ptr, uintC count, uintC i);
 917             ALIGN
 918             DECLARE_FUNCTION(shiftrightsigned_loop_up)
 919 C(shiftrightsigned_loop_up:)
 920             pushl   %edi            // %edi retten
 921             pushl   %ebx            // %ebx retten
 922             movl    12(%esp),%edi   // %edi = ptr
 923             movl    16(%esp),%edx   // %edx = count
 924             movb    20(%esp),%cl    // %cl = i
 925             // erstes Digit shiften:
 926             movl    (%edi),%eax     // Digit in %eax halten
 927             movl    %eax,%ebx       // und in %ebx rechnen:
 928             sarl    %cl,%ebx        // um i Bits rechts shiften, Vorzeichen vervielfachen
 929             movl    %ebx,(%edi)     // und wieder ablegen
 930             // Letztes Digit in %eax.
 931             decl    %edx
 932             jz      L(srslu2)
 933 L(srslu1:)    // weiteres Digit shiften:
 934               leal    4(%edi),%edi
 935               movl    (%edi),%ebx
 936               shrdl   shcl %eax,(%edi) // (%edi) um %cl=i Bits rechts shiften, %eax von links reinshiften
 937               // Letztes Digit in %ebx.
 938               decl    %edx
 939               jz      L(srslu3)
 940               // weiteres Digit shiften:
 941               leal    4(%edi),%edi
 942               movl    (%edi),%eax
 943               shrdl   shcl %ebx,(%edi) // (%edi) um %cl=i Bits rechts shiften, %ebx von links reinshiften
 944               // Letztes Digit in %eax.
 945               decl    %edx
 946               jnz     L(srslu1)
 947 L(srslu2:)  movl    %eax,%ebx
 948 L(srslu3:)  xorl    %eax,%eax       // %eax := 0
 949             shrdl   shcl %ebx,%eax  // %eax := niedrigste %cl=i Bits von %ebx, als Bits 31..32-i
 950             popl    %ebx            // %ebx zurück
 951             popl    %edi            // %edi zurück
 952             ret
 953
 954 // extern uintD shiftrightcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
 955             ALIGN
 956             DECLARE_FUNCTION(shiftrightcopy_loop_up)
 957 C(shiftrightcopy_loop_up:)
 958             pushl   %esi            // %esi retten
 959             pushl   %edi            // %edi retten
 960             pushl   %ebx            // %ebx retten
 961             movl    16(%esp),%esi   // %esi = sourceptr
 962             movl    20(%esp),%edi   // %edi = destptr
 963             movl    24(%esp),%edx   // count
 964             movb    28(%esp),%cl    // i
 965             negb    %cl             // 32-i
 966             movl    32(%esp),%eax   // %eax = carry
 967             orl     %edx,%edx       // count = 0 ?
 968             jz      L(srcld3)
 969             subl    %edi,%esi
 970             // erstes Digit shiften:
 971             movl    (%edi,%esi),%ebx // *sourceptr in %ebx halten
 972             shldl   shcl %ebx,%eax  // carry um %cl=32-i Bits links shiften, dabei *sourceptr rein
 973             movl    %eax,(%edi)     // und als *destptr ablegen
 974             // Letztes Digit in %ebx.
 975             decl    %edx
 976             jz      L(srcld2)
 977 L(srcld1:)    // weiteres Digit shiften:
 978               leal    4(%edi),%edi    // sourceptr++, destptr++
 979               movl    (%edi,%esi),%eax // nächstes Digit nach %eax
 980               shldl   shcl %eax,%ebx  // %ebx um %cl=32-i Bits links shiften, %eax von rechts reinshiften
 981               movl    %ebx,(%edi)     // %ebx als *destptr ablegen
 982               // Letztes Digit in %eax.
 983               decl    %edx
 984               jz      L(srcld3)
 985               // weiteres Digit shiften:
 986               leal    4(%edi),%edi    // sourceptr++, destptr++
 987               movl    (%edi,%esi),%ebx // nächstes Digit nach %ebx
 988               shldl   shcl %ebx,%eax  // %eax um %cl=32-i Bits links shiften, %ebx von rechts reinshiften
 989               movl    %eax,(%edi)     // %eax als *destptr ablegen
 990               // Letztes Digit in %ebx.
 991               decl    %edx
 992               jnz     L(srcld1)
 993 L(srcld2:)  movl    %ebx,%eax
 994 L(srcld3:)  shll    %cl,%eax        // %eax um 32-i Bits nach links shiften
 995             popl    %ebx            // %ebx zurück
 996             popl    %edi            // %edi zurück
 997             popl    %esi            // %esi zurück
 998             ret
 999
1000 // extern uintD mulusmall_loop_down (uintD digit, uintD* ptr, uintC len, uintD newdigit);
1001             ALIGN
1002             DECLARE_FUNCTION(mulusmall_loop_down)
1003 C(mulusmall_loop_down:)
1004             pushl   %ebp            // %ebp retten
1005             pushl   %edi            // %edi retten
1006             pushl   %ebx            // %ebx retten
1007             movl    16(%esp),%ebx   // %ebx = digit
1008             movl    20(%esp),%edi   // %edi = ptr
1009             movl    24(%esp),%ecx   // %ecx = len
1010             movl    28(%esp),%ebp   // %ebp = carry := newdigit
1011             movl    %ecx,%eax
1012             negl    %eax            // %eax = -len
1013             jz      L(msld2)
1014             leal    -4(%edi,%eax,4),%edi // %edi = &ptr[-1-len]
1015             nop ; nop ; nop
1016 L(msld1:)     movl    (%edi,%ecx,4),%eax // *ptr
1017               mull    %ebx               // %edx|%eax := digit * *ptr
1018               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1019               movl    $0,%ebp
1020               adcl    %edx,%ebp          // Übertrag zum High-Teil %edx dazu, gibt neuen carry
1021               movl    %eax,(%edi,%ecx,4) // Low-Teil als *ptr ablegen
1022               decl    %ecx               // count--, ptr--
1023               jnz     L(msld1)
1024 L(msld2:)   movl    %ebp,%eax       // Ergebnis := letzter Übertrag
1025             popl    %ebx            // %ebx zurück
1026             popl    %edi            // %edi zurück
1027             popl    %ebp            // %ebp zurück
1028             ret
1029
1030 // extern void mulu_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1031             ALIGN
1032             DECLARE_FUNCTION(mulu_loop_down)
1033 C(mulu_loop_down:)
1034             pushl   %ebp            // %ebp retten
1035             pushl   %edi            // %edi retten
1036             pushl   %esi            // %esi retten
1037             pushl   %ebx            // %ebx retten
1038             movl    20(%esp),%ebx   // %ebx = digit
1039             movl    24(%esp),%esi   // %esi = sourceptr
1040             movl    28(%esp),%edi   // %edi = destptr
1041             movl    32(%esp),%ecx   // %ecx = len
1042             movl    %ecx,%eax
1043             notl    %eax            // %eax = -1-len
1044             leal    (%esi,%eax,4),%esi // %esi = &sourceptr[-1-len]
1045             leal    (%edi,%eax,4),%edi // %edi = &destptr[-1-len]
1046             xorl    %ebp,%ebp       // %epb = carry := 0
1047 L(muld1:)     movl    (%esi,%ecx,4),%eax // *sourceptr
1048               mull    %ebx               // %edx|%eax := digit * *sourceptr
1049               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1050               movl    $0,%ebp
1051               adcl    %edx,%ebp          // Übertrag zum High-Teil %edx dazu, gibt neuen carry
1052               movl    %eax,(%edi,%ecx,4) // Low-Teil als *destptr ablegen
1053               decl    %ecx               // count--, sourceptr--, destptr--
1054               jnz     L(muld1)
1055             movl    %ebp,(%edi)     // letzten Übertrag ablegen
1056             popl    %ebx            // %ebx zurück
1057             popl    %esi            // %esi zurück
1058             popl    %edi            // %edi zurück
1059             popl    %ebp            // %ebp zurück
1060             ret
1061
1062 // extern uintD muluadd_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1063             ALIGN
1064             DECLARE_FUNCTION(muluadd_loop_down)
1065 C(muluadd_loop_down:)
1066             pushl   %ebp            // %ebp retten
1067             pushl   %edi            // %edi retten
1068             pushl   %esi            // %esi retten
1069             pushl   %ebx            // %ebx retten
1070             movl    20(%esp),%ebx   // %ebx = digit
1071             movl    24(%esp),%esi   // %esi = sourceptr
1072             movl    28(%esp),%edi   // %edi = destptr
1073             movl    32(%esp),%ecx   // %ecx = len
1074             movl    %ecx,%eax
1075             notl    %eax            // %eax = -1-len
1076             leal    (%esi,%eax,4),%esi // %esi = &sourceptr[-1-len]
1077             leal    (%edi,%eax,4),%edi // %edi = &destptr[-1-len]
1078             xorl    %ebp,%ebp       // %epb = carry := 0
1079 L(muald1:)    movl    (%esi,%ecx,4),%eax // *sourceptr
1080               mull    %ebx               // %edx|%eax := digit * *sourceptr
1081               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1082               movl    $0,%ebp
1083               adcl    %ebp,%edx          // Übertrag zum High-Teil %edx dazu
1084               addl    %eax,(%edi,%ecx,4) // Low-Teil zu *destptr addieren
1085               adcl    %edx,%ebp          // zweiten Übertrag zu %edx addieren, gibt neuen carry
1086               decl    %ecx               // count--, sourceptr--, destptr--
1087               jnz     L(muald1)
1088             movl    %ebp,%eax       // Ergebnis := letzter Übertrag
1089             popl    %ebx            // %ebx zurück
1090             popl    %esi            // %esi zurück
1091             popl    %edi            // %edi zurück
1092             popl    %ebp            // %ebp zurück
1093             ret
1094
1095 // extern uintD mulusub_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1096             ALIGN
1097             DECLARE_FUNCTION(mulusub_loop_down)
1098 C(mulusub_loop_down:)
1099             pushl   %ebp            // %ebp retten
1100             pushl   %edi            // %edi retten
1101             pushl   %esi            // %esi retten
1102             pushl   %ebx            // %ebx retten
1103             movl    20(%esp),%ebx   // %ebx = digit
1104             movl    24(%esp),%esi   // %esi = sourceptr
1105             movl    28(%esp),%edi   // %edi = destptr
1106             movl    32(%esp),%ecx   // %ecx = len
1107             movl    %ecx,%eax
1108             notl    %eax            // %eax = -1-len
1109             leal    (%esi,%eax,4),%esi // %esi = &sourceptr[-1-len]
1110             leal    (%edi,%eax,4),%edi // %edi = &destptr[-1-len]
1111             xorl    %ebp,%ebp       // %epb = carry := 0
1112 L(musld1:)    movl    (%esi,%ecx,4),%eax // *sourceptr
1113               mull    %ebx               // %edx|%eax := digit * *sourceptr
1114               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1115               movl    $0,%ebp
1116               adcl    %ebp,%edx          // Übertrag zum High-Teil %edx dazu
1117               subl    %eax,(%edi,%ecx,4) // Low-Teil von *destptr subtrahieren
1118               adcl    %edx,%ebp          // zweiten Übertrag zu %edx addieren, gibt neuen carry
1119               decl    %ecx               // count--, sourceptr--, destptr--
1120               jnz     L(musld1)
1121             movl    %ebp,%eax       // Ergebnis := letzter Übertrag
1122             popl    %ebx            // %ebx zurück
1123             popl    %esi            // %esi zurück
1124             popl    %edi            // %edi zurück
1125             popl    %ebp            // %ebp zurück
1126             ret
1127
1128 // extern uintD divu_loop_up (uintD digit, uintD* ptr, uintC len);
1129             ALIGN
1130             DECLARE_FUNCTION(divu_loop_up)
1131 C(divu_loop_up:)
1132             pushl   %edi            // %edi retten
1133             pushl   %ebx            // %ebx retten
1134             movl    12(%esp),%ebx   // %ebx = digit
1135             movl    16(%esp),%edi   // %edi = ptr
1136             movl    20(%esp),%ecx   // %ecx = len
1137             xorl    %edx,%edx       // %edx = Rest := 0
1138             jecxz   L(dlu2)         // %ecx = 0 ?
1139 L(dlu1:)      movl    (%edi),%eax     // nächstes Digit *ptr
1140               divl    %ebx            // Division von %edx|%eax durch %ebx
1141               movl    %eax,(%edi)     // Quotient %eax ablegen, Rest in %edx behalten
1142               leal    4(%edi),%edi    // ptr++
1143               decl    %ecx
1144               jnz     L(dlu1)
1145 L(dlu2:)    movl    %edx,%eax       // Ergebnis := letzter Rest
1146             popl    %ebx            // %ebx zurück
1147             popl    %edi            // %edi zurück
1148             ret
1149
1150 // extern uintD divucopy_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1151             ALIGN
1152             DECLARE_FUNCTION(divucopy_loop_up)
1153 C(divucopy_loop_up:)
1154             pushl   %edi            // %edi retten
1155             pushl   %esi            // %esi retten
1156             pushl   %ebx            // %ebx retten
1157             movl    16(%esp),%ebx   // %ebx = digit
1158             movl    20(%esp),%esi   // %esi = sourceptr
1159             movl    24(%esp),%edi   // %edi = destptr
1160             movl    28(%esp),%ecx   // %ecx = len
1161             xorl    %edx,%edx       // %edx = Rest := 0
1162             jecxz   L(dclu2)        // %ecx = 0 ?
1163             subl    %edi,%esi
1164 L(dclu1:)     movl    (%esi,%edi),%eax // nächstes Digit *ptr
1165               divl    %ebx            // Division von %edx|%eax durch %ebx
1166               movl    %eax,(%edi)     // Quotient %eax ablegen, Rest in %edx behalten
1167               leal    4(%edi),%edi    // sourceptr++, destptr++
1168               decl    %ecx
1169               jnz     L(dclu1)
1170 L(dclu2:)   movl    %edx,%eax       // Ergebnis := letzter Rest
1171             popl    %ebx            // %ebx zurück
1172             popl    %esi            // %esi zurück
1173             popl    %edi            // %edi zurück
1174             ret
1175
1176 #endif
1177
1178 #if !CL_DS_BIG_ENDIAN_P
1179
1180 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
1181             ALIGN
1182             DECLARE_FUNCTION(or_loop_down)
1183 C(or_loop_down:)
1184             pushl   %esi            // %esi retten
1185             movl    8(%esp),%edx    // %edx = xptr
1186             movl    12(%esp),%esi   // %esi = yptr
1187             movl    16(%esp),%ecx   // %ecx = count
1188             subl    %edx,%esi
1189             jecxz   L(old2)         // %ecx = 0 ?
1190 L(old1:)      leal    -4(%edx),%edx    // xptr--, yptr--
1191               movl    (%edx,%esi),%eax // *yptr
1192               orl     %eax,(%edx)      // *xptr |= ...
1193               decl    %ecx
1194               jnz     L(old1)
1195 L(old2:)    popl    %esi            // %esi zurück
1196             ret
1197
1198 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1199             ALIGN
1200             DECLARE_FUNCTION(xor_loop_down)
1201 C(xor_loop_down:)
1202             pushl   %esi            // %esi retten
1203             movl    8(%esp),%edx    // %edx = xptr
1204             movl    12(%esp),%esi   // %esi = yptr
1205             movl    16(%esp),%ecx   // %ecx = count
1206             subl    %edx,%esi
1207             jecxz   L(xld2)         // %ecx = 0 ?
1208 L(xld1:)      leal    -4(%edx),%edx    // xptr--, yptr--
1209               movl    (%edx,%esi),%eax // *yptr
1210               xorl    %eax,(%edx)      // *xptr ^= ...
1211               decl    %ecx
1212               jnz     L(xld1)
1213 L(xld2:)    popl    %esi            // %esi zurück
1214             ret
1215
1216 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
1217             ALIGN
1218             DECLARE_FUNCTION(and_loop_down)
1219 C(and_loop_down:)
1220             pushl   %esi            // %esi retten
1221             movl    8(%esp),%edx    // %edx = xptr
1222             movl    12(%esp),%esi   // %esi = yptr
1223             movl    16(%esp),%ecx   // %ecx = count
1224             subl    %edx,%esi
1225             jecxz   L(ald2)         // %ecx = 0 ?
1226 L(ald1:)      leal    -4(%edx),%edx    // xptr--, yptr--
1227               movl    (%edx,%esi),%eax // *yptr
1228               andl    %eax,(%edx)      // *xptr &= ...
1229               decl    %ecx
1230               jnz     L(ald1)
1231 L(ald2:)    popl    %esi            // %esi zurück
1232             ret
1233
1234 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
1235             ALIGN
1236             DECLARE_FUNCTION(eqv_loop_down)
1237 C(eqv_loop_down:)
1238             pushl   %esi            // %esi retten
1239             movl    8(%esp),%edx    // %edx = xptr
1240             movl    12(%esp),%esi   // %esi = yptr
1241             movl    16(%esp),%ecx   // %ecx = count
1242             subl    %edx,%esi
1243             jecxz   L(eld2)         // %ecx = 0 ?
1244 L(eld1:)      leal    -4(%edx),%edx    // xptr--, yptr--
1245               movl    (%edx),%eax      // *xptr
1246               xorl    (%edx,%esi),%eax // ^ *yptr
1247               notl    %eax             // ~(...)
1248               movl    %eax,(%edx)      // =: *xptr
1249               decl    %ecx
1250               jnz     L(eld1)
1251 L(eld2:)    popl    %esi            // %esi zurück
1252             ret
1253
1254 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
1255             ALIGN
1256             DECLARE_FUNCTION(nand_loop_down)
1257 C(nand_loop_down:)
1258             pushl   %esi            // %esi retten
1259             movl    8(%esp),%edx    // %edx = xptr
1260             movl    12(%esp),%esi   // %esi = yptr
1261             movl    16(%esp),%ecx   // %ecx = count
1262             subl    %edx,%esi
1263             jecxz   L(nald2)        // %ecx = 0 ?
1264 L(nald1:)     leal    -4(%edx),%edx    // xptr--, yptr--
1265               movl    (%edx),%eax      // *xptr
1266               andl    (%edx,%esi),%eax // & *yptr
1267               notl    %eax             // ~(...)
1268               movl    %eax,(%edx)      // =: *xptr
1269               decl    %ecx
1270               jnz     L(nald1)
1271 L(nald2:)   popl    %esi            // %esi zurück
1272             ret
1273
1274 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
1275             ALIGN
1276             DECLARE_FUNCTION(nor_loop_down)
1277 C(nor_loop_down:)
1278             pushl   %esi            // %esi retten
1279             movl    8(%esp),%edx    // %edx = xptr
1280             movl    12(%esp),%esi   // %esi = yptr
1281             movl    16(%esp),%ecx   // %ecx = count
1282             subl    %edx,%esi
1283             jecxz   L(nold2)        // %ecx = 0 ?
1284 L(nold1:)     leal    -4(%edx),%edx    // xptr--, yptr--
1285               movl    (%edx),%eax      // *xptr
1286               orl     (%edx,%esi),%eax // | *yptr
1287               notl    %eax             // ~(...)
1288               movl    %eax,(%edx)      // =: *xptr
1289               decl    %ecx
1290               jnz     L(nold1)
1291 L(nold2:)   popl    %esi            // %esi zurück
1292             ret
1293
1294 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1295             ALIGN
1296             DECLARE_FUNCTION(andc2_loop_down)
1297 C(andc2_loop_down:)
1298             pushl   %esi            // %esi retten
1299             movl    8(%esp),%edx    // %edx = xptr
1300             movl    12(%esp),%esi   // %esi = yptr
1301             movl    16(%esp),%ecx   // %ecx = count
1302             subl    %edx,%esi
1303             jecxz   L(acld2)        // %ecx = 0 ?
1304 L(acld1:)     leal    -4(%edx),%edx    // xptr--, yptr--
1305               movl    (%edx,%esi),%eax // *yptr
1306               notl    %eax             // ~ *yptr
1307               andl    %eax,(%edx)      // *xptr &= ...
1308               decl    %ecx
1309               jnz     L(acld1)
1310 L(acld2:)   popl    %esi            // %esi zurück
1311             ret
1312
1313 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
1314             ALIGN
1315             DECLARE_FUNCTION(orc2_loop_down)
1316 C(orc2_loop_down:)
1317             pushl   %esi            // %esi retten
1318             movl    8(%esp),%edx    // %edx = xptr
1319             movl    12(%esp),%esi   // %esi = yptr
1320             movl    16(%esp),%ecx   // %ecx = count
1321             subl    %edx,%esi
1322             jecxz   L(ocld2)        // %ecx = 0 ?
1323 L(ocld1:)     leal    -4(%edx),%edx    // xptr--, yptr--
1324               movl    (%edx,%esi),%eax // *yptr
1325               notl    %eax             // ~ *yptr
1326               orl     %eax,(%edx)      // *xptr |= ...
1327               decl    %ecx
1328               jnz     L(ocld1)
1329 L(ocld2:)   popl    %esi            // %esi zurück
1330             ret
1331
1332 // extern void not_loop_down (uintD* xptr, uintC count);
1333             ALIGN
1334             DECLARE_FUNCTION(not_loop_down)
1335 C(not_loop_down:)
1336             movl    4(%esp),%edx    // %edx = xptr
1337             movl    8(%esp),%ecx    // %ecx = count
1338             jecxz   L(nld2)         // %ecx = 0 ?
1339             nop ; nop ; nop ; nop ; nop ; nop
1340 L(nld1:)      leal    -4(%edx),%edx    // xptr--
1341               notl    (%edx)           // ~= *xptr
1342               decl    %ecx
1343               jnz     L(nld1)
1344 L(nld2:)    ret
1345
1346 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
1347             ALIGN
1348             DECLARE_FUNCTION(and_test_loop_down)
1349 C(and_test_loop_down:)
1350             pushl   %esi            // %esi retten
1351             movl    8(%esp),%edx    // %edx = xptr
1352             movl    12(%esp),%esi   // %esi = yptr
1353             movl    16(%esp),%ecx   // %ecx = count
1354             jecxz   L(atld2)        // %ecx = 0 ?
1355             subl    %edx,%esi
1356 L(atld1:)     leal    -4(%edx),%edx    // xptr--, yptr--
1357               movl    (%edx,%esi),%eax // *yptr
1358               andl    (%edx),%eax      // *xptr & ...
1359               jnz     L(atld3)
1360               decl    %ecx
1361               jnz     L(atld1)
1362 L(atld2:)   xorl    %eax,%eax       // Ergebnis 0
1363             popl    %esi            // %esi zurück
1364             ret
1365 L(atld3:)   movl    $1,%eax         // Ergebnis 1 (nicht irgendwas /=0 !)
1366             popl    %esi            // %esi zurück
1367             ret
1368
1369 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
1370             ALIGN
1371             DECLARE_FUNCTION(compare_loop_down)
1372 C(compare_loop_down:)
1373             movl    %esi,%edx       // %esi retten
1374             movl    %edi,%eax       // %edi retten
1375             movl    4(%esp),%esi    // %esi = xptr
1376             movl    8(%esp),%edi    // %edi = yptr
1377             movl    12(%esp),%ecx   // %ecx = count
1378             leal    -4(%esi),%esi
1379             leal    -4(%edi),%edi
1380             dir1start
1381             repz                    // Falls %ecx > 0:
1382               cmpsl                 // %ecx mal aufwärts (%edi) und (%esi) vergleichen
1383                                     // und weiterschleifen, falls Z, d.h. (%edi)=(%esi).
1384             dir1end
1385             // Flags -> Ergebnis:
1386             // Z,NC -> bis zum Schluß (%esi)-(%edi) = 0 -> x=y -> Ergebnis 0
1387             // NZ,C -> schließlich (%esi)-(%edi) < 0 -> x<y -> Ergebnis -1
1388             // NZ,NC -> schließlich (%esi)-(%edi) > 0 -> x>y -> Ergebnis +1
1389             movl    %eax,%edi       // %edi zurück
1390             movl    %edx,%esi       // %esi zurück
1391             jbe     L(cmld1)        // "be" = Z oder C
1392             movl    $1,%eax         // Ergebnis +1
1393             ret
1394 L(cmld1:)   sbbl    %eax,%eax       // Ergebnis -1 (falls C) oder 0 (falls NC)
1395             ret
1396
1397 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1398             ALIGN
1399             DECLARE_FUNCTION(add_loop_up)
1400 C(add_loop_up:)
1401             pushl   %esi            // %esi retten
1402             pushl   %edi            // %edi retten
1403             movl    12(%esp),%edx   // %edx = sourceptr1
1404             movl    16(%esp),%esi   // %esi = sourceptr2
1405             movl    20(%esp),%edi   // %edi = destptr
1406             movl    24(%esp),%ecx   // %ecx = count
1407             subl    %edi,%edx
1408             subl    %edi,%esi
1409             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
1410             jz      L(alu2)
1411 L(alu1:)      movl    (%edx,%edi),%eax // *sourceptr1
1412               adcl    (%esi,%edi),%eax // + *sourceptr2 + carry
1413               movl    %eax,(%edi)     // =: *destptr, neuen Carry behalten
1414               leal    4(%edi),%edi    // sourceptr1++, sourceptr2++, destptr++
1415               decl    %ecx
1416               jnz     L(alu1)
1417 L(alu2:)    sbbl    %eax,%eax      // Ergebnis := - Carry
1418             popl    %edi           // %edi zurück
1419             popl    %esi           // %esi zurück
1420             ret
1421
1422 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1423             ALIGN
1424             DECLARE_FUNCTION(addto_loop_up)
1425 C(addto_loop_up:)
1426             pushl   %edi            // %edi retten
1427             movl    8(%esp),%edx    // %edx = sourceptr
1428             movl    12(%esp),%edi   // %edi = destptr
1429             movl    16(%esp),%ecx   // %ecx = count
1430             subl    %edi,%edx
1431             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
1432             jz      L(atlu2)
1433 L(atlu1:)     movl    (%edx,%edi),%eax // *sourceptr
1434               adcl    %eax,(%edi)     // + *destptr + carry =: *destptr, neuer Carry
1435               leal    4(%edi),%edi    // sourceptr++, destptr++
1436               decl    %ecx
1437               jnz     L(atlu1)
1438 L(atlu2:)   sbbl    %eax,%eax       // Ergebnis := - Carry
1439             popl    %edi            // %edi zurück
1440             ret
1441
1442 // extern uintD inc_loop_up (uintD* ptr, uintC count);
1443             ALIGN
1444             DECLARE_FUNCTION(inc_loop_up)
1445 C(inc_loop_up:)
1446             movl    4(%esp),%edx    // %edx = ptr
1447             movl    8(%esp),%ecx    // %ecx = count
1448             jecxz   L(ilu2)         // %ecx = 0 ?
1449 L(ilu1:)      addl    $1,(%edx)       // (*ptr)++
1450               jnc     L(ilu3)         // kein Carry -> fertig
1451               leal    4(%edx),%edx
1452               decl    %ecx
1453               jnz     L(ilu1)
1454 L(ilu2:)    movl    $1,%eax         // Ergebnis := 1
1455             ret
1456 L(ilu3:)    xorl    %eax,%eax       // Ergebnis := 0
1457             ret
1458
1459 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
1460             ALIGN
1461             DECLARE_FUNCTION(sub_loop_up)
1462 C(sub_loop_up:)
1463             pushl   %esi            // %esi retten
1464             pushl   %edi            // %edi retten
1465             movl    12(%esp),%edx   // %edx = sourceptr1
1466             movl    16(%esp),%esi   // %esi = sourceptr2
1467             movl    20(%esp),%edi   // %edi = destptr
1468             movl    24(%esp),%ecx   // %ecx = count
1469             subl    %edi,%edx
1470             subl    %edi,%esi
1471             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
1472             jz      L(slu2)
1473 L(slu1:)      movl    (%edx,%edi),%eax // *sourceptr1
1474               sbbl    (%esi,%edi),%eax // - *sourceptr2 - carry
1475               movl    %eax,(%edi)     // =: *destptr, neuen Carry behalten
1476               leal    4(%edi),%edi    // sourceptr1++, sourceptr2++, destptr++
1477               decl    %ecx
1478               jnz     L(slu1)
1479 L(slu2:)    sbbl    %eax,%eax      // Ergebnis := - Carry
1480             popl    %edi           // %edi zurück
1481             popl    %esi           // %esi zurück
1482             ret
1483
1484 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1485             ALIGN
1486             DECLARE_FUNCTION(subx_loop_up)
1487 C(subx_loop_up:)
1488             pushl   %esi            // %esi retten
1489             pushl   %edi            // %edi retten
1490             movl    12(%esp),%edx   // %edx = sourceptr1
1491             movl    16(%esp),%esi   // %esi = sourceptr2
1492             movl    20(%esp),%edi   // %edi = destptr
1493             movl    24(%esp),%ecx   // %ecx = count
1494             jecxz   L(sxlu2)        // %ecx = 0 ?
1495             subl    %edi,%edx
1496             subl    %edi,%esi
1497             movl    28(%esp),%eax   // carry, 0 oder -1
1498             addl    %eax,%eax       // Bit 31 davon in den Carry
1499             nop ; nop
1500 L(sxlu1:)     movl    (%edx,%edi),%eax // *sourceptr1
1501               sbbl    (%esi,%edi),%eax // - *sourceptr2 - carry
1502               movl    %eax,(%edi)     // =: *destptr, neuen Carry behalten
1503               leal    4(%edi),%edi    // sourceptr1++, sourceptr2++, destptr++
1504               decl    %ecx
1505               jnz     L(sxlu1)
1506             sbbl    %eax,%eax      // Ergebnis := - Carry
1507             popl    %edi           // %edi zurück
1508             popl    %esi           // %esi zurück
1509             ret
1510 L(sxlu2:)   movl    28(%esp),%eax  // Ergebnis := carry
1511             popl    %edi           // %edi zurück
1512             popl    %esi           // %esi zurück
1513             ret
1514
1515 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1516             ALIGN
1517             DECLARE_FUNCTION(subfrom_loop_up)
1518 C(subfrom_loop_up:)
1519             pushl   %edi            // %edi retten
1520             movl    8(%esp),%edx    // %edx = sourceptr
1521             movl    12(%esp),%edi   // %edi = destptr
1522             movl    16(%esp),%ecx   // %ecx = count
1523             subl    %edi,%edx
1524             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
1525             jz      L(sflu2)
1526 L(sflu1:)     movl    (%edx,%edi),%eax // *sourceptr
1527               sbbl    %eax,(%edi)     // *destptr - *sourceptr - carry =: *destptr, neuer Carry
1528               leal    4(%edi),%edi    // sourceptr++, destptr++
1529               decl    %ecx
1530               jnz     L(sflu1)
1531 L(sflu2:)   sbbl    %eax,%eax       // Ergebnis := - Carry
1532             popl    %edi            // %edi zurück
1533             ret
1534
1535 // extern uintD dec_loop_up (uintD* ptr, uintC count);
1536             ALIGN
1537             DECLARE_FUNCTION(dec_loop_up)
1538 C(dec_loop_up:)
1539             movl    4(%esp),%edx    // %edx = ptr
1540             movl    8(%esp),%ecx    // %ecx = count
1541             jecxz   L(dlu2)         // %ecx = 0 ?
1542 L(dlu1:)      subl    $1,(%edx)       // (*ptr)--
1543               jnc     L(dlu3)         // kein Carry -> fertig
1544               leal    4(%edx),%edx
1545               decl    %ecx
1546               jnz     L(dlu1)
1547 L(dlu2:)    movl    $-1,%eax        // Ergebnis := -1
1548             ret
1549 L(dlu3:)    xorl    %eax,%eax       // Ergebnis := 0
1550             ret
1551
1552 // extern uintD neg_loop_up (uintD* ptr, uintC count);
1553             ALIGN
1554             DECLARE_FUNCTION(neg_loop_up)
1555 C(neg_loop_up:)
1556             movl    4(%esp),%edx    // %edx = ptr
1557             movl    8(%esp),%ecx    // %ecx = count
1558             // erstes Digit /=0 suchen:
1559             jecxz   L(nlu2)         // %ecx = 0 ?
1560 L(nlu1:)      negl    (%edx)
1561               jnz     L(nlu3)
1562               leal    4(%edx),%edx
1563               decl    %ecx
1564               jnz     L(nlu1)
1565 L(nlu2:)    xorl    %eax,%eax       // Ergebnis := 0
1566             ret
1567             nop ; nop ; nop ; nop ; nop ; nop
1568 L(nlu3:)    // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1569             // alle anderen Digits invertieren:
1570             decl    %ecx
1571             jz      L(nlu5)
1572 L(nlu4:)      leal    4(%edx),%edx
1573               notl    (%edx)
1574               decl    %ecx
1575               jnz     L(nlu4)
1576 L(nlu5:)    movl    $-1,%eax        // Ergebnis := -1
1577             ret
1578
1579 // extern uintD shift1left_loop_up (uintD* ptr, uintC count);
1580             ALIGN
1581             DECLARE_FUNCTION(shift1left_loop_up)
1582 C(shift1left_loop_up:)
1583             movl    4(%esp),%edx    // %edx = ptr
1584             movl    8(%esp),%ecx    // %ecx = count
1585             orl     %ecx,%ecx       // %ecx = 0 ?, Carry löschen
1586             jz      L(s1llu2)
1587             nop ; nop ; nop ; nop
1588 L(s1llu1:)    rcll    $1,(%edx)       // *ptr und Carry um 1 Bit links rotieren
1589               leal    4(%edx),%edx    // ptr++
1590               decl    %ecx
1591               jnz     L(s1llu1)
1592 L(s1llu2:)  sbbl    %eax,%eax       // Ergebnis := - Carry
1593             ret
1594
1595 // extern uintD shiftleft_loop_up (uintD* ptr, uintC count, uintC i, uintD carry);
1596             ALIGN
1597             DECLARE_FUNCTION(shiftleft_loop_up)
1598 C(shiftleft_loop_up:)
1599             pushl   %edi            // %edi retten
1600             pushl   %ebx            // %ebx retten
1601             movl    12(%esp),%edi   // %edi = ptr
1602             movl    16(%esp),%edx   // %edx = count
1603             movb    20(%esp),%cl    // %cl = i
1604             orl     %edx,%edx       // count = 0 ?
1605             jz      L(sllu4)
1606             // erstes Digit shiften:
1607             movl    (%edi),%eax     // Digit in %eax halten
1608             movl    %eax,%ebx       // und in %ebx rechnen:
1609             shll    %cl,%ebx        // um i Bits links shiften
1610             orl     24(%esp),%ebx   // und die unteren i Bits eintragen
1611             movl    %ebx,(%edi)     // und wieder ablegen
1612             leal    4(%edi),%edi
1613             // Letztes Digit in %eax.
1614             decl    %edx
1615             jz      L(sllu2)
1616             nop ; nop ; nop ; nop
1617 L(sllu1:)     // weiteres Digit shiften:
1618               movl    (%edi),%ebx
1619               shldl   shcl %eax,(%edi) // (%edi) um %cl=i Bits links shiften, %eax von rechts reinshiften
1620               leal    4(%edi),%edi
1621               // Letztes Digit in %ebx.
1622               decl    %edx
1623               jz      L(sllu3)
1624               // weiteres Digit shiften:
1625               movl    (%edi),%eax
1626               shldl   shcl %ebx,(%edi) // (%edi) um %cl=i Bits links shiften, %ebx von rechts reinshiften
1627               leal    4(%edi),%edi
1628               // Letztes Digit in %eax.
1629               decl    %edx
1630               jnz     L(sllu1)
1631 L(sllu2:)   movl    %eax,%ebx
1632 L(sllu3:)   xorl    %eax,%eax       // %eax := 0
1633             shldl   shcl %ebx,%eax  // %eax := höchste %cl=i Bits von %ebx
1634             popl    %ebx            // %ebx zurück
1635             popl    %edi            // %edi zurück
1636             ret
1637 L(sllu4:)   movl    24(%esp),%eax   // %eax := carry
1638             popl    %ebx            // %ebx zurück
1639             popl    %edi            // %edi zurück
1640             ret
1641
1642 #endif
1643
1644 // extern uintD shiftleftcopy_loop_up (uintD* sourceptr, uintD* destptr, uintC count, uintC i);
1645             ALIGN
1646             DECLARE_FUNCTION(shiftleftcopy_loop_up)
1647 C(shiftleftcopy_loop_up:)
1648             pushl   %esi            // %esi retten
1649             pushl   %edi            // %edi retten
1650             pushl   %ebx            // %ebx retten
1651             movl    16(%esp),%esi   // %esi = sourceptr
1652             movl    20(%esp),%edi   // %edi = destptr
1653             movl    24(%esp),%edx   // count
1654             movb    28(%esp),%cl    // i
1655             orl     %edx,%edx       // count = 0 ?
1656             jz      L(slclu4)
1657             subl    %edi,%esi
1658             // erstes Digit shiften:
1659             movl    (%edi,%esi),%ebx // *sourceptr in %ebx halten
1660             movl    %ebx,%eax       // und in %eax rechnen:
1661             shll    %cl,%eax        // um i Bits links shiften, rechts Nullen rein
1662             movl    %eax,(%edi)     // und als *destptr ablegen
1663             leal    4(%edi),%edi    // sourceptr++, destptr++
1664             // Letztes Digit in %ebx.
1665             negb    %cl             // 32-i
1666             decl    %edx
1667             jz      L(slclu2)
1668 L(slclu1:)    // weiteres Digit shiften:
1669               movl    (%edi,%esi),%eax // nächstes Digit nach %eax
1670               shrdl   shcl %eax,%ebx  // %ebx um %cl=32-i Bits rechts shiften, %eax von links reinshiften
1671               movl    %ebx,(%edi)     // %ebx als *destptr ablegen
1672               leal    4(%edi),%edi    // sourceptr++, destptr++
1673               // Letztes Digit in %eax.
1674               decl    %edx
1675               jz      L(slclu3)
1676               // weiteres Digit shiften:
1677               movl    (%edi,%esi),%ebx // nächstes Digit nach %ebx
1678               shrdl   shcl %ebx,%eax  // %eax um %cl=32-i Bits rechts shiften, %ebx von links reinshiften
1679               movl    %eax,(%edi)     // %eax als *destptr ablegen
1680               leal    4(%edi),%edi    // sourceptr++, destptr++
1681               // Letztes Digit in %ebx.
1682               decl    %edx
1683               jnz     L(slclu1)
1684 L(slclu2:)  movl    %ebx,%eax
1685 L(slclu3:)  shrl    %cl,%eax        // %eax um 32-i Bits nach rechts shiften
1686             popl    %ebx            // %ebx zurück
1687             popl    %edi            // %edi zurück
1688             popl    %esi            // %esi zurück
1689             ret
1690 L(slclu4:)  xorl    %eax,%eax       // %eax := 0
1691             popl    %ebx            // %ebx zurück
1692             popl    %edi            // %edi zurück
1693             popl    %esi            // %esi zurück
1694             ret
1695
1696 #if !CL_DS_BIG_ENDIAN_P
1697
1698 // extern uintD shift1right_loop_down (uintD* ptr, uintC count, uintD carry);
1699             ALIGN
1700             DECLARE_FUNCTION(shift1right_loop_down)
1701 C(shift1right_loop_down:)
1702             movl    4(%esp),%edx    // %edx = ptr
1703             movl    8(%esp),%ecx    // %ecx = count
1704             movl    12(%esp),%eax   // %eax = carry (0 oder -1)
1705             jecxz   L(s1rlu3)       // %ecx = 0 ?
1706             addl    %eax,%eax       // Carry := Bit 31 von carry
1707 L(s1rlu1:)    leal    -4(%edx),%edx   // ptr--
1708               rcrl    $1,(%edx)       // *ptr und Carry um 1 Bit rechts rotieren
1709               decl    %ecx
1710               jnz     L(s1rlu1)
1711 L(s1rlu2:)  sbbl    %eax,%eax       // Ergebnis := - Carry
1712 L(s1rlu3:)  ret
1713
1714 // extern uintD shiftright_loop_down (uintD* ptr, uintC count, uintC i);
1715             ALIGN
1716             DECLARE_FUNCTION(shiftright_loop_down)
1717 C(shiftright_loop_down:)
1718             pushl   %edi            // %edi retten
1719             pushl   %ebx            // %ebx retten
1720             movl    12(%esp),%edi   // %edi = ptr
1721             movl    16(%esp),%edx   // %edx = count
1722             movb    20(%esp),%cl    // %cl = i
1723             orl     %edx,%edx       // count = 0 ?
1724             jz      L(srld4)
1725             // erstes Digit shiften:
1726             leal    -4(%edi),%edi
1727             movl    (%edi),%eax     // Digit in %eax halten
1728             movl    %eax,%ebx       // und in %ebx rechnen:
1729             shrl    %cl,%ebx        // um i Bits rechts shiften
1730             movl    %ebx,(%edi)     // und wieder ablegen
1731             // Letztes Digit in %eax.
1732             decl    %edx
1733             jz      L(srld2)
1734 L(srld1:)     // weiteres Digit shiften:
1735               leal    -4(%edi),%edi
1736               movl    (%edi),%ebx
1737               shrdl   shcl %eax,(%edi) // (%edi) um %cl=i Bits rechts shiften, %eax von links reinshiften
1738               // Letztes Digit in %ebx.
1739               decl    %edx
1740               jz      L(srld3)
1741               // weiteres Digit shiften:
1742               leal    -4(%edi),%edi
1743               movl    (%edi),%eax
1744               shrdl   shcl %ebx,(%edi) // (%edi) um %cl=i Bits rechts shiften, %ebx von links reinshiften
1745               // Letztes Digit in %eax.
1746               decl    %edx
1747               jnz     L(srld1)
1748 L(srld2:)   movl    %eax,%ebx
1749 L(srld3:)   xorl    %eax,%eax       // %eax := 0
1750             shrdl   shcl %ebx,%eax  // %eax := niedrigste %cl=i Bits von %ebx, als Bits 31..32-i
1751             popl    %ebx            // %ebx zurück
1752             popl    %edi            // %edi zurück
1753             ret
1754 L(srld4:)   xorl    %eax,%eax       // %eax := 0
1755             popl    %ebx            // %ebx zurück
1756             popl    %edi            // %edi zurück
1757             ret
1758
1759 // extern uintD shiftrightsigned_loop_down (uintD* ptr, uintC count, uintC i);
1760             ALIGN
1761             DECLARE_FUNCTION(shiftrightsigned_loop_down)
1762 C(shiftrightsigned_loop_down:)
1763             pushl   %edi            // %edi retten
1764             pushl   %ebx            // %ebx retten
1765             movl    12(%esp),%edi   // %edi = ptr
1766             movl    16(%esp),%edx   // %edx = count
1767             movb    20(%esp),%cl    // %cl = i
1768             // erstes Digit shiften:
1769             leal    -4(%edi),%edi
1770             movl    (%edi),%eax     // Digit in %eax halten
1771             movl    %eax,%ebx       // und in %ebx rechnen:
1772             sarl    %cl,%ebx        // um i Bits rechts shiften, Vorzeichen vervielfachen
1773             movl    %ebx,(%edi)     // und wieder ablegen
1774             // Letztes Digit in %eax.
1775             decl    %edx
1776             jz      L(srsld2)
1777             nop ; nop ; nop ; nop
1778 L(srsld1:)    // weiteres Digit shiften:
1779               leal    -4(%edi),%edi
1780               movl    (%edi),%ebx
1781               shrdl   shcl %eax,(%edi) // (%edi) um %cl=i Bits rechts shiften, %eax von links reinshiften
1782               // Letztes Digit in %ebx.
1783               decl    %edx
1784               jz      L(srsld3)
1785               // weiteres Digit shiften:
1786               leal    -4(%edi),%edi
1787               movl    (%edi),%eax
1788               shrdl   shcl %ebx,(%edi) // (%edi) um %cl=i Bits rechts shiften, %ebx von links reinshiften
1789               // Letztes Digit in %eax.
1790               decl    %edx
1791               jnz     L(srsld1)
1792 L(srsld2:)  movl    %eax,%ebx
1793 L(srsld3:)  xorl    %eax,%eax       // %eax := 0
1794             shrdl   shcl %ebx,%eax  // %eax := niedrigste %cl=i Bits von %ebx, als Bits 31..32-i
1795             popl    %ebx            // %ebx zurück
1796             popl    %edi            // %edi zurück
1797             ret
1798
1799 // extern uintD shiftrightcopy_loop_down (uintD* sourceptr, uintD* destptr, uintC count, uintC i, uintD carry);
1800             ALIGN
1801             DECLARE_FUNCTION(shiftrightcopy_loop_down)
1802 C(shiftrightcopy_loop_down:)
1803             pushl   %esi            // %esi retten
1804             pushl   %edi            // %edi retten
1805             pushl   %ebx            // %ebx retten
1806             movl    16(%esp),%esi   // %esi = sourceptr
1807             movl    20(%esp),%edi   // %edi = destptr
1808             movl    24(%esp),%edx   // count
1809             movb    28(%esp),%cl    // i
1810             negb    %cl             // 32-i
1811             movl    32(%esp),%eax   // %eax = carry
1812             orl     %edx,%edx       // count = 0 ?
1813             jz      L(srclu3)
1814             subl    %edi,%esi
1815             // erstes Digit shiften:
1816             leal    -4(%edi),%edi   // sourceptr--, destptr--
1817             movl    (%edi,%esi),%ebx // *sourceptr in %ebx halten
1818             shldl   shcl %ebx,%eax  // carry um %cl=32-i Bits links shiften, dabei *sourceptr rein
1819             movl    %eax,(%edi)     // und als *destptr ablegen
1820             // Letztes Digit in %ebx.
1821             decl    %edx
1822             jz      L(srclu2)
1823             nop ; nop ; nop
1824 L(srclu1:)    // weiteres Digit shiften:
1825               leal    -4(%edi),%edi   // sourceptr--, destptr--
1826               movl    (%edi,%esi),%eax // nächstes Digit nach %eax
1827               shldl   shcl %eax,%ebx  // %ebx um %cl=32-i Bits links shiften, %eax von rechts reinshiften
1828               movl    %ebx,(%edi)     // %ebx als *destptr ablegen
1829               // Letztes Digit in %eax.
1830               decl    %edx
1831               jz      L(srclu3)
1832               // weiteres Digit shiften:
1833               leal    -4(%edi),%edi   // sourceptr--, destptr--
1834               movl    (%edi,%esi),%ebx // nächstes Digit nach %ebx
1835               shldl   shcl %ebx,%eax  // %eax um %cl=32-i Bits links shiften, %ebx von rechts reinshiften
1836               movl    %eax,(%edi)     // %eax als *destptr ablegen
1837               // Letztes Digit in %ebx.
1838               decl    %edx
1839               jnz     L(srclu1)
1840 L(srclu2:)  movl    %ebx,%eax
1841 L(srclu3:)  shll    %cl,%eax        // %eax um 32-i Bits nach links shiften
1842             popl    %ebx            // %ebx zurück
1843             popl    %edi            // %edi zurück
1844             popl    %esi            // %esi zurück
1845             ret
1846
1847 // extern uintD mulusmall_loop_up (uintD digit, uintD* ptr, uintC len, uintD newdigit);
1848             ALIGN
1849             DECLARE_FUNCTION(mulusmall_loop_up)
1850 C(mulusmall_loop_up:)
1851             pushl   %ebp            // %ebp retten
1852             pushl   %edi            // %edi retten
1853             pushl   %ebx            // %ebx retten
1854             movl    16(%esp),%ebx   // %ebx = digit
1855             movl    20(%esp),%edi   // %edi = ptr
1856             movl    24(%esp),%ecx   // %ecx = len
1857             movl    28(%esp),%ebp   // %ebp = carry := newdigit
1858             leal    (%edi,%ecx,4),%edi // %edi = &ptr[len]
1859             negl    %ecx            // %ecx = -count
1860             jz      L(mslu2)
1861 L(mslu1:)     movl    (%edi,%ecx,4),%eax // *ptr
1862               mull    %ebx               // %edx|%eax := digit * *ptr
1863               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1864               movl    $0,%ebp
1865               adcl    %edx,%ebp          // Übertrag zum High-Teil %edx dazu, gibt neuen carry
1866               movl    %eax,(%edi,%ecx,4) // Low-Teil als *ptr ablegen
1867               incl    %ecx               // count--, ptr++
1868               jnz     L(mslu1)
1869 L(mslu2:)   movl    %ebp,%eax       // Ergebnis := letzter Übertrag
1870             popl    %ebx            // %ebx zurück
1871             popl    %edi            // %edi zurück
1872             popl    %ebp            // %ebp zurück
1873             ret
1874
1875 // extern void mulu_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1876             ALIGN
1877             DECLARE_FUNCTION(mulu_loop_up)
1878 C(mulu_loop_up:)
1879             pushl   %ebp            // %ebp retten
1880             pushl   %edi            // %edi retten
1881             pushl   %esi            // %esi retten
1882             pushl   %ebx            // %ebx retten
1883             movl    20(%esp),%ebx   // %ebx = digit
1884             movl    24(%esp),%esi   // %esi = sourceptr
1885             movl    28(%esp),%edi   // %edi = destptr
1886             movl    32(%esp),%ecx   // %ecx = len
1887             leal    (%esi,%ecx,4),%esi // %esi = &sourceptr[len]
1888             leal    (%edi,%ecx,4),%edi // %edi = &destptr[len]
1889             negl    %ecx            // %ecx = -count
1890             xorl    %ebp,%ebp       // %epb = carry := 0
1891             nop ; nop
1892 L(mulu1:)     movl    (%esi,%ecx,4),%eax // *sourceptr
1893               mull    %ebx               // %edx|%eax := digit * *sourceptr
1894               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1895               movl    $0,%ebp
1896               adcl    %edx,%ebp          // Übertrag zum High-Teil %edx dazu, gibt neuen carry
1897               movl    %eax,(%edi,%ecx,4) // Low-Teil als *destptr ablegen
1898               incl    %ecx               // count--, sourceptr++, destptr++
1899               jnz     L(mulu1)
1900             movl    %ebp,(%edi)     // letzten Übertrag ablegen
1901             popl    %ebx            // %ebx zurück
1902             popl    %esi            // %esi zurück
1903             popl    %edi            // %edi zurück
1904             popl    %ebp            // %ebp zurück
1905             ret
1906
1907 // extern uintD muluadd_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1908             ALIGN
1909             DECLARE_FUNCTION(muluadd_loop_up)
1910 C(muluadd_loop_up:)
1911             pushl   %ebp            // %ebp retten
1912             pushl   %edi            // %edi retten
1913             pushl   %esi            // %esi retten
1914             pushl   %ebx            // %ebx retten
1915             movl    20(%esp),%ebx   // %ebx = digit
1916             movl    24(%esp),%esi   // %esi = sourceptr
1917             movl    28(%esp),%edi   // %edi = destptr
1918             movl    32(%esp),%ecx   // %ecx = len
1919             leal    (%esi,%ecx,4),%esi // %esi = &sourceptr[len]
1920             leal    (%edi,%ecx,4),%edi // %edi = &destptr[len]
1921             negl    %ecx            // %ecx = -count
1922             xorl    %ebp,%ebp       // %epb = carry := 0
1923             nop ; nop
1924 L(mualu1:)    movl    (%esi,%ecx,4),%eax // *sourceptr
1925               mull    %ebx               // %edx|%eax := digit * *sourceptr
1926               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1927               movl    $0,%ebp
1928               adcl    %ebp,%edx          // Übertrag zum High-Teil %edx dazu
1929               addl    %eax,(%edi,%ecx,4) // Low-Teil zu *destptr addieren
1930               adcl    %edx,%ebp          // zweiten Übertrag zu %edx addieren, gibt neuen carry
1931               incl    %ecx               // count--, sourceptr++, destptr++
1932               jnz     L(mualu1)
1933             movl    %ebp,%eax       // Ergebnis := letzter Übertrag
1934             popl    %ebx            // %ebx zurück
1935             popl    %esi            // %esi zurück
1936             popl    %edi            // %edi zurück
1937             popl    %ebp            // %ebp zurück
1938             ret
1939
1940 // extern uintD mulusub_loop_up (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1941             ALIGN
1942             DECLARE_FUNCTION(mulusub_loop_up)
1943 C(mulusub_loop_up:)
1944             pushl   %ebp            // %ebp retten
1945             pushl   %edi            // %edi retten
1946             pushl   %esi            // %esi retten
1947             pushl   %ebx            // %ebx retten
1948             movl    20(%esp),%ebx   // %ebx = digit
1949             movl    24(%esp),%esi   // %esi = sourceptr
1950             movl    28(%esp),%edi   // %edi = destptr
1951             movl    32(%esp),%ecx   // %ecx = len
1952             leal    (%esi,%ecx,4),%esi // %esi = &sourceptr[len]
1953             leal    (%edi,%ecx,4),%edi // %edi = &destptr[len]
1954             negl    %ecx            // %ecx = -count
1955             xorl    %ebp,%ebp       // %epb = carry := 0
1956             nop ; nop
1957 L(muslu1:)    movl    (%esi,%ecx,4),%eax // *sourceptr
1958               mull    %ebx               // %edx|%eax := digit * *sourceptr
1959               addl    %ebp,%eax          // carry und Low-Teil des Produktes addieren
1960               movl    $0,%ebp
1961               adcl    %ebp,%edx          // Übertrag zum High-Teil %edx dazu
1962               subl    %eax,(%edi,%ecx,4) // Low-Teil von *destptr subtrahieren
1963               adcl    %edx,%ebp          // zweiten Übertrag zu %edx addieren, gibt neuen carry
1964               incl    %ecx               // count--, sourceptr++, destptr++
1965               jnz     L(muslu1)
1966             movl    %ebp,%eax       // Ergebnis := letzter Übertrag
1967             popl    %ebx            // %ebx zurück
1968             popl    %esi            // %esi zurück
1969             popl    %edi            // %edi zurück
1970             popl    %ebp            // %ebp zurück
1971             ret
1972
1973 // extern uintD divu_loop_down (uintD digit, uintD* ptr, uintC len);
1974             ALIGN
1975             DECLARE_FUNCTION(divu_loop_down)
1976 C(divu_loop_down:)
1977             pushl   %edi            // %edi retten
1978             pushl   %ebx            // %ebx retten
1979             movl    12(%esp),%ebx   // %ebx = digit
1980             movl    16(%esp),%edi   // %edi = ptr
1981             movl    20(%esp),%ecx   // %ecx = len
1982             xorl    %edx,%edx       // %edx = Rest := 0
1983             jecxz   L(dld2)         // %ecx = 0 ?
1984 L(dld1:)      leal    -4(%edi),%edi   // ptr--
1985               movl    (%edi),%eax     // nächstes Digit *ptr
1986               divl    %ebx            // Division von %edx|%eax durch %ebx
1987               movl    %eax,(%edi)     // Quotient %eax ablegen, Rest in %edx behalten
1988               decl    %ecx
1989               jnz     L(dld1)
1990 L(dld2:)    movl    %edx,%eax       // Ergebnis := letzter Rest
1991             popl    %ebx            // %ebx zurück
1992             popl    %edi            // %edi zurück
1993             ret
1994
1995 // extern uintD divucopy_loop_down (uintD digit, uintD* sourceptr, uintD* destptr, uintC len);
1996             ALIGN
1997             DECLARE_FUNCTION(divucopy_loop_down)
1998 C(divucopy_loop_down:)
1999             pushl   %edi            // %edi retten
2000             pushl   %esi            // %esi retten
2001             pushl   %ebx            // %ebx retten
2002             movl    16(%esp),%ebx   // %ebx = digit
2003             movl    20(%esp),%esi   // %esi = sourceptr
2004             movl    24(%esp),%edi   // %edi = destptr
2005             movl    28(%esp),%ecx   // %ecx = len
2006             xorl    %edx,%edx       // %edx = Rest := 0
2007             jecxz   L(dcld2)        // %ecx = 0 ?
2008             subl    %edi,%esi
2009 L(dcld1:)     leal    -4(%edi),%edi   // sourceptr--, destptr--
2010               movl    (%esi,%edi),%eax // nächstes Digit *ptr
2011               divl    %ebx            // Division von %edx|%eax durch %ebx
2012               movl    %eax,(%edi)     // Quotient %eax ablegen, Rest in %edx behalten
2013               decl    %ecx
2014               jnz     L(dcld1)
2015 L(dcld2:)   movl    %edx,%eax       // Ergebnis := letzter Rest
2016             popl    %ebx            // %ebx zurück
2017             popl    %esi            // %esi zurück
2018             popl    %edi            // %edi zurück
2019             ret
2020
2021 #endif
2022
2023 // extern void shiftxor_loop_up (uintD* xptr, const uintD* yptr, uintC count, uintC i);
2024             ALIGN
2025             DECLARE_FUNCTION(shiftxor_loop_up)
2026 C(shiftxor_loop_up:)
2027             pushl   %esi            // %esi retten
2028             pushl   %edi            // %edi retten
2029             pushl   %ebx            // %ebx retten
2030             movl    16(%esp),%esi   // %esi = xptr
2031             movl    20(%esp),%edi   // %edi = yptr
2032             movl    24(%esp),%edx   // count
2033             movb    28(%esp),%cl    // i
2034             orl     %edx,%edx       // count = 0 ?
2035             jz      L(shxlu4)
2036             subl    %esi,%edi
2037             // erstes Digit shiften:
2038             movl    (%esi,%edi),%ebx // *yptr in %ebx halten
2039             movl    %ebx,%eax       // und in %eax rechnen:
2040             shll    %cl,%eax        // um i Bits links shiften, rechts Nullen rein
2041             xorl    %eax,(%esi)     // und mit *xptr verknüpfen und ablegen
2042             leal    4(%esi),%esi    // sourceptr++, destptr++
2043             // Letztes Digit in %ebx.
2044             negb    %cl             // 32-i
2045             decl    %edx
2046             jz      L(shxlu2)
2047 L(shxlu1:)    // weiteres Digit shiften:
2048               movl    (%esi,%edi),%eax // nächstes Digit nach %eax
2049               shrdl   shcl %eax,%ebx  // %ebx um %cl=32-i Bits rechts shiften, %eax von links reinshiften
2050               xorl    %ebx,(%esi)     // %ebx mit *xptr verknüpfen und ablegen
2051               leal    4(%esi),%esi    // xptr++, yptr++
2052               // Letztes Digit in %eax.
2053               decl    %edx
2054               jz      L(shxlu3)
2055               // weiteres Digit shiften:
2056               movl    (%esi,%edi),%ebx // nächstes Digit nach %ebx
2057               shrdl   shcl %ebx,%eax  // %eax um %cl=32-i Bits rechts shiften, %ebx von links reinshiften
2058               xorl    %eax,(%esi)     // %eax mit *xptr verknüpfen und ablegen
2059               leal    4(%esi),%esi    // xptr++, yptr++
2060               // Letztes Digit in %ebx.
2061               decl    %edx
2062               jnz     L(shxlu1)
2063 L(shxlu2:)  movl    %ebx,%eax
2064 L(shxlu3:)  shrl    %cl,%eax        // %eax um 32-i Bits nach rechts shiften
2065             xorl    %eax,(%esi)     // und mit *xptr verknüpfen und ablegen
2066 L(shxlu4:)  popl    %ebx            // %ebx zurück
2067             popl    %edi            // %edi zurück
2068             popl    %esi            // %esi zurück
2069             ret
2070