]> www.ginac.de Git - cln.git/blob - src/base/digitseq/cl_asm_mips_.cc
Finalize CLN 1.3.7 release.
[cln.git] / src / base / digitseq / cl_asm_mips_.cc
1 // Externe Routinen zu ARILEV1.D
2 // Prozessor: MIPS
3 // Endianness: irrelevant
4 // Compiler: GNU-C oder ...
5 // Parameter-Übergabe:
6 //   o32: in Registern $4,$5,$6,$7, und auf dem Stack 16($sp),...
7 //   n32: in Registern $4,$5,$6,$7,$8,$9,$10,$11, und auf dem Stack 4($sp),...
8 // Rückgabewert: in Register $2
9 // Einstellungen: intCsize=32, intDsize=32.
10 // Besonderheiten: Nach jedem Ladebefehl ein Wartetakt nötig, bevor der
11 //   geholte Wert benutzt werden darf.
12
13 // Strictly speaking, the MIPS ABI (-32 or -n32) is independent from the CPU
14 // identification (-mips[12] or -mips[34]). But -n32 is commonly used together
15 // with -mips3, and it's easier to test the CPU identification.
16 #if __mips >= 3
17   #define ABI_N32 1
18 #else
19   #define ABI_O32 1
20 #endif
21
22 // When this file is compiled into a shared library, ELF linkers need to
23 // know which symbols are functions.
24 #if defined(__GNU__) || defined(__NetBSD__)
25   #define DECLARE_FUNCTION(name) .type name,@function
26 #else
27   #define DECLARE_FUNCTION(name)
28 #endif
29
30         .text
31
32         .globl copy_loop_up
33         .globl copy_loop_down
34         .globl fill_loop_up
35         .globl fill_loop_down
36         .globl clear_loop_up
37         .globl clear_loop_down
38         .globl test_loop_up
39         .globl test_loop_down
40         .globl xor_loop_up
41         .globl compare_loop_up
42 #if CL_DS_BIG_ENDIAN_P
43         .globl or_loop_up
44         .globl and_loop_up
45         .globl eqv_loop_up
46         .globl nand_loop_up
47         .globl nor_loop_up
48         .globl andc2_loop_up
49         .globl orc2_loop_up
50         .globl not_loop_up
51         .globl and_test_loop_up
52         .globl add_loop_down
53         .globl addto_loop_down
54         .globl inc_loop_down
55         .globl sub_loop_down
56         .globl subx_loop_down
57         .globl subfrom_loop_down
58         .globl dec_loop_down
59         .globl neg_loop_down
60 #else
61         .globl or_loop_down
62         .globl xor_loop_down
63         .globl and_loop_down
64         .globl eqv_loop_down
65         .globl nand_loop_down
66         .globl nor_loop_down
67         .globl andc2_loop_down
68         .globl orc2_loop_down
69         .globl not_loop_down
70         .globl and_test_loop_down
71         .globl compare_loop_down
72         .globl add_loop_up
73         .globl addto_loop_up
74         .globl inc_loop_up
75         .globl sub_loop_up
76         .globl subx_loop_up
77         .globl subfrom_loop_up
78         .globl dec_loop_up
79         .globl neg_loop_up
80 #endif
81
82 #ifndef __GNUC__ /* mit GNU-C machen wir mulu32() als Macro, der inline multipliziert */
83
84 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
85 // 2^32*hi+lo := arg1*arg2.
86         .globl mulu32_
87         .align 2
88         DECLARE_FUNCTION(mulu32_)
89         .ent mulu32_ // Input in $4,$5, Output in $2,mulu32_high
90 mulu32_:
91 #if __mips_isa_rev >= 6
92         mulu $2,$5,$4           // arg1 * arg2, lo
93         muhu $6,$5,$4           // arg1 * arg2, hi
94 #else
95         multu $5,$4             // arg1 * arg2
96         mfhi $6                 // hi
97         mflo $2                 // lo
98 #endif
99         sw $6,mulu32_high       // hi abspeichern // Adressierung?? Deklaration??
100         j $31                   // return
101         .end mulu32_
102
103 #endif
104
105 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
106         .align 2
107         DECLARE_FUNCTION(copy_loop_up)
108         .ent copy_loop_up // Input in $4,$5,$6, Output in $2
109 colu1:    lw $12,($4)           // d = *sourceptr
110           addu $4,4             // sourceptr++
111           sw $12,($5)           // *destptr = d
112           addu $5,4             // destptr++
113           subu $6,1             // count--
114 copy_loop_up:
115           bnez $6,colu1         // until (count==0)
116         move $2,$5              // destptr
117         j $31                   // return
118         .end copy_loop_up
119
120 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
121         .align 2
122         DECLARE_FUNCTION(copy_loop_down)
123         .ent copy_loop_down // Input in $4,$5,$6, Output in $2
124 cold1:    subu $4,4             // sourceptr--
125           lw $12,($4)           // d = *sourceptr
126           subu $5,4             // destptr--
127           sw $12,($5)           // *destptr = d
128           subu $6,1             // count--
129 copy_loop_down:
130           bnez $6,cold1         // until (count==0)
131         move $2,$5              // destptr
132         j $31                   // return
133         .end copy_loop_down
134
135 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
136         .align 2
137         DECLARE_FUNCTION(fill_loop_up)
138         .ent fill_loop_up // Input in $4,$5,$6, Output in $2
139 flu1:     sw $6,($4)            // *destptr = filler
140           addu $4,4             // destptr++
141           subu $5,1             // count--
142 fill_loop_up:
143           bnez $5,flu1          // until (count==0)
144         move $2,$4              // destptr
145         j $31                   // return
146         .end fill_loop_up
147
148 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
149         .align 2
150         DECLARE_FUNCTION(fill_loop_down)
151         .ent fill_loop_down // Input in $4,$5,$6, Output in $2
152 fld1:     subu $4,4             // destptr--
153           sw $6,($4)            // *destptr = filler
154           subu $5,1             // count--
155 fill_loop_down:
156           bnez $5,fld1          // until (count==0)
157         move $2,$4              // destptr
158         j $31                   // return
159         .end fill_loop_down
160
161 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
162         .align 2
163         DECLARE_FUNCTION(clear_loop_up)
164         .ent clear_loop_up // Input in $4,$5, Output in $2
165 cllu1:    sw $0,($4)            // *destptr = 0
166           addu $4,4             // destptr++
167           subu $5,1             // count--
168 clear_loop_up:
169           bnez $5,cllu1         // until (count==0)
170         move $2,$4              // destptr
171         j $31                   // return
172         .end clear_loop_up
173
174 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
175         .align 2
176         DECLARE_FUNCTION(clear_loop_down)
177         .ent clear_loop_down // Input in $4,$5, Output in $2
178 clld1:    subu $4,4             // destptr--
179           sw $0,($4)            // *destptr = 0
180           subu $5,1             // count--
181 clear_loop_down:
182           bnez $5,clld1         // until (count==0)
183         move $2,$4              // destptr
184         j $31                   // return
185         .end clear_loop_down
186
187 // extern boolean test_loop_up (uintD* ptr, uintC count);
188         .align 2
189         DECLARE_FUNCTION(test_loop_up)
190         .ent test_loop_up // Input in $4,$5
191 tlu1:     lw $12,($4)           // x = *ptr
192           addu $4,4             // ptr++
193           bnez $12,tlu3
194           subu $5,1             // count--
195 test_loop_up:
196           bnez $5,tlu1          // until (count==0)
197         move $2,$0              // 0
198         j $31                   // return
199 tlu3:   li $2,1                 // 1
200         j $31                   // return
201         .end test_loop_up
202
203 // extern boolean test_loop_down (uintD* ptr, uintC count);
204         .align 2
205         DECLARE_FUNCTION(test_loop_down)
206         .ent test_loop_down // Input in $4,$5
207 tld1:     subu $4,4             // ptr--
208           lw $12,($4)           // x = *ptr
209           subu $5,1             // count--
210           bnez $12,tld3
211 test_loop_down:
212           bnez $5,tld1          // until (count==0)
213         move $2,$0              // 0
214         j $31                   // return
215 tld3:   li $2,1                 // 1
216         j $31                   // return
217         .end test_loop_down
218
219 #if CL_DS_BIG_ENDIAN_P
220
221 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
222         .align 2
223         DECLARE_FUNCTION(or_loop_up)
224         .ent or_loop_up // Input in $4,$5,$6
225 olu1:     lw $12,($4)           // x = *xptr
226           lw $13,($5)           // y = *yptr
227           addu $5,4             // yptr++
228           or $12,$13            // x |= y
229           sw $12,($4)           // *xptr = x
230           addu $4,4             // xptr++
231           subu $6,1             // count--
232 or_loop_up:
233           bnez $6,olu1          // until (count==0)
234         j $31                   // return
235         .end or_loop_up
236
237 #endif
238
239 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
240         .align 2
241         DECLARE_FUNCTION(xor_loop_up)
242         .ent xor_loop_up // Input in $4,$5,$6
243 xlu1:     lw $12,($4)           // x = *xptr
244           lw $13,($5)           // y = *yptr
245           addu $5,4             // yptr++
246           xor $12,$13           // x ^= y
247           sw $12,($4)           // *xptr = x
248           addu $4,4             // xptr++
249           subu $6,1             // count--
250 xor_loop_up:
251           bnez $6,xlu1          // until (count==0)
252         j $31                   // return
253         .end xor_loop_up
254
255 #if CL_DS_BIG_ENDIAN_P
256
257 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
258         .align 2
259         DECLARE_FUNCTION(and_loop_up)
260         .ent and_loop_up // Input in $4,$5,$6
261 alu1:     lw $12,($4)           // x = *xptr
262           lw $13,($5)           // y = *yptr
263           addu $5,4             // yptr++
264           and $12,$13           // x &= y
265           sw $12,($4)           // *xptr = x
266           addu $4,4             // xptr++
267           subu $6,1             // count--
268 and_loop_up:
269           bnez $6,alu1          // until (count==0)
270         j $31                   // return
271         .end and_loop_up
272
273 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
274         .align 2
275         DECLARE_FUNCTION(eqv_loop_up)
276         .ent eqv_loop_up // Input in $4,$5,$6
277 nxlu1:    lw $12,($4)           // x = *xptr
278           lw $13,($5)           // y = *yptr
279           addu $5,4             // yptr++
280           xor $12,$13           // x ^= y
281           nor $12,$0            // x = ~x
282           sw $12,($4)           // *xptr = x
283           addu $4,4             // xptr++
284           subu $6,1             // count--
285 eqv_loop_up:
286           bnez $6,nxlu1         // until (count==0)
287         j $31                   // return
288         .end eqv_loop_up
289
290 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
291         .align 2
292         DECLARE_FUNCTION(nand_loop_up)
293         .ent nand_loop_up // Input in $4,$5,$6
294 nalu1:    lw $12,($4)           // x = *xptr
295           lw $13,($5)           // y = *yptr
296           addu $5,4             // yptr++
297           and $12,$13           // x &= y        // Gibt es 'nand $12,$13' ??
298           nor $12,$0            // x = ~x
299           sw $12,($4)           // *xptr = x
300           addu $4,4             // xptr++
301           subu $6,1             // count--
302 nand_loop_up:
303           bnez $6,nalu1         // until (count==0)
304         j $31                   // return
305         .end nand_loop_up
306
307 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
308         .align 2
309         DECLARE_FUNCTION(nor_loop_up)
310         .ent nor_loop_up // Input in $4,$5,$6
311 nolu1:    lw $12,($4)           // x = *xptr
312           lw $13,($5)           // y = *yptr
313           addu $5,4             // yptr++
314           nor $12,$13           // x = ~(x|y)
315           sw $12,($4)           // *xptr = x
316           addu $4,4             // xptr++
317           subu $6,1             // count--
318 nor_loop_up:
319           bnez $6,nolu1         // until (count==0)
320         j $31                   // return
321         .end nor_loop_up
322
323 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
324         .align 2
325         DECLARE_FUNCTION(andc2_loop_up)
326         .ent andc2_loop_up // Input in $4,$5,$6
327 aclu1:    lw $12,($4)           // x = *xptr
328           lw $13,($5)           // y = *yptr
329           addu $5,4             // yptr++
330           nor $13,$0            // y = ~y
331           and $12,$13           // x &= y
332           sw $12,($4)           // *xptr = x
333           addu $4,4             // xptr++
334           subu $6,1             // count--
335 andc2_loop_up:
336           bnez $6,aclu1         // until (count==0)
337         j $31                   // return
338         .end andc2_loop_up
339
340 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
341         .align 2
342         DECLARE_FUNCTION(orc2_loop_up)
343         .ent orc2_loop_up // Input in $4,$5,$6
344 oclu1:    lw $12,($4)           // x = *xptr
345           lw $13,($5)           // y = *yptr
346           addu $5,4             // yptr++
347           nor $13,$0            // y = ~y
348           or $12,$13            // x |= y
349           sw $12,($4)           // *xptr = x
350           addu $4,4             // xptr++
351           subu $6,1             // count--
352 orc2_loop_up:
353           bnez $6,oclu1         // until (count==0)
354         j $31                   // return
355         .end orc2_loop_up
356
357 // extern void not_loop_up (uintD* xptr, uintC count);
358         .align 2
359         DECLARE_FUNCTION(not_loop_up)
360         .ent not_loop_up // Input in $4,$5
361 nlu1:     lw $12,($4)           // x = *xptr
362           subu $5,1             // count--
363           nor $12,$0            // x = ~x
364           sw $12,($4)           // *xptr = x
365           addu $4,4             // xptr++
366 not_loop_up:
367           bnez $5,nlu1          // until (count==0)
368         j $31                   // return
369         .end not_loop_up
370
371 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
372         .align 2
373         DECLARE_FUNCTION(and_test_loop_up)
374         .ent and_test_loop_up // Input in $4,$5,$6
375 atlu1:    lw $12,($4)           // x = *xptr
376           lw $13,($5)           // y = *yptr
377           addu $5,4             // yptr++
378           and $12,$13           // x &= y
379           bnez $12,atlu3        // if (x) ...
380           addu $4,4             // xptr++
381           subu $6,1             // count--
382 and_test_loop_up:
383           bnez $6,atlu1         // until (count==0)
384         move $2,$0              // 0
385         j $31                   // return
386 atlu3:  li $2,1                 // 1
387         j $31                   // return
388         .end and_test_loop_up
389
390 #endif
391
392 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
393         .align 2
394         DECLARE_FUNCTION(compare_loop_up)
395         .ent compare_loop_up // Input in $4,$5,$6
396 cmlu1:    lw $12,($4)           // x = *xptr
397           lw $13,($5)           // y = *yptr
398           addu $5,4             // yptr++
399           bne $12,$13,cmlu3     // if (!(x==y)) ...
400           addu $4,4             // xptr++
401           subu $6,1             // count--
402 compare_loop_up:
403           bnez $6,cmlu1         // until (count==0)
404         move $2,$0              // 0
405         j $31                   // return
406 cmlu3:  bltu $12,$13,cmlu4      // if (x<y) ...
407         li $2,1                 // 1
408         j $31                   // return
409 cmlu4:  li $2,-1                // -1
410         j $31                   // return
411         .end compare_loop_up
412
413 #if CL_DS_BIG_ENDIAN_P
414
415 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
416         .align 2
417         DECLARE_FUNCTION(add_loop_down)
418         .ent add_loop_down // Input in $4,$5,$6,$7, Output in $2
419 ald1:     // kein Carry
420           subu $4,4             // sourceptr1--
421           subu $5,4             // sourceptr2--
422           lw $12,($4)           // source1 = *sourceptr1
423           lw $13,($5)           // source2 = *sourceptr2
424           subu $6,4             // destptr--
425           addu $12,$13          // dest = source1 + source2
426           sw $12,($6)           // *destptr = dest
427           bltu $12,$13,ald4     // if (dest < source2) [also Carry] ...
428 ald2:
429           subu $7,1             // count--
430 add_loop_down:
431           bnez $7,ald1          // until (count==0)
432         move $2,$0              // 0
433         j $31                   // return
434 ald3:   // Hier Carry
435           subu $4,4             // sourceptr1--
436           subu $5,4             // sourceptr2--
437           lw $12,($4)           // source1 = *sourceptr1
438           lw $13,($5)           // source2 = *sourceptr2
439           subu $6,4             // destptr--
440           addu $12,$13          // dest = source1 + source2
441           addu $12,1            //        + 1
442           sw $12,($6)           // *destptr = dest
443           bgtu $12,$13,ald2     // if (dest > source2) [also kein Carry] ...
444 ald4:     subu $7,1             // count--
445           bnez $7,ald3          // until (count==0)
446         li $2,1                 // 1
447         j $31                   // return
448         .end add_loop_down
449
450 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
451         .align 2
452         DECLARE_FUNCTION(addto_loop_down)
453         .ent addto_loop_down // Input in $4,$5,$6, Output in $2
454 atld1:    // kein Carry
455           subu $4,4             // sourceptr--
456           subu $5,4             // destptr--
457           lw $12,($4)           // source1 = *sourceptr
458           lw $13,($5)           // source2 = *destptr
459           subu $6,1             // count--
460           addu $12,$13          // dest = source1 + source2
461           sw $12,($5)           // *destptr = dest
462           bltu $12,$13,atld4    // if (dest < source2) [also Carry] ...
463 addto_loop_down:
464 atld2:    bnez $6,atld1         // until (count==0)
465         move $2,$0              // 0
466         j $31                   // return
467 atld3:  // Hier Carry
468           subu $4,4             // sourceptr--
469           subu $5,4             // destptr--
470           lw $12,($4)           // source1 = *sourceptr
471           lw $13,($5)           // source2 = *destptr
472           subu $6,1             // count--
473           addu $12,$13          // dest = source1 + source2
474           addu $12,1            //        + 1
475           sw $12,($5)           // *destptr = dest
476           bgtu $12,$13,atld2    // if (dest > source2) [also kein Carry] ...
477 atld4:    bnez $6,atld3         // until (count==0)
478         li $2,1                 // 1
479         j $31                   // return
480         .end addto_loop_down
481
482 // extern uintD inc_loop_down (uintD* ptr, uintC count);
483         .align 2
484         DECLARE_FUNCTION(inc_loop_down)
485         .ent inc_loop_down // Input in $4,$5, Output in $2
486 ild1:     subu $4,4             // ptr--
487           lw $12,($4)           // x = *ptr
488           subu $5,1             // count--
489           addu $12,1            // x++;
490           sw $12,($4)           // *ptr = x
491           bnez $12,ild3         // if (!(x==0)) ...
492 inc_loop_down:
493           bnez $5,ild1          // until (count==0)
494         li $2,1                 // 1
495         j $31                   // return
496 ild3:   move $2,$0              // 0
497         j $31                   // return
498         .end inc_loop_down
499
500 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
501         .align 2
502         DECLARE_FUNCTION(sub_loop_down)
503         .ent sub_loop_down // Input in $4,$5,$6,$7, Output in $2
504 sld1:     // kein Carry
505           subu $4,4             // sourceptr1--
506           subu $5,4             // sourceptr2--
507           lw $12,($4)           // source1 = *sourceptr1
508           lw $13,($5)           // source2 = *sourceptr2
509           subu $6,4             // destptr--
510           bltu $12,$13,sld2     // if (source1 < source2) [also Carry] ...
511           subu $12,$13          // dest = source1 - source2
512           sw $12,($6)           // *destptr = dest
513           subu $7,1             // count--
514 sub_loop_down:
515           bnez $7,sld1          // until (count==0)
516         move $2,$0              // 0
517         j $31                   // return
518 sld2:     subu $12,$13          // dest = source1 - source2
519           sw $12,($6)           // *destptr = dest
520           subu $7,1             // count--
521           bnez $7,sld3          // until (count==0)
522         li $2,-1                // -1
523         j $31                   // return
524 sld3:   // Hier Carry
525           subu $4,4             // sourceptr1--
526           subu $5,4             // sourceptr2--
527           lw $12,($4)           // source1 = *sourceptr1
528           lw $13,($5)           // source2 = *sourceptr2
529           subu $6,4             // destptr--
530           bgtu $12,$13,sld4     // if (source1 > source2) [also kein Carry] ...
531           subu $12,$13          // dest = source1 - source2
532           subu $12,1            //        - 1
533           sw $12,($6)           // *destptr = dest
534           subu $7,1             // count--
535           bnez $7,sld3          // until (count==0)
536         li $2,-1                // -1
537         j $31                   // return
538 sld4:     subu $12,$13          // dest = source1 - source2
539           subu $12,1            //        - 1
540           sw $12,($6)           // *destptr = dest
541           subu $7,1             // count--
542           bnez $7,sld1          // until (count==0)
543         move $2,$0              // 0
544         j $31                   // return
545         .end sub_loop_down
546
547 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
548         .align 2
549         DECLARE_FUNCTION(subx_loop_down)
550         .ent subx_loop_down // Input in $4,$5,$6,$7,$8 Output in $2
551 subx_loop_down:
552 #if ABI_N32
553         move $12,$8             // carry
554 #else
555         lw $12,16($sp)          // carry
556 #endif
557         bnez $12,sxld5          // !(carry==0) ?
558         b sxld2
559 sxld1:    // kein Carry
560           subu $4,4             // sourceptr1--
561           subu $5,4             // sourceptr2--
562           lw $12,($4)           // source1 = *sourceptr1
563           lw $13,($5)           // source2 = *sourceptr2
564           subu $6,4             // destptr--
565           bltu $12,$13,sxld3    // if (source1 < source2) [also Carry] ...
566           subu $12,$13          // dest = source1 - source2
567           sw $12,($6)           // *destptr = dest
568           subu $7,1             // count--
569 sxld2:    bnez $7,sxld1         // until (count==0)
570         move $2,$0              // 0
571         j $31                   // return
572 sxld3:    subu $12,$13          // dest = source1 - source2
573           sw $12,($6)           // *destptr = dest
574           subu $7,1             // count--
575           bnez $7,sxld4         // until (count==0)
576         li $2,-1                // -1
577         j $31                   // return
578 sxld4:  // Hier Carry
579           subu $4,4             // sourceptr1--
580           subu $5,4             // sourceptr2--
581           lw $12,($4)           // source1 = *sourceptr1
582           lw $13,($5)           // source2 = *sourceptr2
583           subu $6,4             // destptr--
584           bgtu $12,$13,sxld6    // if (source1 > source2) [also kein Carry] ...
585           subu $12,$13          // dest = source1 - source2
586           subu $12,1            //        - 1
587           sw $12,($6)           // *destptr = dest
588           subu $7,1             // count--
589 sxld5:    bnez $7,sxld4         // until (count==0)
590         li $2,-1                // -1
591         j $31                   // return
592 sxld6:    subu $12,$13          // dest = source1 - source2
593           subu $12,1            //        - 1
594           sw $12,($6)           // *destptr = dest
595           subu $7,1             // count--
596           bnez $7,sxld1         // until (count==0)
597         move $2,$0              // 0
598         j $31                   // return
599         .end subx_loop_down
600
601 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
602         .align 2
603         DECLARE_FUNCTION(subfrom_loop_down)
604         .ent subfrom_loop_down // Input in $4,$5,$6,$7, Output in $2
605 sfld1:    // kein Carry
606           subu $4,4             // sourceptr--
607           subu $5,4             // destptr--
608           lw $12,($5)           // source1 = *destptr
609           lw $13,($4)           // source2 = *sourceptr
610           subu $6,1             // count--
611           bltu $12,$13,sfld2    // if (source1 < source2) [also Carry] ...
612           subu $12,$13          // dest = source1 - source2
613           sw $12,($5)           // *destptr = dest
614 subfrom_loop_down:
615           bnez $6,sfld1         // until (count==0)
616         move $2,$0              // 0
617         j $31                   // return
618 sfld2:    subu $12,$13          // dest = source1 - source2
619           sw $12,($5)           // *destptr = dest
620           bnez $6,sfld3         // until (count==0)
621         li $2,-1                // -1
622         j $31                   // return
623 sfld3:  // Hier Carry
624           subu $4,4             // sourceptr--
625           subu $5,4             // destptr--
626           lw $12,($5)           // source1 = *destptr
627           lw $13,($4)           // source2 = *sourceptr
628           subu $6,1             // count--
629           bgtu $12,$13,sfld4    // if (source1 > source2) [also kein Carry] ...
630           subu $12,$13          // dest = source1 - source2
631           subu $12,1            //        - 1
632           sw $12,($5)           // *destptr = dest
633           bnez $6,sfld3         // until (count==0)
634         li $2,-1                // -1
635         j $31                   // return
636 sfld4:    subu $12,$13          // dest = source1 - source2
637           subu $12,1            //        - 1
638           sw $12,($5)           // *destptr = dest
639           bnez $6,sfld1         // until (count==0)
640         move $2,$0              // 0
641         j $31                   // return
642         .end subfrom_loop_down
643
644 // extern uintD dec_loop_down (uintD* ptr, uintC count);
645         .align 2
646         DECLARE_FUNCTION(dec_loop_down)
647         .ent dec_loop_down // Input in $4,$5, Output in $2
648 dld1:     subu $4,4             // ptr--
649           lw $12,($4)           // x = *ptr
650           subu $5,1             // count--
651           bnez $12,dld3         // if (!(x==0)) ...
652           subu $12,1            // x--;
653           sw $12,($4)           // *ptr = x
654 dec_loop_down:
655           bnez $5,dld1          // until (count==0)
656         li $2,-1                // -1
657         j $31                   // return
658 dld3:   subu $12,1              // x--;
659         sw $12,($4)             // *ptr = x
660         move $2,$0              // 0
661         j $31                   // return
662         .end dec_loop_down
663
664 // extern uintD neg_loop_down (uintD* ptr, uintC count);
665         .align 2
666         DECLARE_FUNCTION(neg_loop_down)
667         .ent neg_loop_down // Input in $4,$5, Output in $2
668         // erstes Digit /=0 suchen:
669 nld1:     subu $4,4             // ptr--
670           lw $12,($4)           // x = *ptr
671           subu $5,1             // count--
672           bnez $12,nld3         // if (!(x==0)) ...
673 neg_loop_down:
674           bnez $5,nld1          // until (count==0)
675         move $2,$0              // 0
676         j $31                   // return
677 nld3:   // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
678         // 1 Digit negieren:
679         subu $12,$0,$12         // x = -x
680         sw $12,($4)             // *ptr = x
681         // alle anderen Digits invertieren:
682         b nld5
683 nld4:     subu $4,4             // xptr--
684           lw $12,($4)           // x = *xptr
685           subu $5,1             // count--
686           nor $12,$0            // x = ~x
687           sw $12,($4)           // *xptr = x
688 nld5:     bnez $5,nld4          // until (count==0)
689         li $2,-1                // -1
690         j $31                   // return
691         .end neg_loop_down
692
693 #endif
694
695 #if !CL_DS_BIG_ENDIAN_P
696
697 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
698         .align 2
699         DECLARE_FUNCTION(or_loop_down)
700         .ent or_loop_down // Input in $4,$5,$6
701 old1:     subu $4,4             // xptr--
702           subu $5,4             // yptr--
703           lw $12,($4)           // x = *xptr
704           lw $13,($5)           // y = *yptr
705           subu $6,1             // count--
706           or $12,$13            // x |= y
707           sw $12,($4)           // *xptr = x
708 or_loop_down:
709           bnez $6,old1          // until (count==0)
710         j $31                   // return
711         .end or_loop_down
712
713 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
714         .align 2
715         DECLARE_FUNCTION(xor_loop_down)
716         .ent xor_loop_down // Input in $4,$5,$6
717 xld1:     subu $4,4             // xptr--
718           subu $5,4             // yptr--
719           lw $12,($4)           // x = *xptr
720           lw $13,($5)           // y = *yptr
721           subu $6,1             // count--
722           xor $12,$13           // x ^= y
723           sw $12,($4)           // *xptr = x
724 xor_loop_down:
725           bnez $6,xld1          // until (count==0)
726         j $31                   // return
727         .end xor_loop_down
728
729 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
730         .align 2
731         DECLARE_FUNCTION(and_loop_down)
732         .ent and_loop_down // Input in $4,$5,$6
733 ald1:     subu $4,4             // xptr--
734           subu $5,4             // yptr--
735           lw $12,($4)           // x = *xptr
736           lw $13,($5)           // y = *yptr
737           subu $6,1             // count--
738           and $12,$13           // x &= y
739           sw $12,($4)           // *xptr = x
740 and_loop_down:
741           bnez $6,ald1          // until (count==0)
742         j $31                   // return
743         .end and_loop_down
744
745 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
746         .align 2
747         DECLARE_FUNCTION(eqv_loop_down)
748         .ent eqv_loop_down // Input in $4,$5,$6
749 nxld1:    subu $4,4             // xptr--
750           subu $5,4             // yptr--
751           lw $12,($4)           // x = *xptr
752           lw $13,($5)           // y = *yptr
753           subu $6,1             // count--
754           xor $12,$13           // x ^= y
755           nor $12,$0            // x = ~x
756           sw $12,($4)           // *xptr = x
757 eqv_loop_down:
758           bnez $6,nxld1         // until (count==0)
759         j $31                   // return
760         .end eqv_loop_down
761
762 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
763         .align 2
764         DECLARE_FUNCTION(nand_loop_down)
765         .ent nand_loop_down // Input in $4,$5,$6
766 nald1:    subu $4,4             // xptr--
767           subu $5,4             // yptr--
768           lw $12,($4)           // x = *xptr
769           lw $13,($5)           // y = *yptr
770           subu $6,1             // count--
771           and $12,$13           // x &= y        // Gibt es 'nand $12,$13' ??
772           nor $12,$0            // x = ~x
773           sw $12,($4)           // *xptr = x
774 nand_loop_down:
775           bnez $6,nald1         // until (count==0)
776         j $31                   // return
777         .end nand_loop_down
778
779 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
780         .align 2
781         DECLARE_FUNCTION(nor_loop_down)
782         .ent nor_loop_down // Input in $4,$5,$6
783 nold1:    subu $4,4             // xptr--
784           subu $5,4             // yptr--
785           lw $12,($4)           // x = *xptr
786           lw $13,($5)           // y = *yptr
787           subu $6,1             // count--
788           nor $12,$13           // x = ~(x|y)
789           sw $12,($4)           // *xptr = x
790 nor_loop_down:
791           bnez $6,nold1         // until (count==0)
792         j $31                   // return
793         .end nor_loop_down
794
795 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
796         .align 2
797         DECLARE_FUNCTION(andc2_loop_down)
798         .ent andc2_loop_down // Input in $4,$5,$6
799 acld1:    subu $4,4             // xptr--
800           subu $5,4             // yptr--
801           lw $12,($4)           // x = *xptr
802           lw $13,($5)           // y = *yptr
803           subu $6,1             // count--
804           nor $13,$0            // y = ~y
805           and $12,$13           // x &= y
806           sw $12,($4)           // *xptr = x
807 andc2_loop_down:
808           bnez $6,acld1         // until (count==0)
809         j $31                   // return
810         .end andc2_loop_down
811
812 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
813         .align 2
814         DECLARE_FUNCTION(orc2_loop_down)
815         .ent orc2_loop_down // Input in $4,$5,$6
816 ocld1:    subu $4,4             // xptr--
817           subu $5,4             // yptr--
818           lw $12,($4)           // x = *xptr
819           lw $13,($5)           // y = *yptr
820           subu $6,1             // count--
821           nor $13,$0            // y = ~y
822           or $12,$13            // x |= y
823           sw $12,($4)           // *xptr = x
824 orc2_loop_down:
825           bnez $6,ocld1         // until (count==0)
826         j $31                   // return
827         .end orc2_loop_down
828
829 // extern void not_loop_down (uintD* xptr, uintC count);
830         .align 2
831         DECLARE_FUNCTION(not_loop_down)
832         .ent not_loop_down // Input in $4,$5
833 nld1:     subu $4,4             // xptr--
834           lw $12,($4)           // x = *xptr
835           subu $5,1             // count--
836           nor $12,$0            // x = ~x
837           sw $12,($4)           // *xptr = x
838 not_loop_down:
839           bnez $5,nld1          // until (count==0)
840         j $31                   // return
841         .end not_loop_down
842
843 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
844         .align 2
845         DECLARE_FUNCTION(and_test_loop_down)
846         .ent and_test_loop_down // Input in $4,$5,$6
847 atld1:    subu $4,4             // xptr--
848           subu $5,4             // yptr--
849           lw $12,($4)           // x = *xptr
850           lw $13,($5)           // y = *yptr
851           and $12,$13           // x &= y
852           bnez $12,atld3        // if (x) ...
853           subu $6,1             // count--
854 and_test_loop_down:
855           bnez $6,atld1         // until (count==0)
856         move $2,$0              // 0
857         j $31                   // return
858 atld3:  li $2,1                 // 1
859         j $31                   // return
860         .end and_test_loop_down
861
862 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
863         .align 2
864         DECLARE_FUNCTION(compare_loop_down)
865         .ent compare_loop_down // Input in $4,$5,$6
866 cmld1:    subu $4,4             // xptr--
867           subu $5,4             // yptr--
868           lw $12,($4)           // x = *xptr
869           lw $13,($5)           // y = *yptr
870           subu $6,1             // count--
871           bne $12,$13,cmld3     // if (!(x==y)) ...
872 compare_loop_down:
873           bnez $6,cmld1         // until (count==0)
874         move $2,$0              // 0
875         j $31                   // return
876 cmld3:  bltu $12,$13,cmld4      // if (x<y) ...
877         li $2,1                 // 1
878         j $31                   // return
879 cmld4:  li $2,-1                // -1
880         j $31                   // return
881         .end compare_loop_down
882
883 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
884         .align 2
885         DECLARE_FUNCTION(add_loop_up)
886         .ent add_loop_up // Input in $4,$5,$6,$7, Output in $2
887 alu1:     // kein Carry
888           lw $12,($4)           // source1 = *sourceptr1
889           lw $13,($5)           // source2 = *sourceptr2
890           addu $4,4             // sourceptr1++
891           addu $5,4             // sourceptr2++
892           addu $12,$13          // dest = source1 + source2
893           sw $12,($6)           // *destptr = dest
894           addu $6,4             // destptr++
895           bltu $12,$13,alu4     // if (dest < source2) [also Carry] ...
896 alu2:
897           subu $7,1             // count--
898 add_loop_up:
899           bnez $7,alu1          // until (count==0)
900         move $2,$0              // 0
901         j $31                   // return
902 alu3:   // Hier Carry
903           lw $12,($4)           // source1 = *sourceptr1
904           lw $13,($5)           // source2 = *sourceptr2
905           addu $4,4             // sourceptr1++
906           addu $5,4             // sourceptr2++
907           addu $12,$13          // dest = source1 + source2
908           addu $12,1            //        + 1
909           sw $12,($6)           // *destptr = dest
910           addu $6,4             // destptr++
911           bgtu $12,$13,alu2     // if (dest > source2) [also kein Carry] ...
912 alu4:     subu $7,1             // count--
913           bnez $7,alu3          // until (count==0)
914         li $2,1                 // 1
915         j $31                   // return
916         .end add_loop_up
917
918 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
919         .align 2
920         DECLARE_FUNCTION(addto_loop_up)
921         .ent addto_loop_up // Input in $4,$5,$6, Output in $2
922 atlu1:    // kein Carry
923           lw $12,($4)           // source1 = *sourceptr
924           lw $13,($5)           // source2 = *destptr
925           addu $4,4             // sourceptr++
926           subu $6,1             // count--
927           addu $12,$13          // dest = source1 + source2
928           sw $12,($5)           // *destptr = dest
929           addu $5,4             // destptr++
930           bltu $12,$13,atlu4    // if (dest < source2) [also Carry] ...
931 addto_loop_up:
932 atlu2:    bnez $6,atlu1         // until (count==0)
933         move $2,$0              // 0
934         j $31                   // return
935 atlu3:  // Hier Carry
936           lw $12,($4)           // source1 = *sourceptr
937           lw $13,($5)           // source2 = *destptr
938           addu $4,4             // sourceptr++
939           subu $6,1             // count--
940           addu $12,$13          // dest = source1 + source2
941           addu $12,1            //        + 1
942           sw $12,($5)           // *destptr = dest
943           addu $5,4             // destptr++
944           bgtu $12,$13,atlu2    // if (dest > source2) [also kein Carry] ...
945 atlu4:    bnez $6,atlu3         // until (count==0)
946         li $2,1                 // 1
947         j $31                   // return
948         .end addto_loop_up
949
950 // extern uintD inc_loop_up (uintD* ptr, uintC count);
951         .align 2
952         DECLARE_FUNCTION(inc_loop_up)
953         .ent inc_loop_up // Input in $4,$5, Output in $2
954 ilu1:     lw $12,($4)           // x = *ptr
955           subu $5,1             // count--
956           addu $12,1            // x++;
957           sw $12,($4)           // *ptr = x
958           addu $4,4             // ptr++
959           bnez $12,ilu3         // if (!(x==0)) ...
960 inc_loop_up:
961           bnez $5,ilu1          // until (count==0)
962         li $2,1                 // 1
963         j $31                   // return
964 ilu3:   move $2,$0              // 0
965         j $31                   // return
966         .end inc_loop_up
967
968 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
969         .align 2
970         DECLARE_FUNCTION(sub_loop_up)
971         .ent sub_loop_up // Input in $4,$5,$6,$7, Output in $2
972 slu1:     // kein Carry
973           lw $12,($4)           // source1 = *sourceptr1
974           lw $13,($5)           // source2 = *sourceptr2
975           addu $4,4             // sourceptr1++
976           addu $5,4             // sourceptr2++
977           subu $7,1             // count--
978           bltu $12,$13,slu2     // if (source1 < source2) [also Carry] ...
979           subu $12,$13          // dest = source1 - source2
980           sw $12,($6)           // *destptr = dest
981           addu $6,4             // destptr++
982 sub_loop_up:
983           bnez $7,slu1          // until (count==0)
984         move $2,$0              // 0
985         j $31                   // return
986 slu2:     subu $12,$13          // dest = source1 - source2
987           sw $12,($6)           // *destptr = dest
988           addu $6,4             // destptr++
989           bnez $7,slu3          // until (count==0)
990         li $2,-1                // -1
991         j $31                   // return
992 slu3:   // Hier Carry
993           lw $12,($4)           // source1 = *sourceptr1
994           lw $13,($5)           // source2 = *sourceptr2
995           addu $4,4             // sourceptr1++
996           addu $5,4             // sourceptr2++
997           subu $7,1             // count--
998           bgtu $12,$13,slu4     // if (source1 > source2) [also kein Carry] ...
999           subu $12,$13          // dest = source1 - source2
1000           subu $12,1            //        - 1
1001           sw $12,($6)           // *destptr = dest
1002           addu $6,4             // destptr++
1003           bnez $7,slu3          // until (count==0)
1004         li $2,-1                // -1
1005         j $31                   // return
1006 slu4:     subu $12,$13          // dest = source1 - source2
1007           subu $12,1            //        - 1
1008           sw $12,($6)           // *destptr = dest
1009           addu $6,4             // destptr++
1010           bnez $7,slu1          // until (count==0)
1011         move $2,$0              // 0
1012         j $31                   // return
1013         .end sub_loop_up
1014
1015 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1016         .align 2
1017         DECLARE_FUNCTION(subx_loop_up)
1018         .ent subx_loop_up // Input in $4,$5,$6,$7,$8, Output in $2
1019 subx_loop_up:
1020 #if ABI_N32
1021         move $12,$8             // carry
1022 #else
1023         lw $12,16($sp)          // carry
1024 #endif
1025         bnez $12,sxlu5          // !(carry==0) ?
1026         b sxlu2
1027 sxlu1:    // kein Carry
1028           lw $12,($4)           // source1 = *sourceptr1
1029           lw $13,($5)           // source2 = *sourceptr2
1030           addu $4,4             // sourceptr1++
1031           addu $5,4             // sourceptr2++
1032           subu $7,1             // count--
1033           bltu $12,$13,sxlu3    // if (source1 < source2) [also Carry] ...
1034           subu $12,$13          // dest = source1 - source2
1035           sw $12,($6)           // *destptr = dest
1036           addu $6,4             // destptr++
1037 sxlu2:    bnez $7,sxlu1         // until (count==0)
1038         move $2,$0              // 0
1039         j $31                   // return
1040 sxlu3:    subu $12,$13          // dest = source1 - source2
1041           sw $12,($6)           // *destptr = dest
1042           addu $6,4             // destptr++
1043           bnez $7,sxlu4         // until (count==0)
1044         li $2,-1                // -1
1045         j $31                   // return
1046 sxlu4:  // Hier Carry
1047           lw $12,($4)           // source1 = *sourceptr1
1048           lw $13,($5)           // source2 = *sourceptr2
1049           addu $4,4             // sourceptr1++
1050           addu $5,4             // sourceptr2++
1051           subu $7,1             // count--
1052           bgtu $12,$13,sxlu6    // if (source1 > source2) [also kein Carry] ...
1053           subu $12,$13          // dest = source1 - source2
1054           subu $12,1            //        - 1
1055           sw $12,($6)           // *destptr = dest
1056           addu $6,4             // destptr++
1057 sxlu5:    bnez $7,sxlu4         // until (count==0)
1058         li $2,-1                // -1
1059         j $31                   // return
1060 sxlu6:    subu $12,$13          // dest = source1 - source2
1061           subu $12,1            //        - 1
1062           sw $12,($6)           // *destptr = dest
1063           addu $6,4             // destptr++
1064           bnez $7,sxlu1         // until (count==0)
1065         move $2,$0              // 0
1066         j $31                   // return
1067         .end subx_loop_up
1068
1069 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1070         .align 2
1071         DECLARE_FUNCTION(subfrom_loop_up)
1072         .ent subfrom_loop_up // Input in $4,$5,$6,$7, Output in $2
1073 sflu1:    // kein Carry
1074           lw $12,($5)           // source1 = *destptr
1075           lw $13,($4)           // source2 = *sourceptr
1076           addu $4,4             // sourceptr++
1077           subu $6,1             // count--
1078           bltu $12,$13,sflu2    // if (source1 < source2) [also Carry] ...
1079           subu $12,$13          // dest = source1 - source2
1080           sw $12,($5)           // *destptr = dest
1081           addu $5,4             // destptr++
1082 subfrom_loop_up:
1083           bnez $6,sflu1         // until (count==0)
1084         move $2,$0              // 0
1085         j $31                   // return
1086 sflu2:    subu $12,$13          // dest = source1 - source2
1087           sw $12,($5)           // *destptr = dest
1088           addu $5,4             // destptr++
1089           bnez $6,sflu3         // until (count==0)
1090         li $2,-1                // -1
1091         j $31                   // return
1092 sflu3:  // Hier Carry
1093           lw $12,($5)           // source1 = *destptr
1094           lw $13,($4)           // source2 = *sourceptr
1095           addu $4,4             // sourceptr++
1096           subu $6,1             // count--
1097           bgtu $12,$13,sflu4    // if (source1 > source2) [also kein Carry] ...
1098           subu $12,$13          // dest = source1 - source2
1099           subu $12,1            //        - 1
1100           sw $12,($5)           // *destptr = dest
1101           addu $5,4             // destptr++
1102           bnez $6,sflu3         // until (count==0)
1103         li $2,-1                // -1
1104         j $31                   // return
1105 sflu4:    subu $12,$13          // dest = source1 - source2
1106           subu $12,1            //        - 1
1107           sw $12,($5)           // *destptr = dest
1108           addu $5,4             // destptr++
1109           bnez $6,sflu1         // until (count==0)
1110         move $2,$0              // 0
1111         j $31                   // return
1112         .end subfrom_loop_up
1113
1114 // extern uintD dec_loop_up (uintD* ptr, uintC count);
1115         .align 2
1116         DECLARE_FUNCTION(dec_loop_up)
1117         .ent dec_loop_up // Input in $4,$5, Output in $2
1118 dlu1:     lw $12,($4)           // x = *ptr
1119           subu $5,1             // count--
1120           bnez $12,dlu3         // if (!(x==0)) ...
1121           subu $12,1            // x--;
1122           sw $12,($4)           // *ptr = x
1123           addu $4,4             // ptr++
1124 dec_loop_up:
1125           bnez $5,dlu1          // until (count==0)
1126         li $2,-1                // -1
1127         j $31                   // return
1128 dlu3:   subu $12,1              // x--;
1129         sw $12,($4)             // *ptr = x
1130         move $2,$0              // 0
1131         j $31                   // return
1132         .end dec_loop_up
1133
1134 // extern uintD neg_loop_up (uintD* ptr, uintC count);
1135         .align 2
1136         DECLARE_FUNCTION(neg_loop_up)
1137         .ent neg_loop_up // Input in $4,$5, Output in $2
1138         // erstes Digit /=0 suchen:
1139 nlu1:     lw $12,($4)           // x = *ptr
1140           subu $5,1             // count--
1141           bnez $12,nlu3         // if (!(x==0)) ...
1142           addu $4,4             // ptr++
1143 neg_loop_up:
1144           bnez $5,nlu1          // until (count==0)
1145         move $2,$0              // 0
1146         j $31                   // return
1147 nlu3:   // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1148         // 1 Digit negieren:
1149         subu $12,$0,$12         // x = -x
1150         sw $12,($4)             // *ptr = x
1151         // alle anderen Digits invertieren:
1152         b nlu5
1153 nlu4:     lw $12,($4)           // x = *xptr
1154           subu $5,1             // count--
1155           nor $12,$0            // x = ~x
1156           sw $12,($4)           // *xptr = x
1157 nlu5:     addu $4,4             // xptr++
1158           bnez $5,nlu4          // until (count==0)
1159         li $2,-1                // -1
1160         j $31                   // return
1161         .end neg_loop_up
1162
1163 #endif
1164