]> www.ginac.de Git - cln.git/blob - src/base/digitseq/cl_asm_mips_.cc
- src/base/digitseq/cl_asm_mips_.cc: Starting at argument 5 the
[cln.git] / src / base / digitseq / cl_asm_mips_.cc
1 // Externe Routinen zu ARILEV1.D
2 // Prozessor: MIPS
3 // Endianness: irrelevant
4 // Compiler: GNU-C oder ...
5 // Parameter-Übergabe:
6 //   o32: in Registern $4,$5,$6,$7, und auf dem Stack 16($sp),...
7 //   n32: in Registern $4,$5,$6,$7,$8,$9,$10,$11, und auf dem Stack 4($sp),...
8 // Rückgabewert: in Register $2
9 // Einstellungen: intCsize=32, intDsize=32.
10 // Besonderheiten: Nach jedem Ladebefehl ein Wartetakt nötig, bevor der
11 //   geholte Wert benutzt werden darf.
12
13 // Strictly speaking, the MIPS ABI (-32 or -n32) is independent from the CPU
14 // identification (-mips[12] or -mips[34]). But -n32 is commonly used together
15 // with -mips3, and it's easier to test the CPU identification.
16 #if __mips >= 3
17   #define ABI_N32 1
18 #else
19   #define ABI_O32 1
20 #endif
21
22 // When this file is compiled into a shared library, ELF linkers need to
23 // know which symbols are functions.
24 #if defined(__GNU__) || defined(__NetBSD__)
25   #define DECLARE_FUNCTION(name) .type name,@function
26 #else
27   #define DECLARE_FUNCTION(name)
28 #endif
29
30         .text
31
32         .globl copy_loop_up
33         .globl copy_loop_down
34         .globl fill_loop_up
35         .globl fill_loop_down
36         .globl clear_loop_up
37         .globl clear_loop_down
38         .globl test_loop_up
39         .globl test_loop_down
40         .globl xor_loop_up
41         .globl compare_loop_up
42 #if CL_DS_BIG_ENDIAN_P
43         .globl or_loop_up
44         .globl and_loop_up
45         .globl eqv_loop_up
46         .globl nand_loop_up
47         .globl nor_loop_up
48         .globl andc2_loop_up
49         .globl orc2_loop_up
50         .globl not_loop_up
51         .globl and_test_loop_up
52         .globl add_loop_down
53         .globl addto_loop_down
54         .globl inc_loop_down
55         .globl sub_loop_down
56         .globl subx_loop_down
57         .globl subfrom_loop_down
58         .globl dec_loop_down
59         .globl neg_loop_down
60 #else
61         .globl or_loop_down
62         .globl xor_loop_down
63         .globl and_loop_down
64         .globl eqv_loop_down
65         .globl nand_loop_down
66         .globl nor_loop_down
67         .globl andc2_loop_down
68         .globl orc2_loop_down
69         .globl not_loop_down
70         .globl and_test_loop_down
71         .globl compare_loop_down
72         .globl add_loop_up
73         .globl addto_loop_up
74         .globl inc_loop_up
75         .globl sub_loop_up
76         .globl subx_loop_up
77         .globl subfrom_loop_up
78         .globl dec_loop_up
79         .globl neg_loop_up
80 #endif
81
82 #ifndef __GNUC__ /* mit GNU-C machen wir mulu32() als Macro, der inline multipliziert */
83
84 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
85 // 2^32*hi+lo := arg1*arg2.
86         .globl mulu32_
87         .align 2
88         DECLARE_FUNCTION(mulu32_)
89         .ent mulu32_ // Input in $4,$5, Output in $2,mulu32_high
90 mulu32_:
91         multu $5,$4             // arg1 * arg2
92         mfhi $6                 // hi
93         mflo $2                 // lo
94         sw $6,mulu32_high       // hi abspeichern // Adressierung?? Deklaration??
95         j $31                   // return
96         .end mulu32_
97
98 #endif
99
100 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
101         .align 2
102         DECLARE_FUNCTION(copy_loop_up)
103         .ent copy_loop_up // Input in $4,$5,$6, Output in $2
104 colu1:    lw $12,($4)           // d = *sourceptr
105           addu $4,4             // sourceptr++
106           sw $12,($5)           // *destptr = d
107           addu $5,4             // destptr++
108           subu $6,1             // count--
109 copy_loop_up:
110           bnez $6,colu1         // until (count==0)
111         move $2,$5              // destptr
112         j $31                   // return
113         .end copy_loop_up
114
115 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
116         .align 2
117         DECLARE_FUNCTION(copy_loop_down)
118         .ent copy_loop_down // Input in $4,$5,$6, Output in $2
119 cold1:    subu $4,4             // sourceptr--
120           lw $12,($4)           // d = *sourceptr
121           subu $5,4             // destptr--
122           sw $12,($5)           // *destptr = d
123           subu $6,1             // count--
124 copy_loop_down:
125           bnez $6,cold1         // until (count==0)
126         move $2,$5              // destptr
127         j $31                   // return
128         .end copy_loop_down
129
130 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
131         .align 2
132         DECLARE_FUNCTION(fill_loop_up)
133         .ent fill_loop_up // Input in $4,$5,$6, Output in $2
134 flu1:     sw $6,($4)            // *destptr = filler
135           addu $4,4             // destptr++
136           subu $5,1             // count--
137 fill_loop_up:
138           bnez $5,flu1          // until (count==0)
139         move $2,$4              // destptr
140         j $31                   // return
141         .end fill_loop_up
142
143 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
144         .align 2
145         DECLARE_FUNCTION(fill_loop_down)
146         .ent fill_loop_down // Input in $4,$5,$6, Output in $2
147 fld1:     subu $4,4             // destptr--
148           sw $6,($4)            // *destptr = filler
149           subu $5,1             // count--
150 fill_loop_down:
151           bnez $5,fld1          // until (count==0)
152         move $2,$4              // destptr
153         j $31                   // return
154         .end fill_loop_down
155
156 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
157         .align 2
158         DECLARE_FUNCTION(clear_loop_up)
159         .ent clear_loop_up // Input in $4,$5, Output in $2
160 cllu1:    sw $0,($4)            // *destptr = 0
161           addu $4,4             // destptr++
162           subu $5,1             // count--
163 clear_loop_up:
164           bnez $5,cllu1         // until (count==0)
165         move $2,$4              // destptr
166         j $31                   // return
167         .end clear_loop_up
168
169 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
170         .align 2
171         DECLARE_FUNCTION(clear_loop_down)
172         .ent clear_loop_down // Input in $4,$5, Output in $2
173 clld1:    subu $4,4             // destptr--
174           sw $0,($4)            // *destptr = 0
175           subu $5,1             // count--
176 clear_loop_down:
177           bnez $5,clld1         // until (count==0)
178         move $2,$4              // destptr
179         j $31                   // return
180         .end clear_loop_down
181
182 // extern boolean test_loop_up (uintD* ptr, uintC count);
183         .align 2
184         DECLARE_FUNCTION(test_loop_up)
185         .ent test_loop_up // Input in $4,$5
186 tlu1:     lw $12,($4)           // x = *ptr
187           addu $4,4             // ptr++
188           bnez $12,tlu3
189           subu $5,1             // count--
190 test_loop_up:
191           bnez $5,tlu1          // until (count==0)
192         move $2,$0              // 0
193         j $31                   // return
194 tlu3:   li $2,1                 // 1
195         j $31                   // return
196         .end test_loop_up
197
198 // extern boolean test_loop_down (uintD* ptr, uintC count);
199         .align 2
200         DECLARE_FUNCTION(test_loop_down)
201         .ent test_loop_down // Input in $4,$5
202 tld1:     subu $4,4             // ptr--
203           lw $12,($4)           // x = *ptr
204           subu $5,1             // count--
205           bnez $12,tld3
206 test_loop_down:
207           bnez $5,tld1          // until (count==0)
208         move $2,$0              // 0
209         j $31                   // return
210 tld3:   li $2,1                 // 1
211         j $31                   // return
212         .end test_loop_down
213
214 #if CL_DS_BIG_ENDIAN_P
215
216 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
217         .align 2
218         DECLARE_FUNCTION(or_loop_up)
219         .ent or_loop_up // Input in $4,$5,$6
220 olu1:     lw $12,($4)           // x = *xptr
221           lw $13,($5)           // y = *yptr
222           addu $5,4             // yptr++
223           or $12,$13            // x |= y
224           sw $12,($4)           // *xptr = x
225           addu $4,4             // xptr++
226           subu $6,1             // count--
227 or_loop_up:
228           bnez $6,olu1          // until (count==0)
229         j $31                   // return
230         .end or_loop_up
231
232 #endif
233
234 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
235         .align 2
236         DECLARE_FUNCTION(xor_loop_up)
237         .ent xor_loop_up // Input in $4,$5,$6
238 xlu1:     lw $12,($4)           // x = *xptr
239           lw $13,($5)           // y = *yptr
240           addu $5,4             // yptr++
241           xor $12,$13           // x ^= y
242           sw $12,($4)           // *xptr = x
243           addu $4,4             // xptr++
244           subu $6,1             // count--
245 xor_loop_up:
246           bnez $6,xlu1          // until (count==0)
247         j $31                   // return
248         .end xor_loop_up
249
250 #if CL_DS_BIG_ENDIAN_P
251
252 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
253         .align 2
254         DECLARE_FUNCTION(and_loop_up)
255         .ent and_loop_up // Input in $4,$5,$6
256 alu1:     lw $12,($4)           // x = *xptr
257           lw $13,($5)           // y = *yptr
258           addu $5,4             // yptr++
259           and $12,$13           // x &= y
260           sw $12,($4)           // *xptr = x
261           addu $4,4             // xptr++
262           subu $6,1             // count--
263 and_loop_up:
264           bnez $6,alu1          // until (count==0)
265         j $31                   // return
266         .end and_loop_up
267
268 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
269         .align 2
270         DECLARE_FUNCTION(eqv_loop_up)
271         .ent eqv_loop_up // Input in $4,$5,$6
272 nxlu1:    lw $12,($4)           // x = *xptr
273           lw $13,($5)           // y = *yptr
274           addu $5,4             // yptr++
275           xor $12,$13           // x ^= y
276           nor $12,$0            // x = ~x
277           sw $12,($4)           // *xptr = x
278           addu $4,4             // xptr++
279           subu $6,1             // count--
280 eqv_loop_up:
281           bnez $6,nxlu1         // until (count==0)
282         j $31                   // return
283         .end eqv_loop_up
284
285 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
286         .align 2
287         DECLARE_FUNCTION(nand_loop_up)
288         .ent nand_loop_up // Input in $4,$5,$6
289 nalu1:    lw $12,($4)           // x = *xptr
290           lw $13,($5)           // y = *yptr
291           addu $5,4             // yptr++
292           and $12,$13           // x &= y        // Gibt es 'nand $12,$13' ??
293           nor $12,$0            // x = ~x
294           sw $12,($4)           // *xptr = x
295           addu $4,4             // xptr++
296           subu $6,1             // count--
297 nand_loop_up:
298           bnez $6,nalu1         // until (count==0)
299         j $31                   // return
300         .end nand_loop_up
301
302 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
303         .align 2
304         DECLARE_FUNCTION(nor_loop_up)
305         .ent nor_loop_up // Input in $4,$5,$6
306 nolu1:    lw $12,($4)           // x = *xptr
307           lw $13,($5)           // y = *yptr
308           addu $5,4             // yptr++
309           nor $12,$13           // x = ~(x|y)
310           sw $12,($4)           // *xptr = x
311           addu $4,4             // xptr++
312           subu $6,1             // count--
313 nor_loop_up:
314           bnez $6,nolu1         // until (count==0)
315         j $31                   // return
316         .end nor_loop_up
317
318 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
319         .align 2
320         DECLARE_FUNCTION(andc2_loop_up)
321         .ent andc2_loop_up // Input in $4,$5,$6
322 aclu1:    lw $12,($4)           // x = *xptr
323           lw $13,($5)           // y = *yptr
324           addu $5,4             // yptr++
325           nor $13,$0            // y = ~y
326           and $12,$13           // x &= y
327           sw $12,($4)           // *xptr = x
328           addu $4,4             // xptr++
329           subu $6,1             // count--
330 andc2_loop_up:
331           bnez $6,aclu1         // until (count==0)
332         j $31                   // return
333         .end andc2_loop_up
334
335 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
336         .align 2
337         DECLARE_FUNCTION(orc2_loop_up)
338         .ent orc2_loop_up // Input in $4,$5,$6
339 oclu1:    lw $12,($4)           // x = *xptr
340           lw $13,($5)           // y = *yptr
341           addu $5,4             // yptr++
342           nor $13,$0            // y = ~y
343           or $12,$13            // x |= y
344           sw $12,($4)           // *xptr = x
345           addu $4,4             // xptr++
346           subu $6,1             // count--
347 orc2_loop_up:
348           bnez $6,oclu1         // until (count==0)
349         j $31                   // return
350         .end orc2_loop_up
351
352 // extern void not_loop_up (uintD* xptr, uintC count);
353         .align 2
354         DECLARE_FUNCTION(not_loop_up)
355         .ent not_loop_up // Input in $4,$5
356 nlu1:     lw $12,($4)           // x = *xptr
357           subu $5,1             // count--
358           nor $12,$0            // x = ~x
359           sw $12,($4)           // *xptr = x
360           addu $4,4             // xptr++
361 not_loop_up:
362           bnez $5,nlu1          // until (count==0)
363         j $31                   // return
364         .end not_loop_up
365
366 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
367         .align 2
368         DECLARE_FUNCTION(and_test_loop_up)
369         .ent and_test_loop_up // Input in $4,$5,$6
370 atlu1:    lw $12,($4)           // x = *xptr
371           lw $13,($5)           // y = *yptr
372           addu $5,4             // yptr++
373           and $12,$13           // x &= y
374           bnez $12,atlu3        // if (x) ...
375           addu $4,4             // xptr++
376           subu $6,1             // count--
377 and_test_loop_up:
378           bnez $6,atlu1         // until (count==0)
379         move $2,$0              // 0
380         j $31                   // return
381 atlu3:  li $2,1                 // 1
382         j $31                   // return
383         .end and_test_loop_up
384
385 #endif
386
387 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
388         .align 2
389         DECLARE_FUNCTION(compare_loop_up)
390         .ent compare_loop_up // Input in $4,$5,$6
391 cmlu1:    lw $12,($4)           // x = *xptr
392           lw $13,($5)           // y = *yptr
393           addu $5,4             // yptr++
394           bne $12,$13,cmlu3     // if (!(x==y)) ...
395           addu $4,4             // xptr++
396           subu $6,1             // count--
397 compare_loop_up:
398           bnez $6,cmlu1         // until (count==0)
399         move $2,$0              // 0
400         j $31                   // return
401 cmlu3:  bltu $12,$13,cmlu4      // if (x<y) ...
402         li $2,1                 // 1
403         j $31                   // return
404 cmlu4:  li $2,-1                // -1
405         j $31                   // return
406         .end compare_loop_up
407
408 #if CL_DS_BIG_ENDIAN_P
409
410 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
411         .align 2
412         DECLARE_FUNCTION(add_loop_down)
413         .ent add_loop_down // Input in $4,$5,$6,$7, Output in $2
414 ald1:     // kein Carry
415           subu $4,4             // sourceptr1--
416           subu $5,4             // sourceptr2--
417           lw $12,($4)           // source1 = *sourceptr1
418           lw $13,($5)           // source2 = *sourceptr2
419           subu $6,4             // destptr--
420           addu $12,$13          // dest = source1 + source2
421           sw $12,($6)           // *destptr = dest
422           bltu $12,$13,ald4     // if (dest < source2) [also Carry] ...
423 ald2:
424           subu $7,1             // count--
425 add_loop_down:
426           bnez $7,ald1          // until (count==0)
427         move $2,$0              // 0
428         j $31                   // return
429 ald3:   // Hier Carry
430           subu $4,4             // sourceptr1--
431           subu $5,4             // sourceptr2--
432           lw $12,($4)           // source1 = *sourceptr1
433           lw $13,($5)           // source2 = *sourceptr2
434           subu $6,4             // destptr--
435           addu $12,$13          // dest = source1 + source2
436           addu $12,1            //        + 1
437           sw $12,($6)           // *destptr = dest
438           bgtu $12,$13,ald2     // if (dest > source2) [also kein Carry] ...
439 ald4:     subu $7,1             // count--
440           bnez $7,ald3          // until (count==0)
441         li $2,1                 // 1
442         j $31                   // return
443         .end add_loop_down
444
445 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
446         .align 2
447         DECLARE_FUNCTION(addto_loop_down)
448         .ent addto_loop_down // Input in $4,$5,$6, Output in $2
449 atld1:    // kein Carry
450           subu $4,4             // sourceptr--
451           subu $5,4             // destptr--
452           lw $12,($4)           // source1 = *sourceptr
453           lw $13,($5)           // source2 = *destptr
454           subu $6,1             // count--
455           addu $12,$13          // dest = source1 + source2
456           sw $12,($5)           // *destptr = dest
457           bltu $12,$13,atld4    // if (dest < source2) [also Carry] ...
458 addto_loop_down:
459 atld2:    bnez $6,atld1         // until (count==0)
460         move $2,$0              // 0
461         j $31                   // return
462 atld3:  // Hier Carry
463           subu $4,4             // sourceptr--
464           subu $5,4             // destptr--
465           lw $12,($4)           // source1 = *sourceptr
466           lw $13,($5)           // source2 = *destptr
467           subu $6,1             // count--
468           addu $12,$13          // dest = source1 + source2
469           addu $12,1            //        + 1
470           sw $12,($5)           // *destptr = dest
471           bgtu $12,$13,atld2    // if (dest > source2) [also kein Carry] ...
472 atld4:    bnez $6,atld3         // until (count==0)
473         li $2,1                 // 1
474         j $31                   // return
475         .end addto_loop_down
476
477 // extern uintD inc_loop_down (uintD* ptr, uintC count);
478         .align 2
479         DECLARE_FUNCTION(inc_loop_down)
480         .ent inc_loop_down // Input in $4,$5, Output in $2
481 ild1:     subu $4,4             // ptr--
482           lw $12,($4)           // x = *ptr
483           subu $5,1             // count--
484           addu $12,1            // x++;
485           sw $12,($4)           // *ptr = x
486           bnez $12,ild3         // if (!(x==0)) ...
487 inc_loop_down:
488           bnez $5,ild1          // until (count==0)
489         li $2,1                 // 1
490         j $31                   // return
491 ild3:   move $2,$0              // 0
492         j $31                   // return
493         .end inc_loop_down
494
495 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
496         .align 2
497         DECLARE_FUNCTION(sub_loop_down)
498         .ent sub_loop_down // Input in $4,$5,$6,$7, Output in $2
499 sld1:     // kein Carry
500           subu $4,4             // sourceptr1--
501           subu $5,4             // sourceptr2--
502           lw $12,($4)           // source1 = *sourceptr1
503           lw $13,($5)           // source2 = *sourceptr2
504           subu $6,4             // destptr--
505           bltu $12,$13,sld2     // if (source1 < source2) [also Carry] ...
506           subu $12,$13          // dest = source1 - source2
507           sw $12,($6)           // *destptr = dest
508           subu $7,1             // count--
509 sub_loop_down:
510           bnez $7,sld1          // until (count==0)
511         move $2,$0              // 0
512         j $31                   // return
513 sld2:     subu $12,$13          // dest = source1 - source2
514           sw $12,($6)           // *destptr = dest
515           subu $7,1             // count--
516           bnez $7,sld3          // until (count==0)
517         li $2,-1                // -1
518         j $31                   // return
519 sld3:   // Hier Carry
520           subu $4,4             // sourceptr1--
521           subu $5,4             // sourceptr2--
522           lw $12,($4)           // source1 = *sourceptr1
523           lw $13,($5)           // source2 = *sourceptr2
524           subu $6,4             // destptr--
525           bgtu $12,$13,sld4     // if (source1 > source2) [also kein Carry] ...
526           subu $12,$13          // dest = source1 - source2
527           subu $12,1            //        - 1
528           sw $12,($6)           // *destptr = dest
529           subu $7,1             // count--
530           bnez $7,sld3          // until (count==0)
531         li $2,-1                // -1
532         j $31                   // return
533 sld4:     subu $12,$13          // dest = source1 - source2
534           subu $12,1            //        - 1
535           sw $12,($6)           // *destptr = dest
536           subu $7,1             // count--
537           bnez $7,sld1          // until (count==0)
538         move $2,$0              // 0
539         j $31                   // return
540         .end sub_loop_down
541
542 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
543         .align 2
544         DECLARE_FUNCTION(subx_loop_down)
545         .ent subx_loop_down // Input in $4,$5,$6,$7,$8 Output in $2
546 subx_loop_down:
547 #if ABI_N32
548         move $12,$8             // carry
549 #else
550         lw $12,16($sp)          // carry
551 #endif
552         bnez $12,sxld5          // !(carry==0) ?
553         b sxld2
554 sxld1:    // kein Carry
555           subu $4,4             // sourceptr1--
556           subu $5,4             // sourceptr2--
557           lw $12,($4)           // source1 = *sourceptr1
558           lw $13,($5)           // source2 = *sourceptr2
559           subu $6,4             // destptr--
560           bltu $12,$13,sxld3    // if (source1 < source2) [also Carry] ...
561           subu $12,$13          // dest = source1 - source2
562           sw $12,($6)           // *destptr = dest
563           subu $7,1             // count--
564 sxld2:    bnez $7,sxld1         // until (count==0)
565         move $2,$0              // 0
566         j $31                   // return
567 sxld3:    subu $12,$13          // dest = source1 - source2
568           sw $12,($6)           // *destptr = dest
569           subu $7,1             // count--
570           bnez $7,sxld4         // until (count==0)
571         li $2,-1                // -1
572         j $31                   // return
573 sxld4:  // Hier Carry
574           subu $4,4             // sourceptr1--
575           subu $5,4             // sourceptr2--
576           lw $12,($4)           // source1 = *sourceptr1
577           lw $13,($5)           // source2 = *sourceptr2
578           subu $6,4             // destptr--
579           bgtu $12,$13,sxld6    // if (source1 > source2) [also kein Carry] ...
580           subu $12,$13          // dest = source1 - source2
581           subu $12,1            //        - 1
582           sw $12,($6)           // *destptr = dest
583           subu $7,1             // count--
584 sxld5:    bnez $7,sxld4         // until (count==0)
585         li $2,-1                // -1
586         j $31                   // return
587 sxld6:    subu $12,$13          // dest = source1 - source2
588           subu $12,1            //        - 1
589           sw $12,($6)           // *destptr = dest
590           subu $7,1             // count--
591           bnez $7,sxld1         // until (count==0)
592         move $2,$0              // 0
593         j $31                   // return
594         .end subx_loop_down
595
596 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
597         .align 2
598         DECLARE_FUNCTION(subfrom_loop_down)
599         .ent subfrom_loop_down // Input in $4,$5,$6,$7, Output in $2
600 sfld1:    // kein Carry
601           subu $4,4             // sourceptr--
602           subu $5,4             // destptr--
603           lw $12,($5)           // source1 = *destptr
604           lw $13,($4)           // source2 = *sourceptr
605           subu $6,1             // count--
606           bltu $12,$13,sfld2    // if (source1 < source2) [also Carry] ...
607           subu $12,$13          // dest = source1 - source2
608           sw $12,($5)           // *destptr = dest
609 subfrom_loop_down:
610           bnez $6,sfld1         // until (count==0)
611         move $2,$0              // 0
612         j $31                   // return
613 sfld2:    subu $12,$13          // dest = source1 - source2
614           sw $12,($5)           // *destptr = dest
615           bnez $6,sfld3         // until (count==0)
616         li $2,-1                // -1
617         j $31                   // return
618 sfld3:  // Hier Carry
619           subu $4,4             // sourceptr--
620           subu $5,4             // destptr--
621           lw $12,($5)           // source1 = *destptr
622           lw $13,($4)           // source2 = *sourceptr
623           subu $6,1             // count--
624           bgtu $12,$13,sfld4    // if (source1 > source2) [also kein Carry] ...
625           subu $12,$13          // dest = source1 - source2
626           subu $12,1            //        - 1
627           sw $12,($5)           // *destptr = dest
628           bnez $6,sfld3         // until (count==0)
629         li $2,-1                // -1
630         j $31                   // return
631 sfld4:    subu $12,$13          // dest = source1 - source2
632           subu $12,1            //        - 1
633           sw $12,($5)           // *destptr = dest
634           bnez $6,sfld1         // until (count==0)
635         move $2,$0              // 0
636         j $31                   // return
637         .end subfrom_loop_down
638
639 // extern uintD dec_loop_down (uintD* ptr, uintC count);
640         .align 2
641         DECLARE_FUNCTION(dec_loop_down)
642         .ent dec_loop_down // Input in $4,$5, Output in $2
643 dld1:     subu $4,4             // ptr--
644           lw $12,($4)           // x = *ptr
645           subu $5,1             // count--
646           bnez $12,dld3         // if (!(x==0)) ...
647           subu $12,1            // x--;
648           sw $12,($4)           // *ptr = x
649 dec_loop_down:
650           bnez $5,dld1          // until (count==0)
651         li $2,-1                // -1
652         j $31                   // return
653 dld3:   subu $12,1              // x--;
654         sw $12,($4)             // *ptr = x
655         move $2,$0              // 0
656         j $31                   // return
657         .end dec_loop_down
658
659 // extern uintD neg_loop_down (uintD* ptr, uintC count);
660         .align 2
661         DECLARE_FUNCTION(neg_loop_down)
662         .ent neg_loop_down // Input in $4,$5, Output in $2
663         // erstes Digit /=0 suchen:
664 nld1:     subu $4,4             // ptr--
665           lw $12,($4)           // x = *ptr
666           subu $5,1             // count--
667           bnez $12,nld3         // if (!(x==0)) ...
668 neg_loop_down:
669           bnez $5,nld1          // until (count==0)
670         move $2,$0              // 0
671         j $31                   // return
672 nld3:   // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
673         // 1 Digit negieren:
674         subu $12,$0,$12         // x = -x
675         sw $12,($4)             // *ptr = x
676         // alle anderen Digits invertieren:
677         b nld5
678 nld4:     subu $4,4             // xptr--
679           lw $12,($4)           // x = *xptr
680           subu $5,1             // count--
681           nor $12,$0            // x = ~x
682           sw $12,($4)           // *xptr = x
683 nld5:     bnez $5,nld4          // until (count==0)
684         li $2,-1                // -1
685         j $31                   // return
686         .end neg_loop_down
687
688 #endif
689
690 #if !CL_DS_BIG_ENDIAN_P
691
692 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
693         .align 2
694         DECLARE_FUNCTION(or_loop_down)
695         .ent or_loop_down // Input in $4,$5,$6
696 old1:     subu $4,4             // xptr--
697           subu $5,4             // yptr--
698           lw $12,($4)           // x = *xptr
699           lw $13,($5)           // y = *yptr
700           subu $6,1             // count--
701           or $12,$13            // x |= y
702           sw $12,($4)           // *xptr = x
703 or_loop_down:
704           bnez $6,old1          // until (count==0)
705         j $31                   // return
706         .end or_loop_down
707
708 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
709         .align 2
710         DECLARE_FUNCTION(xor_loop_down)
711         .ent xor_loop_down // Input in $4,$5,$6
712 xld1:     subu $4,4             // xptr--
713           subu $5,4             // yptr--
714           lw $12,($4)           // x = *xptr
715           lw $13,($5)           // y = *yptr
716           subu $6,1             // count--
717           xor $12,$13           // x ^= y
718           sw $12,($4)           // *xptr = x
719 xor_loop_down:
720           bnez $6,xld1          // until (count==0)
721         j $31                   // return
722         .end xor_loop_down
723
724 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
725         .align 2
726         DECLARE_FUNCTION(and_loop_down)
727         .ent and_loop_down // Input in $4,$5,$6
728 ald1:     subu $4,4             // xptr--
729           subu $5,4             // yptr--
730           lw $12,($4)           // x = *xptr
731           lw $13,($5)           // y = *yptr
732           subu $6,1             // count--
733           and $12,$13           // x &= y
734           sw $12,($4)           // *xptr = x
735 and_loop_down:
736           bnez $6,ald1          // until (count==0)
737         j $31                   // return
738         .end and_loop_down
739
740 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
741         .align 2
742         DECLARE_FUNCTION(eqv_loop_down)
743         .ent eqv_loop_down // Input in $4,$5,$6
744 nxld1:    subu $4,4             // xptr--
745           subu $5,4             // yptr--
746           lw $12,($4)           // x = *xptr
747           lw $13,($5)           // y = *yptr
748           subu $6,1             // count--
749           xor $12,$13           // x ^= y
750           nor $12,$0            // x = ~x
751           sw $12,($4)           // *xptr = x
752 eqv_loop_down:
753           bnez $6,nxld1         // until (count==0)
754         j $31                   // return
755         .end eqv_loop_down
756
757 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
758         .align 2
759         DECLARE_FUNCTION(nand_loop_down)
760         .ent nand_loop_down // Input in $4,$5,$6
761 nald1:    subu $4,4             // xptr--
762           subu $5,4             // yptr--
763           lw $12,($4)           // x = *xptr
764           lw $13,($5)           // y = *yptr
765           subu $6,1             // count--
766           and $12,$13           // x &= y        // Gibt es 'nand $12,$13' ??
767           nor $12,$0            // x = ~x
768           sw $12,($4)           // *xptr = x
769 nand_loop_down:
770           bnez $6,nald1         // until (count==0)
771         j $31                   // return
772         .end nand_loop_down
773
774 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
775         .align 2
776         DECLARE_FUNCTION(nor_loop_down)
777         .ent nor_loop_down // Input in $4,$5,$6
778 nold1:    subu $4,4             // xptr--
779           subu $5,4             // yptr--
780           lw $12,($4)           // x = *xptr
781           lw $13,($5)           // y = *yptr
782           subu $6,1             // count--
783           nor $12,$13           // x = ~(x|y)
784           sw $12,($4)           // *xptr = x
785 nor_loop_down:
786           bnez $6,nold1         // until (count==0)
787         j $31                   // return
788         .end nor_loop_down
789
790 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
791         .align 2
792         DECLARE_FUNCTION(andc2_loop_down)
793         .ent andc2_loop_down // Input in $4,$5,$6
794 acld1:    subu $4,4             // xptr--
795           subu $5,4             // yptr--
796           lw $12,($4)           // x = *xptr
797           lw $13,($5)           // y = *yptr
798           subu $6,1             // count--
799           nor $13,$0            // y = ~y
800           and $12,$13           // x &= y
801           sw $12,($4)           // *xptr = x
802 andc2_loop_down:
803           bnez $6,acld1         // until (count==0)
804         j $31                   // return
805         .end andc2_loop_down
806
807 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
808         .align 2
809         DECLARE_FUNCTION(orc2_loop_down)
810         .ent orc2_loop_down // Input in $4,$5,$6
811 ocld1:    subu $4,4             // xptr--
812           subu $5,4             // yptr--
813           lw $12,($4)           // x = *xptr
814           lw $13,($5)           // y = *yptr
815           subu $6,1             // count--
816           nor $13,$0            // y = ~y
817           or $12,$13            // x |= y
818           sw $12,($4)           // *xptr = x
819 orc2_loop_down:
820           bnez $6,ocld1         // until (count==0)
821         j $31                   // return
822         .end orc2_loop_down
823
824 // extern void not_loop_down (uintD* xptr, uintC count);
825         .align 2
826         DECLARE_FUNCTION(not_loop_down)
827         .ent not_loop_down // Input in $4,$5
828 nld1:     subu $4,4             // xptr--
829           lw $12,($4)           // x = *xptr
830           subu $5,1             // count--
831           nor $12,$0            // x = ~x
832           sw $12,($4)           // *xptr = x
833 not_loop_down:
834           bnez $5,nld1          // until (count==0)
835         j $31                   // return
836         .end not_loop_down
837
838 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
839         .align 2
840         DECLARE_FUNCTION(and_test_loop_down)
841         .ent and_test_loop_down // Input in $4,$5,$6
842 atld1:    subu $4,4             // xptr--
843           subu $5,4             // yptr--
844           lw $12,($4)           // x = *xptr
845           lw $13,($5)           // y = *yptr
846           and $12,$13           // x &= y
847           bnez $12,atld3        // if (x) ...
848           subu $6,1             // count--
849 and_test_loop_down:
850           bnez $6,atld1         // until (count==0)
851         move $2,$0              // 0
852         j $31                   // return
853 atld3:  li $2,1                 // 1
854         j $31                   // return
855         .end and_test_loop_down
856
857 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
858         .align 2
859         DECLARE_FUNCTION(compare_loop_down)
860         .ent compare_loop_down // Input in $4,$5,$6
861 cmld1:    subu $4,4             // xptr--
862           subu $5,4             // yptr--
863           lw $12,($4)           // x = *xptr
864           lw $13,($5)           // y = *yptr
865           subu $6,1             // count--
866           bne $12,$13,cmld3     // if (!(x==y)) ...
867 compare_loop_down:
868           bnez $6,cmld1         // until (count==0)
869         move $2,$0              // 0
870         j $31                   // return
871 cmld3:  bltu $12,$13,cmld4      // if (x<y) ...
872         li $2,1                 // 1
873         j $31                   // return
874 cmld4:  li $2,-1                // -1
875         j $31                   // return
876         .end compare_loop_down
877
878 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
879         .align 2
880         DECLARE_FUNCTION(add_loop_up)
881         .ent add_loop_up // Input in $4,$5,$6,$7, Output in $2
882 alu1:     // kein Carry
883           lw $12,($4)           // source1 = *sourceptr1
884           lw $13,($5)           // source2 = *sourceptr2
885           addu $4,4             // sourceptr1++
886           addu $5,4             // sourceptr2++
887           addu $12,$13          // dest = source1 + source2
888           sw $12,($6)           // *destptr = dest
889           addu $6,4             // destptr++
890           bltu $12,$13,alu4     // if (dest < source2) [also Carry] ...
891 alu2:
892           subu $7,1             // count--
893 add_loop_up:
894           bnez $7,alu1          // until (count==0)
895         move $2,$0              // 0
896         j $31                   // return
897 alu3:   // Hier Carry
898           lw $12,($4)           // source1 = *sourceptr1
899           lw $13,($5)           // source2 = *sourceptr2
900           addu $4,4             // sourceptr1++
901           addu $5,4             // sourceptr2++
902           addu $12,$13          // dest = source1 + source2
903           addu $12,1            //        + 1
904           sw $12,($6)           // *destptr = dest
905           addu $6,4             // destptr++
906           bgtu $12,$13,alu2     // if (dest > source2) [also kein Carry] ...
907 alu4:     subu $7,1             // count--
908           bnez $7,alu3          // until (count==0)
909         li $2,1                 // 1
910         j $31                   // return
911         .end add_loop_up
912
913 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
914         .align 2
915         DECLARE_FUNCTION(addto_loop_up)
916         .ent addto_loop_up // Input in $4,$5,$6, Output in $2
917 atlu1:    // kein Carry
918           lw $12,($4)           // source1 = *sourceptr
919           lw $13,($5)           // source2 = *destptr
920           addu $4,4             // sourceptr++
921           subu $6,1             // count--
922           addu $12,$13          // dest = source1 + source2
923           sw $12,($5)           // *destptr = dest
924           addu $5,4             // destptr++
925           bltu $12,$13,atlu4    // if (dest < source2) [also Carry] ...
926 addto_loop_up:
927 atlu2:    bnez $6,atlu1         // until (count==0)
928         move $2,$0              // 0
929         j $31                   // return
930 atlu3:  // Hier Carry
931           lw $12,($4)           // source1 = *sourceptr
932           lw $13,($5)           // source2 = *destptr
933           addu $4,4             // sourceptr++
934           subu $6,1             // count--
935           addu $12,$13          // dest = source1 + source2
936           addu $12,1            //        + 1
937           sw $12,($5)           // *destptr = dest
938           addu $5,4             // destptr++
939           bgtu $12,$13,atlu2    // if (dest > source2) [also kein Carry] ...
940 atlu4:    bnez $6,atlu3         // until (count==0)
941         li $2,1                 // 1
942         j $31                   // return
943         .end addto_loop_up
944
945 // extern uintD inc_loop_up (uintD* ptr, uintC count);
946         .align 2
947         DECLARE_FUNCTION(inc_loop_up)
948         .ent inc_loop_up // Input in $4,$5, Output in $2
949 ilu1:     lw $12,($4)           // x = *ptr
950           subu $5,1             // count--
951           addu $12,1            // x++;
952           sw $12,($4)           // *ptr = x
953           addu $4,4             // ptr++
954           bnez $12,ilu3         // if (!(x==0)) ...
955 inc_loop_up:
956           bnez $5,ilu1          // until (count==0)
957         li $2,1                 // 1
958         j $31                   // return
959 ilu3:   move $2,$0              // 0
960         j $31                   // return
961         .end inc_loop_up
962
963 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
964         .align 2
965         DECLARE_FUNCTION(sub_loop_up)
966         .ent sub_loop_up // Input in $4,$5,$6,$7, Output in $2
967 slu1:     // kein Carry
968           lw $12,($4)           // source1 = *sourceptr1
969           lw $13,($5)           // source2 = *sourceptr2
970           addu $4,4             // sourceptr1++
971           addu $5,4             // sourceptr2++
972           subu $7,1             // count--
973           bltu $12,$13,slu2     // if (source1 < source2) [also Carry] ...
974           subu $12,$13          // dest = source1 - source2
975           sw $12,($6)           // *destptr = dest
976           addu $6,4             // destptr++
977 sub_loop_up:
978           bnez $7,slu1          // until (count==0)
979         move $2,$0              // 0
980         j $31                   // return
981 slu2:     subu $12,$13          // dest = source1 - source2
982           sw $12,($6)           // *destptr = dest
983           addu $6,4             // destptr++
984           bnez $7,slu3          // until (count==0)
985         li $2,-1                // -1
986         j $31                   // return
987 slu3:   // Hier Carry
988           lw $12,($4)           // source1 = *sourceptr1
989           lw $13,($5)           // source2 = *sourceptr2
990           addu $4,4             // sourceptr1++
991           addu $5,4             // sourceptr2++
992           subu $7,1             // count--
993           bgtu $12,$13,slu4     // if (source1 > source2) [also kein Carry] ...
994           subu $12,$13          // dest = source1 - source2
995           subu $12,1            //        - 1
996           sw $12,($6)           // *destptr = dest
997           addu $6,4             // destptr++
998           bnez $7,slu3          // until (count==0)
999         li $2,-1                // -1
1000         j $31                   // return
1001 slu4:     subu $12,$13          // dest = source1 - source2
1002           subu $12,1            //        - 1
1003           sw $12,($6)           // *destptr = dest
1004           addu $6,4             // destptr++
1005           bnez $7,slu1          // until (count==0)
1006         move $2,$0              // 0
1007         j $31                   // return
1008         .end sub_loop_up
1009
1010 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
1011         .align 2
1012         DECLARE_FUNCTION(subx_loop_up)
1013         .ent subx_loop_up // Input in $4,$5,$6,$7,$8, Output in $2
1014 subx_loop_up:
1015 #if ABI_N32
1016         move $12,$8             // carry
1017 #else
1018         lw $12,16($sp)          // carry
1019 #endif
1020         bnez $12,sxlu5          // !(carry==0) ?
1021         b sxlu2
1022 sxlu1:    // kein Carry
1023           lw $12,($4)           // source1 = *sourceptr1
1024           lw $13,($5)           // source2 = *sourceptr2
1025           addu $4,4             // sourceptr1++
1026           addu $5,4             // sourceptr2++
1027           subu $7,1             // count--
1028           bltu $12,$13,sxlu3    // if (source1 < source2) [also Carry] ...
1029           subu $12,$13          // dest = source1 - source2
1030           sw $12,($6)           // *destptr = dest
1031           addu $6,4             // destptr++
1032 sxlu2:    bnez $7,sxlu1         // until (count==0)
1033         move $2,$0              // 0
1034         j $31                   // return
1035 sxlu3:    subu $12,$13          // dest = source1 - source2
1036           sw $12,($6)           // *destptr = dest
1037           addu $6,4             // destptr++
1038           bnez $7,sxlu4         // until (count==0)
1039         li $2,-1                // -1
1040         j $31                   // return
1041 sxlu4:  // Hier Carry
1042           lw $12,($4)           // source1 = *sourceptr1
1043           lw $13,($5)           // source2 = *sourceptr2
1044           addu $4,4             // sourceptr1++
1045           addu $5,4             // sourceptr2++
1046           subu $7,1             // count--
1047           bgtu $12,$13,sxlu6    // if (source1 > source2) [also kein Carry] ...
1048           subu $12,$13          // dest = source1 - source2
1049           subu $12,1            //        - 1
1050           sw $12,($6)           // *destptr = dest
1051           addu $6,4             // destptr++
1052 sxlu5:    bnez $7,sxlu4         // until (count==0)
1053         li $2,-1                // -1
1054         j $31                   // return
1055 sxlu6:    subu $12,$13          // dest = source1 - source2
1056           subu $12,1            //        - 1
1057           sw $12,($6)           // *destptr = dest
1058           addu $6,4             // destptr++
1059           bnez $7,sxlu1         // until (count==0)
1060         move $2,$0              // 0
1061         j $31                   // return
1062         .end subx_loop_up
1063
1064 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1065         .align 2
1066         DECLARE_FUNCTION(subfrom_loop_up)
1067         .ent subfrom_loop_up // Input in $4,$5,$6,$7, Output in $2
1068 sflu1:    // kein Carry
1069           lw $12,($5)           // source1 = *destptr
1070           lw $13,($4)           // source2 = *sourceptr
1071           addu $4,4             // sourceptr++
1072           subu $6,1             // count--
1073           bltu $12,$13,sflu2    // if (source1 < source2) [also Carry] ...
1074           subu $12,$13          // dest = source1 - source2
1075           sw $12,($5)           // *destptr = dest
1076           addu $5,4             // destptr++
1077 subfrom_loop_up:
1078           bnez $6,sflu1         // until (count==0)
1079         move $2,$0              // 0
1080         j $31                   // return
1081 sflu2:    subu $12,$13          // dest = source1 - source2
1082           sw $12,($5)           // *destptr = dest
1083           addu $5,4             // destptr++
1084           bnez $6,sflu3         // until (count==0)
1085         li $2,-1                // -1
1086         j $31                   // return
1087 sflu3:  // Hier Carry
1088           lw $12,($5)           // source1 = *destptr
1089           lw $13,($4)           // source2 = *sourceptr
1090           addu $4,4             // sourceptr++
1091           subu $6,1             // count--
1092           bgtu $12,$13,sflu4    // if (source1 > source2) [also kein Carry] ...
1093           subu $12,$13          // dest = source1 - source2
1094           subu $12,1            //        - 1
1095           sw $12,($5)           // *destptr = dest
1096           addu $5,4             // destptr++
1097           bnez $6,sflu3         // until (count==0)
1098         li $2,-1                // -1
1099         j $31                   // return
1100 sflu4:    subu $12,$13          // dest = source1 - source2
1101           subu $12,1            //        - 1
1102           sw $12,($5)           // *destptr = dest
1103           addu $5,4             // destptr++
1104           bnez $6,sflu1         // until (count==0)
1105         move $2,$0              // 0
1106         j $31                   // return
1107         .end subfrom_loop_up
1108
1109 // extern uintD dec_loop_up (uintD* ptr, uintC count);
1110         .align 2
1111         DECLARE_FUNCTION(dec_loop_up)
1112         .ent dec_loop_up // Input in $4,$5, Output in $2
1113 dlu1:     lw $12,($4)           // x = *ptr
1114           subu $5,1             // count--
1115           bnez $12,dlu3         // if (!(x==0)) ...
1116           subu $12,1            // x--;
1117           sw $12,($4)           // *ptr = x
1118           addu $4,4             // ptr++
1119 dec_loop_up:
1120           bnez $5,dlu1          // until (count==0)
1121         li $2,-1                // -1
1122         j $31                   // return
1123 dlu3:   subu $12,1              // x--;
1124         sw $12,($4)             // *ptr = x
1125         move $2,$0              // 0
1126         j $31                   // return
1127         .end dec_loop_up
1128
1129 // extern uintD neg_loop_up (uintD* ptr, uintC count);
1130         .align 2
1131         DECLARE_FUNCTION(neg_loop_up)
1132         .ent neg_loop_up // Input in $4,$5, Output in $2
1133         // erstes Digit /=0 suchen:
1134 nlu1:     lw $12,($4)           // x = *ptr
1135           subu $5,1             // count--
1136           bnez $12,nlu3         // if (!(x==0)) ...
1137           addu $4,4             // ptr++
1138 neg_loop_up:
1139           bnez $5,nlu1          // until (count==0)
1140         move $2,$0              // 0
1141         j $31                   // return
1142 nlu3:   // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1143         // 1 Digit negieren:
1144         subu $12,$0,$12         // x = -x
1145         sw $12,($4)             // *ptr = x
1146         // alle anderen Digits invertieren:
1147         b nlu5
1148 nlu4:     lw $12,($4)           // x = *xptr
1149           subu $5,1             // count--
1150           nor $12,$0            // x = ~x
1151           sw $12,($4)           // *xptr = x
1152 nlu5:     addu $4,4             // xptr++
1153           bnez $5,nlu4          // until (count==0)
1154         li $2,-1                // -1
1155         j $31                   // return
1156         .end neg_loop_up
1157
1158 #endif
1159