]> www.ginac.de Git - cln.git/blob - src/base/digitseq/cl_asm_mips_.cc
Initial revision
[cln.git] / src / base / digitseq / cl_asm_mips_.cc
1 // Externe Routinen zu ARILEV1.D
2 // Prozessor: MIPS
3 // Endianness: irrelevant
4 // Compiler: GNU-C oder ...
5 // Parameter-Übergabe: in Registern $4,$5,$6,$7, und auf dem Stack 16($sp),...
6 // Rückgabewert: in Register $2
7 // Einstellungen: intCsize=32, intDsize=32.
8 // Besonderheiten: Nach jedem Ladebefehl ein Wartetakt nötig, bevor der
9 //   geholte Wert benutzt werden darf.
10
11 // When this file is compiled into a shared library, ELF linkers need to
12 // know which symbols are functions.
13 #if defined(__GNU__) || defined(__NetBSD__)
14   #define DECLARE_FUNCTION(name) .type name,@function
15 #else
16   #define DECLARE_FUNCTION(name)
17 #endif
18
19         .text
20
21         .globl copy_loop_up
22         .globl copy_loop_down
23         .globl fill_loop_up
24         .globl fill_loop_down
25         .globl clear_loop_up
26         .globl clear_loop_down
27         .globl test_loop_up
28         .globl test_loop_down
29         .globl xor_loop_up
30         .globl compare_loop_up
31 #if CL_DS_BIG_ENDIAN_P
32         .globl or_loop_up
33         .globl and_loop_up
34         .globl eqv_loop_up
35         .globl nand_loop_up
36         .globl nor_loop_up
37         .globl andc2_loop_up
38         .globl orc2_loop_up
39         .globl not_loop_up
40         .globl and_test_loop_up
41         .globl add_loop_down
42         .globl addto_loop_down
43         .globl inc_loop_down
44         .globl sub_loop_down
45         .globl subx_loop_down
46         .globl subfrom_loop_down
47         .globl dec_loop_down
48         .globl neg_loop_down
49 #else
50         .globl or_loop_down
51         .globl xor_loop_down
52         .globl and_loop_down
53         .globl eqv_loop_down
54         .globl nand_loop_down
55         .globl nor_loop_down
56         .globl andc2_loop_down
57         .globl orc2_loop_down
58         .globl not_loop_down
59         .globl and_test_loop_down
60         .globl compare_loop_down
61         .globl add_loop_up
62         .globl addto_loop_up
63         .globl inc_loop_up
64         .globl sub_loop_up
65         .globl subx_loop_up
66         .globl subfrom_loop_up
67         .globl dec_loop_up
68         .globl neg_loop_up
69 #endif
70
71 #ifndef __GNUC__ /* mit GNU-C machen wir mulu32() als Macro, der inline multipliziert */
72
73 // extern struct { uint32 lo; uint32 hi; } mulu32_ (uint32 arg1, uint32 arg2);
74 // 2^32*hi+lo := arg1*arg2.
75         .globl mulu32_
76         .align 2
77         DECLARE_FUNCTION(mulu32_)
78         .ent mulu32_ // Input in $4,$5, Output in $2,mulu32_high
79 mulu32_:
80         multu $5,$4             // arg1 * arg2
81         mfhi $6                 // hi
82         mflo $2                 // lo
83         sw $6,mulu32_high       // hi abspeichern // Adressierung?? Deklaration??
84         j $31                   // return
85         .end mulu32_
86
87 #endif
88
89 // extern uintD* copy_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
90         .align 2
91         DECLARE_FUNCTION(copy_loop_up)
92         .ent copy_loop_up // Input in $4,$5,$6, Output in $2
93 colu1:    lw $12,($4)           // d = *sourceptr
94           addu $4,4             // sourceptr++
95           sw $12,($5)           // *destptr = d
96           addu $5,4             // destptr++
97           subu $6,1             // count--
98 copy_loop_up:
99           bnez $6,colu1         // until (count==0)
100         move $2,$5              // destptr
101         j $31                   // return
102         .end copy_loop_up
103
104 // extern uintD* copy_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
105         .align 2
106         DECLARE_FUNCTION(copy_loop_down)
107         .ent copy_loop_down // Input in $4,$5,$6, Output in $2
108 cold1:    subu $4,4             // sourceptr--
109           lw $12,($4)           // d = *sourceptr
110           subu $5,4             // destptr--
111           sw $12,($5)           // *destptr = d
112           subu $6,1             // count--
113 copy_loop_down:
114           bnez $6,cold1         // until (count==0)
115         move $2,$5              // destptr
116         j $31                   // return
117         .end copy_loop_down
118
119 // extern uintD* fill_loop_up (uintD* destptr, uintC count, uintD filler);
120         .align 2
121         DECLARE_FUNCTION(fill_loop_up)
122         .ent fill_loop_up // Input in $4,$5,$6, Output in $2
123 flu1:     sw $6,($4)            // *destptr = filler
124           addu $4,4             // destptr++
125           subu $5,1             // count--
126 fill_loop_up:
127           bnez $5,flu1          // until (count==0)
128         move $2,$4              // destptr
129         j $31                   // return
130         .end fill_loop_up
131
132 // extern uintD* fill_loop_down (uintD* destptr, uintC count, uintD filler);
133         .align 2
134         DECLARE_FUNCTION(fill_loop_down)
135         .ent fill_loop_down // Input in $4,$5,$6, Output in $2
136 fld1:     subu $4,4             // destptr--
137           sw $6,($4)            // *destptr = filler
138           subu $5,1             // count--
139 fill_loop_down:
140           bnez $5,fld1          // until (count==0)
141         move $2,$4              // destptr
142         j $31                   // return
143         .end fill_loop_down
144
145 // extern uintD* clear_loop_up (uintD* destptr, uintC count);
146         .align 2
147         DECLARE_FUNCTION(clear_loop_up)
148         .ent clear_loop_up // Input in $4,$5, Output in $2
149 cllu1:    sw $0,($4)            // *destptr = 0
150           addu $4,4             // destptr++
151           subu $5,1             // count--
152 clear_loop_up:
153           bnez $5,cllu1         // until (count==0)
154         move $2,$4              // destptr
155         j $31                   // return
156         .end clear_loop_up
157
158 // extern uintD* clear_loop_down (uintD* destptr, uintC count);
159         .align 2
160         DECLARE_FUNCTION(clear_loop_down)
161         .ent clear_loop_down // Input in $4,$5, Output in $2
162 clld1:    subu $4,4             // destptr--
163           sw $0,($4)            // *destptr = 0
164           subu $5,1             // count--
165 clear_loop_down:
166           bnez $5,clld1         // until (count==0)
167         move $2,$4              // destptr
168         j $31                   // return
169         .end clear_loop_down
170
171 // extern boolean test_loop_up (uintD* ptr, uintC count);
172         .align 2
173         DECLARE_FUNCTION(test_loop_up)
174         .ent test_loop_up // Input in $4,$5
175 tlu1:     lw $12,($4)           // x = *ptr
176           addu $4,4             // ptr++
177           bnez $12,tlu3
178           subu $5,1             // count--
179 test_loop_up:
180           bnez $5,tlu1          // until (count==0)
181         move $2,$0              // 0
182         j $31                   // return
183 tlu3:   li $2,1                 // 1
184         j $31                   // return
185         .end test_loop_up
186
187 // extern boolean test_loop_down (uintD* ptr, uintC count);
188         .align 2
189         DECLARE_FUNCTION(test_loop_down)
190         .ent test_loop_down // Input in $4,$5
191 tld1:     subu $4,4             // ptr--
192           lw $12,($4)           // x = *ptr
193           subu $5,1             // count--
194           bnez $12,tld3
195 test_loop_down:
196           bnez $5,tld1          // until (count==0)
197         move $2,$0              // 0
198         j $31                   // return
199 tld3:   li $2,1                 // 1
200         j $31                   // return
201         .end test_loop_down
202
203 #if CL_DS_BIG_ENDIAN_P
204
205 // extern void or_loop_up (uintD* xptr, uintD* yptr, uintC count);
206         .align 2
207         DECLARE_FUNCTION(or_loop_up)
208         .ent or_loop_up // Input in $4,$5,$6
209 olu1:     lw $12,($4)           // x = *xptr
210           lw $13,($5)           // y = *yptr
211           addu $5,4             // yptr++
212           or $12,$13            // x |= y
213           sw $12,($4)           // *xptr = x
214           addu $4,4             // xptr++
215           subu $6,1             // count--
216 or_loop_up:
217           bnez $6,olu1          // until (count==0)
218         j $31                   // return
219         .end or_loop_up
220
221 #endif
222
223 // extern void xor_loop_up (uintD* xptr, uintD* yptr, uintC count);
224         .align 2
225         DECLARE_FUNCTION(xor_loop_up)
226         .ent xor_loop_up // Input in $4,$5,$6
227 xlu1:     lw $12,($4)           // x = *xptr
228           lw $13,($5)           // y = *yptr
229           addu $5,4             // yptr++
230           xor $12,$13           // x ^= y
231           sw $12,($4)           // *xptr = x
232           addu $4,4             // xptr++
233           subu $6,1             // count--
234 xor_loop_up:
235           bnez $6,xlu1          // until (count==0)
236         j $31                   // return
237         .end xor_loop_up
238
239 #if CL_DS_BIG_ENDIAN_P
240
241 // extern void and_loop_up (uintD* xptr, uintD* yptr, uintC count);
242         .align 2
243         DECLARE_FUNCTION(and_loop_up)
244         .ent and_loop_up // Input in $4,$5,$6
245 alu1:     lw $12,($4)           // x = *xptr
246           lw $13,($5)           // y = *yptr
247           addu $5,4             // yptr++
248           and $12,$13           // x &= y
249           sw $12,($4)           // *xptr = x
250           addu $4,4             // xptr++
251           subu $6,1             // count--
252 and_loop_up:
253           bnez $6,alu1          // until (count==0)
254         j $31                   // return
255         .end and_loop_up
256
257 // extern void eqv_loop_up (uintD* xptr, uintD* yptr, uintC count);
258         .align 2
259         DECLARE_FUNCTION(eqv_loop_up)
260         .ent eqv_loop_up // Input in $4,$5,$6
261 nxlu1:    lw $12,($4)           // x = *xptr
262           lw $13,($5)           // y = *yptr
263           addu $5,4             // yptr++
264           xor $12,$13           // x ^= y
265           nor $12,$0            // x = ~x
266           sw $12,($4)           // *xptr = x
267           addu $4,4             // xptr++
268           subu $6,1             // count--
269 eqv_loop_up:
270           bnez $6,nxlu1         // until (count==0)
271         j $31                   // return
272         .end eqv_loop_up
273
274 // extern void nand_loop_up (uintD* xptr, uintD* yptr, uintC count);
275         .align 2
276         DECLARE_FUNCTION(nand_loop_up)
277         .ent nand_loop_up // Input in $4,$5,$6
278 nalu1:    lw $12,($4)           // x = *xptr
279           lw $13,($5)           // y = *yptr
280           addu $5,4             // yptr++
281           and $12,$13           // x &= y        // Gibt es 'nand $12,$13' ??
282           nor $12,$0            // x = ~x
283           sw $12,($4)           // *xptr = x
284           addu $4,4             // xptr++
285           subu $6,1             // count--
286 nand_loop_up:
287           bnez $6,nalu1         // until (count==0)
288         j $31                   // return
289         .end nand_loop_up
290
291 // extern void nor_loop_up (uintD* xptr, uintD* yptr, uintC count);
292         .align 2
293         DECLARE_FUNCTION(nor_loop_up)
294         .ent nor_loop_up // Input in $4,$5,$6
295 nolu1:    lw $12,($4)           // x = *xptr
296           lw $13,($5)           // y = *yptr
297           addu $5,4             // yptr++
298           nor $12,$13           // x = ~(x|y)
299           sw $12,($4)           // *xptr = x
300           addu $4,4             // xptr++
301           subu $6,1             // count--
302 nor_loop_up:
303           bnez $6,nolu1         // until (count==0)
304         j $31                   // return
305         .end nor_loop_up
306
307 // extern void andc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
308         .align 2
309         DECLARE_FUNCTION(andc2_loop_up)
310         .ent andc2_loop_up // Input in $4,$5,$6
311 aclu1:    lw $12,($4)           // x = *xptr
312           lw $13,($5)           // y = *yptr
313           addu $5,4             // yptr++
314           nor $13,$0            // y = ~y
315           and $12,$13           // x &= y
316           sw $12,($4)           // *xptr = x
317           addu $4,4             // xptr++
318           subu $6,1             // count--
319 andc2_loop_up:
320           bnez $6,aclu1         // until (count==0)
321         j $31                   // return
322         .end andc2_loop_up
323
324 // extern void orc2_loop_up (uintD* xptr, uintD* yptr, uintC count);
325         .align 2
326         DECLARE_FUNCTION(orc2_loop_up)
327         .ent orc2_loop_up // Input in $4,$5,$6
328 oclu1:    lw $12,($4)           // x = *xptr
329           lw $13,($5)           // y = *yptr
330           addu $5,4             // yptr++
331           nor $13,$0            // y = ~y
332           or $12,$13            // x |= y
333           sw $12,($4)           // *xptr = x
334           addu $4,4             // xptr++
335           subu $6,1             // count--
336 orc2_loop_up:
337           bnez $6,oclu1         // until (count==0)
338         j $31                   // return
339         .end orc2_loop_up
340
341 // extern void not_loop_up (uintD* xptr, uintC count);
342         .align 2
343         DECLARE_FUNCTION(not_loop_up)
344         .ent not_loop_up // Input in $4,$5
345 nlu1:     lw $12,($4)           // x = *xptr
346           subu $5,1             // count--
347           nor $12,$0            // x = ~x
348           sw $12,($4)           // *xptr = x
349           addu $4,4             // xptr++
350 not_loop_up:
351           bnez $5,nlu1          // until (count==0)
352         j $31                   // return
353         .end not_loop_up
354
355 // extern boolean and_test_loop_up (uintD* xptr, uintD* yptr, uintC count);
356         .align 2
357         DECLARE_FUNCTION(and_test_loop_up)
358         .ent and_test_loop_up // Input in $4,$5,$6
359 atlu1:    lw $12,($4)           // x = *xptr
360           lw $13,($5)           // y = *yptr
361           addu $5,4             // yptr++
362           and $12,$13           // x &= y
363           bnez $12,atlu3        // if (x) ...
364           addu $4,4             // xptr++
365           subu $6,1             // count--
366 and_test_loop_up:
367           bnez $6,atlu1         // until (count==0)
368         move $2,$0              // 0
369         j $31                   // return
370 atlu3:  li $2,1                 // 1
371         j $31                   // return
372         .end and_test_loop_up
373
374 #endif
375
376 // extern cl_signean compare_loop_up (uintD* xptr, uintD* yptr, uintC count);
377         .align 2
378         DECLARE_FUNCTION(compare_loop_up)
379         .ent compare_loop_up // Input in $4,$5,$6
380 cmlu1:    lw $12,($4)           // x = *xptr
381           lw $13,($5)           // y = *yptr
382           addu $5,4             // yptr++
383           bne $12,$13,cmlu3     // if (!(x==y)) ...
384           addu $4,4             // xptr++
385           subu $6,1             // count--
386 compare_loop_up:
387           bnez $6,cmlu1         // until (count==0)
388         move $2,$0              // 0
389         j $31                   // return
390 cmlu3:  bltu $12,$13,cmlu4      // if (x<y) ...
391         li $2,1                 // 1
392         j $31                   // return
393 cmlu4:  li $2,-1                // -1
394         j $31                   // return
395         .end compare_loop_up
396
397 #if CL_DS_BIG_ENDIAN_P
398
399 // extern uintD add_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
400         .align 2
401         DECLARE_FUNCTION(add_loop_down)
402         .ent add_loop_down // Input in $4,$5,$6,$7, Output in $2
403 ald1:     // kein Carry
404           subu $4,4             // sourceptr1--
405           subu $5,4             // sourceptr2--
406           lw $12,($4)           // source1 = *sourceptr1
407           lw $13,($5)           // source2 = *sourceptr2
408           subu $6,4             // destptr--
409           addu $12,$13          // dest = source1 + source2
410           sw $12,($6)           // *destptr = dest
411           bltu $12,$13,ald4     // if (dest < source2) [also Carry] ...
412 ald2:
413           subu $7,1             // count--
414 add_loop_down:
415           bnez $7,ald1          // until (count==0)
416         move $2,$0              // 0
417         j $31                   // return
418 ald3:   // Hier Carry
419           subu $4,4             // sourceptr1--
420           subu $5,4             // sourceptr2--
421           lw $12,($4)           // source1 = *sourceptr1
422           lw $13,($5)           // source2 = *sourceptr2
423           subu $6,4             // destptr--
424           addu $12,$13          // dest = source1 + source2
425           addu $12,1            //        + 1
426           sw $12,($6)           // *destptr = dest
427           bgtu $12,$13,ald2     // if (dest > source2) [also kein Carry] ...
428 ald4:     subu $7,1             // count--
429           bnez $7,ald3          // until (count==0)
430         li $2,1                 // 1
431         j $31                   // return
432         .end add_loop_down
433
434 // extern uintD addto_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
435         .align 2
436         DECLARE_FUNCTION(addto_loop_down)
437         .ent addto_loop_down // Input in $4,$5,$6, Output in $2
438 atld1:    // kein Carry
439           subu $4,4             // sourceptr--
440           subu $5,4             // destptr--
441           lw $12,($4)           // source1 = *sourceptr
442           lw $13,($5)           // source2 = *destptr
443           subu $6,1             // count--
444           addu $12,$13          // dest = source1 + source2
445           sw $12,($5)           // *destptr = dest
446           bltu $12,$13,atld4    // if (dest < source2) [also Carry] ...
447 addto_loop_down:
448 atld2:    bnez $6,atld1         // until (count==0)
449         move $2,$0              // 0
450         j $31                   // return
451 atld3:  // Hier Carry
452           subu $4,4             // sourceptr--
453           subu $5,4             // destptr--
454           lw $12,($4)           // source1 = *sourceptr
455           lw $13,($5)           // source2 = *destptr
456           subu $6,1             // count--
457           addu $12,$13          // dest = source1 + source2
458           addu $12,1            //        + 1
459           sw $12,($5)           // *destptr = dest
460           bgtu $12,$13,atld2    // if (dest > source2) [also kein Carry] ...
461 atld4:    bnez $6,atld3         // until (count==0)
462         li $2,1                 // 1
463         j $31                   // return
464         .end addto_loop_down
465
466 // extern uintD inc_loop_down (uintD* ptr, uintC count);
467         .align 2
468         DECLARE_FUNCTION(inc_loop_down)
469         .ent inc_loop_down // Input in $4,$5, Output in $2
470 ild1:     subu $4,4             // ptr--
471           lw $12,($4)           // x = *ptr
472           subu $5,1             // count--
473           addu $12,1            // x++;
474           sw $12,($4)           // *ptr = x
475           bnez $12,ild3         // if (!(x==0)) ...
476 inc_loop_down:
477           bnez $5,ild1          // until (count==0)
478         li $2,1                 // 1
479         j $31                   // return
480 ild3:   move $2,$0              // 0
481         j $31                   // return
482         .end inc_loop_down
483
484 // extern uintD sub_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
485         .align 2
486         DECLARE_FUNCTION(sub_loop_down)
487         .ent sub_loop_down // Input in $4,$5,$6,$7, Output in $2
488 sld1:     // kein Carry
489           subu $4,4             // sourceptr1--
490           subu $5,4             // sourceptr2--
491           lw $12,($4)           // source1 = *sourceptr1
492           lw $13,($5)           // source2 = *sourceptr2
493           subu $6,4             // destptr--
494           bltu $12,$13,sld2     // if (source1 < source2) [also Carry] ...
495           subu $12,$13          // dest = source1 - source2
496           sw $12,($6)           // *destptr = dest
497           subu $7,1             // count--
498 sub_loop_down:
499           bnez $7,sld1          // until (count==0)
500         move $2,$0              // 0
501         j $31                   // return
502 sld2:     subu $12,$13          // dest = source1 - source2
503           sw $12,($6)           // *destptr = dest
504           subu $7,1             // count--
505           bnez $7,sld3          // until (count==0)
506         li $2,-1                // -1
507         j $31                   // return
508 sld3:   // Hier Carry
509           subu $4,4             // sourceptr1--
510           subu $5,4             // sourceptr2--
511           lw $12,($4)           // source1 = *sourceptr1
512           lw $13,($5)           // source2 = *sourceptr2
513           subu $6,4             // destptr--
514           bgtu $12,$13,sld4     // if (source1 > source2) [also kein Carry] ...
515           subu $12,$13          // dest = source1 - source2
516           subu $12,1            //        - 1
517           sw $12,($6)           // *destptr = dest
518           subu $7,1             // count--
519           bnez $7,sld3          // until (count==0)
520         li $2,-1                // -1
521         j $31                   // return
522 sld4:     subu $12,$13          // dest = source1 - source2
523           subu $12,1            //        - 1
524           sw $12,($6)           // *destptr = dest
525           subu $7,1             // count--
526           bnez $7,sld1          // until (count==0)
527         move $2,$0              // 0
528         j $31                   // return
529         .end sub_loop_down
530
531 // extern uintD subx_loop_down (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
532         .align 2
533         DECLARE_FUNCTION(subx_loop_down)
534         .ent subx_loop_down // Input in $4,$5,$6,$7, Output in $2
535 subx_loop_down:
536         lw $12,16($sp)          // carry
537         bnez $12,sxld5          // !(carry==0) ?
538         b sxld2
539 sxld1:    // kein Carry
540           subu $4,4             // sourceptr1--
541           subu $5,4             // sourceptr2--
542           lw $12,($4)           // source1 = *sourceptr1
543           lw $13,($5)           // source2 = *sourceptr2
544           subu $6,4             // destptr--
545           bltu $12,$13,sxld3    // if (source1 < source2) [also Carry] ...
546           subu $12,$13          // dest = source1 - source2
547           sw $12,($6)           // *destptr = dest
548           subu $7,1             // count--
549 sxld2:    bnez $7,sxld1         // until (count==0)
550         move $2,$0              // 0
551         j $31                   // return
552 sxld3:    subu $12,$13          // dest = source1 - source2
553           sw $12,($6)           // *destptr = dest
554           subu $7,1             // count--
555           bnez $7,sxld4         // until (count==0)
556         li $2,-1                // -1
557         j $31                   // return
558 sxld4:  // Hier Carry
559           subu $4,4             // sourceptr1--
560           subu $5,4             // sourceptr2--
561           lw $12,($4)           // source1 = *sourceptr1
562           lw $13,($5)           // source2 = *sourceptr2
563           subu $6,4             // destptr--
564           bgtu $12,$13,sxld6    // if (source1 > source2) [also kein Carry] ...
565           subu $12,$13          // dest = source1 - source2
566           subu $12,1            //        - 1
567           sw $12,($6)           // *destptr = dest
568           subu $7,1             // count--
569 sxld5:    bnez $7,sxld4         // until (count==0)
570         li $2,-1                // -1
571         j $31                   // return
572 sxld6:    subu $12,$13          // dest = source1 - source2
573           subu $12,1            //        - 1
574           sw $12,($6)           // *destptr = dest
575           subu $7,1             // count--
576           bnez $7,sxld1         // until (count==0)
577         move $2,$0              // 0
578         j $31                   // return
579         .end subx_loop_down
580
581 // extern uintD subfrom_loop_down (uintD* sourceptr, uintD* destptr, uintC count);
582         .align 2
583         DECLARE_FUNCTION(subfrom_loop_down)
584         .ent subfrom_loop_down // Input in $4,$5,$6,$7, Output in $2
585 sfld1:    // kein Carry
586           subu $4,4             // sourceptr--
587           subu $5,4             // destptr--
588           lw $12,($5)           // source1 = *destptr
589           lw $13,($4)           // source2 = *sourceptr
590           subu $6,1             // count--
591           bltu $12,$13,sfld2    // if (source1 < source2) [also Carry] ...
592           subu $12,$13          // dest = source1 - source2
593           sw $12,($5)           // *destptr = dest
594 subfrom_loop_down:
595           bnez $6,sfld1         // until (count==0)
596         move $2,$0              // 0
597         j $31                   // return
598 sfld2:    subu $12,$13          // dest = source1 - source2
599           sw $12,($5)           // *destptr = dest
600           bnez $6,sfld3         // until (count==0)
601         li $2,-1                // -1
602         j $31                   // return
603 sfld3:  // Hier Carry
604           subu $4,4             // sourceptr--
605           subu $5,4             // destptr--
606           lw $12,($5)           // source1 = *destptr
607           lw $13,($4)           // source2 = *sourceptr
608           subu $6,1             // count--
609           bgtu $12,$13,sfld4    // if (source1 > source2) [also kein Carry] ...
610           subu $12,$13          // dest = source1 - source2
611           subu $12,1            //        - 1
612           sw $12,($5)           // *destptr = dest
613           bnez $6,sfld3         // until (count==0)
614         li $2,-1                // -1
615         j $31                   // return
616 sfld4:    subu $12,$13          // dest = source1 - source2
617           subu $12,1            //        - 1
618           sw $12,($5)           // *destptr = dest
619           bnez $6,sfld1         // until (count==0)
620         move $2,$0              // 0
621         j $31                   // return
622         .end subfrom_loop_down
623
624 // extern uintD dec_loop_down (uintD* ptr, uintC count);
625         .align 2
626         DECLARE_FUNCTION(dec_loop_down)
627         .ent dec_loop_down // Input in $4,$5, Output in $2
628 dld1:     subu $4,4             // ptr--
629           lw $12,($4)           // x = *ptr
630           subu $5,1             // count--
631           bnez $12,dld3         // if (!(x==0)) ...
632           subu $12,1            // x--;
633           sw $12,($4)           // *ptr = x
634 dec_loop_down:
635           bnez $5,dld1          // until (count==0)
636         li $2,-1                // -1
637         j $31                   // return
638 dld3:   subu $12,1              // x--;
639         sw $12,($4)             // *ptr = x
640         move $2,$0              // 0
641         j $31                   // return
642         .end dec_loop_down
643
644 // extern uintD neg_loop_down (uintD* ptr, uintC count);
645         .align 2
646         DECLARE_FUNCTION(neg_loop_down)
647         .ent neg_loop_down // Input in $4,$5, Output in $2
648         // erstes Digit /=0 suchen:
649 nld1:     subu $4,4             // ptr--
650           lw $12,($4)           // x = *ptr
651           subu $5,1             // count--
652           bnez $12,nld3         // if (!(x==0)) ...
653 neg_loop_down:
654           bnez $5,nld1          // until (count==0)
655         move $2,$0              // 0
656         j $31                   // return
657 nld3:   // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
658         // 1 Digit negieren:
659         subu $12,$0,$12         // x = -x
660         sw $12,($4)             // *ptr = x
661         // alle anderen Digits invertieren:
662         b nld5
663 nld4:     subu $4,4             // xptr--
664           lw $12,($4)           // x = *xptr
665           subu $5,1             // count--
666           nor $12,$0            // x = ~x
667           sw $12,($4)           // *xptr = x
668 nld5:     bnez $5,nld4          // until (count==0)
669         li $2,-1                // -1
670         j $31                   // return
671         .end neg_loop_down
672
673 #endif
674
675 #if !CL_DS_BIG_ENDIAN_P
676
677 // extern void or_loop_down (uintD* xptr, uintD* yptr, uintC count);
678         .align 2
679         DECLARE_FUNCTION(or_loop_down)
680         .ent or_loop_down // Input in $4,$5,$6
681 old1:     subu $4,4             // xptr--
682           subu $5,4             // yptr--
683           lw $12,($4)           // x = *xptr
684           lw $13,($5)           // y = *yptr
685           subu $6,1             // count--
686           or $12,$13            // x |= y
687           sw $12,($4)           // *xptr = x
688 or_loop_down:
689           bnez $6,old1          // until (count==0)
690         j $31                   // return
691         .end or_loop_down
692
693 // extern void xor_loop_down (uintD* xptr, uintD* yptr, uintC count);
694         .align 2
695         DECLARE_FUNCTION(xor_loop_down)
696         .ent xor_loop_down // Input in $4,$5,$6
697 xld1:     subu $4,4             // xptr--
698           subu $5,4             // yptr--
699           lw $12,($4)           // x = *xptr
700           lw $13,($5)           // y = *yptr
701           subu $6,1             // count--
702           xor $12,$13           // x ^= y
703           sw $12,($4)           // *xptr = x
704 xor_loop_down:
705           bnez $6,xld1          // until (count==0)
706         j $31                   // return
707         .end xor_loop_down
708
709 // extern void and_loop_down (uintD* xptr, uintD* yptr, uintC count);
710         .align 2
711         DECLARE_FUNCTION(and_loop_down)
712         .ent and_loop_down // Input in $4,$5,$6
713 ald1:     subu $4,4             // xptr--
714           subu $5,4             // yptr--
715           lw $12,($4)           // x = *xptr
716           lw $13,($5)           // y = *yptr
717           subu $6,1             // count--
718           and $12,$13           // x &= y
719           sw $12,($4)           // *xptr = x
720 and_loop_down:
721           bnez $6,ald1          // until (count==0)
722         j $31                   // return
723         .end and_loop_down
724
725 // extern void eqv_loop_down (uintD* xptr, uintD* yptr, uintC count);
726         .align 2
727         DECLARE_FUNCTION(eqv_loop_down)
728         .ent eqv_loop_down // Input in $4,$5,$6
729 nxld1:    subu $4,4             // xptr--
730           subu $5,4             // yptr--
731           lw $12,($4)           // x = *xptr
732           lw $13,($5)           // y = *yptr
733           subu $6,1             // count--
734           xor $12,$13           // x ^= y
735           nor $12,$0            // x = ~x
736           sw $12,($4)           // *xptr = x
737 eqv_loop_down:
738           bnez $6,nxld1         // until (count==0)
739         j $31                   // return
740         .end eqv_loop_down
741
742 // extern void nand_loop_down (uintD* xptr, uintD* yptr, uintC count);
743         .align 2
744         DECLARE_FUNCTION(nand_loop_down)
745         .ent nand_loop_down // Input in $4,$5,$6
746 nald1:    subu $4,4             // xptr--
747           subu $5,4             // yptr--
748           lw $12,($4)           // x = *xptr
749           lw $13,($5)           // y = *yptr
750           subu $6,1             // count--
751           and $12,$13           // x &= y        // Gibt es 'nand $12,$13' ??
752           nor $12,$0            // x = ~x
753           sw $12,($4)           // *xptr = x
754 nand_loop_down:
755           bnez $6,nald1         // until (count==0)
756         j $31                   // return
757         .end nand_loop_down
758
759 // extern void nor_loop_down (uintD* xptr, uintD* yptr, uintC count);
760         .align 2
761         DECLARE_FUNCTION(nor_loop_down)
762         .ent nor_loop_down // Input in $4,$5,$6
763 nold1:    subu $4,4             // xptr--
764           subu $5,4             // yptr--
765           lw $12,($4)           // x = *xptr
766           lw $13,($5)           // y = *yptr
767           subu $6,1             // count--
768           nor $12,$13           // x = ~(x|y)
769           sw $12,($4)           // *xptr = x
770 nor_loop_down:
771           bnez $6,nold1         // until (count==0)
772         j $31                   // return
773         .end nor_loop_down
774
775 // extern void andc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
776         .align 2
777         DECLARE_FUNCTION(andc2_loop_down)
778         .ent andc2_loop_down // Input in $4,$5,$6
779 acld1:    subu $4,4             // xptr--
780           subu $5,4             // yptr--
781           lw $12,($4)           // x = *xptr
782           lw $13,($5)           // y = *yptr
783           subu $6,1             // count--
784           nor $13,$0            // y = ~y
785           and $12,$13           // x &= y
786           sw $12,($4)           // *xptr = x
787 andc2_loop_down:
788           bnez $6,acld1         // until (count==0)
789         j $31                   // return
790         .end andc2_loop_down
791
792 // extern void orc2_loop_down (uintD* xptr, uintD* yptr, uintC count);
793         .align 2
794         DECLARE_FUNCTION(orc2_loop_down)
795         .ent orc2_loop_down // Input in $4,$5,$6
796 ocld1:    subu $4,4             // xptr--
797           subu $5,4             // yptr--
798           lw $12,($4)           // x = *xptr
799           lw $13,($5)           // y = *yptr
800           subu $6,1             // count--
801           nor $13,$0            // y = ~y
802           or $12,$13            // x |= y
803           sw $12,($4)           // *xptr = x
804 orc2_loop_down:
805           bnez $6,ocld1         // until (count==0)
806         j $31                   // return
807         .end orc2_loop_down
808
809 // extern void not_loop_down (uintD* xptr, uintC count);
810         .align 2
811         DECLARE_FUNCTION(not_loop_down)
812         .ent not_loop_down // Input in $4,$5
813 nld1:     subu $4,4             // xptr--
814           lw $12,($4)           // x = *xptr
815           subu $5,1             // count--
816           nor $12,$0            // x = ~x
817           sw $12,($4)           // *xptr = x
818 not_loop_down:
819           bnez $5,nld1          // until (count==0)
820         j $31                   // return
821         .end not_loop_down
822
823 // extern boolean and_test_loop_down (uintD* xptr, uintD* yptr, uintC count);
824         .align 2
825         DECLARE_FUNCTION(and_test_loop_down)
826         .ent and_test_loop_down // Input in $4,$5,$6
827 atld1:    subu $4,4             // xptr--
828           subu $5,4             // yptr--
829           lw $12,($4)           // x = *xptr
830           lw $13,($5)           // y = *yptr
831           and $12,$13           // x &= y
832           bnez $12,atld3        // if (x) ...
833           subu $6,1             // count--
834 and_test_loop_down:
835           bnez $6,atld1         // until (count==0)
836         move $2,$0              // 0
837         j $31                   // return
838 atld3:  li $2,1                 // 1
839         j $31                   // return
840         .end and_test_loop_down
841
842 // extern cl_signean compare_loop_down (uintD* xptr, uintD* yptr, uintC count);
843         .align 2
844         DECLARE_FUNCTION(compare_loop_down)
845         .ent compare_loop_down // Input in $4,$5,$6
846 cmld1:    subu $4,4             // xptr--
847           subu $5,4             // yptr--
848           lw $12,($4)           // x = *xptr
849           lw $13,($5)           // y = *yptr
850           subu $6,1             // count--
851           bne $12,$13,cmld3     // if (!(x==y)) ...
852 compare_loop_down:
853           bnez $6,cmld1         // until (count==0)
854         move $2,$0              // 0
855         j $31                   // return
856 cmld3:  bltu $12,$13,cmld4      // if (x<y) ...
857         li $2,1                 // 1
858         j $31                   // return
859 cmld4:  li $2,-1                // -1
860         j $31                   // return
861         .end compare_loop_down
862
863 // extern uintD add_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
864         .align 2
865         DECLARE_FUNCTION(add_loop_up)
866         .ent add_loop_up // Input in $4,$5,$6,$7, Output in $2
867 alu1:     // kein Carry
868           lw $12,($4)           // source1 = *sourceptr1
869           lw $13,($5)           // source2 = *sourceptr2
870           addu $4,4             // sourceptr1++
871           addu $5,4             // sourceptr2++
872           addu $12,$13          // dest = source1 + source2
873           sw $12,($6)           // *destptr = dest
874           addu $6,4             // destptr++
875           bltu $12,$13,alu4     // if (dest < source2) [also Carry] ...
876 alu2:
877           subu $7,1             // count--
878 add_loop_up:
879           bnez $7,alu1          // until (count==0)
880         move $2,$0              // 0
881         j $31                   // return
882 alu3:   // Hier Carry
883           lw $12,($4)           // source1 = *sourceptr1
884           lw $13,($5)           // source2 = *sourceptr2
885           addu $4,4             // sourceptr1++
886           addu $5,4             // sourceptr2++
887           addu $12,$13          // dest = source1 + source2
888           addu $12,1            //        + 1
889           sw $12,($6)           // *destptr = dest
890           addu $6,4             // destptr++
891           bgtu $12,$13,alu2     // if (dest > source2) [also kein Carry] ...
892 alu4:     subu $7,1             // count--
893           bnez $7,alu3          // until (count==0)
894         li $2,1                 // 1
895         j $31                   // return
896         .end add_loop_up
897
898 // extern uintD addto_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
899         .align 2
900         DECLARE_FUNCTION(addto_loop_up)
901         .ent addto_loop_up // Input in $4,$5,$6, Output in $2
902 atlu1:    // kein Carry
903           lw $12,($4)           // source1 = *sourceptr
904           lw $13,($5)           // source2 = *destptr
905           addu $4,4             // sourceptr++
906           subu $6,1             // count--
907           addu $12,$13          // dest = source1 + source2
908           sw $12,($5)           // *destptr = dest
909           addu $5,4             // destptr++
910           bltu $12,$13,atlu4    // if (dest < source2) [also Carry] ...
911 addto_loop_up:
912 atlu2:    bnez $6,atlu1         // until (count==0)
913         move $2,$0              // 0
914         j $31                   // return
915 atlu3:  // Hier Carry
916           lw $12,($4)           // source1 = *sourceptr
917           lw $13,($5)           // source2 = *destptr
918           addu $4,4             // sourceptr++
919           subu $6,1             // count--
920           addu $12,$13          // dest = source1 + source2
921           addu $12,1            //        + 1
922           sw $12,($5)           // *destptr = dest
923           addu $5,4             // destptr++
924           bgtu $12,$13,atlu2    // if (dest > source2) [also kein Carry] ...
925 atlu4:    bnez $6,atlu3         // until (count==0)
926         li $2,1                 // 1
927         j $31                   // return
928         .end addto_loop_up
929
930 // extern uintD inc_loop_up (uintD* ptr, uintC count);
931         .align 2
932         DECLARE_FUNCTION(inc_loop_up)
933         .ent inc_loop_up // Input in $4,$5, Output in $2
934 ilu1:     lw $12,($4)           // x = *ptr
935           subu $5,1             // count--
936           addu $12,1            // x++;
937           sw $12,($4)           // *ptr = x
938           addu $4,4             // ptr++
939           bnez $12,ilu3         // if (!(x==0)) ...
940 inc_loop_up:
941           bnez $5,ilu1          // until (count==0)
942         li $2,1                 // 1
943         j $31                   // return
944 ilu3:   move $2,$0              // 0
945         j $31                   // return
946         .end inc_loop_up
947
948 // extern uintD sub_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count);
949         .align 2
950         DECLARE_FUNCTION(sub_loop_up)
951         .ent sub_loop_up // Input in $4,$5,$6,$7, Output in $2
952 slu1:     // kein Carry
953           lw $12,($4)           // source1 = *sourceptr1
954           lw $13,($5)           // source2 = *sourceptr2
955           addu $4,4             // sourceptr1++
956           addu $5,4             // sourceptr2++
957           subu $7,1             // count--
958           bltu $12,$13,slu2     // if (source1 < source2) [also Carry] ...
959           subu $12,$13          // dest = source1 - source2
960           sw $12,($6)           // *destptr = dest
961           addu $6,4             // destptr++
962 sub_loop_up:
963           bnez $7,slu1          // until (count==0)
964         move $2,$0              // 0
965         j $31                   // return
966 slu2:     subu $12,$13          // dest = source1 - source2
967           sw $12,($6)           // *destptr = dest
968           addu $6,4             // destptr++
969           bnez $7,slu3          // until (count==0)
970         li $2,-1                // -1
971         j $31                   // return
972 slu3:   // Hier Carry
973           lw $12,($4)           // source1 = *sourceptr1
974           lw $13,($5)           // source2 = *sourceptr2
975           addu $4,4             // sourceptr1++
976           addu $5,4             // sourceptr2++
977           subu $7,1             // count--
978           bgtu $12,$13,slu4     // if (source1 > source2) [also kein Carry] ...
979           subu $12,$13          // dest = source1 - source2
980           subu $12,1            //        - 1
981           sw $12,($6)           // *destptr = dest
982           addu $6,4             // destptr++
983           bnez $7,slu3          // until (count==0)
984         li $2,-1                // -1
985         j $31                   // return
986 slu4:     subu $12,$13          // dest = source1 - source2
987           subu $12,1            //        - 1
988           sw $12,($6)           // *destptr = dest
989           addu $6,4             // destptr++
990           bnez $7,slu1          // until (count==0)
991         move $2,$0              // 0
992         j $31                   // return
993         .end sub_loop_up
994
995 // extern uintD subx_loop_up (uintD* sourceptr1, uintD* sourceptr2, uintD* destptr, uintC count, uintD carry);
996         .align 2
997         DECLARE_FUNCTION(subx_loop_up)
998         .ent subx_loop_up // Input in $4,$5,$6,$7, Output in $2
999 subx_loop_up:
1000         lw $12,16($sp)          // carry
1001         bnez $12,sxlu5          // !(carry==0) ?
1002         b sxlu2
1003 sxlu1:    // kein Carry
1004           lw $12,($4)           // source1 = *sourceptr1
1005           lw $13,($5)           // source2 = *sourceptr2
1006           addu $4,4             // sourceptr1++
1007           addu $5,4             // sourceptr2++
1008           subu $7,1             // count--
1009           bltu $12,$13,sxlu3    // if (source1 < source2) [also Carry] ...
1010           subu $12,$13          // dest = source1 - source2
1011           sw $12,($6)           // *destptr = dest
1012           addu $6,4             // destptr++
1013 sxlu2:    bnez $7,sxlu1         // until (count==0)
1014         move $2,$0              // 0
1015         j $31                   // return
1016 sxlu3:    subu $12,$13          // dest = source1 - source2
1017           sw $12,($6)           // *destptr = dest
1018           addu $6,4             // destptr++
1019           bnez $7,sxlu4         // until (count==0)
1020         li $2,-1                // -1
1021         j $31                   // return
1022 sxlu4:  // Hier Carry
1023           lw $12,($4)           // source1 = *sourceptr1
1024           lw $13,($5)           // source2 = *sourceptr2
1025           addu $4,4             // sourceptr1++
1026           addu $5,4             // sourceptr2++
1027           subu $7,1             // count--
1028           bgtu $12,$13,sxlu6    // if (source1 > source2) [also kein Carry] ...
1029           subu $12,$13          // dest = source1 - source2
1030           subu $12,1            //        - 1
1031           sw $12,($6)           // *destptr = dest
1032           addu $6,4             // destptr++
1033 sxlu5:    bnez $7,sxlu4         // until (count==0)
1034         li $2,-1                // -1
1035         j $31                   // return
1036 sxlu6:    subu $12,$13          // dest = source1 - source2
1037           subu $12,1            //        - 1
1038           sw $12,($6)           // *destptr = dest
1039           addu $6,4             // destptr++
1040           bnez $7,sxlu1         // until (count==0)
1041         move $2,$0              // 0
1042         j $31                   // return
1043         .end subx_loop_up
1044
1045 // extern uintD subfrom_loop_up (uintD* sourceptr, uintD* destptr, uintC count);
1046         .align 2
1047         DECLARE_FUNCTION(subfrom_loop_up)
1048         .ent subfrom_loop_up // Input in $4,$5,$6,$7, Output in $2
1049 sflu1:    // kein Carry
1050           lw $12,($5)           // source1 = *destptr
1051           lw $13,($4)           // source2 = *sourceptr
1052           addu $4,4             // sourceptr++
1053           subu $6,1             // count--
1054           bltu $12,$13,sflu2    // if (source1 < source2) [also Carry] ...
1055           subu $12,$13          // dest = source1 - source2
1056           sw $12,($5)           // *destptr = dest
1057           addu $5,4             // destptr++
1058 subfrom_loop_up:
1059           bnez $6,sflu1         // until (count==0)
1060         move $2,$0              // 0
1061         j $31                   // return
1062 sflu2:    subu $12,$13          // dest = source1 - source2
1063           sw $12,($5)           // *destptr = dest
1064           addu $5,4             // destptr++
1065           bnez $6,sflu3         // until (count==0)
1066         li $2,-1                // -1
1067         j $31                   // return
1068 sflu3:  // Hier Carry
1069           lw $12,($5)           // source1 = *destptr
1070           lw $13,($4)           // source2 = *sourceptr
1071           addu $4,4             // sourceptr++
1072           subu $6,1             // count--
1073           bgtu $12,$13,sflu4    // if (source1 > source2) [also kein Carry] ...
1074           subu $12,$13          // dest = source1 - source2
1075           subu $12,1            //        - 1
1076           sw $12,($5)           // *destptr = dest
1077           addu $5,4             // destptr++
1078           bnez $6,sflu3         // until (count==0)
1079         li $2,-1                // -1
1080         j $31                   // return
1081 sflu4:    subu $12,$13          // dest = source1 - source2
1082           subu $12,1            //        - 1
1083           sw $12,($5)           // *destptr = dest
1084           addu $5,4             // destptr++
1085           bnez $6,sflu1         // until (count==0)
1086         move $2,$0              // 0
1087         j $31                   // return
1088         .end subfrom_loop_up
1089
1090 // extern uintD dec_loop_up (uintD* ptr, uintC count);
1091         .align 2
1092         DECLARE_FUNCTION(dec_loop_up)
1093         .ent dec_loop_up // Input in $4,$5, Output in $2
1094 dlu1:     lw $12,($4)           // x = *ptr
1095           subu $5,1             // count--
1096           bnez $12,dlu3         // if (!(x==0)) ...
1097           subu $12,1            // x--;
1098           sw $12,($4)           // *ptr = x
1099           addu $4,4             // ptr++
1100 dec_loop_up:
1101           bnez $5,dlu1          // until (count==0)
1102         li $2,-1                // -1
1103         j $31                   // return
1104 dlu3:   subu $12,1              // x--;
1105         sw $12,($4)             // *ptr = x
1106         move $2,$0              // 0
1107         j $31                   // return
1108         .end dec_loop_up
1109
1110 // extern uintD neg_loop_up (uintD* ptr, uintC count);
1111         .align 2
1112         DECLARE_FUNCTION(neg_loop_up)
1113         .ent neg_loop_up // Input in $4,$5, Output in $2
1114         // erstes Digit /=0 suchen:
1115 nlu1:     lw $12,($4)           // x = *ptr
1116           subu $5,1             // count--
1117           bnez $12,nlu3         // if (!(x==0)) ...
1118           addu $4,4             // ptr++
1119 neg_loop_up:
1120           bnez $5,nlu1          // until (count==0)
1121         move $2,$0              // 0
1122         j $31                   // return
1123 nlu3:   // erstes Digit /=0 gefunden, ab jetzt gibt's Carrys
1124         // 1 Digit negieren:
1125         subu $12,$0,$12         // x = -x
1126         sw $12,($4)             // *ptr = x
1127         // alle anderen Digits invertieren:
1128         b nlu5
1129 nlu4:     lw $12,($4)           // x = *xptr
1130           subu $5,1             // count--
1131           nor $12,$0            // x = ~x
1132           sw $12,($4)           // *xptr = x
1133 nlu5:     addu $4,4             // xptr++
1134           bnez $5,nlu4          // until (count==0)
1135         li $2,-1                // -1
1136         j $31                   // return
1137         .end neg_loop_up
1138
1139 #endif
1140