]>
Commit | Line | Data |
---|---|---|
90e7678c | 1 | /* -*- Mode: Asm -*- */ |
33faafca | 2 | /* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009 |
0a84fec6 | 3 | Free Software Foundation, Inc. |
92bffc14 | 4 | Contributed by Denis Chertykov <chertykov@gmail.com> |
90e7678c DC |
5 | |
6 | This file is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
748086b7 | 8 | Free Software Foundation; either version 3, or (at your option) any |
90e7678c DC |
9 | later version. |
10 | ||
90e7678c DC |
11 | This file is distributed in the hope that it will be useful, but |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | Under Section 7 of GPL version 3, you are granted additional |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
90e7678c | 24 | |
90e7678c DC |
25 | #define __zero_reg__ r1 |
26 | #define __tmp_reg__ r0 | |
27 | #define __SREG__ 0x3f | |
28 | #define __SP_H__ 0x3e | |
29 | #define __SP_L__ 0x3d | |
34d02d17 | 30 | #define __RAMPZ__ 0x3B |
90e7678c | 31 | |
1d26ac96 MM |
32 | /* Most of the functions here are called directly from avr.md |
33 | patterns, instead of using the standard libcall mechanisms. | |
34 | This can make better code because GCC knows exactly which | |
35 | of the call-used registers (not all of them) are clobbered. */ | |
36 | ||
3f8a8c68 | 37 | .section .text.libgcc, "ax", @progbits |
bad3869a | 38 | |
1d26ac96 | 39 | .macro mov_l r_dest, r_src |
7ed9c001 | 40 | #if defined (__AVR_HAVE_MOVW__) |
1d26ac96 MM |
41 | movw \r_dest, \r_src |
42 | #else | |
43 | mov \r_dest, \r_src | |
44 | #endif | |
45 | .endm | |
46 | ||
47 | .macro mov_h r_dest, r_src | |
7ed9c001 | 48 | #if defined (__AVR_HAVE_MOVW__) |
1d26ac96 MM |
49 | ; empty |
50 | #else | |
51 | mov \r_dest, \r_src | |
52 | #endif | |
53 | .endm | |
54 | ||
0ad8bb3b GJL |
55 | #if defined (__AVR_HAVE_JMP_CALL__) |
56 | #define XCALL call | |
57 | #define XJMP jmp | |
58 | #else | |
59 | #define XCALL rcall | |
60 | #define XJMP rjmp | |
61 | #endif | |
62 | ||
63 | .macro DEFUN name | |
64 | .global \name | |
65 | .func \name | |
66 | \name: | |
67 | .endm | |
68 | ||
69 | .macro ENDF name | |
70 | .size \name, .-\name | |
71 | .endfunc | |
72 | .endm | |
73 | ||
74 | \f | |
bad3869a | 75 | /* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ |
dd6d1f8c | 76 | #if !defined (__AVR_HAVE_MUL__) |
90e7678c DC |
77 | /******************************************************* |
78 | Multiplication 8 x 8 | |
79 | *******************************************************/ | |
bad3869a | 80 | #if defined (L_mulqi3) |
90e7678c | 81 | |
c4984bad | 82 | #define r_arg2 r22 /* multiplicand */ |
90e7678c DC |
83 | #define r_arg1 r24 /* multiplier */ |
84 | #define r_res __tmp_reg__ /* result */ | |
85 | ||
bad3869a MM |
86 | .global __mulqi3 |
87 | .func __mulqi3 | |
88 | __mulqi3: | |
90e7678c DC |
89 | clr r_res ; clear result |
90 | __mulqi3_loop: | |
91 | sbrc r_arg1,0 | |
92 | add r_res,r_arg2 | |
93 | add r_arg2,r_arg2 ; shift multiplicand | |
94 | breq __mulqi3_exit ; while multiplicand != 0 | |
95 | lsr r_arg1 ; | |
96 | brne __mulqi3_loop ; exit if multiplier = 0 | |
97 | __mulqi3_exit: | |
98 | mov r_arg1,r_res ; result to return register | |
99 | ret | |
100 | ||
101 | #undef r_arg2 | |
102 | #undef r_arg1 | |
103 | #undef r_res | |
104 | ||
cdd9eb8f | 105 | .endfunc |
bad3869a | 106 | #endif /* defined (L_mulqi3) */ |
90e7678c | 107 | |
1d26ac96 MM |
108 | #if defined (L_mulqihi3) |
109 | .global __mulqihi3 | |
110 | .func __mulqihi3 | |
111 | __mulqihi3: | |
112 | clr r25 | |
113 | sbrc r24, 7 | |
114 | dec r25 | |
115 | clr r23 | |
116 | sbrc r22, 7 | |
117 | dec r22 | |
118 | rjmp __mulhi3 | |
119 | .endfunc | |
120 | #endif /* defined (L_mulqihi3) */ | |
121 | ||
122 | #if defined (L_umulqihi3) | |
123 | .global __umulqihi3 | |
124 | .func __umulqihi3 | |
125 | __umulqihi3: | |
126 | clr r25 | |
127 | clr r23 | |
128 | rjmp __mulhi3 | |
129 | .endfunc | |
130 | #endif /* defined (L_umulqihi3) */ | |
90e7678c DC |
131 | |
132 | /******************************************************* | |
133 | Multiplication 16 x 16 | |
134 | *******************************************************/ | |
bad3869a | 135 | #if defined (L_mulhi3) |
90e7678c DC |
136 | #define r_arg1L r24 /* multiplier Low */ |
137 | #define r_arg1H r25 /* multiplier High */ | |
138 | #define r_arg2L r22 /* multiplicand Low */ | |
139 | #define r_arg2H r23 /* multiplicand High */ | |
1d26ac96 | 140 | #define r_resL __tmp_reg__ /* result Low */ |
90e7678c DC |
141 | #define r_resH r21 /* result High */ |
142 | ||
bad3869a MM |
143 | .global __mulhi3 |
144 | .func __mulhi3 | |
145 | __mulhi3: | |
90e7678c DC |
146 | clr r_resH ; clear result |
147 | clr r_resL ; clear result | |
148 | __mulhi3_loop: | |
149 | sbrs r_arg1L,0 | |
150 | rjmp __mulhi3_skip1 | |
151 | add r_resL,r_arg2L ; result + multiplicand | |
152 | adc r_resH,r_arg2H | |
153 | __mulhi3_skip1: | |
154 | add r_arg2L,r_arg2L ; shift multiplicand | |
155 | adc r_arg2H,r_arg2H | |
156 | ||
500164d2 MM |
157 | cp r_arg2L,__zero_reg__ |
158 | cpc r_arg2H,__zero_reg__ | |
90e7678c DC |
159 | breq __mulhi3_exit ; while multiplicand != 0 |
160 | ||
161 | lsr r_arg1H ; gets LSB of multiplier | |
162 | ror r_arg1L | |
7656d28a | 163 | sbiw r_arg1L,0 |
90e7678c DC |
164 | brne __mulhi3_loop ; exit if multiplier = 0 |
165 | __mulhi3_exit: | |
166 | mov r_arg1H,r_resH ; result to return register | |
167 | mov r_arg1L,r_resL | |
168 | ret | |
169 | ||
170 | #undef r_arg1L | |
171 | #undef r_arg1H | |
172 | #undef r_arg2L | |
173 | #undef r_arg2H | |
174 | #undef r_resL | |
175 | #undef r_resH | |
176 | ||
cdd9eb8f | 177 | .endfunc |
bad3869a | 178 | #endif /* defined (L_mulhi3) */ |
dd6d1f8c | 179 | #endif /* !defined (__AVR_HAVE_MUL__) */ |
90e7678c | 180 | |
9a63dee4 GJL |
181 | /******************************************************* |
182 | Widening Multiplication 32 = 16 x 16 | |
183 | *******************************************************/ | |
184 | ||
1d26ac96 | 185 | #if defined (L_mulhisi3) |
9a63dee4 GJL |
186 | DEFUN __mulhisi3 |
187 | #if defined (__AVR_HAVE_MUL__) | |
188 | ||
189 | ;; r25:r22 = r19:r18 * r21:r20 | |
190 | ||
191 | #define A0 18 | |
192 | #define B0 20 | |
193 | #define C0 22 | |
194 | ||
195 | #define A1 A0+1 | |
196 | #define B1 B0+1 | |
197 | #define C1 C0+1 | |
198 | #define C2 C0+2 | |
199 | #define C3 C0+3 | |
200 | ||
201 | ; C = (signed)A1 * (signed)B1 | |
202 | muls A1, B1 | |
203 | movw C2, R0 | |
204 | ||
205 | ; C += A0 * B0 | |
206 | mul A0, B0 | |
207 | movw C0, R0 | |
208 | ||
209 | ; C += (signed)A1 * B0 | |
210 | mulsu A1, B0 | |
211 | sbci C3, 0 | |
212 | add C1, R0 | |
213 | adc C2, R1 | |
214 | clr __zero_reg__ | |
215 | adc C3, __zero_reg__ | |
216 | ||
217 | ; C += (signed)B1 * A0 | |
218 | mulsu B1, A0 | |
219 | sbci C3, 0 | |
220 | XJMP __xmulhisi3_exit | |
221 | ||
222 | #undef A0 | |
223 | #undef A1 | |
224 | #undef B0 | |
225 | #undef B1 | |
226 | #undef C0 | |
227 | #undef C1 | |
228 | #undef C2 | |
229 | #undef C3 | |
230 | ||
231 | #else /* !__AVR_HAVE_MUL__ */ | |
232 | ;;; FIXME: This is dead code (noone calls it) | |
1d26ac96 MM |
233 | mov_l r18, r24 |
234 | mov_h r19, r25 | |
235 | clr r24 | |
236 | sbrc r23, 7 | |
237 | dec r24 | |
238 | mov r25, r24 | |
239 | clr r20 | |
240 | sbrc r19, 7 | |
241 | dec r20 | |
242 | mov r21, r20 | |
9a63dee4 GJL |
243 | XJMP __mulsi3 |
244 | #endif /* __AVR_HAVE_MUL__ */ | |
245 | ENDF __mulhisi3 | |
1d26ac96 MM |
246 | #endif /* defined (L_mulhisi3) */ |
247 | ||
248 | #if defined (L_umulhisi3) | |
9a63dee4 GJL |
249 | DEFUN __umulhisi3 |
250 | #if defined (__AVR_HAVE_MUL__) | |
251 | ||
252 | ;; r25:r22 = r19:r18 * r21:r20 | |
253 | ||
254 | #define A0 18 | |
255 | #define B0 20 | |
256 | #define C0 22 | |
257 | ||
258 | #define A1 A0+1 | |
259 | #define B1 B0+1 | |
260 | #define C1 C0+1 | |
261 | #define C2 C0+2 | |
262 | #define C3 C0+3 | |
263 | ||
264 | ; C = A1 * B1 | |
265 | mul A1, B1 | |
266 | movw C2, R0 | |
267 | ||
268 | ; C += A0 * B0 | |
269 | mul A0, B0 | |
270 | movw C0, R0 | |
271 | ||
272 | ; C += A1 * B0 | |
273 | mul A1, B0 | |
274 | add C1, R0 | |
275 | adc C2, R1 | |
276 | clr __zero_reg__ | |
277 | adc C3, __zero_reg__ | |
278 | ||
279 | ; C += B1 * A0 | |
280 | mul B1, A0 | |
281 | XJMP __xmulhisi3_exit | |
282 | ||
283 | #undef A0 | |
284 | #undef A1 | |
285 | #undef B0 | |
286 | #undef B1 | |
287 | #undef C0 | |
288 | #undef C1 | |
289 | #undef C2 | |
290 | #undef C3 | |
291 | ||
292 | #else /* !__AVR_HAVE_MUL__ */ | |
293 | ;;; FIXME: This is dead code (noone calls it) | |
1d26ac96 MM |
294 | mov_l r18, r24 |
295 | mov_h r19, r25 | |
296 | clr r24 | |
297 | clr r25 | |
298 | clr r20 | |
299 | clr r21 | |
9a63dee4 GJL |
300 | XJMP __mulsi3 |
301 | #endif /* __AVR_HAVE_MUL__ */ | |
302 | ENDF __umulhisi3 | |
1d26ac96 MM |
303 | #endif /* defined (L_umulhisi3) */ |
304 | ||
9a63dee4 GJL |
305 | #if defined (L_xmulhisi3_exit) |
306 | ||
307 | ;;; Helper for __mulhisi3 resp. __umulhisi3. | |
308 | ||
309 | #define C0 22 | |
310 | #define C1 C0+1 | |
311 | #define C2 C0+2 | |
312 | #define C3 C0+3 | |
313 | ||
314 | DEFUN __xmulhisi3_exit | |
315 | add C1, R0 | |
316 | adc C2, R1 | |
317 | clr __zero_reg__ | |
318 | adc C3, __zero_reg__ | |
319 | ret | |
320 | ENDF __xmulhisi3_exit | |
321 | ||
322 | #undef C0 | |
323 | #undef C1 | |
324 | #undef C2 | |
325 | #undef C3 | |
326 | ||
327 | #endif /* defined (L_xmulhisi3_exit) */ | |
328 | ||
bad3869a | 329 | #if defined (L_mulsi3) |
90e7678c DC |
330 | /******************************************************* |
331 | Multiplication 32 x 32 | |
332 | *******************************************************/ | |
333 | #define r_arg1L r22 /* multiplier Low */ | |
334 | #define r_arg1H r23 | |
335 | #define r_arg1HL r24 | |
336 | #define r_arg1HH r25 /* multiplier High */ | |
337 | ||
338 | ||
339 | #define r_arg2L r18 /* multiplicand Low */ | |
340 | #define r_arg2H r19 | |
341 | #define r_arg2HL r20 | |
342 | #define r_arg2HH r21 /* multiplicand High */ | |
343 | ||
344 | #define r_resL r26 /* result Low */ | |
345 | #define r_resH r27 | |
346 | #define r_resHL r30 | |
347 | #define r_resHH r31 /* result High */ | |
348 | ||
349 | ||
bad3869a MM |
350 | .global __mulsi3 |
351 | .func __mulsi3 | |
352 | __mulsi3: | |
dd6d1f8c | 353 | #if defined (__AVR_HAVE_MUL__) |
bad3869a MM |
354 | mul r_arg1L, r_arg2L |
355 | movw r_resL, r0 | |
356 | mul r_arg1H, r_arg2H | |
357 | movw r_resHL, r0 | |
358 | mul r_arg1HL, r_arg2L | |
359 | add r_resHL, r0 | |
360 | adc r_resHH, r1 | |
361 | mul r_arg1L, r_arg2HL | |
362 | add r_resHL, r0 | |
363 | adc r_resHH, r1 | |
364 | mul r_arg1HH, r_arg2L | |
365 | add r_resHH, r0 | |
366 | mul r_arg1HL, r_arg2H | |
367 | add r_resHH, r0 | |
368 | mul r_arg1H, r_arg2HL | |
369 | add r_resHH, r0 | |
370 | mul r_arg1L, r_arg2HH | |
371 | add r_resHH, r0 | |
372 | clr r_arg1HH ; use instead of __zero_reg__ to add carry | |
373 | mul r_arg1H, r_arg2L | |
374 | add r_resH, r0 | |
375 | adc r_resHL, r1 | |
376 | adc r_resHH, r_arg1HH ; add carry | |
377 | mul r_arg1L, r_arg2H | |
378 | add r_resH, r0 | |
379 | adc r_resHL, r1 | |
380 | adc r_resHH, r_arg1HH ; add carry | |
381 | movw r_arg1L, r_resL | |
382 | movw r_arg1HL, r_resHL | |
383 | clr r1 ; __zero_reg__ clobbered by "mul" | |
384 | ret | |
385 | #else | |
90e7678c DC |
386 | clr r_resHH ; clear result |
387 | clr r_resHL ; clear result | |
388 | clr r_resH ; clear result | |
389 | clr r_resL ; clear result | |
390 | __mulsi3_loop: | |
391 | sbrs r_arg1L,0 | |
392 | rjmp __mulsi3_skip1 | |
393 | add r_resL,r_arg2L ; result + multiplicand | |
394 | adc r_resH,r_arg2H | |
395 | adc r_resHL,r_arg2HL | |
396 | adc r_resHH,r_arg2HH | |
397 | __mulsi3_skip1: | |
398 | add r_arg2L,r_arg2L ; shift multiplicand | |
399 | adc r_arg2H,r_arg2H | |
400 | adc r_arg2HL,r_arg2HL | |
401 | adc r_arg2HH,r_arg2HH | |
402 | ||
403 | lsr r_arg1HH ; gets LSB of multiplier | |
404 | ror r_arg1HL | |
405 | ror r_arg1H | |
406 | ror r_arg1L | |
407 | brne __mulsi3_loop | |
408 | sbiw r_arg1HL,0 | |
409 | cpc r_arg1H,r_arg1L | |
410 | brne __mulsi3_loop ; exit if multiplier = 0 | |
411 | __mulsi3_exit: | |
7ed9c001 DC |
412 | mov_h r_arg1HH,r_resHH ; result to return register |
413 | mov_l r_arg1HL,r_resHL | |
414 | mov_h r_arg1H,r_resH | |
415 | mov_l r_arg1L,r_resL | |
90e7678c | 416 | ret |
dd6d1f8c | 417 | #endif /* defined (__AVR_HAVE_MUL__) */ |
90e7678c DC |
418 | #undef r_arg1L |
419 | #undef r_arg1H | |
420 | #undef r_arg1HL | |
421 | #undef r_arg1HH | |
422 | ||
423 | ||
424 | #undef r_arg2L | |
425 | #undef r_arg2H | |
426 | #undef r_arg2HL | |
427 | #undef r_arg2HH | |
428 | ||
429 | #undef r_resL | |
430 | #undef r_resH | |
431 | #undef r_resHL | |
432 | #undef r_resHH | |
433 | ||
cdd9eb8f | 434 | .endfunc |
bad3869a | 435 | #endif /* defined (L_mulsi3) */ |
90e7678c DC |
436 | |
437 | /******************************************************* | |
438 | Division 8 / 8 => (result + remainder) | |
439 | *******************************************************/ | |
1d26ac96 MM |
440 | #define r_rem r25 /* remainder */ |
441 | #define r_arg1 r24 /* dividend, quotient */ | |
c4984bad | 442 | #define r_arg2 r22 /* divisor */ |
1d26ac96 | 443 | #define r_cnt r23 /* loop count */ |
90e7678c | 444 | |
1d26ac96 MM |
445 | #if defined (L_udivmodqi4) |
446 | .global __udivmodqi4 | |
447 | .func __udivmodqi4 | |
448 | __udivmodqi4: | |
449 | sub r_rem,r_rem ; clear remainder and carry | |
450 | ldi r_cnt,9 ; init loop counter | |
451 | rjmp __udivmodqi4_ep ; jump to entry point | |
452 | __udivmodqi4_loop: | |
453 | rol r_rem ; shift dividend into remainder | |
454 | cp r_rem,r_arg2 ; compare remainder & divisor | |
455 | brcs __udivmodqi4_ep ; remainder <= divisor | |
456 | sub r_rem,r_arg2 ; restore remainder | |
457 | __udivmodqi4_ep: | |
458 | rol r_arg1 ; shift dividend (with CARRY) | |
459 | dec r_cnt ; decrement loop counter | |
460 | brne __udivmodqi4_loop | |
461 | com r_arg1 ; complement result | |
462 | ; because C flag was complemented in loop | |
90e7678c | 463 | ret |
1d26ac96 MM |
464 | .endfunc |
465 | #endif /* defined (L_udivmodqi4) */ | |
466 | ||
467 | #if defined (L_divmodqi4) | |
468 | .global __divmodqi4 | |
469 | .func __divmodqi4 | |
470 | __divmodqi4: | |
471 | bst r_arg1,7 ; store sign of dividend | |
90e7678c DC |
472 | mov __tmp_reg__,r_arg1 |
473 | eor __tmp_reg__,r_arg2; r0.7 is sign of result | |
474 | sbrc r_arg1,7 | |
1d26ac96 | 475 | neg r_arg1 ; dividend negative : negate |
90e7678c | 476 | sbrc r_arg2,7 |
1d26ac96 MM |
477 | neg r_arg2 ; divisor negative : negate |
478 | rcall __udivmodqi4 ; do the unsigned div/mod | |
479 | brtc __divmodqi4_1 | |
90e7678c | 480 | neg r_rem ; correct remainder sign |
1d26ac96 | 481 | __divmodqi4_1: |
90e7678c DC |
482 | sbrc __tmp_reg__,7 |
483 | neg r_arg1 ; correct result sign | |
1d26ac96 MM |
484 | __divmodqi4_exit: |
485 | ret | |
486 | .endfunc | |
487 | #endif /* defined (L_divmodqi4) */ | |
90e7678c DC |
488 | |
489 | #undef r_rem | |
490 | #undef r_arg1 | |
491 | #undef r_arg2 | |
492 | #undef r_cnt | |
493 | ||
494 | ||
495 | /******************************************************* | |
496 | Division 16 / 16 => (result + remainder) | |
497 | *******************************************************/ | |
498 | #define r_remL r26 /* remainder Low */ | |
499 | #define r_remH r27 /* remainder High */ | |
1d26ac96 MM |
500 | |
501 | /* return: remainder */ | |
90e7678c DC |
502 | #define r_arg1L r24 /* dividend Low */ |
503 | #define r_arg1H r25 /* dividend High */ | |
1d26ac96 MM |
504 | |
505 | /* return: quotient */ | |
90e7678c DC |
506 | #define r_arg2L r22 /* divisor Low */ |
507 | #define r_arg2H r23 /* divisor High */ | |
508 | ||
509 | #define r_cnt r21 /* loop count */ | |
90e7678c | 510 | |
1d26ac96 MM |
511 | #if defined (L_udivmodhi4) |
512 | .global __udivmodhi4 | |
513 | .func __udivmodhi4 | |
514 | __udivmodhi4: | |
90e7678c | 515 | sub r_remL,r_remL |
1d26ac96 | 516 | sub r_remH,r_remH ; clear remainder and carry |
90e7678c | 517 | ldi r_cnt,17 ; init loop counter |
1d26ac96 MM |
518 | rjmp __udivmodhi4_ep ; jump to entry point |
519 | __udivmodhi4_loop: | |
90e7678c DC |
520 | rol r_remL ; shift dividend into remainder |
521 | rol r_remH | |
522 | cp r_remL,r_arg2L ; compare remainder & divisor | |
523 | cpc r_remH,r_arg2H | |
1d26ac96 | 524 | brcs __udivmodhi4_ep ; remainder < divisor |
90e7678c DC |
525 | sub r_remL,r_arg2L ; restore remainder |
526 | sbc r_remH,r_arg2H | |
1d26ac96 | 527 | __udivmodhi4_ep: |
90e7678c DC |
528 | rol r_arg1L ; shift dividend (with CARRY) |
529 | rol r_arg1H | |
530 | dec r_cnt ; decrement loop counter | |
1d26ac96 MM |
531 | brne __udivmodhi4_loop |
532 | com r_arg1L | |
533 | com r_arg1H | |
534 | ; div/mod results to return registers, as for the div() function | |
535 | mov_l r_arg2L, r_arg1L ; quotient | |
536 | mov_h r_arg2H, r_arg1H | |
537 | mov_l r_arg1L, r_remL ; remainder | |
538 | mov_h r_arg1H, r_remH | |
539 | ret | |
540 | .endfunc | |
541 | #endif /* defined (L_udivmodhi4) */ | |
542 | ||
543 | #if defined (L_divmodhi4) | |
544 | .global __divmodhi4 | |
545 | .func __divmodhi4 | |
546 | __divmodhi4: | |
547 | .global _div | |
548 | _div: | |
549 | bst r_arg1H,7 ; store sign of dividend | |
550 | mov __tmp_reg__,r_arg1H | |
551 | eor __tmp_reg__,r_arg2H ; r0.7 is sign of result | |
552 | rcall __divmodhi4_neg1 ; dividend negative : negate | |
553 | sbrc r_arg2H,7 | |
554 | rcall __divmodhi4_neg2 ; divisor negative : negate | |
555 | rcall __udivmodhi4 ; do the unsigned div/mod | |
556 | rcall __divmodhi4_neg1 ; correct remainder sign | |
90e7678c | 557 | tst __tmp_reg__ |
1d26ac96 MM |
558 | brpl __divmodhi4_exit |
559 | __divmodhi4_neg2: | |
560 | com r_arg2H | |
561 | neg r_arg2L ; correct divisor/result sign | |
562 | sbci r_arg2H,0xff | |
563 | __divmodhi4_exit: | |
90e7678c | 564 | ret |
1d26ac96 MM |
565 | __divmodhi4_neg1: |
566 | brtc __divmodhi4_exit | |
90e7678c | 567 | com r_arg1H |
1d26ac96 MM |
568 | neg r_arg1L ; correct dividend/remainder sign |
569 | sbci r_arg1H,0xff | |
90e7678c | 570 | ret |
1d26ac96 MM |
571 | .endfunc |
572 | #endif /* defined (L_divmodhi4) */ | |
573 | ||
90e7678c DC |
574 | #undef r_remH |
575 | #undef r_remL | |
576 | ||
577 | #undef r_arg1H | |
578 | #undef r_arg1L | |
579 | ||
580 | #undef r_arg2H | |
581 | #undef r_arg2L | |
582 | ||
583 | #undef r_cnt | |
584 | ||
585 | /******************************************************* | |
586 | Division 32 / 32 => (result + remainder) | |
587 | *******************************************************/ | |
588 | #define r_remHH r31 /* remainder High */ | |
589 | #define r_remHL r30 | |
590 | #define r_remH r27 | |
591 | #define r_remL r26 /* remainder Low */ | |
1d26ac96 MM |
592 | |
593 | /* return: remainder */ | |
90e7678c DC |
594 | #define r_arg1HH r25 /* dividend High */ |
595 | #define r_arg1HL r24 | |
596 | #define r_arg1H r23 | |
597 | #define r_arg1L r22 /* dividend Low */ | |
1d26ac96 MM |
598 | |
599 | /* return: quotient */ | |
90e7678c DC |
600 | #define r_arg2HH r21 /* divisor High */ |
601 | #define r_arg2HL r20 | |
602 | #define r_arg2H r19 | |
603 | #define r_arg2L r18 /* divisor Low */ | |
604 | ||
bad3869a | 605 | #define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ |
90e7678c | 606 | |
1d26ac96 MM |
607 | #if defined (L_udivmodsi4) |
608 | .global __udivmodsi4 | |
609 | .func __udivmodsi4 | |
610 | __udivmodsi4: | |
bad3869a MM |
611 | ldi r_remL, 33 ; init loop counter |
612 | mov r_cnt, r_remL | |
90e7678c | 613 | sub r_remL,r_remL |
1d26ac96 MM |
614 | sub r_remH,r_remH ; clear remainder and carry |
615 | mov_l r_remHL, r_remL | |
616 | mov_h r_remHH, r_remH | |
617 | rjmp __udivmodsi4_ep ; jump to entry point | |
618 | __udivmodsi4_loop: | |
90e7678c DC |
619 | rol r_remL ; shift dividend into remainder |
620 | rol r_remH | |
621 | rol r_remHL | |
622 | rol r_remHH | |
623 | cp r_remL,r_arg2L ; compare remainder & divisor | |
624 | cpc r_remH,r_arg2H | |
625 | cpc r_remHL,r_arg2HL | |
626 | cpc r_remHH,r_arg2HH | |
1d26ac96 | 627 | brcs __udivmodsi4_ep ; remainder <= divisor |
90e7678c DC |
628 | sub r_remL,r_arg2L ; restore remainder |
629 | sbc r_remH,r_arg2H | |
630 | sbc r_remHL,r_arg2HL | |
631 | sbc r_remHH,r_arg2HH | |
1d26ac96 | 632 | __udivmodsi4_ep: |
90e7678c DC |
633 | rol r_arg1L ; shift dividend (with CARRY) |
634 | rol r_arg1H | |
635 | rol r_arg1HL | |
636 | rol r_arg1HH | |
637 | dec r_cnt ; decrement loop counter | |
1d26ac96 | 638 | brne __udivmodsi4_loop |
bad3869a | 639 | ; __zero_reg__ now restored (r_cnt == 0) |
90e7678c DC |
640 | com r_arg1L |
641 | com r_arg1H | |
642 | com r_arg1HL | |
643 | com r_arg1HH | |
1d26ac96 MM |
644 | ; div/mod results to return registers, as for the ldiv() function |
645 | mov_l r_arg2L, r_arg1L ; quotient | |
646 | mov_h r_arg2H, r_arg1H | |
647 | mov_l r_arg2HL, r_arg1HL | |
648 | mov_h r_arg2HH, r_arg1HH | |
649 | mov_l r_arg1L, r_remL ; remainder | |
650 | mov_h r_arg1H, r_remH | |
651 | mov_l r_arg1HL, r_remHL | |
652 | mov_h r_arg1HH, r_remHH | |
90e7678c | 653 | ret |
1d26ac96 MM |
654 | .endfunc |
655 | #endif /* defined (L_udivmodsi4) */ | |
656 | ||
657 | #if defined (L_divmodsi4) | |
658 | .global __divmodsi4 | |
659 | .func __divmodsi4 | |
660 | __divmodsi4: | |
661 | bst r_arg1HH,7 ; store sign of dividend | |
662 | mov __tmp_reg__,r_arg1HH | |
663 | eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result | |
664 | rcall __divmodsi4_neg1 ; dividend negative : negate | |
665 | sbrc r_arg2HH,7 | |
666 | rcall __divmodsi4_neg2 ; divisor negative : negate | |
667 | rcall __udivmodsi4 ; do the unsigned div/mod | |
668 | rcall __divmodsi4_neg1 ; correct remainder sign | |
669 | rol __tmp_reg__ | |
670 | brcc __divmodsi4_exit | |
671 | __divmodsi4_neg2: | |
672 | com r_arg2HH | |
673 | com r_arg2HL | |
674 | com r_arg2H | |
675 | neg r_arg2L ; correct divisor/quotient sign | |
676 | sbci r_arg2H,0xff | |
677 | sbci r_arg2HL,0xff | |
678 | sbci r_arg2HH,0xff | |
679 | __divmodsi4_exit: | |
680 | ret | |
681 | __divmodsi4_neg1: | |
682 | brtc __divmodsi4_exit | |
683 | com r_arg1HH | |
684 | com r_arg1HL | |
685 | com r_arg1H | |
686 | neg r_arg1L ; correct dividend/remainder sign | |
687 | sbci r_arg1H, 0xff | |
688 | sbci r_arg1HL,0xff | |
689 | sbci r_arg1HH,0xff | |
690 | ret | |
691 | .endfunc | |
692 | #endif /* defined (L_divmodsi4) */ | |
90e7678c DC |
693 | |
694 | /********************************** | |
695 | * This is a prologue subroutine | |
696 | **********************************/ | |
bad3869a | 697 | #if defined (L_prologue) |
90e7678c | 698 | |
cdd9eb8f DC |
699 | .global __prologue_saves__ |
700 | .func __prologue_saves__ | |
701 | __prologue_saves__: | |
90e7678c DC |
702 | push r2 |
703 | push r3 | |
704 | push r4 | |
705 | push r5 | |
706 | push r6 | |
707 | push r7 | |
708 | push r8 | |
709 | push r9 | |
710 | push r10 | |
711 | push r11 | |
712 | push r12 | |
713 | push r13 | |
714 | push r14 | |
715 | push r15 | |
716 | push r16 | |
717 | push r17 | |
718 | push r28 | |
719 | push r29 | |
720 | in r28,__SP_L__ | |
721 | in r29,__SP_H__ | |
90e7678c DC |
722 | sub r28,r26 |
723 | sbc r29,r27 | |
724 | in __tmp_reg__,__SREG__ | |
725 | cli | |
90e7678c | 726 | out __SP_H__,r29 |
78cf8279 MM |
727 | out __SREG__,__tmp_reg__ |
728 | out __SP_L__,r28 | |
693092fb BH |
729 | #if defined (__AVR_HAVE_EIJMP_EICALL__) |
730 | eijmp | |
731 | #else | |
90e7678c | 732 | ijmp |
693092fb BH |
733 | #endif |
734 | ||
cdd9eb8f | 735 | .endfunc |
bad3869a | 736 | #endif /* defined (L_prologue) */ |
90e7678c DC |
737 | |
738 | /* | |
56b871c1 | 739 | * This is an epilogue subroutine |
90e7678c | 740 | */ |
bad3869a | 741 | #if defined (L_epilogue) |
90e7678c | 742 | |
cdd9eb8f DC |
743 | .global __epilogue_restores__ |
744 | .func __epilogue_restores__ | |
745 | __epilogue_restores__: | |
90e7678c DC |
746 | ldd r2,Y+18 |
747 | ldd r3,Y+17 | |
748 | ldd r4,Y+16 | |
749 | ldd r5,Y+15 | |
750 | ldd r6,Y+14 | |
751 | ldd r7,Y+13 | |
752 | ldd r8,Y+12 | |
753 | ldd r9,Y+11 | |
754 | ldd r10,Y+10 | |
755 | ldd r11,Y+9 | |
756 | ldd r12,Y+8 | |
757 | ldd r13,Y+7 | |
758 | ldd r14,Y+6 | |
759 | ldd r15,Y+5 | |
760 | ldd r16,Y+4 | |
761 | ldd r17,Y+3 | |
762 | ldd r26,Y+2 | |
763 | ldd r27,Y+1 | |
764 | add r28,r30 | |
765 | adc r29,__zero_reg__ | |
766 | in __tmp_reg__,__SREG__ | |
767 | cli | |
90e7678c | 768 | out __SP_H__,r29 |
78cf8279 MM |
769 | out __SREG__,__tmp_reg__ |
770 | out __SP_L__,r28 | |
1d26ac96 MM |
771 | mov_l r28, r26 |
772 | mov_h r29, r27 | |
90e7678c | 773 | ret |
6bec29c9 DC |
774 | .endfunc |
775 | #endif /* defined (L_epilogue) */ | |
90e7678c | 776 | |
6bec29c9 | 777 | #ifdef L_exit |
9af145ae MM |
778 | .section .fini9,"ax",@progbits |
779 | .global _exit | |
cdd9eb8f DC |
780 | .func _exit |
781 | _exit: | |
9af145ae MM |
782 | .weak exit |
783 | exit: | |
0f4e946f | 784 | .endfunc |
9af145ae MM |
785 | |
786 | /* Code from .fini8 ... .fini1 sections inserted by ld script. */ | |
787 | ||
788 | .section .fini0,"ax",@progbits | |
0a84fec6 | 789 | cli |
9af145ae MM |
790 | __stop_program: |
791 | rjmp __stop_program | |
6bec29c9 DC |
792 | #endif /* defined (L_exit) */ |
793 | ||
794 | #ifdef L_cleanup | |
c4984bad | 795 | .weak _cleanup |
6bec29c9 | 796 | .func _cleanup |
c4984bad MM |
797 | _cleanup: |
798 | ret | |
cdd9eb8f | 799 | .endfunc |
6bec29c9 DC |
800 | #endif /* defined (L_cleanup) */ |
801 | ||
802 | #ifdef L_tablejump | |
1268b05f MM |
803 | .global __tablejump2__ |
804 | .func __tablejump2__ | |
805 | __tablejump2__: | |
806 | lsl r30 | |
807 | rol r31 | |
9af145ae MM |
808 | .global __tablejump__ |
809 | __tablejump__: | |
dd6d1f8c | 810 | #if defined (__AVR_HAVE_LPMX__) |
6bec29c9 DC |
811 | lpm __tmp_reg__, Z+ |
812 | lpm r31, Z | |
813 | mov r30, __tmp_reg__ | |
693092fb BH |
814 | |
815 | #if defined (__AVR_HAVE_EIJMP_EICALL__) | |
816 | eijmp | |
817 | #else | |
6bec29c9 | 818 | ijmp |
693092fb BH |
819 | #endif |
820 | ||
6bec29c9 DC |
821 | #else |
822 | lpm | |
9af145ae | 823 | adiw r30, 1 |
6bec29c9 | 824 | push r0 |
6bec29c9 DC |
825 | lpm |
826 | push r0 | |
693092fb BH |
827 | #if defined (__AVR_HAVE_EIJMP_EICALL__) |
828 | push __zero_reg__ | |
829 | #endif | |
6bec29c9 | 830 | ret |
cdd9eb8f | 831 | #endif |
9af145ae | 832 | .endfunc |
6bec29c9 | 833 | #endif /* defined (L_tablejump) */ |
bad3869a | 834 | |
9af145ae MM |
835 | #ifdef L_copy_data |
836 | .section .init4,"ax",@progbits | |
837 | .global __do_copy_data | |
838 | __do_copy_data: | |
34d02d17 AS |
839 | #if defined(__AVR_HAVE_ELPMX__) |
840 | ldi r17, hi8(__data_end) | |
841 | ldi r26, lo8(__data_start) | |
842 | ldi r27, hi8(__data_start) | |
843 | ldi r30, lo8(__data_load_start) | |
844 | ldi r31, hi8(__data_load_start) | |
845 | ldi r16, hh8(__data_load_start) | |
846 | out __RAMPZ__, r16 | |
847 | rjmp .L__do_copy_data_start | |
848 | .L__do_copy_data_loop: | |
849 | elpm r0, Z+ | |
850 | st X+, r0 | |
851 | .L__do_copy_data_start: | |
852 | cpi r26, lo8(__data_end) | |
853 | cpc r27, r17 | |
854 | brne .L__do_copy_data_loop | |
855 | #elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) | |
856 | ldi r17, hi8(__data_end) | |
857 | ldi r26, lo8(__data_start) | |
858 | ldi r27, hi8(__data_start) | |
859 | ldi r30, lo8(__data_load_start) | |
860 | ldi r31, hi8(__data_load_start) | |
861 | ldi r16, hh8(__data_load_start - 0x10000) | |
862 | .L__do_copy_data_carry: | |
863 | inc r16 | |
864 | out __RAMPZ__, r16 | |
865 | rjmp .L__do_copy_data_start | |
866 | .L__do_copy_data_loop: | |
867 | elpm | |
868 | st X+, r0 | |
869 | adiw r30, 1 | |
870 | brcs .L__do_copy_data_carry | |
871 | .L__do_copy_data_start: | |
872 | cpi r26, lo8(__data_end) | |
873 | cpc r27, r17 | |
874 | brne .L__do_copy_data_loop | |
875 | #elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) | |
9af145ae MM |
876 | ldi r17, hi8(__data_end) |
877 | ldi r26, lo8(__data_start) | |
878 | ldi r27, hi8(__data_start) | |
879 | ldi r30, lo8(__data_load_start) | |
880 | ldi r31, hi8(__data_load_start) | |
34d02d17 AS |
881 | rjmp .L__do_copy_data_start |
882 | .L__do_copy_data_loop: | |
7ed9c001 | 883 | #if defined (__AVR_HAVE_LPMX__) |
9af145ae MM |
884 | lpm r0, Z+ |
885 | #else | |
886 | lpm | |
887 | adiw r30, 1 | |
888 | #endif | |
889 | st X+, r0 | |
34d02d17 | 890 | .L__do_copy_data_start: |
9af145ae MM |
891 | cpi r26, lo8(__data_end) |
892 | cpc r27, r17 | |
34d02d17 AS |
893 | brne .L__do_copy_data_loop |
894 | #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ | |
9af145ae MM |
895 | #endif /* L_copy_data */ |
896 | ||
897 | /* __do_clear_bss is only necessary if there is anything in .bss section. */ | |
898 | ||
899 | #ifdef L_clear_bss | |
900 | .section .init4,"ax",@progbits | |
901 | .global __do_clear_bss | |
902 | __do_clear_bss: | |
903 | ldi r17, hi8(__bss_end) | |
904 | ldi r26, lo8(__bss_start) | |
905 | ldi r27, hi8(__bss_start) | |
906 | rjmp .do_clear_bss_start | |
907 | .do_clear_bss_loop: | |
908 | st X+, __zero_reg__ | |
909 | .do_clear_bss_start: | |
910 | cpi r26, lo8(__bss_end) | |
911 | cpc r27, r17 | |
912 | brne .do_clear_bss_loop | |
913 | #endif /* L_clear_bss */ | |
914 | ||
915 | /* __do_global_ctors and __do_global_dtors are only necessary | |
916 | if there are any constructors/destructors. */ | |
917 | ||
9af145ae MM |
918 | #ifdef L_ctors |
919 | .section .init6,"ax",@progbits | |
920 | .global __do_global_ctors | |
33faafca | 921 | #if defined(__AVR_HAVE_RAMPZ__) |
9af145ae MM |
922 | __do_global_ctors: |
923 | ldi r17, hi8(__ctors_start) | |
924 | ldi r28, lo8(__ctors_end) | |
925 | ldi r29, hi8(__ctors_end) | |
3a7bfdd5 | 926 | ldi r16, hh8(__ctors_end) |
33faafca AS |
927 | rjmp .L__do_global_ctors_start |
928 | .L__do_global_ctors_loop: | |
929 | sbiw r28, 2 | |
3a7bfdd5 | 930 | sbc r16, __zero_reg__ |
33faafca AS |
931 | mov_h r31, r29 |
932 | mov_l r30, r28 | |
3a7bfdd5 | 933 | out __RAMPZ__, r16 |
33faafca AS |
934 | XCALL __tablejump_elpm__ |
935 | .L__do_global_ctors_start: | |
936 | cpi r28, lo8(__ctors_start) | |
937 | cpc r29, r17 | |
3a7bfdd5 GJL |
938 | ldi r24, hh8(__ctors_start) |
939 | cpc r16, r24 | |
33faafca AS |
940 | brne .L__do_global_ctors_loop |
941 | #else | |
942 | __do_global_ctors: | |
943 | ldi r17, hi8(__ctors_start) | |
944 | ldi r28, lo8(__ctors_end) | |
945 | ldi r29, hi8(__ctors_end) | |
946 | rjmp .L__do_global_ctors_start | |
947 | .L__do_global_ctors_loop: | |
9af145ae MM |
948 | sbiw r28, 2 |
949 | mov_h r31, r29 | |
950 | mov_l r30, r28 | |
951 | XCALL __tablejump__ | |
33faafca | 952 | .L__do_global_ctors_start: |
9af145ae MM |
953 | cpi r28, lo8(__ctors_start) |
954 | cpc r29, r17 | |
33faafca AS |
955 | brne .L__do_global_ctors_loop |
956 | #endif /* defined(__AVR_HAVE_RAMPZ__) */ | |
9af145ae MM |
957 | #endif /* L_ctors */ |
958 | ||
959 | #ifdef L_dtors | |
960 | .section .fini6,"ax",@progbits | |
961 | .global __do_global_dtors | |
33faafca | 962 | #if defined(__AVR_HAVE_RAMPZ__) |
9af145ae MM |
963 | __do_global_dtors: |
964 | ldi r17, hi8(__dtors_end) | |
965 | ldi r28, lo8(__dtors_start) | |
966 | ldi r29, hi8(__dtors_start) | |
3a7bfdd5 | 967 | ldi r16, hh8(__dtors_start) |
33faafca AS |
968 | rjmp .L__do_global_dtors_start |
969 | .L__do_global_dtors_loop: | |
970 | sbiw r28, 2 | |
3a7bfdd5 | 971 | sbc r16, __zero_reg__ |
33faafca AS |
972 | mov_h r31, r29 |
973 | mov_l r30, r28 | |
3a7bfdd5 | 974 | out __RAMPZ__, r16 |
33faafca AS |
975 | XCALL __tablejump_elpm__ |
976 | .L__do_global_dtors_start: | |
977 | cpi r28, lo8(__dtors_end) | |
978 | cpc r29, r17 | |
3a7bfdd5 GJL |
979 | ldi r24, hh8(__dtors_end) |
980 | cpc r16, r24 | |
33faafca AS |
981 | brne .L__do_global_dtors_loop |
982 | #else | |
983 | __do_global_dtors: | |
984 | ldi r17, hi8(__dtors_end) | |
985 | ldi r28, lo8(__dtors_start) | |
986 | ldi r29, hi8(__dtors_start) | |
987 | rjmp .L__do_global_dtors_start | |
988 | .L__do_global_dtors_loop: | |
9af145ae MM |
989 | mov_h r31, r29 |
990 | mov_l r30, r28 | |
991 | XCALL __tablejump__ | |
992 | adiw r28, 2 | |
33faafca | 993 | .L__do_global_dtors_start: |
9af145ae MM |
994 | cpi r28, lo8(__dtors_end) |
995 | cpc r29, r17 | |
33faafca AS |
996 | brne .L__do_global_dtors_loop |
997 | #endif /* defined(__AVR_HAVE_RAMPZ__) */ | |
9af145ae MM |
998 | #endif /* L_dtors */ |
999 | ||
33faafca AS |
1000 | #ifdef L_tablejump_elpm |
1001 | .global __tablejump_elpm__ | |
1002 | .func __tablejump_elpm__ | |
1003 | __tablejump_elpm__: | |
1004 | #if defined (__AVR_HAVE_ELPM__) | |
1005 | #if defined (__AVR_HAVE_LPMX__) | |
1006 | elpm __tmp_reg__, Z+ | |
1007 | elpm r31, Z | |
1008 | mov r30, __tmp_reg__ | |
1009 | #if defined (__AVR_HAVE_EIJMP_EICALL__) | |
1010 | eijmp | |
1011 | #else | |
1012 | ijmp | |
1013 | #endif | |
1014 | ||
1015 | #else | |
1016 | elpm | |
1017 | adiw r30, 1 | |
1018 | push r0 | |
1019 | elpm | |
1020 | push r0 | |
1021 | #if defined (__AVR_HAVE_EIJMP_EICALL__) | |
1022 | push __zero_reg__ | |
1023 | #endif | |
1024 | ret | |
1025 | #endif | |
1026 | #endif /* defined (__AVR_HAVE_ELPM__) */ | |
1027 | .endfunc | |
1028 | #endif /* defined (L_tablejump_elpm) */ | |
1029 | ||
0ad8bb3b GJL |
1030 | \f |
1031 | /********************************** | |
1032 | * Find first set Bit (ffs) | |
1033 | **********************************/ | |
1034 | ||
1035 | #if defined (L_ffssi2) | |
1036 | ;; find first set bit | |
1037 | ;; r25:r24 = ffs32 (r25:r22) | |
1038 | ;; clobbers: r22, r26 | |
1039 | DEFUN __ffssi2 | |
1040 | clr r26 | |
1041 | tst r22 | |
1042 | brne 1f | |
1043 | subi r26, -8 | |
1044 | or r22, r23 | |
1045 | brne 1f | |
1046 | subi r26, -8 | |
1047 | or r22, r24 | |
1048 | brne 1f | |
1049 | subi r26, -8 | |
1050 | or r22, r25 | |
1051 | brne 1f | |
1052 | ret | |
1053 | 1: mov r24, r22 | |
1054 | XJMP __loop_ffsqi2 | |
1055 | ENDF __ffssi2 | |
1056 | #endif /* defined (L_ffssi2) */ | |
1057 | ||
1058 | #if defined (L_ffshi2) | |
1059 | ;; find first set bit | |
1060 | ;; r25:r24 = ffs16 (r25:r24) | |
1061 | ;; clobbers: r26 | |
1062 | DEFUN __ffshi2 | |
1063 | clr r26 | |
1064 | cpse r24, __zero_reg__ | |
1065 | 1: XJMP __loop_ffsqi2 | |
1066 | ldi r26, 8 | |
1067 | or r24, r25 | |
1068 | brne 1b | |
1069 | ret | |
1070 | ENDF __ffshi2 | |
1071 | #endif /* defined (L_ffshi2) */ | |
1072 | ||
1073 | #if defined (L_loop_ffsqi2) | |
1074 | ;; Helper for ffshi2, ffssi2 | |
1075 | ;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) | |
1076 | ;; r24 must be != 0 | |
1077 | ;; clobbers: r26 | |
1078 | DEFUN __loop_ffsqi2 | |
1079 | inc r26 | |
1080 | lsr r24 | |
1081 | brcc __loop_ffsqi2 | |
1082 | mov r24, r26 | |
1083 | clr r25 | |
1084 | ret | |
1085 | ENDF __loop_ffsqi2 | |
1086 | #endif /* defined (L_loop_ffsqi2) */ | |
1087 | ||
1088 | \f | |
1089 | /********************************** | |
1090 | * Count trailing Zeros (ctz) | |
1091 | **********************************/ | |
1092 | ||
1093 | #if defined (L_ctzsi2) | |
1094 | ;; count trailing zeros | |
1095 | ;; r25:r24 = ctz32 (r25:r22) | |
1096 | ;; ctz(0) = 32 | |
1097 | DEFUN __ctzsi2 | |
1098 | XCALL __ffssi2 | |
1099 | dec r24 | |
1100 | sbrc r24, 7 | |
1101 | ldi r24, 32 | |
1102 | ret | |
1103 | ENDF __ctzsi2 | |
1104 | #endif /* defined (L_ctzsi2) */ | |
1105 | ||
1106 | #if defined (L_ctzhi2) | |
1107 | ;; count trailing zeros | |
1108 | ;; r25:r24 = ctz16 (r25:r24) | |
1109 | ;; ctz(0) = 16 | |
1110 | DEFUN __ctzhi2 | |
1111 | XCALL __ffshi2 | |
1112 | dec r24 | |
1113 | sbrc r24, 7 | |
1114 | ldi r24, 16 | |
1115 | ret | |
1116 | ENDF __ctzhi2 | |
1117 | #endif /* defined (L_ctzhi2) */ | |
1118 | ||
1119 | \f | |
1120 | /********************************** | |
1121 | * Count leading Zeros (clz) | |
1122 | **********************************/ | |
1123 | ||
1124 | #if defined (L_clzdi2) | |
1125 | ;; count leading zeros | |
1126 | ;; r25:r24 = clz64 (r25:r18) | |
1127 | ;; clobbers: r22, r23, r26 | |
1128 | DEFUN __clzdi2 | |
1129 | XCALL __clzsi2 | |
1130 | sbrs r24, 5 | |
1131 | ret | |
1132 | mov_l r22, r18 | |
1133 | mov_h r23, r19 | |
1134 | mov_l r24, r20 | |
1135 | mov_h r25, r21 | |
1136 | XCALL __clzsi2 | |
1137 | subi r24, -32 | |
1138 | ret | |
1139 | ENDF __clzdi2 | |
1140 | #endif /* defined (L_clzdi2) */ | |
1141 | ||
1142 | #if defined (L_clzsi2) | |
1143 | ;; count leading zeros | |
1144 | ;; r25:r24 = clz32 (r25:r22) | |
1145 | ;; clobbers: r26 | |
1146 | DEFUN __clzsi2 | |
1147 | XCALL __clzhi2 | |
1148 | sbrs r24, 4 | |
1149 | ret | |
1150 | mov_l r24, r22 | |
1151 | mov_h r25, r23 | |
1152 | XCALL __clzhi2 | |
1153 | subi r24, -16 | |
1154 | ret | |
1155 | ENDF __clzsi2 | |
1156 | #endif /* defined (L_clzsi2) */ | |
1157 | ||
1158 | #if defined (L_clzhi2) | |
1159 | ;; count leading zeros | |
1160 | ;; r25:r24 = clz16 (r25:r24) | |
1161 | ;; clobbers: r26 | |
1162 | DEFUN __clzhi2 | |
1163 | clr r26 | |
1164 | tst r25 | |
1165 | brne 1f | |
1166 | subi r26, -8 | |
1167 | or r25, r24 | |
1168 | brne 1f | |
1169 | ldi r24, 16 | |
1170 | ret | |
1171 | 1: cpi r25, 16 | |
1172 | brsh 3f | |
1173 | subi r26, -3 | |
1174 | swap r25 | |
1175 | 2: inc r26 | |
1176 | 3: lsl r25 | |
1177 | brcc 2b | |
1178 | mov r24, r26 | |
1179 | clr r25 | |
1180 | ret | |
1181 | ENDF __clzhi2 | |
1182 | #endif /* defined (L_clzhi2) */ | |
1183 | ||
1184 | \f | |
1185 | /********************************** | |
1186 | * Parity | |
1187 | **********************************/ | |
1188 | ||
1189 | #if defined (L_paritydi2) | |
1190 | ;; r25:r24 = parity64 (r25:r18) | |
1191 | ;; clobbers: __tmp_reg__ | |
1192 | DEFUN __paritydi2 | |
1193 | eor r24, r18 | |
1194 | eor r24, r19 | |
1195 | eor r24, r20 | |
1196 | eor r24, r21 | |
1197 | XJMP __paritysi2 | |
1198 | ENDF __paritydi2 | |
1199 | #endif /* defined (L_paritydi2) */ | |
1200 | ||
1201 | #if defined (L_paritysi2) | |
1202 | ;; r25:r24 = parity32 (r25:r22) | |
1203 | ;; clobbers: __tmp_reg__ | |
1204 | DEFUN __paritysi2 | |
1205 | eor r24, r22 | |
1206 | eor r24, r23 | |
1207 | XJMP __parityhi2 | |
1208 | ENDF __paritysi2 | |
1209 | #endif /* defined (L_paritysi2) */ | |
1210 | ||
1211 | #if defined (L_parityhi2) | |
1212 | ;; r25:r24 = parity16 (r25:r24) | |
1213 | ;; clobbers: __tmp_reg__ | |
1214 | DEFUN __parityhi2 | |
1215 | eor r24, r25 | |
1216 | ;; FALLTHRU | |
1217 | ENDF __parityhi2 | |
1218 | ||
1219 | ;; r25:r24 = parity8 (r24) | |
1220 | ;; clobbers: __tmp_reg__ | |
1221 | DEFUN __parityqi2 | |
1222 | ;; parity is in r24[0..7] | |
1223 | mov __tmp_reg__, r24 | |
1224 | swap __tmp_reg__ | |
1225 | eor r24, __tmp_reg__ | |
1226 | ;; parity is in r24[0..3] | |
1227 | subi r24, -4 | |
1228 | andi r24, -5 | |
1229 | subi r24, -6 | |
1230 | ;; parity is in r24[0,3] | |
1231 | sbrc r24, 3 | |
1232 | inc r24 | |
1233 | ;; parity is in r24[0] | |
1234 | andi r24, 1 | |
1235 | clr r25 | |
1236 | ret | |
1237 | ENDF __parityqi2 | |
1238 | #endif /* defined (L_parityhi2) */ | |
1239 | ||
1240 | \f | |
1241 | /********************************** | |
1242 | * Population Count | |
1243 | **********************************/ | |
1244 | ||
1245 | #if defined (L_popcounthi2) | |
1246 | ;; population count | |
1247 | ;; r25:r24 = popcount16 (r25:r24) | |
1248 | ;; clobbers: r30, __tmp_reg__ | |
1249 | DEFUN __popcounthi2 | |
1250 | XCALL __popcountqi2 | |
1251 | mov r30, r24 | |
1252 | mov r24, r25 | |
1253 | XCALL __popcountqi2 | |
1254 | add r24, r30 | |
1255 | clr r25 | |
1256 | ret | |
1257 | ENDF __popcounthi2 | |
1258 | #endif /* defined (L_popcounthi2) */ | |
1259 | ||
1260 | #if defined (L_popcountsi2) | |
1261 | ;; population count | |
1262 | ;; r25:r24 = popcount32 (r25:r22) | |
1263 | ;; clobbers: r26, r30, __tmp_reg__ | |
1264 | DEFUN __popcountsi2 | |
1265 | XCALL __popcounthi2 | |
1266 | mov r26, r24 | |
1267 | mov_l r24, r22 | |
1268 | mov_h r25, r23 | |
1269 | XCALL __popcounthi2 | |
1270 | add r24, r26 | |
1271 | ret | |
1272 | ENDF __popcountsi2 | |
1273 | #endif /* defined (L_popcountsi2) */ | |
1274 | ||
1275 | #if defined (L_popcountdi2) | |
1276 | ;; population count | |
1277 | ;; r25:r24 = popcount64 (r25:r18) | |
1278 | ;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__ | |
1279 | DEFUN __popcountdi2 | |
1280 | XCALL __popcountsi2 | |
1281 | mov r27, r24 | |
1282 | mov_l r22, r18 | |
1283 | mov_h r23, r19 | |
1284 | mov_l r24, r20 | |
1285 | mov_h r25, r21 | |
1286 | XCALL __popcountsi2 | |
1287 | add r24, r27 | |
1288 | ret | |
1289 | ENDF __popcountdi2 | |
1290 | #endif /* defined (L_popcountdi2) */ | |
1291 | ||
1292 | #if defined (L_popcountqi2) | |
1293 | ;; population count | |
1294 | ;; r24 = popcount8 (r24) | |
1295 | ;; clobbers: __tmp_reg__ | |
1296 | DEFUN __popcountqi2 | |
1297 | mov __tmp_reg__, r24 | |
1298 | andi r24, 1 | |
1299 | lsr __tmp_reg__ | |
1300 | lsr __tmp_reg__ | |
1301 | adc r24, __zero_reg__ | |
1302 | lsr __tmp_reg__ | |
1303 | adc r24, __zero_reg__ | |
1304 | lsr __tmp_reg__ | |
1305 | adc r24, __zero_reg__ | |
1306 | lsr __tmp_reg__ | |
1307 | adc r24, __zero_reg__ | |
1308 | lsr __tmp_reg__ | |
1309 | adc r24, __zero_reg__ | |
1310 | lsr __tmp_reg__ | |
1311 | adc r24, __tmp_reg__ | |
1312 | ret | |
1313 | ENDF __popcountqi2 | |
1314 | #endif /* defined (L_popcountqi2) */ | |
1315 | ||
1316 | \f | |
1317 | /********************************** | |
1318 | * Swap bytes | |
1319 | **********************************/ | |
1320 | ||
1321 | ;; swap two registers with different register number | |
1322 | .macro bswap a, b | |
1323 | eor \a, \b | |
1324 | eor \b, \a | |
1325 | eor \a, \b | |
1326 | .endm | |
1327 | ||
1328 | #if defined (L_bswapsi2) | |
1329 | ;; swap bytes | |
1330 | ;; r25:r22 = bswap32 (r25:r22) | |
1331 | DEFUN __bswapsi2 | |
1332 | bswap r22, r25 | |
1333 | bswap r23, r24 | |
1334 | ret | |
1335 | ENDF __bswapsi2 | |
1336 | #endif /* defined (L_bswapsi2) */ | |
1337 | ||
1338 | #if defined (L_bswapdi2) | |
1339 | ;; swap bytes | |
1340 | ;; r25:r18 = bswap64 (r25:r18) | |
1341 | DEFUN __bswapdi2 | |
1342 | bswap r18, r25 | |
1343 | bswap r19, r24 | |
1344 | bswap r20, r23 | |
1345 | bswap r21, r22 | |
1346 | ret | |
1347 | ENDF __bswapdi2 | |
1348 | #endif /* defined (L_bswapdi2) */ | |
1349 | ||
1350 | \f | |
1351 | /********************************** | |
1352 | * 64-bit shifts | |
1353 | **********************************/ | |
1354 | ||
1355 | #if defined (L_ashrdi3) | |
1356 | ;; Arithmetic shift right | |
1357 | ;; r25:r18 = ashr64 (r25:r18, r17:r16) | |
1358 | DEFUN __ashrdi3 | |
1359 | push r16 | |
fbd05da0 | 1360 | andi r16, 63 |
0ad8bb3b GJL |
1361 | breq 2f |
1362 | 1: asr r25 | |
1363 | ror r24 | |
1364 | ror r23 | |
1365 | ror r22 | |
1366 | ror r21 | |
1367 | ror r20 | |
1368 | ror r19 | |
1369 | ror r18 | |
1370 | dec r16 | |
1371 | brne 1b | |
1372 | 2: pop r16 | |
1373 | ret | |
1374 | ENDF __ashrdi3 | |
1375 | #endif /* defined (L_ashrdi3) */ | |
1376 | ||
1377 | #if defined (L_lshrdi3) | |
1378 | ;; Logic shift right | |
1379 | ;; r25:r18 = lshr64 (r25:r18, r17:r16) | |
1380 | DEFUN __lshrdi3 | |
1381 | push r16 | |
fbd05da0 | 1382 | andi r16, 63 |
0ad8bb3b GJL |
1383 | breq 2f |
1384 | 1: lsr r25 | |
1385 | ror r24 | |
1386 | ror r23 | |
1387 | ror r22 | |
1388 | ror r21 | |
1389 | ror r20 | |
1390 | ror r19 | |
1391 | ror r18 | |
1392 | dec r16 | |
1393 | brne 1b | |
1394 | 2: pop r16 | |
1395 | ret | |
1396 | ENDF __lshrdi3 | |
1397 | #endif /* defined (L_lshrdi3) */ | |
1398 | ||
1399 | #if defined (L_ashldi3) | |
1400 | ;; Shift left | |
1401 | ;; r25:r18 = ashl64 (r25:r18, r17:r16) | |
1402 | DEFUN __ashldi3 | |
1403 | push r16 | |
fbd05da0 | 1404 | andi r16, 63 |
0ad8bb3b GJL |
1405 | breq 2f |
1406 | 1: lsl r18 | |
1407 | rol r19 | |
1408 | rol r20 | |
1409 | rol r21 | |
1410 | rol r22 | |
1411 | rol r23 | |
1412 | rol r24 | |
1413 | rol r25 | |
1414 | dec r16 | |
1415 | brne 1b | |
1416 | 2: pop r16 | |
1417 | ret | |
1418 | ENDF __ashldi3 | |
1419 | #endif /* defined (L_ashldi3) */ | |
f451d14d GJL |
1420 | |
1421 | ||
1422 | /***********************************************************/ | |
1423 | ;;; Softmul versions of FMUL, FMULS and FMULSU to implement | |
1424 | ;;; __builtin_avr_fmul* if !AVR_HAVE_MUL | |
1425 | /***********************************************************/ | |
1426 | ||
1427 | #define A1 24 | |
1428 | #define B1 25 | |
1429 | #define C0 22 | |
1430 | #define C1 23 | |
1431 | #define A0 __tmp_reg__ | |
1432 | ||
1433 | #ifdef L_fmuls | |
1434 | ;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction | |
1435 | ;;; Clobbers: r24, r25, __tmp_reg__ | |
1436 | DEFUN __fmuls | |
1437 | ;; A0.7 = negate result? | |
1438 | mov A0, A1 | |
1439 | eor A0, B1 | |
1440 | ;; B1 = |B1| | |
1441 | sbrc B1, 7 | |
1442 | neg B1 | |
1443 | XJMP __fmulsu_exit | |
1444 | ENDF __fmuls | |
1445 | #endif /* L_fmuls */ | |
1446 | ||
1447 | #ifdef L_fmulsu | |
1448 | ;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction | |
1449 | ;;; Clobbers: r24, r25, __tmp_reg__ | |
1450 | DEFUN __fmulsu | |
1451 | ;; A0.7 = negate result? | |
1452 | mov A0, A1 | |
1453 | ;; FALLTHRU | |
1454 | ENDF __fmulsu | |
1455 | ||
1456 | ;; Helper for __fmuls and __fmulsu | |
1457 | DEFUN __fmulsu_exit | |
1458 | ;; A1 = |A1| | |
1459 | sbrc A1, 7 | |
1460 | neg A1 | |
1461 | #ifdef __AVR_HAVE_JMP_CALL__ | |
1462 | ;; Some cores have problem skipping 2-word instruction | |
1463 | tst A0 | |
1464 | brmi 1f | |
1465 | #else | |
1466 | sbrs A0, 7 | |
1467 | #endif /* __AVR_HAVE_JMP_CALL__ */ | |
1468 | XJMP __fmul | |
1469 | 1: XCALL __fmul | |
1470 | ;; C = -C iff A0.7 = 1 | |
1471 | com C1 | |
1472 | neg C0 | |
1473 | sbci C1, -1 | |
1474 | ret | |
1475 | ENDF __fmulsu_exit | |
1476 | #endif /* L_fmulsu */ | |
1477 | ||
1478 | ||
1479 | #ifdef L_fmul | |
1480 | ;;; r22:r23 = fmul (r24, r25) like in FMUL instruction | |
1481 | ;;; Clobbers: r24, r25, __tmp_reg__ | |
1482 | DEFUN __fmul | |
1483 | ; clear result | |
1484 | clr C0 | |
1485 | clr C1 | |
1486 | clr A0 | |
1487 | 1: tst B1 | |
1488 | ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. | |
1489 | 2: brpl 3f | |
1490 | ;; C += A | |
1491 | add C0, A0 | |
1492 | adc C1, A1 | |
1493 | 3: ;; A >>= 1 | |
1494 | lsr A1 | |
1495 | ror A0 | |
1496 | ;; B <<= 1 | |
1497 | lsl B1 | |
1498 | brne 2b | |
1499 | ret | |
1500 | ENDF __fmul | |
1501 | #endif /* L_fmul */ | |
1502 | ||
1503 | #undef A0 | |
1504 | #undef A1 | |
1505 | #undef B1 | |
1506 | #undef C0 | |
1507 | #undef C1 |