]> gcc.gnu.org Git - gcc.git/blob - gcc/config/spu/spu.c
a03b87e635c480835ac39c2cdddcb01a0e7cb673
[gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "hash-set.h"
31 #include "machmode.h"
32 #include "vec.h"
33 #include "double-int.h"
34 #include "input.h"
35 #include "alias.h"
36 #include "symtab.h"
37 #include "wide-int.h"
38 #include "inchash.h"
39 #include "tree.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "hashtab.h"
46 #include "function.h"
47 #include "statistics.h"
48 #include "real.h"
49 #include "fixed-value.h"
50 #include "expmed.h"
51 #include "dojump.h"
52 #include "explow.h"
53 #include "emit-rtl.h"
54 #include "stmt.h"
55 #include "expr.h"
56 #include "insn-codes.h"
57 #include "optabs.h"
58 #include "except.h"
59 #include "output.h"
60 #include "predict.h"
61 #include "dominance.h"
62 #include "cfg.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "basic-block.h"
69 #include "diagnostic-core.h"
70 #include "ggc.h"
71 #include "tm_p.h"
72 #include "target.h"
73 #include "target-def.h"
74 #include "langhooks.h"
75 #include "reload.h"
76 #include "sched-int.h"
77 #include "params.h"
78 #include "hash-table.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-fold.h"
82 #include "tree-eh.h"
83 #include "gimple-expr.h"
84 #include "is-a.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "tm-constrs.h"
88 #include "sbitmap.h"
89 #include "df.h"
90 #include "ddg.h"
91 #include "timevar.h"
92 #include "dumpfile.h"
93 #include "cfgloop.h"
94 #include "builtins.h"
95 #include "rtl-iter.h"
96
97 /* Builtin types, data and prototypes. */
98
99 enum spu_builtin_type_index
100 {
101 SPU_BTI_END_OF_PARAMS,
102
103 /* We create new type nodes for these. */
104 SPU_BTI_V16QI,
105 SPU_BTI_V8HI,
106 SPU_BTI_V4SI,
107 SPU_BTI_V2DI,
108 SPU_BTI_V4SF,
109 SPU_BTI_V2DF,
110 SPU_BTI_UV16QI,
111 SPU_BTI_UV8HI,
112 SPU_BTI_UV4SI,
113 SPU_BTI_UV2DI,
114
115 /* A 16-byte type. (Implemented with V16QI_type_node) */
116 SPU_BTI_QUADWORD,
117
118 /* These all correspond to intSI_type_node */
119 SPU_BTI_7,
120 SPU_BTI_S7,
121 SPU_BTI_U7,
122 SPU_BTI_S10,
123 SPU_BTI_S10_4,
124 SPU_BTI_U14,
125 SPU_BTI_16,
126 SPU_BTI_S16,
127 SPU_BTI_S16_2,
128 SPU_BTI_U16,
129 SPU_BTI_U16_2,
130 SPU_BTI_U18,
131
132 /* These correspond to the standard types */
133 SPU_BTI_INTQI,
134 SPU_BTI_INTHI,
135 SPU_BTI_INTSI,
136 SPU_BTI_INTDI,
137
138 SPU_BTI_UINTQI,
139 SPU_BTI_UINTHI,
140 SPU_BTI_UINTSI,
141 SPU_BTI_UINTDI,
142
143 SPU_BTI_FLOAT,
144 SPU_BTI_DOUBLE,
145
146 SPU_BTI_VOID,
147 SPU_BTI_PTR,
148
149 SPU_BTI_MAX
150 };
151
152 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
153 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
154 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
155 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
156 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
157 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
158 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
159 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
160 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
161 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
162
163 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
164
165 struct spu_builtin_range
166 {
167 int low, high;
168 };
169
170 static struct spu_builtin_range spu_builtin_range[] = {
171 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
172 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
173 {0ll, 0x7fll}, /* SPU_BTI_U7 */
174 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
175 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
176 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
177 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
178 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
179 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
180 {0ll, 0xffffll}, /* SPU_BTI_U16 */
181 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
182 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
183 };
184
185 \f
186 /* Target specific attribute specifications. */
187 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
188
189 /* Prototypes and external defs. */
190 static int get_pipe (rtx_insn *insn);
191 static int spu_naked_function_p (tree func);
192 static int mem_is_padded_component_ref (rtx x);
193 static void fix_range (const char *);
194 static rtx spu_expand_load (rtx, rtx, rtx, int);
195
196 /* Which instruction set architecture to use. */
197 int spu_arch;
198 /* Which cpu are we tuning for. */
199 int spu_tune;
200
201 /* The hardware requires 8 insns between a hint and the branch it
202 effects. This variable describes how many rtl instructions the
203 compiler needs to see before inserting a hint, and then the compiler
204 will insert enough nops to make it at least 8 insns. The default is
205 for the compiler to allow up to 2 nops be emitted. The nops are
206 inserted in pairs, so we round down. */
207 int spu_hint_dist = (8*4) - (2*4);
208
209 enum spu_immediate {
210 SPU_NONE,
211 SPU_IL,
212 SPU_ILA,
213 SPU_ILH,
214 SPU_ILHU,
215 SPU_ORI,
216 SPU_ORHI,
217 SPU_ORBI,
218 SPU_IOHL
219 };
220 enum immediate_class
221 {
222 IC_POOL, /* constant pool */
223 IC_IL1, /* one il* instruction */
224 IC_IL2, /* both ilhu and iohl instructions */
225 IC_IL1s, /* one il* instruction */
226 IC_IL2s, /* both ilhu and iohl instructions */
227 IC_FSMBI, /* the fsmbi instruction */
228 IC_CPAT, /* one of the c*d instructions */
229 IC_FSMBI2 /* fsmbi plus 1 other instruction */
230 };
231
232 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
233 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
234 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
235 static enum immediate_class classify_immediate (rtx op,
236 machine_mode mode);
237
238 /* Pointer mode for __ea references. */
239 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
240
241 \f
242 /* Define the structure for the machine field in struct function. */
243 struct GTY(()) machine_function
244 {
245 /* Register to use for PIC accesses. */
246 rtx pic_reg;
247 };
248
249 /* How to allocate a 'struct machine_function'. */
250 static struct machine_function *
251 spu_init_machine_status (void)
252 {
253 return ggc_cleared_alloc<machine_function> ();
254 }
255
256 /* Implement TARGET_OPTION_OVERRIDE. */
257 static void
258 spu_option_override (void)
259 {
260 /* Set up function hooks. */
261 init_machine_status = spu_init_machine_status;
262
263 /* Small loops will be unpeeled at -O3. For SPU it is more important
264 to keep code small by default. */
265 if (!flag_unroll_loops && !flag_peel_loops)
266 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
267 global_options.x_param_values,
268 global_options_set.x_param_values);
269
270 flag_omit_frame_pointer = 1;
271
272 /* Functions must be 8 byte aligned so we correctly handle dual issue */
273 if (align_functions < 8)
274 align_functions = 8;
275
276 spu_hint_dist = 8*4 - spu_max_nops*4;
277 if (spu_hint_dist < 0)
278 spu_hint_dist = 0;
279
280 if (spu_fixed_range_string)
281 fix_range (spu_fixed_range_string);
282
283 /* Determine processor architectural level. */
284 if (spu_arch_string)
285 {
286 if (strcmp (&spu_arch_string[0], "cell") == 0)
287 spu_arch = PROCESSOR_CELL;
288 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
289 spu_arch = PROCESSOR_CELLEDP;
290 else
291 error ("bad value (%s) for -march= switch", spu_arch_string);
292 }
293
294 /* Determine processor to tune for. */
295 if (spu_tune_string)
296 {
297 if (strcmp (&spu_tune_string[0], "cell") == 0)
298 spu_tune = PROCESSOR_CELL;
299 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
300 spu_tune = PROCESSOR_CELLEDP;
301 else
302 error ("bad value (%s) for -mtune= switch", spu_tune_string);
303 }
304
305 /* Change defaults according to the processor architecture. */
306 if (spu_arch == PROCESSOR_CELLEDP)
307 {
308 /* If no command line option has been otherwise specified, change
309 the default to -mno-safe-hints on celledp -- only the original
310 Cell/B.E. processors require this workaround. */
311 if (!(target_flags_explicit & MASK_SAFE_HINTS))
312 target_flags &= ~MASK_SAFE_HINTS;
313 }
314
315 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
316 }
317 \f
318 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
319 struct attribute_spec.handler. */
320
321 /* True if MODE is valid for the target. By "valid", we mean able to
322 be manipulated in non-trivial ways. In particular, this means all
323 the arithmetic is supported. */
324 static bool
325 spu_scalar_mode_supported_p (machine_mode mode)
326 {
327 switch (mode)
328 {
329 case QImode:
330 case HImode:
331 case SImode:
332 case SFmode:
333 case DImode:
334 case TImode:
335 case DFmode:
336 return true;
337
338 default:
339 return false;
340 }
341 }
342
343 /* Similarly for vector modes. "Supported" here is less strict. At
344 least some operations are supported; need to check optabs or builtins
345 for further details. */
346 static bool
347 spu_vector_mode_supported_p (machine_mode mode)
348 {
349 switch (mode)
350 {
351 case V16QImode:
352 case V8HImode:
353 case V4SImode:
354 case V2DImode:
355 case V4SFmode:
356 case V2DFmode:
357 return true;
358
359 default:
360 return false;
361 }
362 }
363
364 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
365 least significant bytes of the outer mode. This function returns
366 TRUE for the SUBREG's where this is correct. */
367 int
368 valid_subreg (rtx op)
369 {
370 machine_mode om = GET_MODE (op);
371 machine_mode im = GET_MODE (SUBREG_REG (op));
372 return om != VOIDmode && im != VOIDmode
373 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
374 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
375 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
376 }
377
378 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
379 and adjust the start offset. */
380 static rtx
381 adjust_operand (rtx op, HOST_WIDE_INT * start)
382 {
383 machine_mode mode;
384 int op_size;
385 /* Strip any paradoxical SUBREG. */
386 if (GET_CODE (op) == SUBREG
387 && (GET_MODE_BITSIZE (GET_MODE (op))
388 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
389 {
390 if (start)
391 *start -=
392 GET_MODE_BITSIZE (GET_MODE (op)) -
393 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
394 op = SUBREG_REG (op);
395 }
396 /* If it is smaller than SI, assure a SUBREG */
397 op_size = GET_MODE_BITSIZE (GET_MODE (op));
398 if (op_size < 32)
399 {
400 if (start)
401 *start += 32 - op_size;
402 op_size = 32;
403 }
404 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
405 mode = mode_for_size (op_size, MODE_INT, 0);
406 if (mode != GET_MODE (op))
407 op = gen_rtx_SUBREG (mode, op, 0);
408 return op;
409 }
410
411 void
412 spu_expand_extv (rtx ops[], int unsignedp)
413 {
414 rtx dst = ops[0], src = ops[1];
415 HOST_WIDE_INT width = INTVAL (ops[2]);
416 HOST_WIDE_INT start = INTVAL (ops[3]);
417 HOST_WIDE_INT align_mask;
418 rtx s0, s1, mask, r0;
419
420 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
421
422 if (MEM_P (src))
423 {
424 /* First, determine if we need 1 TImode load or 2. We need only 1
425 if the bits being extracted do not cross the alignment boundary
426 as determined by the MEM and its address. */
427
428 align_mask = -MEM_ALIGN (src);
429 if ((start & align_mask) == ((start + width - 1) & align_mask))
430 {
431 /* Alignment is sufficient for 1 load. */
432 s0 = gen_reg_rtx (TImode);
433 r0 = spu_expand_load (s0, 0, src, start / 8);
434 start &= 7;
435 if (r0)
436 emit_insn (gen_rotqby_ti (s0, s0, r0));
437 }
438 else
439 {
440 /* Need 2 loads. */
441 s0 = gen_reg_rtx (TImode);
442 s1 = gen_reg_rtx (TImode);
443 r0 = spu_expand_load (s0, s1, src, start / 8);
444 start &= 7;
445
446 gcc_assert (start + width <= 128);
447 if (r0)
448 {
449 rtx r1 = gen_reg_rtx (SImode);
450 mask = gen_reg_rtx (TImode);
451 emit_move_insn (mask, GEN_INT (-1));
452 emit_insn (gen_rotqby_ti (s0, s0, r0));
453 emit_insn (gen_rotqby_ti (s1, s1, r0));
454 if (GET_CODE (r0) == CONST_INT)
455 r1 = GEN_INT (INTVAL (r0) & 15);
456 else
457 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
458 emit_insn (gen_shlqby_ti (mask, mask, r1));
459 emit_insn (gen_selb (s0, s1, s0, mask));
460 }
461 }
462
463 }
464 else if (GET_CODE (src) == SUBREG)
465 {
466 rtx r = SUBREG_REG (src);
467 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
468 s0 = gen_reg_rtx (TImode);
469 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
470 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
471 else
472 emit_move_insn (s0, src);
473 }
474 else
475 {
476 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
477 s0 = gen_reg_rtx (TImode);
478 emit_move_insn (s0, src);
479 }
480
481 /* Now s0 is TImode and contains the bits to extract at start. */
482
483 if (start)
484 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
485
486 if (128 - width)
487 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
488
489 emit_move_insn (dst, s0);
490 }
491
492 void
493 spu_expand_insv (rtx ops[])
494 {
495 HOST_WIDE_INT width = INTVAL (ops[1]);
496 HOST_WIDE_INT start = INTVAL (ops[2]);
497 HOST_WIDE_INT maskbits;
498 machine_mode dst_mode;
499 rtx dst = ops[0], src = ops[3];
500 int dst_size;
501 rtx mask;
502 rtx shift_reg;
503 int shift;
504
505
506 if (GET_CODE (ops[0]) == MEM)
507 dst = gen_reg_rtx (TImode);
508 else
509 dst = adjust_operand (dst, &start);
510 dst_mode = GET_MODE (dst);
511 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
512
513 if (CONSTANT_P (src))
514 {
515 machine_mode m =
516 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
517 src = force_reg (m, convert_to_mode (m, src, 0));
518 }
519 src = adjust_operand (src, 0);
520
521 mask = gen_reg_rtx (dst_mode);
522 shift_reg = gen_reg_rtx (dst_mode);
523 shift = dst_size - start - width;
524
525 /* It's not safe to use subreg here because the compiler assumes
526 that the SUBREG_REG is right justified in the SUBREG. */
527 convert_move (shift_reg, src, 1);
528
529 if (shift > 0)
530 {
531 switch (dst_mode)
532 {
533 case SImode:
534 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
535 break;
536 case DImode:
537 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
538 break;
539 case TImode:
540 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
541 break;
542 default:
543 abort ();
544 }
545 }
546 else if (shift < 0)
547 abort ();
548
549 switch (dst_size)
550 {
551 case 32:
552 maskbits = (-1ll << (32 - width - start));
553 if (start)
554 maskbits += (1ll << (32 - start));
555 emit_move_insn (mask, GEN_INT (maskbits));
556 break;
557 case 64:
558 maskbits = (-1ll << (64 - width - start));
559 if (start)
560 maskbits += (1ll << (64 - start));
561 emit_move_insn (mask, GEN_INT (maskbits));
562 break;
563 case 128:
564 {
565 unsigned char arr[16];
566 int i = start / 8;
567 memset (arr, 0, sizeof (arr));
568 arr[i] = 0xff >> (start & 7);
569 for (i++; i <= (start + width - 1) / 8; i++)
570 arr[i] = 0xff;
571 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
572 emit_move_insn (mask, array_to_constant (TImode, arr));
573 }
574 break;
575 default:
576 abort ();
577 }
578 if (GET_CODE (ops[0]) == MEM)
579 {
580 rtx low = gen_reg_rtx (SImode);
581 rtx rotl = gen_reg_rtx (SImode);
582 rtx mask0 = gen_reg_rtx (TImode);
583 rtx addr;
584 rtx addr0;
585 rtx addr1;
586 rtx mem;
587
588 addr = force_reg (Pmode, XEXP (ops[0], 0));
589 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
590 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
591 emit_insn (gen_negsi2 (rotl, low));
592 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
593 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
594 mem = change_address (ops[0], TImode, addr0);
595 set_mem_alias_set (mem, 0);
596 emit_move_insn (dst, mem);
597 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
598 if (start + width > MEM_ALIGN (ops[0]))
599 {
600 rtx shl = gen_reg_rtx (SImode);
601 rtx mask1 = gen_reg_rtx (TImode);
602 rtx dst1 = gen_reg_rtx (TImode);
603 rtx mem1;
604 addr1 = plus_constant (Pmode, addr, 16);
605 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
606 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
607 emit_insn (gen_shlqby_ti (mask1, mask, shl));
608 mem1 = change_address (ops[0], TImode, addr1);
609 set_mem_alias_set (mem1, 0);
610 emit_move_insn (dst1, mem1);
611 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
612 emit_move_insn (mem1, dst1);
613 }
614 emit_move_insn (mem, dst);
615 }
616 else
617 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
618 }
619
620
621 int
622 spu_expand_block_move (rtx ops[])
623 {
624 HOST_WIDE_INT bytes, align, offset;
625 rtx src, dst, sreg, dreg, target;
626 int i;
627 if (GET_CODE (ops[2]) != CONST_INT
628 || GET_CODE (ops[3]) != CONST_INT
629 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
630 return 0;
631
632 bytes = INTVAL (ops[2]);
633 align = INTVAL (ops[3]);
634
635 if (bytes <= 0)
636 return 1;
637
638 dst = ops[0];
639 src = ops[1];
640
641 if (align == 16)
642 {
643 for (offset = 0; offset + 16 <= bytes; offset += 16)
644 {
645 dst = adjust_address (ops[0], V16QImode, offset);
646 src = adjust_address (ops[1], V16QImode, offset);
647 emit_move_insn (dst, src);
648 }
649 if (offset < bytes)
650 {
651 rtx mask;
652 unsigned char arr[16] = { 0 };
653 for (i = 0; i < bytes - offset; i++)
654 arr[i] = 0xff;
655 dst = adjust_address (ops[0], V16QImode, offset);
656 src = adjust_address (ops[1], V16QImode, offset);
657 mask = gen_reg_rtx (V16QImode);
658 sreg = gen_reg_rtx (V16QImode);
659 dreg = gen_reg_rtx (V16QImode);
660 target = gen_reg_rtx (V16QImode);
661 emit_move_insn (mask, array_to_constant (V16QImode, arr));
662 emit_move_insn (dreg, dst);
663 emit_move_insn (sreg, src);
664 emit_insn (gen_selb (target, dreg, sreg, mask));
665 emit_move_insn (dst, target);
666 }
667 return 1;
668 }
669 return 0;
670 }
671
672 enum spu_comp_code
673 { SPU_EQ, SPU_GT, SPU_GTU };
674
675 int spu_comp_icode[12][3] = {
676 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
677 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
678 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
679 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
680 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
681 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
682 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
683 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
684 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
685 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
686 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
687 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
688 };
689
690 /* Generate a compare for CODE. Return a brand-new rtx that represents
691 the result of the compare. GCC can figure this out too if we don't
692 provide all variations of compares, but GCC always wants to use
693 WORD_MODE, we can generate better code in most cases if we do it
694 ourselves. */
695 void
696 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
697 {
698 int reverse_compare = 0;
699 int reverse_test = 0;
700 rtx compare_result, eq_result;
701 rtx comp_rtx, eq_rtx;
702 machine_mode comp_mode;
703 machine_mode op_mode;
704 enum spu_comp_code scode, eq_code;
705 enum insn_code ior_code;
706 enum rtx_code code = GET_CODE (cmp);
707 rtx op0 = XEXP (cmp, 0);
708 rtx op1 = XEXP (cmp, 1);
709 int index;
710 int eq_test = 0;
711
712 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
713 and so on, to keep the constant in operand 1. */
714 if (GET_CODE (op1) == CONST_INT)
715 {
716 HOST_WIDE_INT val = INTVAL (op1) - 1;
717 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
718 switch (code)
719 {
720 case GE:
721 op1 = GEN_INT (val);
722 code = GT;
723 break;
724 case LT:
725 op1 = GEN_INT (val);
726 code = LE;
727 break;
728 case GEU:
729 op1 = GEN_INT (val);
730 code = GTU;
731 break;
732 case LTU:
733 op1 = GEN_INT (val);
734 code = LEU;
735 break;
736 default:
737 break;
738 }
739 }
740
741 /* However, if we generate an integer result, performing a reverse test
742 would require an extra negation, so avoid that where possible. */
743 if (GET_CODE (op1) == CONST_INT && is_set == 1)
744 {
745 HOST_WIDE_INT val = INTVAL (op1) + 1;
746 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
747 switch (code)
748 {
749 case LE:
750 op1 = GEN_INT (val);
751 code = LT;
752 break;
753 case LEU:
754 op1 = GEN_INT (val);
755 code = LTU;
756 break;
757 default:
758 break;
759 }
760 }
761
762 comp_mode = SImode;
763 op_mode = GET_MODE (op0);
764
765 switch (code)
766 {
767 case GE:
768 scode = SPU_GT;
769 if (HONOR_NANS (op_mode))
770 {
771 reverse_compare = 0;
772 reverse_test = 0;
773 eq_test = 1;
774 eq_code = SPU_EQ;
775 }
776 else
777 {
778 reverse_compare = 1;
779 reverse_test = 1;
780 }
781 break;
782 case LE:
783 scode = SPU_GT;
784 if (HONOR_NANS (op_mode))
785 {
786 reverse_compare = 1;
787 reverse_test = 0;
788 eq_test = 1;
789 eq_code = SPU_EQ;
790 }
791 else
792 {
793 reverse_compare = 0;
794 reverse_test = 1;
795 }
796 break;
797 case LT:
798 reverse_compare = 1;
799 reverse_test = 0;
800 scode = SPU_GT;
801 break;
802 case GEU:
803 reverse_compare = 1;
804 reverse_test = 1;
805 scode = SPU_GTU;
806 break;
807 case LEU:
808 reverse_compare = 0;
809 reverse_test = 1;
810 scode = SPU_GTU;
811 break;
812 case LTU:
813 reverse_compare = 1;
814 reverse_test = 0;
815 scode = SPU_GTU;
816 break;
817 case NE:
818 reverse_compare = 0;
819 reverse_test = 1;
820 scode = SPU_EQ;
821 break;
822
823 case EQ:
824 scode = SPU_EQ;
825 break;
826 case GT:
827 scode = SPU_GT;
828 break;
829 case GTU:
830 scode = SPU_GTU;
831 break;
832 default:
833 scode = SPU_EQ;
834 break;
835 }
836
837 switch (op_mode)
838 {
839 case QImode:
840 index = 0;
841 comp_mode = QImode;
842 break;
843 case HImode:
844 index = 1;
845 comp_mode = HImode;
846 break;
847 case SImode:
848 index = 2;
849 break;
850 case DImode:
851 index = 3;
852 break;
853 case TImode:
854 index = 4;
855 break;
856 case SFmode:
857 index = 5;
858 break;
859 case DFmode:
860 index = 6;
861 break;
862 case V16QImode:
863 index = 7;
864 comp_mode = op_mode;
865 break;
866 case V8HImode:
867 index = 8;
868 comp_mode = op_mode;
869 break;
870 case V4SImode:
871 index = 9;
872 comp_mode = op_mode;
873 break;
874 case V4SFmode:
875 index = 10;
876 comp_mode = V4SImode;
877 break;
878 case V2DFmode:
879 index = 11;
880 comp_mode = V2DImode;
881 break;
882 case V2DImode:
883 default:
884 abort ();
885 }
886
887 if (GET_MODE (op1) == DFmode
888 && (scode != SPU_GT && scode != SPU_EQ))
889 abort ();
890
891 if (is_set == 0 && op1 == const0_rtx
892 && (GET_MODE (op0) == SImode
893 || GET_MODE (op0) == HImode
894 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
895 {
896 /* Don't need to set a register with the result when we are
897 comparing against zero and branching. */
898 reverse_test = !reverse_test;
899 compare_result = op0;
900 }
901 else
902 {
903 compare_result = gen_reg_rtx (comp_mode);
904
905 if (reverse_compare)
906 {
907 rtx t = op1;
908 op1 = op0;
909 op0 = t;
910 }
911
912 if (spu_comp_icode[index][scode] == 0)
913 abort ();
914
915 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
916 (op0, op_mode))
917 op0 = force_reg (op_mode, op0);
918 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
919 (op1, op_mode))
920 op1 = force_reg (op_mode, op1);
921 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
922 op0, op1);
923 if (comp_rtx == 0)
924 abort ();
925 emit_insn (comp_rtx);
926
927 if (eq_test)
928 {
929 eq_result = gen_reg_rtx (comp_mode);
930 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
931 op0, op1);
932 if (eq_rtx == 0)
933 abort ();
934 emit_insn (eq_rtx);
935 ior_code = optab_handler (ior_optab, comp_mode);
936 gcc_assert (ior_code != CODE_FOR_nothing);
937 emit_insn (GEN_FCN (ior_code)
938 (compare_result, compare_result, eq_result));
939 }
940 }
941
942 if (is_set == 0)
943 {
944 rtx bcomp;
945 rtx loc_ref;
946
947 /* We don't have branch on QI compare insns, so we convert the
948 QI compare result to a HI result. */
949 if (comp_mode == QImode)
950 {
951 rtx old_res = compare_result;
952 compare_result = gen_reg_rtx (HImode);
953 comp_mode = HImode;
954 emit_insn (gen_extendqihi2 (compare_result, old_res));
955 }
956
957 if (reverse_test)
958 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
959 else
960 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
961
962 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
963 emit_jump_insn (gen_rtx_SET (pc_rtx,
964 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
965 loc_ref, pc_rtx)));
966 }
967 else if (is_set == 2)
968 {
969 rtx target = operands[0];
970 int compare_size = GET_MODE_BITSIZE (comp_mode);
971 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
972 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
973 rtx select_mask;
974 rtx op_t = operands[2];
975 rtx op_f = operands[3];
976
977 /* The result of the comparison can be SI, HI or QI mode. Create a
978 mask based on that result. */
979 if (target_size > compare_size)
980 {
981 select_mask = gen_reg_rtx (mode);
982 emit_insn (gen_extend_compare (select_mask, compare_result));
983 }
984 else if (target_size < compare_size)
985 select_mask =
986 gen_rtx_SUBREG (mode, compare_result,
987 (compare_size - target_size) / BITS_PER_UNIT);
988 else if (comp_mode != mode)
989 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
990 else
991 select_mask = compare_result;
992
993 if (GET_MODE (target) != GET_MODE (op_t)
994 || GET_MODE (target) != GET_MODE (op_f))
995 abort ();
996
997 if (reverse_test)
998 emit_insn (gen_selb (target, op_t, op_f, select_mask));
999 else
1000 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1001 }
1002 else
1003 {
1004 rtx target = operands[0];
1005 if (reverse_test)
1006 emit_insn (gen_rtx_SET (compare_result,
1007 gen_rtx_NOT (comp_mode, compare_result)));
1008 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1009 emit_insn (gen_extendhisi2 (target, compare_result));
1010 else if (GET_MODE (target) == SImode
1011 && GET_MODE (compare_result) == QImode)
1012 emit_insn (gen_extend_compare (target, compare_result));
1013 else
1014 emit_move_insn (target, compare_result);
1015 }
1016 }
1017
1018 HOST_WIDE_INT
1019 const_double_to_hwint (rtx x)
1020 {
1021 HOST_WIDE_INT val;
1022 REAL_VALUE_TYPE rv;
1023 if (GET_MODE (x) == SFmode)
1024 {
1025 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1026 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1027 }
1028 else if (GET_MODE (x) == DFmode)
1029 {
1030 long l[2];
1031 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1032 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1033 val = l[0];
1034 val = (val << 32) | (l[1] & 0xffffffff);
1035 }
1036 else
1037 abort ();
1038 return val;
1039 }
1040
1041 rtx
1042 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1043 {
1044 long tv[2];
1045 REAL_VALUE_TYPE rv;
1046 gcc_assert (mode == SFmode || mode == DFmode);
1047
1048 if (mode == SFmode)
1049 tv[0] = (v << 32) >> 32;
1050 else if (mode == DFmode)
1051 {
1052 tv[1] = (v << 32) >> 32;
1053 tv[0] = v >> 32;
1054 }
1055 real_from_target (&rv, tv, mode);
1056 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1057 }
1058
1059 void
1060 print_operand_address (FILE * file, register rtx addr)
1061 {
1062 rtx reg;
1063 rtx offset;
1064
1065 if (GET_CODE (addr) == AND
1066 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1067 && INTVAL (XEXP (addr, 1)) == -16)
1068 addr = XEXP (addr, 0);
1069
1070 switch (GET_CODE (addr))
1071 {
1072 case REG:
1073 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1074 break;
1075
1076 case PLUS:
1077 reg = XEXP (addr, 0);
1078 offset = XEXP (addr, 1);
1079 if (GET_CODE (offset) == REG)
1080 {
1081 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1082 reg_names[REGNO (offset)]);
1083 }
1084 else if (GET_CODE (offset) == CONST_INT)
1085 {
1086 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1087 INTVAL (offset), reg_names[REGNO (reg)]);
1088 }
1089 else
1090 abort ();
1091 break;
1092
1093 case CONST:
1094 case LABEL_REF:
1095 case SYMBOL_REF:
1096 case CONST_INT:
1097 output_addr_const (file, addr);
1098 break;
1099
1100 default:
1101 debug_rtx (addr);
1102 abort ();
1103 }
1104 }
1105
1106 void
1107 print_operand (FILE * file, rtx x, int code)
1108 {
1109 machine_mode mode = GET_MODE (x);
1110 HOST_WIDE_INT val;
1111 unsigned char arr[16];
1112 int xcode = GET_CODE (x);
1113 int i, info;
1114 if (GET_MODE (x) == VOIDmode)
1115 switch (code)
1116 {
1117 case 'L': /* 128 bits, signed */
1118 case 'm': /* 128 bits, signed */
1119 case 'T': /* 128 bits, signed */
1120 case 't': /* 128 bits, signed */
1121 mode = TImode;
1122 break;
1123 case 'K': /* 64 bits, signed */
1124 case 'k': /* 64 bits, signed */
1125 case 'D': /* 64 bits, signed */
1126 case 'd': /* 64 bits, signed */
1127 mode = DImode;
1128 break;
1129 case 'J': /* 32 bits, signed */
1130 case 'j': /* 32 bits, signed */
1131 case 's': /* 32 bits, signed */
1132 case 'S': /* 32 bits, signed */
1133 mode = SImode;
1134 break;
1135 }
1136 switch (code)
1137 {
1138
1139 case 'j': /* 32 bits, signed */
1140 case 'k': /* 64 bits, signed */
1141 case 'm': /* 128 bits, signed */
1142 if (xcode == CONST_INT
1143 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1144 {
1145 gcc_assert (logical_immediate_p (x, mode));
1146 constant_to_array (mode, x, arr);
1147 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1148 val = trunc_int_for_mode (val, SImode);
1149 switch (which_logical_immediate (val))
1150 {
1151 case SPU_ORI:
1152 break;
1153 case SPU_ORHI:
1154 fprintf (file, "h");
1155 break;
1156 case SPU_ORBI:
1157 fprintf (file, "b");
1158 break;
1159 default:
1160 gcc_unreachable();
1161 }
1162 }
1163 else
1164 gcc_unreachable();
1165 return;
1166
1167 case 'J': /* 32 bits, signed */
1168 case 'K': /* 64 bits, signed */
1169 case 'L': /* 128 bits, signed */
1170 if (xcode == CONST_INT
1171 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1172 {
1173 gcc_assert (logical_immediate_p (x, mode)
1174 || iohl_immediate_p (x, mode));
1175 constant_to_array (mode, x, arr);
1176 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1177 val = trunc_int_for_mode (val, SImode);
1178 switch (which_logical_immediate (val))
1179 {
1180 case SPU_ORI:
1181 case SPU_IOHL:
1182 break;
1183 case SPU_ORHI:
1184 val = trunc_int_for_mode (val, HImode);
1185 break;
1186 case SPU_ORBI:
1187 val = trunc_int_for_mode (val, QImode);
1188 break;
1189 default:
1190 gcc_unreachable();
1191 }
1192 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1193 }
1194 else
1195 gcc_unreachable();
1196 return;
1197
1198 case 't': /* 128 bits, signed */
1199 case 'd': /* 64 bits, signed */
1200 case 's': /* 32 bits, signed */
1201 if (CONSTANT_P (x))
1202 {
1203 enum immediate_class c = classify_immediate (x, mode);
1204 switch (c)
1205 {
1206 case IC_IL1:
1207 constant_to_array (mode, x, arr);
1208 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1209 val = trunc_int_for_mode (val, SImode);
1210 switch (which_immediate_load (val))
1211 {
1212 case SPU_IL:
1213 break;
1214 case SPU_ILA:
1215 fprintf (file, "a");
1216 break;
1217 case SPU_ILH:
1218 fprintf (file, "h");
1219 break;
1220 case SPU_ILHU:
1221 fprintf (file, "hu");
1222 break;
1223 default:
1224 gcc_unreachable ();
1225 }
1226 break;
1227 case IC_CPAT:
1228 constant_to_array (mode, x, arr);
1229 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1230 if (info == 1)
1231 fprintf (file, "b");
1232 else if (info == 2)
1233 fprintf (file, "h");
1234 else if (info == 4)
1235 fprintf (file, "w");
1236 else if (info == 8)
1237 fprintf (file, "d");
1238 break;
1239 case IC_IL1s:
1240 if (xcode == CONST_VECTOR)
1241 {
1242 x = CONST_VECTOR_ELT (x, 0);
1243 xcode = GET_CODE (x);
1244 }
1245 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1246 fprintf (file, "a");
1247 else if (xcode == HIGH)
1248 fprintf (file, "hu");
1249 break;
1250 case IC_FSMBI:
1251 case IC_FSMBI2:
1252 case IC_IL2:
1253 case IC_IL2s:
1254 case IC_POOL:
1255 abort ();
1256 }
1257 }
1258 else
1259 gcc_unreachable ();
1260 return;
1261
1262 case 'T': /* 128 bits, signed */
1263 case 'D': /* 64 bits, signed */
1264 case 'S': /* 32 bits, signed */
1265 if (CONSTANT_P (x))
1266 {
1267 enum immediate_class c = classify_immediate (x, mode);
1268 switch (c)
1269 {
1270 case IC_IL1:
1271 constant_to_array (mode, x, arr);
1272 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1273 val = trunc_int_for_mode (val, SImode);
1274 switch (which_immediate_load (val))
1275 {
1276 case SPU_IL:
1277 case SPU_ILA:
1278 break;
1279 case SPU_ILH:
1280 case SPU_ILHU:
1281 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1282 break;
1283 default:
1284 gcc_unreachable ();
1285 }
1286 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1287 break;
1288 case IC_FSMBI:
1289 constant_to_array (mode, x, arr);
1290 val = 0;
1291 for (i = 0; i < 16; i++)
1292 {
1293 val <<= 1;
1294 val |= arr[i] & 1;
1295 }
1296 print_operand (file, GEN_INT (val), 0);
1297 break;
1298 case IC_CPAT:
1299 constant_to_array (mode, x, arr);
1300 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1302 break;
1303 case IC_IL1s:
1304 if (xcode == HIGH)
1305 x = XEXP (x, 0);
1306 if (GET_CODE (x) == CONST_VECTOR)
1307 x = CONST_VECTOR_ELT (x, 0);
1308 output_addr_const (file, x);
1309 if (xcode == HIGH)
1310 fprintf (file, "@h");
1311 break;
1312 case IC_IL2:
1313 case IC_IL2s:
1314 case IC_FSMBI2:
1315 case IC_POOL:
1316 abort ();
1317 }
1318 }
1319 else
1320 gcc_unreachable ();
1321 return;
1322
1323 case 'C':
1324 if (xcode == CONST_INT)
1325 {
1326 /* Only 4 least significant bits are relevant for generate
1327 control word instructions. */
1328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1329 return;
1330 }
1331 break;
1332
1333 case 'M': /* print code for c*d */
1334 if (GET_CODE (x) == CONST_INT)
1335 switch (INTVAL (x))
1336 {
1337 case 1:
1338 fprintf (file, "b");
1339 break;
1340 case 2:
1341 fprintf (file, "h");
1342 break;
1343 case 4:
1344 fprintf (file, "w");
1345 break;
1346 case 8:
1347 fprintf (file, "d");
1348 break;
1349 default:
1350 gcc_unreachable();
1351 }
1352 else
1353 gcc_unreachable();
1354 return;
1355
1356 case 'N': /* Negate the operand */
1357 if (xcode == CONST_INT)
1358 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1359 else if (xcode == CONST_VECTOR)
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1361 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1362 return;
1363
1364 case 'I': /* enable/disable interrupts */
1365 if (xcode == CONST_INT)
1366 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1367 return;
1368
1369 case 'b': /* branch modifiers */
1370 if (xcode == REG)
1371 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1372 else if (COMPARISON_P (x))
1373 fprintf (file, "%s", xcode == NE ? "n" : "");
1374 return;
1375
1376 case 'i': /* indirect call */
1377 if (xcode == MEM)
1378 {
1379 if (GET_CODE (XEXP (x, 0)) == REG)
1380 /* Used in indirect function calls. */
1381 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1382 else
1383 output_address (XEXP (x, 0));
1384 }
1385 return;
1386
1387 case 'p': /* load/store */
1388 if (xcode == MEM)
1389 {
1390 x = XEXP (x, 0);
1391 xcode = GET_CODE (x);
1392 }
1393 if (xcode == AND)
1394 {
1395 x = XEXP (x, 0);
1396 xcode = GET_CODE (x);
1397 }
1398 if (xcode == REG)
1399 fprintf (file, "d");
1400 else if (xcode == CONST_INT)
1401 fprintf (file, "a");
1402 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1403 fprintf (file, "r");
1404 else if (xcode == PLUS || xcode == LO_SUM)
1405 {
1406 if (GET_CODE (XEXP (x, 1)) == REG)
1407 fprintf (file, "x");
1408 else
1409 fprintf (file, "d");
1410 }
1411 return;
1412
1413 case 'e':
1414 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1415 val &= 0x7;
1416 output_addr_const (file, GEN_INT (val));
1417 return;
1418
1419 case 'f':
1420 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421 val &= 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1424
1425 case 'g':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val &= 0x3f;
1428 output_addr_const (file, GEN_INT (val));
1429 return;
1430
1431 case 'h':
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val = (val >> 3) & 0x1f;
1434 output_addr_const (file, GEN_INT (val));
1435 return;
1436
1437 case 'E':
1438 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1439 val = -val;
1440 val &= 0x7;
1441 output_addr_const (file, GEN_INT (val));
1442 return;
1443
1444 case 'F':
1445 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1446 val = -val;
1447 val &= 0x1f;
1448 output_addr_const (file, GEN_INT (val));
1449 return;
1450
1451 case 'G':
1452 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1453 val = -val;
1454 val &= 0x3f;
1455 output_addr_const (file, GEN_INT (val));
1456 return;
1457
1458 case 'H':
1459 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1460 val = -(val & -8ll);
1461 val = (val >> 3) & 0x1f;
1462 output_addr_const (file, GEN_INT (val));
1463 return;
1464
1465 case 'v':
1466 case 'w':
1467 constant_to_array (mode, x, arr);
1468 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1469 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1470 return;
1471
1472 case 0:
1473 if (xcode == REG)
1474 fprintf (file, "%s", reg_names[REGNO (x)]);
1475 else if (xcode == MEM)
1476 output_address (XEXP (x, 0));
1477 else if (xcode == CONST_VECTOR)
1478 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1479 else
1480 output_addr_const (file, x);
1481 return;
1482
1483 /* unused letters
1484 o qr u yz
1485 AB OPQR UVWXYZ */
1486 default:
1487 output_operand_lossage ("invalid %%xn code");
1488 }
1489 gcc_unreachable ();
1490 }
1491
1492 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1493 caller saved register. For leaf functions it is more efficient to
1494 use a volatile register because we won't need to save and restore the
1495 pic register. This routine is only valid after register allocation
1496 is completed, so we can pick an unused register. */
1497 static rtx
1498 get_pic_reg (void)
1499 {
1500 if (!reload_completed && !reload_in_progress)
1501 abort ();
1502
1503 /* If we've already made the decision, we need to keep with it. Once we've
1504 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1505 return true since the register is now live; this should not cause us to
1506 "switch back" to using pic_offset_table_rtx. */
1507 if (!cfun->machine->pic_reg)
1508 {
1509 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1510 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1511 else
1512 cfun->machine->pic_reg = pic_offset_table_rtx;
1513 }
1514
1515 return cfun->machine->pic_reg;
1516 }
1517
1518 /* Split constant addresses to handle cases that are too large.
1519 Add in the pic register when in PIC mode.
1520 Split immediates that require more than 1 instruction. */
1521 int
1522 spu_split_immediate (rtx * ops)
1523 {
1524 machine_mode mode = GET_MODE (ops[0]);
1525 enum immediate_class c = classify_immediate (ops[1], mode);
1526
1527 switch (c)
1528 {
1529 case IC_IL2:
1530 {
1531 unsigned char arrhi[16];
1532 unsigned char arrlo[16];
1533 rtx to, temp, hi, lo;
1534 int i;
1535 machine_mode imode = mode;
1536 /* We need to do reals as ints because the constant used in the
1537 IOR might not be a legitimate real constant. */
1538 imode = int_mode_for_mode (mode);
1539 constant_to_array (mode, ops[1], arrhi);
1540 if (imode != mode)
1541 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1542 else
1543 to = ops[0];
1544 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1545 for (i = 0; i < 16; i += 4)
1546 {
1547 arrlo[i + 2] = arrhi[i + 2];
1548 arrlo[i + 3] = arrhi[i + 3];
1549 arrlo[i + 0] = arrlo[i + 1] = 0;
1550 arrhi[i + 2] = arrhi[i + 3] = 0;
1551 }
1552 hi = array_to_constant (imode, arrhi);
1553 lo = array_to_constant (imode, arrlo);
1554 emit_move_insn (temp, hi);
1555 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1556 return 1;
1557 }
1558 case IC_FSMBI2:
1559 {
1560 unsigned char arr_fsmbi[16];
1561 unsigned char arr_andbi[16];
1562 rtx to, reg_fsmbi, reg_and;
1563 int i;
1564 machine_mode imode = mode;
1565 /* We need to do reals as ints because the constant used in the
1566 * AND might not be a legitimate real constant. */
1567 imode = int_mode_for_mode (mode);
1568 constant_to_array (mode, ops[1], arr_fsmbi);
1569 if (imode != mode)
1570 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1571 else
1572 to = ops[0];
1573 for (i = 0; i < 16; i++)
1574 if (arr_fsmbi[i] != 0)
1575 {
1576 arr_andbi[0] = arr_fsmbi[i];
1577 arr_fsmbi[i] = 0xff;
1578 }
1579 for (i = 1; i < 16; i++)
1580 arr_andbi[i] = arr_andbi[0];
1581 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1582 reg_and = array_to_constant (imode, arr_andbi);
1583 emit_move_insn (to, reg_fsmbi);
1584 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1585 return 1;
1586 }
1587 case IC_POOL:
1588 if (reload_in_progress || reload_completed)
1589 {
1590 rtx mem = force_const_mem (mode, ops[1]);
1591 if (TARGET_LARGE_MEM)
1592 {
1593 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1594 emit_move_insn (addr, XEXP (mem, 0));
1595 mem = replace_equiv_address (mem, addr);
1596 }
1597 emit_move_insn (ops[0], mem);
1598 return 1;
1599 }
1600 break;
1601 case IC_IL1s:
1602 case IC_IL2s:
1603 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1604 {
1605 if (c == IC_IL2s)
1606 {
1607 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1608 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1609 }
1610 else if (flag_pic)
1611 emit_insn (gen_pic (ops[0], ops[1]));
1612 if (flag_pic)
1613 {
1614 rtx pic_reg = get_pic_reg ();
1615 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1616 }
1617 return flag_pic || c == IC_IL2s;
1618 }
1619 break;
1620 case IC_IL1:
1621 case IC_FSMBI:
1622 case IC_CPAT:
1623 break;
1624 }
1625 return 0;
1626 }
1627
1628 /* SAVING is TRUE when we are generating the actual load and store
1629 instructions for REGNO. When determining the size of the stack
1630 needed for saving register we must allocate enough space for the
1631 worst case, because we don't always have the information early enough
1632 to not allocate it. But we can at least eliminate the actual loads
1633 and stores during the prologue/epilogue. */
1634 static int
1635 need_to_save_reg (int regno, int saving)
1636 {
1637 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1638 return 1;
1639 if (flag_pic
1640 && regno == PIC_OFFSET_TABLE_REGNUM
1641 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1642 return 1;
1643 return 0;
1644 }
1645
1646 /* This function is only correct starting with local register
1647 allocation */
1648 int
1649 spu_saved_regs_size (void)
1650 {
1651 int reg_save_size = 0;
1652 int regno;
1653
1654 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1655 if (need_to_save_reg (regno, 0))
1656 reg_save_size += 0x10;
1657 return reg_save_size;
1658 }
1659
1660 static rtx_insn *
1661 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1662 {
1663 rtx reg = gen_rtx_REG (V4SImode, regno);
1664 rtx mem =
1665 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1666 return emit_insn (gen_movv4si (mem, reg));
1667 }
1668
1669 static rtx_insn *
1670 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1671 {
1672 rtx reg = gen_rtx_REG (V4SImode, regno);
1673 rtx mem =
1674 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1675 return emit_insn (gen_movv4si (reg, mem));
1676 }
1677
1678 /* This happens after reload, so we need to expand it. */
1679 static rtx_insn *
1680 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1681 {
1682 rtx_insn *insn;
1683 if (satisfies_constraint_K (GEN_INT (imm)))
1684 {
1685 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1686 }
1687 else
1688 {
1689 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1690 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1691 if (REGNO (src) == REGNO (scratch))
1692 abort ();
1693 }
1694 return insn;
1695 }
1696
1697 /* Return nonzero if this function is known to have a null epilogue. */
1698
1699 int
1700 direct_return (void)
1701 {
1702 if (reload_completed)
1703 {
1704 if (cfun->static_chain_decl == 0
1705 && (spu_saved_regs_size ()
1706 + get_frame_size ()
1707 + crtl->outgoing_args_size
1708 + crtl->args.pretend_args_size == 0)
1709 && crtl->is_leaf)
1710 return 1;
1711 }
1712 return 0;
1713 }
1714
1715 /*
1716 The stack frame looks like this:
1717 +-------------+
1718 | incoming |
1719 | args |
1720 AP -> +-------------+
1721 | $lr save |
1722 +-------------+
1723 prev SP | back chain |
1724 +-------------+
1725 | var args |
1726 | reg save | crtl->args.pretend_args_size bytes
1727 +-------------+
1728 | ... |
1729 | saved regs | spu_saved_regs_size() bytes
1730 FP -> +-------------+
1731 | ... |
1732 | vars | get_frame_size() bytes
1733 HFP -> +-------------+
1734 | ... |
1735 | outgoing |
1736 | args | crtl->outgoing_args_size bytes
1737 +-------------+
1738 | $lr of next |
1739 | frame |
1740 +-------------+
1741 | back chain |
1742 SP -> +-------------+
1743
1744 */
1745 void
1746 spu_expand_prologue (void)
1747 {
1748 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1749 HOST_WIDE_INT total_size;
1750 HOST_WIDE_INT saved_regs_size;
1751 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1752 rtx scratch_reg_0, scratch_reg_1;
1753 rtx_insn *insn;
1754 rtx real;
1755
1756 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1757 cfun->machine->pic_reg = pic_offset_table_rtx;
1758
1759 if (spu_naked_function_p (current_function_decl))
1760 return;
1761
1762 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1763 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1764
1765 saved_regs_size = spu_saved_regs_size ();
1766 total_size = size + saved_regs_size
1767 + crtl->outgoing_args_size
1768 + crtl->args.pretend_args_size;
1769
1770 if (!crtl->is_leaf
1771 || cfun->calls_alloca || total_size > 0)
1772 total_size += STACK_POINTER_OFFSET;
1773
1774 /* Save this first because code after this might use the link
1775 register as a scratch register. */
1776 if (!crtl->is_leaf)
1777 {
1778 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1779 RTX_FRAME_RELATED_P (insn) = 1;
1780 }
1781
1782 if (total_size > 0)
1783 {
1784 offset = -crtl->args.pretend_args_size;
1785 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1786 if (need_to_save_reg (regno, 1))
1787 {
1788 offset -= 16;
1789 insn = frame_emit_store (regno, sp_reg, offset);
1790 RTX_FRAME_RELATED_P (insn) = 1;
1791 }
1792 }
1793
1794 if (flag_pic && cfun->machine->pic_reg)
1795 {
1796 rtx pic_reg = cfun->machine->pic_reg;
1797 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1798 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1799 }
1800
1801 if (total_size > 0)
1802 {
1803 if (flag_stack_check)
1804 {
1805 /* We compare against total_size-1 because
1806 ($sp >= total_size) <=> ($sp > total_size-1) */
1807 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1808 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1809 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1810 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1811 {
1812 emit_move_insn (scratch_v4si, size_v4si);
1813 size_v4si = scratch_v4si;
1814 }
1815 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1816 emit_insn (gen_vec_extractv4si
1817 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1818 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1819 }
1820
1821 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1822 the value of the previous $sp because we save it as the back
1823 chain. */
1824 if (total_size <= 2000)
1825 {
1826 /* In this case we save the back chain first. */
1827 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1828 insn =
1829 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1830 }
1831 else
1832 {
1833 insn = emit_move_insn (scratch_reg_0, sp_reg);
1834 insn =
1835 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1836 }
1837 RTX_FRAME_RELATED_P (insn) = 1;
1838 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1839 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1840
1841 if (total_size > 2000)
1842 {
1843 /* Save the back chain ptr */
1844 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1845 }
1846
1847 if (frame_pointer_needed)
1848 {
1849 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1850 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1851 + crtl->outgoing_args_size;
1852 /* Set the new frame_pointer */
1853 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1854 RTX_FRAME_RELATED_P (insn) = 1;
1855 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1856 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1857 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1858 }
1859 }
1860
1861 if (flag_stack_usage_info)
1862 current_function_static_stack_size = total_size;
1863 }
1864
1865 void
1866 spu_expand_epilogue (bool sibcall_p)
1867 {
1868 int size = get_frame_size (), offset, regno;
1869 HOST_WIDE_INT saved_regs_size, total_size;
1870 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1871 rtx scratch_reg_0;
1872
1873 if (spu_naked_function_p (current_function_decl))
1874 return;
1875
1876 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1877
1878 saved_regs_size = spu_saved_regs_size ();
1879 total_size = size + saved_regs_size
1880 + crtl->outgoing_args_size
1881 + crtl->args.pretend_args_size;
1882
1883 if (!crtl->is_leaf
1884 || cfun->calls_alloca || total_size > 0)
1885 total_size += STACK_POINTER_OFFSET;
1886
1887 if (total_size > 0)
1888 {
1889 if (cfun->calls_alloca)
1890 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1891 else
1892 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1893
1894
1895 if (saved_regs_size > 0)
1896 {
1897 offset = -crtl->args.pretend_args_size;
1898 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1899 if (need_to_save_reg (regno, 1))
1900 {
1901 offset -= 0x10;
1902 frame_emit_load (regno, sp_reg, offset);
1903 }
1904 }
1905 }
1906
1907 if (!crtl->is_leaf)
1908 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1909
1910 if (!sibcall_p)
1911 {
1912 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1913 emit_jump_insn (gen__return ());
1914 }
1915 }
1916
1917 rtx
1918 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1919 {
1920 if (count != 0)
1921 return 0;
1922 /* This is inefficient because it ends up copying to a save-register
1923 which then gets saved even though $lr has already been saved. But
1924 it does generate better code for leaf functions and we don't need
1925 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1926 used for __builtin_return_address anyway, so maybe we don't care if
1927 it's inefficient. */
1928 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1929 }
1930 \f
1931
1932 /* Given VAL, generate a constant appropriate for MODE.
1933 If MODE is a vector mode, every element will be VAL.
1934 For TImode, VAL will be zero extended to 128 bits. */
1935 rtx
1936 spu_const (machine_mode mode, HOST_WIDE_INT val)
1937 {
1938 rtx inner;
1939 rtvec v;
1940 int units, i;
1941
1942 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1943 || GET_MODE_CLASS (mode) == MODE_FLOAT
1944 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1945 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1946
1947 if (GET_MODE_CLASS (mode) == MODE_INT)
1948 return immed_double_const (val, 0, mode);
1949
1950 /* val is the bit representation of the float */
1951 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1952 return hwint_to_const_double (mode, val);
1953
1954 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1955 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1956 else
1957 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1958
1959 units = GET_MODE_NUNITS (mode);
1960
1961 v = rtvec_alloc (units);
1962
1963 for (i = 0; i < units; ++i)
1964 RTVEC_ELT (v, i) = inner;
1965
1966 return gen_rtx_CONST_VECTOR (mode, v);
1967 }
1968
1969 /* Create a MODE vector constant from 4 ints. */
1970 rtx
1971 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1972 {
1973 unsigned char arr[16];
1974 arr[0] = (a >> 24) & 0xff;
1975 arr[1] = (a >> 16) & 0xff;
1976 arr[2] = (a >> 8) & 0xff;
1977 arr[3] = (a >> 0) & 0xff;
1978 arr[4] = (b >> 24) & 0xff;
1979 arr[5] = (b >> 16) & 0xff;
1980 arr[6] = (b >> 8) & 0xff;
1981 arr[7] = (b >> 0) & 0xff;
1982 arr[8] = (c >> 24) & 0xff;
1983 arr[9] = (c >> 16) & 0xff;
1984 arr[10] = (c >> 8) & 0xff;
1985 arr[11] = (c >> 0) & 0xff;
1986 arr[12] = (d >> 24) & 0xff;
1987 arr[13] = (d >> 16) & 0xff;
1988 arr[14] = (d >> 8) & 0xff;
1989 arr[15] = (d >> 0) & 0xff;
1990 return array_to_constant(mode, arr);
1991 }
1992 \f
1993 /* branch hint stuff */
1994
1995 /* An array of these is used to propagate hints to predecessor blocks. */
1996 struct spu_bb_info
1997 {
1998 rtx_insn *prop_jump; /* propagated from another block */
1999 int bb_index; /* the original block. */
2000 };
2001 static struct spu_bb_info *spu_bb_info;
2002
2003 #define STOP_HINT_P(INSN) \
2004 (CALL_P(INSN) \
2005 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2006 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2007
2008 /* 1 when RTX is a hinted branch or its target. We keep track of
2009 what has been hinted so the safe-hint code can test it easily. */
2010 #define HINTED_P(RTX) \
2011 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2012
2013 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2014 #define SCHED_ON_EVEN_P(RTX) \
2015 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2016
2017 /* Emit a nop for INSN such that the two will dual issue. This assumes
2018 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2019 We check for TImode to handle a MULTI1 insn which has dual issued its
2020 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2021 static void
2022 emit_nop_for_insn (rtx_insn *insn)
2023 {
2024 int p;
2025 rtx_insn *new_insn;
2026
2027 /* We need to handle JUMP_TABLE_DATA separately. */
2028 if (JUMP_TABLE_DATA_P (insn))
2029 {
2030 new_insn = emit_insn_after (gen_lnop(), insn);
2031 recog_memoized (new_insn);
2032 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2033 return;
2034 }
2035
2036 p = get_pipe (insn);
2037 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2038 new_insn = emit_insn_after (gen_lnop (), insn);
2039 else if (p == 1 && GET_MODE (insn) == TImode)
2040 {
2041 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2042 PUT_MODE (new_insn, TImode);
2043 PUT_MODE (insn, VOIDmode);
2044 }
2045 else
2046 new_insn = emit_insn_after (gen_lnop (), insn);
2047 recog_memoized (new_insn);
2048 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2049 }
2050
2051 /* Insert nops in basic blocks to meet dual issue alignment
2052 requirements. Also make sure hbrp and hint instructions are at least
2053 one cycle apart, possibly inserting a nop. */
2054 static void
2055 pad_bb(void)
2056 {
2057 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2058 int length;
2059 int addr;
2060
2061 /* This sets up INSN_ADDRESSES. */
2062 shorten_branches (get_insns ());
2063
2064 /* Keep track of length added by nops. */
2065 length = 0;
2066
2067 prev_insn = 0;
2068 insn = get_insns ();
2069 if (!active_insn_p (insn))
2070 insn = next_active_insn (insn);
2071 for (; insn; insn = next_insn)
2072 {
2073 next_insn = next_active_insn (insn);
2074 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2075 || INSN_CODE (insn) == CODE_FOR_hbr)
2076 {
2077 if (hbr_insn)
2078 {
2079 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2080 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2081 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2082 || (a1 - a0 == 4))
2083 {
2084 prev_insn = emit_insn_before (gen_lnop (), insn);
2085 PUT_MODE (prev_insn, GET_MODE (insn));
2086 PUT_MODE (insn, TImode);
2087 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2088 length += 4;
2089 }
2090 }
2091 hbr_insn = insn;
2092 }
2093 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2094 {
2095 if (GET_MODE (insn) == TImode)
2096 PUT_MODE (next_insn, TImode);
2097 insn = next_insn;
2098 next_insn = next_active_insn (insn);
2099 }
2100 addr = INSN_ADDRESSES (INSN_UID (insn));
2101 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2102 {
2103 if (((addr + length) & 7) != 0)
2104 {
2105 emit_nop_for_insn (prev_insn);
2106 length += 4;
2107 }
2108 }
2109 else if (GET_MODE (insn) == TImode
2110 && ((next_insn && GET_MODE (next_insn) != TImode)
2111 || get_attr_type (insn) == TYPE_MULTI0)
2112 && ((addr + length) & 7) != 0)
2113 {
2114 /* prev_insn will always be set because the first insn is
2115 always 8-byte aligned. */
2116 emit_nop_for_insn (prev_insn);
2117 length += 4;
2118 }
2119 prev_insn = insn;
2120 }
2121 }
2122
2123 \f
2124 /* Routines for branch hints. */
2125
2126 static void
2127 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2128 int distance, sbitmap blocks)
2129 {
2130 rtx branch_label = 0;
2131 rtx_insn *hint;
2132 rtx_insn *insn;
2133 rtx_jump_table_data *table;
2134
2135 if (before == 0 || branch == 0 || target == 0)
2136 return;
2137
2138 /* While scheduling we require hints to be no further than 600, so
2139 we need to enforce that here too */
2140 if (distance > 600)
2141 return;
2142
2143 /* If we have a Basic block note, emit it after the basic block note. */
2144 if (NOTE_INSN_BASIC_BLOCK_P (before))
2145 before = NEXT_INSN (before);
2146
2147 branch_label = gen_label_rtx ();
2148 LABEL_NUSES (branch_label)++;
2149 LABEL_PRESERVE_P (branch_label) = 1;
2150 insn = emit_label_before (branch_label, branch);
2151 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2152 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2153
2154 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2155 recog_memoized (hint);
2156 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2157 HINTED_P (branch) = 1;
2158
2159 if (GET_CODE (target) == LABEL_REF)
2160 HINTED_P (XEXP (target, 0)) = 1;
2161 else if (tablejump_p (branch, 0, &table))
2162 {
2163 rtvec vec;
2164 int j;
2165 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2166 vec = XVEC (PATTERN (table), 0);
2167 else
2168 vec = XVEC (PATTERN (table), 1);
2169 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2170 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2171 }
2172
2173 if (distance >= 588)
2174 {
2175 /* Make sure the hint isn't scheduled any earlier than this point,
2176 which could make it too far for the branch offest to fit */
2177 insn = emit_insn_before (gen_blockage (), hint);
2178 recog_memoized (insn);
2179 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2180 }
2181 else if (distance <= 8 * 4)
2182 {
2183 /* To guarantee at least 8 insns between the hint and branch we
2184 insert nops. */
2185 int d;
2186 for (d = distance; d < 8 * 4; d += 4)
2187 {
2188 insn =
2189 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2190 recog_memoized (insn);
2191 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2192 }
2193
2194 /* Make sure any nops inserted aren't scheduled before the hint. */
2195 insn = emit_insn_after (gen_blockage (), hint);
2196 recog_memoized (insn);
2197 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2198
2199 /* Make sure any nops inserted aren't scheduled after the call. */
2200 if (CALL_P (branch) && distance < 8 * 4)
2201 {
2202 insn = emit_insn_before (gen_blockage (), branch);
2203 recog_memoized (insn);
2204 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2205 }
2206 }
2207 }
2208
2209 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2210 the rtx for the branch target. */
2211 static rtx
2212 get_branch_target (rtx_insn *branch)
2213 {
2214 if (JUMP_P (branch))
2215 {
2216 rtx set, src;
2217
2218 /* Return statements */
2219 if (GET_CODE (PATTERN (branch)) == RETURN)
2220 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2221
2222 /* ASM GOTOs. */
2223 if (extract_asm_operands (PATTERN (branch)) != NULL)
2224 return NULL;
2225
2226 set = single_set (branch);
2227 src = SET_SRC (set);
2228 if (GET_CODE (SET_DEST (set)) != PC)
2229 abort ();
2230
2231 if (GET_CODE (src) == IF_THEN_ELSE)
2232 {
2233 rtx lab = 0;
2234 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2235 if (note)
2236 {
2237 /* If the more probable case is not a fall through, then
2238 try a branch hint. */
2239 int prob = XINT (note, 0);
2240 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2241 && GET_CODE (XEXP (src, 1)) != PC)
2242 lab = XEXP (src, 1);
2243 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2244 && GET_CODE (XEXP (src, 2)) != PC)
2245 lab = XEXP (src, 2);
2246 }
2247 if (lab)
2248 {
2249 if (GET_CODE (lab) == RETURN)
2250 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2251 return lab;
2252 }
2253 return 0;
2254 }
2255
2256 return src;
2257 }
2258 else if (CALL_P (branch))
2259 {
2260 rtx call;
2261 /* All of our call patterns are in a PARALLEL and the CALL is
2262 the first pattern in the PARALLEL. */
2263 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2264 abort ();
2265 call = XVECEXP (PATTERN (branch), 0, 0);
2266 if (GET_CODE (call) == SET)
2267 call = SET_SRC (call);
2268 if (GET_CODE (call) != CALL)
2269 abort ();
2270 return XEXP (XEXP (call, 0), 0);
2271 }
2272 return 0;
2273 }
2274
2275 /* The special $hbr register is used to prevent the insn scheduler from
2276 moving hbr insns across instructions which invalidate them. It
2277 should only be used in a clobber, and this function searches for
2278 insns which clobber it. */
2279 static bool
2280 insn_clobbers_hbr (rtx_insn *insn)
2281 {
2282 if (INSN_P (insn)
2283 && GET_CODE (PATTERN (insn)) == PARALLEL)
2284 {
2285 rtx parallel = PATTERN (insn);
2286 rtx clobber;
2287 int j;
2288 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2289 {
2290 clobber = XVECEXP (parallel, 0, j);
2291 if (GET_CODE (clobber) == CLOBBER
2292 && GET_CODE (XEXP (clobber, 0)) == REG
2293 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2294 return 1;
2295 }
2296 }
2297 return 0;
2298 }
2299
2300 /* Search up to 32 insns starting at FIRST:
2301 - at any kind of hinted branch, just return
2302 - at any unconditional branch in the first 15 insns, just return
2303 - at a call or indirect branch, after the first 15 insns, force it to
2304 an even address and return
2305 - at any unconditional branch, after the first 15 insns, force it to
2306 an even address.
2307 At then end of the search, insert an hbrp within 4 insns of FIRST,
2308 and an hbrp within 16 instructions of FIRST.
2309 */
2310 static void
2311 insert_hbrp_for_ilb_runout (rtx_insn *first)
2312 {
2313 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2314 int addr = 0, length, first_addr = -1;
2315 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2316 int insert_lnop_after = 0;
2317 for (insn = first; insn; insn = NEXT_INSN (insn))
2318 if (INSN_P (insn))
2319 {
2320 if (first_addr == -1)
2321 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2322 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2323 length = get_attr_length (insn);
2324
2325 if (before_4 == 0 && addr + length >= 4 * 4)
2326 before_4 = insn;
2327 /* We test for 14 instructions because the first hbrp will add
2328 up to 2 instructions. */
2329 if (before_16 == 0 && addr + length >= 14 * 4)
2330 before_16 = insn;
2331
2332 if (INSN_CODE (insn) == CODE_FOR_hbr)
2333 {
2334 /* Make sure an hbrp is at least 2 cycles away from a hint.
2335 Insert an lnop after the hbrp when necessary. */
2336 if (before_4 == 0 && addr > 0)
2337 {
2338 before_4 = insn;
2339 insert_lnop_after |= 1;
2340 }
2341 else if (before_4 && addr <= 4 * 4)
2342 insert_lnop_after |= 1;
2343 if (before_16 == 0 && addr > 10 * 4)
2344 {
2345 before_16 = insn;
2346 insert_lnop_after |= 2;
2347 }
2348 else if (before_16 && addr <= 14 * 4)
2349 insert_lnop_after |= 2;
2350 }
2351
2352 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2353 {
2354 if (addr < hbrp_addr0)
2355 hbrp_addr0 = addr;
2356 else if (addr < hbrp_addr1)
2357 hbrp_addr1 = addr;
2358 }
2359
2360 if (CALL_P (insn) || JUMP_P (insn))
2361 {
2362 if (HINTED_P (insn))
2363 return;
2364
2365 /* Any branch after the first 15 insns should be on an even
2366 address to avoid a special case branch. There might be
2367 some nops and/or hbrps inserted, so we test after 10
2368 insns. */
2369 if (addr > 10 * 4)
2370 SCHED_ON_EVEN_P (insn) = 1;
2371 }
2372
2373 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2374 return;
2375
2376
2377 if (addr + length >= 32 * 4)
2378 {
2379 gcc_assert (before_4 && before_16);
2380 if (hbrp_addr0 > 4 * 4)
2381 {
2382 insn =
2383 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2384 recog_memoized (insn);
2385 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2386 INSN_ADDRESSES_NEW (insn,
2387 INSN_ADDRESSES (INSN_UID (before_4)));
2388 PUT_MODE (insn, GET_MODE (before_4));
2389 PUT_MODE (before_4, TImode);
2390 if (insert_lnop_after & 1)
2391 {
2392 insn = emit_insn_before (gen_lnop (), before_4);
2393 recog_memoized (insn);
2394 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2395 INSN_ADDRESSES_NEW (insn,
2396 INSN_ADDRESSES (INSN_UID (before_4)));
2397 PUT_MODE (insn, TImode);
2398 }
2399 }
2400 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2401 && hbrp_addr1 > 16 * 4)
2402 {
2403 insn =
2404 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2405 recog_memoized (insn);
2406 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2407 INSN_ADDRESSES_NEW (insn,
2408 INSN_ADDRESSES (INSN_UID (before_16)));
2409 PUT_MODE (insn, GET_MODE (before_16));
2410 PUT_MODE (before_16, TImode);
2411 if (insert_lnop_after & 2)
2412 {
2413 insn = emit_insn_before (gen_lnop (), before_16);
2414 recog_memoized (insn);
2415 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2416 INSN_ADDRESSES_NEW (insn,
2417 INSN_ADDRESSES (INSN_UID
2418 (before_16)));
2419 PUT_MODE (insn, TImode);
2420 }
2421 }
2422 return;
2423 }
2424 }
2425 else if (BARRIER_P (insn))
2426 return;
2427
2428 }
2429
2430 /* The SPU might hang when it executes 48 inline instructions after a
2431 hinted branch jumps to its hinted target. The beginning of a
2432 function and the return from a call might have been hinted, and
2433 must be handled as well. To prevent a hang we insert 2 hbrps. The
2434 first should be within 6 insns of the branch target. The second
2435 should be within 22 insns of the branch target. When determining
2436 if hbrps are necessary, we look for only 32 inline instructions,
2437 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2438 when inserting new hbrps, we insert them within 4 and 16 insns of
2439 the target. */
2440 static void
2441 insert_hbrp (void)
2442 {
2443 rtx_insn *insn;
2444 if (TARGET_SAFE_HINTS)
2445 {
2446 shorten_branches (get_insns ());
2447 /* Insert hbrp at beginning of function */
2448 insn = next_active_insn (get_insns ());
2449 if (insn)
2450 insert_hbrp_for_ilb_runout (insn);
2451 /* Insert hbrp after hinted targets. */
2452 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2453 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2454 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2455 }
2456 }
2457
2458 static int in_spu_reorg;
2459
2460 static void
2461 spu_var_tracking (void)
2462 {
2463 if (flag_var_tracking)
2464 {
2465 df_analyze ();
2466 timevar_push (TV_VAR_TRACKING);
2467 variable_tracking_main ();
2468 timevar_pop (TV_VAR_TRACKING);
2469 df_finish_pass (false);
2470 }
2471 }
2472
2473 /* Insert branch hints. There are no branch optimizations after this
2474 pass, so it's safe to set our branch hints now. */
2475 static void
2476 spu_machine_dependent_reorg (void)
2477 {
2478 sbitmap blocks;
2479 basic_block bb;
2480 rtx_insn *branch, *insn;
2481 rtx branch_target = 0;
2482 int branch_addr = 0, insn_addr, required_dist = 0;
2483 int i;
2484 unsigned int j;
2485
2486 if (!TARGET_BRANCH_HINTS || optimize == 0)
2487 {
2488 /* We still do it for unoptimized code because an external
2489 function might have hinted a call or return. */
2490 compute_bb_for_insn ();
2491 insert_hbrp ();
2492 pad_bb ();
2493 spu_var_tracking ();
2494 free_bb_for_insn ();
2495 return;
2496 }
2497
2498 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2499 bitmap_clear (blocks);
2500
2501 in_spu_reorg = 1;
2502 compute_bb_for_insn ();
2503
2504 /* (Re-)discover loops so that bb->loop_father can be used
2505 in the analysis below. */
2506 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2507
2508 compact_blocks ();
2509
2510 spu_bb_info =
2511 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2512 sizeof (struct spu_bb_info));
2513
2514 /* We need exact insn addresses and lengths. */
2515 shorten_branches (get_insns ());
2516
2517 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2518 {
2519 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2520 branch = 0;
2521 if (spu_bb_info[i].prop_jump)
2522 {
2523 branch = spu_bb_info[i].prop_jump;
2524 branch_target = get_branch_target (branch);
2525 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2526 required_dist = spu_hint_dist;
2527 }
2528 /* Search from end of a block to beginning. In this loop, find
2529 jumps which need a branch and emit them only when:
2530 - it's an indirect branch and we're at the insn which sets
2531 the register
2532 - we're at an insn that will invalidate the hint. e.g., a
2533 call, another hint insn, inline asm that clobbers $hbr, and
2534 some inlined operations (divmodsi4). Don't consider jumps
2535 because they are only at the end of a block and are
2536 considered when we are deciding whether to propagate
2537 - we're getting too far away from the branch. The hbr insns
2538 only have a signed 10 bit offset
2539 We go back as far as possible so the branch will be considered
2540 for propagation when we get to the beginning of the block. */
2541 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2542 {
2543 if (INSN_P (insn))
2544 {
2545 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2546 if (branch
2547 && ((GET_CODE (branch_target) == REG
2548 && set_of (branch_target, insn) != NULL_RTX)
2549 || insn_clobbers_hbr (insn)
2550 || branch_addr - insn_addr > 600))
2551 {
2552 rtx_insn *next = NEXT_INSN (insn);
2553 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2554 if (insn != BB_END (bb)
2555 && branch_addr - next_addr >= required_dist)
2556 {
2557 if (dump_file)
2558 fprintf (dump_file,
2559 "hint for %i in block %i before %i\n",
2560 INSN_UID (branch), bb->index,
2561 INSN_UID (next));
2562 spu_emit_branch_hint (next, branch, branch_target,
2563 branch_addr - next_addr, blocks);
2564 }
2565 branch = 0;
2566 }
2567
2568 /* JUMP_P will only be true at the end of a block. When
2569 branch is already set it means we've previously decided
2570 to propagate a hint for that branch into this block. */
2571 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2572 {
2573 branch = 0;
2574 if ((branch_target = get_branch_target (insn)))
2575 {
2576 branch = insn;
2577 branch_addr = insn_addr;
2578 required_dist = spu_hint_dist;
2579 }
2580 }
2581 }
2582 if (insn == BB_HEAD (bb))
2583 break;
2584 }
2585
2586 if (branch)
2587 {
2588 /* If we haven't emitted a hint for this branch yet, it might
2589 be profitable to emit it in one of the predecessor blocks,
2590 especially for loops. */
2591 rtx_insn *bbend;
2592 basic_block prev = 0, prop = 0, prev2 = 0;
2593 int loop_exit = 0, simple_loop = 0;
2594 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2595
2596 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2597 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2598 prev = EDGE_PRED (bb, j)->src;
2599 else
2600 prev2 = EDGE_PRED (bb, j)->src;
2601
2602 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2603 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2604 loop_exit = 1;
2605 else if (EDGE_SUCC (bb, j)->dest == bb)
2606 simple_loop = 1;
2607
2608 /* If this branch is a loop exit then propagate to previous
2609 fallthru block. This catches the cases when it is a simple
2610 loop or when there is an initial branch into the loop. */
2611 if (prev && (loop_exit || simple_loop)
2612 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2613 prop = prev;
2614
2615 /* If there is only one adjacent predecessor. Don't propagate
2616 outside this loop. */
2617 else if (prev && single_pred_p (bb)
2618 && prev->loop_father == bb->loop_father)
2619 prop = prev;
2620
2621 /* If this is the JOIN block of a simple IF-THEN then
2622 propagate the hint to the HEADER block. */
2623 else if (prev && prev2
2624 && EDGE_COUNT (bb->preds) == 2
2625 && EDGE_COUNT (prev->preds) == 1
2626 && EDGE_PRED (prev, 0)->src == prev2
2627 && prev2->loop_father == bb->loop_father
2628 && GET_CODE (branch_target) != REG)
2629 prop = prev;
2630
2631 /* Don't propagate when:
2632 - this is a simple loop and the hint would be too far
2633 - this is not a simple loop and there are 16 insns in
2634 this block already
2635 - the predecessor block ends in a branch that will be
2636 hinted
2637 - the predecessor block ends in an insn that invalidates
2638 the hint */
2639 if (prop
2640 && prop->index >= 0
2641 && (bbend = BB_END (prop))
2642 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2643 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2644 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2645 {
2646 if (dump_file)
2647 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2648 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2649 bb->index, prop->index, bb_loop_depth (bb),
2650 INSN_UID (branch), loop_exit, simple_loop,
2651 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2652
2653 spu_bb_info[prop->index].prop_jump = branch;
2654 spu_bb_info[prop->index].bb_index = i;
2655 }
2656 else if (branch_addr - next_addr >= required_dist)
2657 {
2658 if (dump_file)
2659 fprintf (dump_file, "hint for %i in block %i before %i\n",
2660 INSN_UID (branch), bb->index,
2661 INSN_UID (NEXT_INSN (insn)));
2662 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2663 branch_addr - next_addr, blocks);
2664 }
2665 branch = 0;
2666 }
2667 }
2668 free (spu_bb_info);
2669
2670 if (!bitmap_empty_p (blocks))
2671 find_many_sub_basic_blocks (blocks);
2672
2673 /* We have to schedule to make sure alignment is ok. */
2674 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2675
2676 /* The hints need to be scheduled, so call it again. */
2677 schedule_insns ();
2678 df_finish_pass (true);
2679
2680 insert_hbrp ();
2681
2682 pad_bb ();
2683
2684 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2685 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2686 {
2687 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2688 between its branch label and the branch . We don't move the
2689 label because GCC expects it at the beginning of the block. */
2690 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2691 rtx label_ref = XVECEXP (unspec, 0, 0);
2692 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2693 rtx_insn *branch;
2694 int offset = 0;
2695 for (branch = NEXT_INSN (label);
2696 !JUMP_P (branch) && !CALL_P (branch);
2697 branch = NEXT_INSN (branch))
2698 if (NONJUMP_INSN_P (branch))
2699 offset += get_attr_length (branch);
2700 if (offset > 0)
2701 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2702 }
2703
2704 spu_var_tracking ();
2705
2706 loop_optimizer_finalize ();
2707
2708 free_bb_for_insn ();
2709
2710 in_spu_reorg = 0;
2711 }
2712 \f
2713
2714 /* Insn scheduling routines, primarily for dual issue. */
2715 static int
2716 spu_sched_issue_rate (void)
2717 {
2718 return 2;
2719 }
2720
2721 static int
2722 uses_ls_unit(rtx_insn *insn)
2723 {
2724 rtx set = single_set (insn);
2725 if (set != 0
2726 && (GET_CODE (SET_DEST (set)) == MEM
2727 || GET_CODE (SET_SRC (set)) == MEM))
2728 return 1;
2729 return 0;
2730 }
2731
2732 static int
2733 get_pipe (rtx_insn *insn)
2734 {
2735 enum attr_type t;
2736 /* Handle inline asm */
2737 if (INSN_CODE (insn) == -1)
2738 return -1;
2739 t = get_attr_type (insn);
2740 switch (t)
2741 {
2742 case TYPE_CONVERT:
2743 return -2;
2744 case TYPE_MULTI0:
2745 return -1;
2746
2747 case TYPE_FX2:
2748 case TYPE_FX3:
2749 case TYPE_SPR:
2750 case TYPE_NOP:
2751 case TYPE_FXB:
2752 case TYPE_FPD:
2753 case TYPE_FP6:
2754 case TYPE_FP7:
2755 return 0;
2756
2757 case TYPE_LNOP:
2758 case TYPE_SHUF:
2759 case TYPE_LOAD:
2760 case TYPE_STORE:
2761 case TYPE_BR:
2762 case TYPE_MULTI1:
2763 case TYPE_HBR:
2764 case TYPE_IPREFETCH:
2765 return 1;
2766 default:
2767 abort ();
2768 }
2769 }
2770
2771
2772 /* haifa-sched.c has a static variable that keeps track of the current
2773 cycle. It is passed to spu_sched_reorder, and we record it here for
2774 use by spu_sched_variable_issue. It won't be accurate if the
2775 scheduler updates it's clock_var between the two calls. */
2776 static int clock_var;
2777
2778 /* This is used to keep track of insn alignment. Set to 0 at the
2779 beginning of each block and increased by the "length" attr of each
2780 insn scheduled. */
2781 static int spu_sched_length;
2782
2783 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2784 ready list appropriately in spu_sched_reorder(). */
2785 static int pipe0_clock;
2786 static int pipe1_clock;
2787
2788 static int prev_clock_var;
2789
2790 static int prev_priority;
2791
2792 /* The SPU needs to load the next ilb sometime during the execution of
2793 the previous ilb. There is a potential conflict if every cycle has a
2794 load or store. To avoid the conflict we make sure the load/store
2795 unit is free for at least one cycle during the execution of insns in
2796 the previous ilb. */
2797 static int spu_ls_first;
2798 static int prev_ls_clock;
2799
2800 static void
2801 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2802 int max_ready ATTRIBUTE_UNUSED)
2803 {
2804 spu_sched_length = 0;
2805 }
2806
2807 static void
2808 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2809 int max_ready ATTRIBUTE_UNUSED)
2810 {
2811 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2812 {
2813 /* When any block might be at least 8-byte aligned, assume they
2814 will all be at least 8-byte aligned to make sure dual issue
2815 works out correctly. */
2816 spu_sched_length = 0;
2817 }
2818 spu_ls_first = INT_MAX;
2819 clock_var = -1;
2820 prev_ls_clock = -1;
2821 pipe0_clock = -1;
2822 pipe1_clock = -1;
2823 prev_clock_var = -1;
2824 prev_priority = -1;
2825 }
2826
2827 static int
2828 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2829 int verbose ATTRIBUTE_UNUSED,
2830 rtx_insn *insn, int more)
2831 {
2832 int len;
2833 int p;
2834 if (GET_CODE (PATTERN (insn)) == USE
2835 || GET_CODE (PATTERN (insn)) == CLOBBER
2836 || (len = get_attr_length (insn)) == 0)
2837 return more;
2838
2839 spu_sched_length += len;
2840
2841 /* Reset on inline asm */
2842 if (INSN_CODE (insn) == -1)
2843 {
2844 spu_ls_first = INT_MAX;
2845 pipe0_clock = -1;
2846 pipe1_clock = -1;
2847 return 0;
2848 }
2849 p = get_pipe (insn);
2850 if (p == 0)
2851 pipe0_clock = clock_var;
2852 else
2853 pipe1_clock = clock_var;
2854
2855 if (in_spu_reorg)
2856 {
2857 if (clock_var - prev_ls_clock > 1
2858 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2859 spu_ls_first = INT_MAX;
2860 if (uses_ls_unit (insn))
2861 {
2862 if (spu_ls_first == INT_MAX)
2863 spu_ls_first = spu_sched_length;
2864 prev_ls_clock = clock_var;
2865 }
2866
2867 /* The scheduler hasn't inserted the nop, but we will later on.
2868 Include those nops in spu_sched_length. */
2869 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2870 spu_sched_length += 4;
2871 prev_clock_var = clock_var;
2872
2873 /* more is -1 when called from spu_sched_reorder for new insns
2874 that don't have INSN_PRIORITY */
2875 if (more >= 0)
2876 prev_priority = INSN_PRIORITY (insn);
2877 }
2878
2879 /* Always try issuing more insns. spu_sched_reorder will decide
2880 when the cycle should be advanced. */
2881 return 1;
2882 }
2883
2884 /* This function is called for both TARGET_SCHED_REORDER and
2885 TARGET_SCHED_REORDER2. */
2886 static int
2887 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2888 rtx_insn **ready, int *nreadyp, int clock)
2889 {
2890 int i, nready = *nreadyp;
2891 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2892 rtx_insn *insn;
2893
2894 clock_var = clock;
2895
2896 if (nready <= 0 || pipe1_clock >= clock)
2897 return 0;
2898
2899 /* Find any rtl insns that don't generate assembly insns and schedule
2900 them first. */
2901 for (i = nready - 1; i >= 0; i--)
2902 {
2903 insn = ready[i];
2904 if (INSN_CODE (insn) == -1
2905 || INSN_CODE (insn) == CODE_FOR_blockage
2906 || (INSN_P (insn) && get_attr_length (insn) == 0))
2907 {
2908 ready[i] = ready[nready - 1];
2909 ready[nready - 1] = insn;
2910 return 1;
2911 }
2912 }
2913
2914 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2915 for (i = 0; i < nready; i++)
2916 if (INSN_CODE (ready[i]) != -1)
2917 {
2918 insn = ready[i];
2919 switch (get_attr_type (insn))
2920 {
2921 default:
2922 case TYPE_MULTI0:
2923 case TYPE_CONVERT:
2924 case TYPE_FX2:
2925 case TYPE_FX3:
2926 case TYPE_SPR:
2927 case TYPE_NOP:
2928 case TYPE_FXB:
2929 case TYPE_FPD:
2930 case TYPE_FP6:
2931 case TYPE_FP7:
2932 pipe_0 = i;
2933 break;
2934 case TYPE_LOAD:
2935 case TYPE_STORE:
2936 pipe_ls = i;
2937 case TYPE_LNOP:
2938 case TYPE_SHUF:
2939 case TYPE_BR:
2940 case TYPE_MULTI1:
2941 case TYPE_HBR:
2942 pipe_1 = i;
2943 break;
2944 case TYPE_IPREFETCH:
2945 pipe_hbrp = i;
2946 break;
2947 }
2948 }
2949
2950 /* In the first scheduling phase, schedule loads and stores together
2951 to increase the chance they will get merged during postreload CSE. */
2952 if (!reload_completed && pipe_ls >= 0)
2953 {
2954 insn = ready[pipe_ls];
2955 ready[pipe_ls] = ready[nready - 1];
2956 ready[nready - 1] = insn;
2957 return 1;
2958 }
2959
2960 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2961 if (pipe_hbrp >= 0)
2962 pipe_1 = pipe_hbrp;
2963
2964 /* When we have loads/stores in every cycle of the last 15 insns and
2965 we are about to schedule another load/store, emit an hbrp insn
2966 instead. */
2967 if (in_spu_reorg
2968 && spu_sched_length - spu_ls_first >= 4 * 15
2969 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2970 {
2971 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2972 recog_memoized (insn);
2973 if (pipe0_clock < clock)
2974 PUT_MODE (insn, TImode);
2975 spu_sched_variable_issue (file, verbose, insn, -1);
2976 return 0;
2977 }
2978
2979 /* In general, we want to emit nops to increase dual issue, but dual
2980 issue isn't faster when one of the insns could be scheduled later
2981 without effecting the critical path. We look at INSN_PRIORITY to
2982 make a good guess, but it isn't perfect so -mdual-nops=n can be
2983 used to effect it. */
2984 if (in_spu_reorg && spu_dual_nops < 10)
2985 {
2986 /* When we are at an even address and we are not issuing nops to
2987 improve scheduling then we need to advance the cycle. */
2988 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2989 && (spu_dual_nops == 0
2990 || (pipe_1 != -1
2991 && prev_priority >
2992 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2993 return 0;
2994
2995 /* When at an odd address, schedule the highest priority insn
2996 without considering pipeline. */
2997 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2998 && (spu_dual_nops == 0
2999 || (prev_priority >
3000 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3001 return 1;
3002 }
3003
3004
3005 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3006 pipe0 insn in the ready list, schedule it. */
3007 if (pipe0_clock < clock && pipe_0 >= 0)
3008 schedule_i = pipe_0;
3009
3010 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3011 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3012 else
3013 schedule_i = pipe_1;
3014
3015 if (schedule_i > -1)
3016 {
3017 insn = ready[schedule_i];
3018 ready[schedule_i] = ready[nready - 1];
3019 ready[nready - 1] = insn;
3020 return 1;
3021 }
3022 return 0;
3023 }
3024
3025 /* INSN is dependent on DEP_INSN. */
3026 static int
3027 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3028 {
3029 rtx set;
3030
3031 /* The blockage pattern is used to prevent instructions from being
3032 moved across it and has no cost. */
3033 if (INSN_CODE (insn) == CODE_FOR_blockage
3034 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3035 return 0;
3036
3037 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3038 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3039 return 0;
3040
3041 /* Make sure hbrps are spread out. */
3042 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3043 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3044 return 8;
3045
3046 /* Make sure hints and hbrps are 2 cycles apart. */
3047 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3048 || INSN_CODE (insn) == CODE_FOR_hbr)
3049 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3050 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3051 return 2;
3052
3053 /* An hbrp has no real dependency on other insns. */
3054 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3055 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3056 return 0;
3057
3058 /* Assuming that it is unlikely an argument register will be used in
3059 the first cycle of the called function, we reduce the cost for
3060 slightly better scheduling of dep_insn. When not hinted, the
3061 mispredicted branch would hide the cost as well. */
3062 if (CALL_P (insn))
3063 {
3064 rtx target = get_branch_target (insn);
3065 if (GET_CODE (target) != REG || !set_of (target, insn))
3066 return cost - 2;
3067 return cost;
3068 }
3069
3070 /* And when returning from a function, let's assume the return values
3071 are completed sooner too. */
3072 if (CALL_P (dep_insn))
3073 return cost - 2;
3074
3075 /* Make sure an instruction that loads from the back chain is schedule
3076 away from the return instruction so a hint is more likely to get
3077 issued. */
3078 if (INSN_CODE (insn) == CODE_FOR__return
3079 && (set = single_set (dep_insn))
3080 && GET_CODE (SET_DEST (set)) == REG
3081 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3082 return 20;
3083
3084 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3085 scheduler makes every insn in a block anti-dependent on the final
3086 jump_insn. We adjust here so higher cost insns will get scheduled
3087 earlier. */
3088 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3089 return insn_cost (dep_insn) - 3;
3090
3091 return cost;
3092 }
3093 \f
3094 /* Create a CONST_DOUBLE from a string. */
3095 rtx
3096 spu_float_const (const char *string, machine_mode mode)
3097 {
3098 REAL_VALUE_TYPE value;
3099 value = REAL_VALUE_ATOF (string, mode);
3100 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3101 }
3102
3103 int
3104 spu_constant_address_p (rtx x)
3105 {
3106 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3107 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3108 || GET_CODE (x) == HIGH);
3109 }
3110
3111 static enum spu_immediate
3112 which_immediate_load (HOST_WIDE_INT val)
3113 {
3114 gcc_assert (val == trunc_int_for_mode (val, SImode));
3115
3116 if (val >= -0x8000 && val <= 0x7fff)
3117 return SPU_IL;
3118 if (val >= 0 && val <= 0x3ffff)
3119 return SPU_ILA;
3120 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3121 return SPU_ILH;
3122 if ((val & 0xffff) == 0)
3123 return SPU_ILHU;
3124
3125 return SPU_NONE;
3126 }
3127
3128 /* Return true when OP can be loaded by one of the il instructions, or
3129 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3130 int
3131 immediate_load_p (rtx op, machine_mode mode)
3132 {
3133 if (CONSTANT_P (op))
3134 {
3135 enum immediate_class c = classify_immediate (op, mode);
3136 return c == IC_IL1 || c == IC_IL1s
3137 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3138 }
3139 return 0;
3140 }
3141
3142 /* Return true if the first SIZE bytes of arr is a constant that can be
3143 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3144 represent the size and offset of the instruction to use. */
3145 static int
3146 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3147 {
3148 int cpat, run, i, start;
3149 cpat = 1;
3150 run = 0;
3151 start = -1;
3152 for (i = 0; i < size && cpat; i++)
3153 if (arr[i] != i+16)
3154 {
3155 if (!run)
3156 {
3157 start = i;
3158 if (arr[i] == 3)
3159 run = 1;
3160 else if (arr[i] == 2 && arr[i+1] == 3)
3161 run = 2;
3162 else if (arr[i] == 0)
3163 {
3164 while (arr[i+run] == run && i+run < 16)
3165 run++;
3166 if (run != 4 && run != 8)
3167 cpat = 0;
3168 }
3169 else
3170 cpat = 0;
3171 if ((i & (run-1)) != 0)
3172 cpat = 0;
3173 i += run;
3174 }
3175 else
3176 cpat = 0;
3177 }
3178 if (cpat && (run || size < 16))
3179 {
3180 if (run == 0)
3181 run = 1;
3182 if (prun)
3183 *prun = run;
3184 if (pstart)
3185 *pstart = start == -1 ? 16-run : start;
3186 return 1;
3187 }
3188 return 0;
3189 }
3190
3191 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3192 it into a register. MODE is only valid when OP is a CONST_INT. */
3193 static enum immediate_class
3194 classify_immediate (rtx op, machine_mode mode)
3195 {
3196 HOST_WIDE_INT val;
3197 unsigned char arr[16];
3198 int i, j, repeated, fsmbi, repeat;
3199
3200 gcc_assert (CONSTANT_P (op));
3201
3202 if (GET_MODE (op) != VOIDmode)
3203 mode = GET_MODE (op);
3204
3205 /* A V4SI const_vector with all identical symbols is ok. */
3206 if (!flag_pic
3207 && mode == V4SImode
3208 && GET_CODE (op) == CONST_VECTOR
3209 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3210 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3211 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3212 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3213 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3214 op = CONST_VECTOR_ELT (op, 0);
3215
3216 switch (GET_CODE (op))
3217 {
3218 case SYMBOL_REF:
3219 case LABEL_REF:
3220 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3221
3222 case CONST:
3223 /* We can never know if the resulting address fits in 18 bits and can be
3224 loaded with ila. For now, assume the address will not overflow if
3225 the displacement is "small" (fits 'K' constraint). */
3226 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3227 {
3228 rtx sym = XEXP (XEXP (op, 0), 0);
3229 rtx cst = XEXP (XEXP (op, 0), 1);
3230
3231 if (GET_CODE (sym) == SYMBOL_REF
3232 && GET_CODE (cst) == CONST_INT
3233 && satisfies_constraint_K (cst))
3234 return IC_IL1s;
3235 }
3236 return IC_IL2s;
3237
3238 case HIGH:
3239 return IC_IL1s;
3240
3241 case CONST_VECTOR:
3242 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3243 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3244 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3245 return IC_POOL;
3246 /* Fall through. */
3247
3248 case CONST_INT:
3249 case CONST_DOUBLE:
3250 constant_to_array (mode, op, arr);
3251
3252 /* Check that each 4-byte slot is identical. */
3253 repeated = 1;
3254 for (i = 4; i < 16; i += 4)
3255 for (j = 0; j < 4; j++)
3256 if (arr[j] != arr[i + j])
3257 repeated = 0;
3258
3259 if (repeated)
3260 {
3261 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3262 val = trunc_int_for_mode (val, SImode);
3263
3264 if (which_immediate_load (val) != SPU_NONE)
3265 return IC_IL1;
3266 }
3267
3268 /* Any mode of 2 bytes or smaller can be loaded with an il
3269 instruction. */
3270 gcc_assert (GET_MODE_SIZE (mode) > 2);
3271
3272 fsmbi = 1;
3273 repeat = 0;
3274 for (i = 0; i < 16 && fsmbi; i++)
3275 if (arr[i] != 0 && repeat == 0)
3276 repeat = arr[i];
3277 else if (arr[i] != 0 && arr[i] != repeat)
3278 fsmbi = 0;
3279 if (fsmbi)
3280 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3281
3282 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3283 return IC_CPAT;
3284
3285 if (repeated)
3286 return IC_IL2;
3287
3288 return IC_POOL;
3289 default:
3290 break;
3291 }
3292 gcc_unreachable ();
3293 }
3294
3295 static enum spu_immediate
3296 which_logical_immediate (HOST_WIDE_INT val)
3297 {
3298 gcc_assert (val == trunc_int_for_mode (val, SImode));
3299
3300 if (val >= -0x200 && val <= 0x1ff)
3301 return SPU_ORI;
3302 if (val >= 0 && val <= 0xffff)
3303 return SPU_IOHL;
3304 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3305 {
3306 val = trunc_int_for_mode (val, HImode);
3307 if (val >= -0x200 && val <= 0x1ff)
3308 return SPU_ORHI;
3309 if ((val & 0xff) == ((val >> 8) & 0xff))
3310 {
3311 val = trunc_int_for_mode (val, QImode);
3312 if (val >= -0x200 && val <= 0x1ff)
3313 return SPU_ORBI;
3314 }
3315 }
3316 return SPU_NONE;
3317 }
3318
3319 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3320 CONST_DOUBLEs. */
3321 static int
3322 const_vector_immediate_p (rtx x)
3323 {
3324 int i;
3325 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3326 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3327 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3328 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3329 return 0;
3330 return 1;
3331 }
3332
3333 int
3334 logical_immediate_p (rtx op, machine_mode mode)
3335 {
3336 HOST_WIDE_INT val;
3337 unsigned char arr[16];
3338 int i, j;
3339
3340 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3341 || GET_CODE (op) == CONST_VECTOR);
3342
3343 if (GET_CODE (op) == CONST_VECTOR
3344 && !const_vector_immediate_p (op))
3345 return 0;
3346
3347 if (GET_MODE (op) != VOIDmode)
3348 mode = GET_MODE (op);
3349
3350 constant_to_array (mode, op, arr);
3351
3352 /* Check that bytes are repeated. */
3353 for (i = 4; i < 16; i += 4)
3354 for (j = 0; j < 4; j++)
3355 if (arr[j] != arr[i + j])
3356 return 0;
3357
3358 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3359 val = trunc_int_for_mode (val, SImode);
3360
3361 i = which_logical_immediate (val);
3362 return i != SPU_NONE && i != SPU_IOHL;
3363 }
3364
3365 int
3366 iohl_immediate_p (rtx op, machine_mode mode)
3367 {
3368 HOST_WIDE_INT val;
3369 unsigned char arr[16];
3370 int i, j;
3371
3372 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3373 || GET_CODE (op) == CONST_VECTOR);
3374
3375 if (GET_CODE (op) == CONST_VECTOR
3376 && !const_vector_immediate_p (op))
3377 return 0;
3378
3379 if (GET_MODE (op) != VOIDmode)
3380 mode = GET_MODE (op);
3381
3382 constant_to_array (mode, op, arr);
3383
3384 /* Check that bytes are repeated. */
3385 for (i = 4; i < 16; i += 4)
3386 for (j = 0; j < 4; j++)
3387 if (arr[j] != arr[i + j])
3388 return 0;
3389
3390 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3391 val = trunc_int_for_mode (val, SImode);
3392
3393 return val >= 0 && val <= 0xffff;
3394 }
3395
3396 int
3397 arith_immediate_p (rtx op, machine_mode mode,
3398 HOST_WIDE_INT low, HOST_WIDE_INT high)
3399 {
3400 HOST_WIDE_INT val;
3401 unsigned char arr[16];
3402 int bytes, i, j;
3403
3404 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3405 || GET_CODE (op) == CONST_VECTOR);
3406
3407 if (GET_CODE (op) == CONST_VECTOR
3408 && !const_vector_immediate_p (op))
3409 return 0;
3410
3411 if (GET_MODE (op) != VOIDmode)
3412 mode = GET_MODE (op);
3413
3414 constant_to_array (mode, op, arr);
3415
3416 if (VECTOR_MODE_P (mode))
3417 mode = GET_MODE_INNER (mode);
3418
3419 bytes = GET_MODE_SIZE (mode);
3420 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3421
3422 /* Check that bytes are repeated. */
3423 for (i = bytes; i < 16; i += bytes)
3424 for (j = 0; j < bytes; j++)
3425 if (arr[j] != arr[i + j])
3426 return 0;
3427
3428 val = arr[0];
3429 for (j = 1; j < bytes; j++)
3430 val = (val << 8) | arr[j];
3431
3432 val = trunc_int_for_mode (val, mode);
3433
3434 return val >= low && val <= high;
3435 }
3436
3437 /* TRUE when op is an immediate and an exact power of 2, and given that
3438 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3439 all entries must be the same. */
3440 bool
3441 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3442 {
3443 machine_mode int_mode;
3444 HOST_WIDE_INT val;
3445 unsigned char arr[16];
3446 int bytes, i, j;
3447
3448 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3449 || GET_CODE (op) == CONST_VECTOR);
3450
3451 if (GET_CODE (op) == CONST_VECTOR
3452 && !const_vector_immediate_p (op))
3453 return 0;
3454
3455 if (GET_MODE (op) != VOIDmode)
3456 mode = GET_MODE (op);
3457
3458 constant_to_array (mode, op, arr);
3459
3460 if (VECTOR_MODE_P (mode))
3461 mode = GET_MODE_INNER (mode);
3462
3463 bytes = GET_MODE_SIZE (mode);
3464 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3465
3466 /* Check that bytes are repeated. */
3467 for (i = bytes; i < 16; i += bytes)
3468 for (j = 0; j < bytes; j++)
3469 if (arr[j] != arr[i + j])
3470 return 0;
3471
3472 val = arr[0];
3473 for (j = 1; j < bytes; j++)
3474 val = (val << 8) | arr[j];
3475
3476 val = trunc_int_for_mode (val, int_mode);
3477
3478 /* Currently, we only handle SFmode */
3479 gcc_assert (mode == SFmode);
3480 if (mode == SFmode)
3481 {
3482 int exp = (val >> 23) - 127;
3483 return val > 0 && (val & 0x007fffff) == 0
3484 && exp >= low && exp <= high;
3485 }
3486 return FALSE;
3487 }
3488
3489 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3490
3491 static bool
3492 ea_symbol_ref_p (const_rtx x)
3493 {
3494 tree decl;
3495
3496 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3497 {
3498 rtx plus = XEXP (x, 0);
3499 rtx op0 = XEXP (plus, 0);
3500 rtx op1 = XEXP (plus, 1);
3501 if (GET_CODE (op1) == CONST_INT)
3502 x = op0;
3503 }
3504
3505 return (GET_CODE (x) == SYMBOL_REF
3506 && (decl = SYMBOL_REF_DECL (x)) != 0
3507 && TREE_CODE (decl) == VAR_DECL
3508 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3509 }
3510
3511 /* We accept:
3512 - any 32-bit constant (SImode, SFmode)
3513 - any constant that can be generated with fsmbi (any mode)
3514 - a 64-bit constant where the high and low bits are identical
3515 (DImode, DFmode)
3516 - a 128-bit constant where the four 32-bit words match. */
3517 bool
3518 spu_legitimate_constant_p (machine_mode mode, rtx x)
3519 {
3520 subrtx_iterator::array_type array;
3521 if (GET_CODE (x) == HIGH)
3522 x = XEXP (x, 0);
3523
3524 /* Reject any __ea qualified reference. These can't appear in
3525 instructions but must be forced to the constant pool. */
3526 FOR_EACH_SUBRTX (iter, array, x, ALL)
3527 if (ea_symbol_ref_p (*iter))
3528 return 0;
3529
3530 /* V4SI with all identical symbols is valid. */
3531 if (!flag_pic
3532 && mode == V4SImode
3533 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3534 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3535 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3536 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3537 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3538 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3539
3540 if (GET_CODE (x) == CONST_VECTOR
3541 && !const_vector_immediate_p (x))
3542 return 0;
3543 return 1;
3544 }
3545
3546 /* Valid address are:
3547 - symbol_ref, label_ref, const
3548 - reg
3549 - reg + const_int, where const_int is 16 byte aligned
3550 - reg + reg, alignment doesn't matter
3551 The alignment matters in the reg+const case because lqd and stqd
3552 ignore the 4 least significant bits of the const. We only care about
3553 16 byte modes because the expand phase will change all smaller MEM
3554 references to TImode. */
3555 static bool
3556 spu_legitimate_address_p (machine_mode mode,
3557 rtx x, bool reg_ok_strict)
3558 {
3559 int aligned = GET_MODE_SIZE (mode) >= 16;
3560 if (aligned
3561 && GET_CODE (x) == AND
3562 && GET_CODE (XEXP (x, 1)) == CONST_INT
3563 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3564 x = XEXP (x, 0);
3565 switch (GET_CODE (x))
3566 {
3567 case LABEL_REF:
3568 return !TARGET_LARGE_MEM;
3569
3570 case SYMBOL_REF:
3571 case CONST:
3572 /* Keep __ea references until reload so that spu_expand_mov can see them
3573 in MEMs. */
3574 if (ea_symbol_ref_p (x))
3575 return !reload_in_progress && !reload_completed;
3576 return !TARGET_LARGE_MEM;
3577
3578 case CONST_INT:
3579 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3580
3581 case SUBREG:
3582 x = XEXP (x, 0);
3583 if (REG_P (x))
3584 return 0;
3585
3586 case REG:
3587 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3588
3589 case PLUS:
3590 case LO_SUM:
3591 {
3592 rtx op0 = XEXP (x, 0);
3593 rtx op1 = XEXP (x, 1);
3594 if (GET_CODE (op0) == SUBREG)
3595 op0 = XEXP (op0, 0);
3596 if (GET_CODE (op1) == SUBREG)
3597 op1 = XEXP (op1, 0);
3598 if (GET_CODE (op0) == REG
3599 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3600 && GET_CODE (op1) == CONST_INT
3601 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3602 /* If virtual registers are involved, the displacement will
3603 change later on anyway, so checking would be premature.
3604 Reload will make sure the final displacement after
3605 register elimination is OK. */
3606 || op0 == arg_pointer_rtx
3607 || op0 == frame_pointer_rtx
3608 || op0 == virtual_stack_vars_rtx)
3609 && (!aligned || (INTVAL (op1) & 15) == 0))
3610 return TRUE;
3611 if (GET_CODE (op0) == REG
3612 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3613 && GET_CODE (op1) == REG
3614 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3615 return TRUE;
3616 }
3617 break;
3618
3619 default:
3620 break;
3621 }
3622 return FALSE;
3623 }
3624
3625 /* Like spu_legitimate_address_p, except with named addresses. */
3626 static bool
3627 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3628 bool reg_ok_strict, addr_space_t as)
3629 {
3630 if (as == ADDR_SPACE_EA)
3631 return (REG_P (x) && (GET_MODE (x) == EAmode));
3632
3633 else if (as != ADDR_SPACE_GENERIC)
3634 gcc_unreachable ();
3635
3636 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3637 }
3638
3639 /* When the address is reg + const_int, force the const_int into a
3640 register. */
3641 static rtx
3642 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3643 machine_mode mode ATTRIBUTE_UNUSED)
3644 {
3645 rtx op0, op1;
3646 /* Make sure both operands are registers. */
3647 if (GET_CODE (x) == PLUS)
3648 {
3649 op0 = XEXP (x, 0);
3650 op1 = XEXP (x, 1);
3651 if (ALIGNED_SYMBOL_REF_P (op0))
3652 {
3653 op0 = force_reg (Pmode, op0);
3654 mark_reg_pointer (op0, 128);
3655 }
3656 else if (GET_CODE (op0) != REG)
3657 op0 = force_reg (Pmode, op0);
3658 if (ALIGNED_SYMBOL_REF_P (op1))
3659 {
3660 op1 = force_reg (Pmode, op1);
3661 mark_reg_pointer (op1, 128);
3662 }
3663 else if (GET_CODE (op1) != REG)
3664 op1 = force_reg (Pmode, op1);
3665 x = gen_rtx_PLUS (Pmode, op0, op1);
3666 }
3667 return x;
3668 }
3669
3670 /* Like spu_legitimate_address, except with named address support. */
3671 static rtx
3672 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3673 addr_space_t as)
3674 {
3675 if (as != ADDR_SPACE_GENERIC)
3676 return x;
3677
3678 return spu_legitimize_address (x, oldx, mode);
3679 }
3680
3681 /* Reload reg + const_int for out-of-range displacements. */
3682 rtx
3683 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3684 int opnum, int type)
3685 {
3686 bool removed_and = false;
3687
3688 if (GET_CODE (ad) == AND
3689 && CONST_INT_P (XEXP (ad, 1))
3690 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3691 {
3692 ad = XEXP (ad, 0);
3693 removed_and = true;
3694 }
3695
3696 if (GET_CODE (ad) == PLUS
3697 && REG_P (XEXP (ad, 0))
3698 && CONST_INT_P (XEXP (ad, 1))
3699 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3700 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3701 {
3702 /* Unshare the sum. */
3703 ad = copy_rtx (ad);
3704
3705 /* Reload the displacement. */
3706 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3707 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3708 opnum, (enum reload_type) type);
3709
3710 /* Add back AND for alignment if we stripped it. */
3711 if (removed_and)
3712 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3713
3714 return ad;
3715 }
3716
3717 return NULL_RTX;
3718 }
3719
3720 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3721 struct attribute_spec.handler. */
3722 static tree
3723 spu_handle_fndecl_attribute (tree * node,
3724 tree name,
3725 tree args ATTRIBUTE_UNUSED,
3726 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3727 {
3728 if (TREE_CODE (*node) != FUNCTION_DECL)
3729 {
3730 warning (0, "%qE attribute only applies to functions",
3731 name);
3732 *no_add_attrs = true;
3733 }
3734
3735 return NULL_TREE;
3736 }
3737
3738 /* Handle the "vector" attribute. */
3739 static tree
3740 spu_handle_vector_attribute (tree * node, tree name,
3741 tree args ATTRIBUTE_UNUSED,
3742 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3743 {
3744 tree type = *node, result = NULL_TREE;
3745 machine_mode mode;
3746 int unsigned_p;
3747
3748 while (POINTER_TYPE_P (type)
3749 || TREE_CODE (type) == FUNCTION_TYPE
3750 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3751 type = TREE_TYPE (type);
3752
3753 mode = TYPE_MODE (type);
3754
3755 unsigned_p = TYPE_UNSIGNED (type);
3756 switch (mode)
3757 {
3758 case DImode:
3759 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3760 break;
3761 case SImode:
3762 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3763 break;
3764 case HImode:
3765 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3766 break;
3767 case QImode:
3768 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3769 break;
3770 case SFmode:
3771 result = V4SF_type_node;
3772 break;
3773 case DFmode:
3774 result = V2DF_type_node;
3775 break;
3776 default:
3777 break;
3778 }
3779
3780 /* Propagate qualifiers attached to the element type
3781 onto the vector type. */
3782 if (result && result != type && TYPE_QUALS (type))
3783 result = build_qualified_type (result, TYPE_QUALS (type));
3784
3785 *no_add_attrs = true; /* No need to hang on to the attribute. */
3786
3787 if (!result)
3788 warning (0, "%qE attribute ignored", name);
3789 else
3790 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3791
3792 return NULL_TREE;
3793 }
3794
3795 /* Return nonzero if FUNC is a naked function. */
3796 static int
3797 spu_naked_function_p (tree func)
3798 {
3799 tree a;
3800
3801 if (TREE_CODE (func) != FUNCTION_DECL)
3802 abort ();
3803
3804 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3805 return a != NULL_TREE;
3806 }
3807
3808 int
3809 spu_initial_elimination_offset (int from, int to)
3810 {
3811 int saved_regs_size = spu_saved_regs_size ();
3812 int sp_offset = 0;
3813 if (!crtl->is_leaf || crtl->outgoing_args_size
3814 || get_frame_size () || saved_regs_size)
3815 sp_offset = STACK_POINTER_OFFSET;
3816 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3817 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3818 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3819 return get_frame_size ();
3820 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3821 return sp_offset + crtl->outgoing_args_size
3822 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3823 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3824 return get_frame_size () + saved_regs_size + sp_offset;
3825 else
3826 gcc_unreachable ();
3827 }
3828
3829 rtx
3830 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3831 {
3832 machine_mode mode = TYPE_MODE (type);
3833 int byte_size = ((mode == BLKmode)
3834 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3835
3836 /* Make sure small structs are left justified in a register. */
3837 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3838 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3839 {
3840 machine_mode smode;
3841 rtvec v;
3842 int i;
3843 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3844 int n = byte_size / UNITS_PER_WORD;
3845 v = rtvec_alloc (nregs);
3846 for (i = 0; i < n; i++)
3847 {
3848 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3849 gen_rtx_REG (TImode,
3850 FIRST_RETURN_REGNUM
3851 + i),
3852 GEN_INT (UNITS_PER_WORD * i));
3853 byte_size -= UNITS_PER_WORD;
3854 }
3855
3856 if (n < nregs)
3857 {
3858 if (byte_size < 4)
3859 byte_size = 4;
3860 smode =
3861 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3862 RTVEC_ELT (v, n) =
3863 gen_rtx_EXPR_LIST (VOIDmode,
3864 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3865 GEN_INT (UNITS_PER_WORD * n));
3866 }
3867 return gen_rtx_PARALLEL (mode, v);
3868 }
3869 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3870 }
3871
3872 static rtx
3873 spu_function_arg (cumulative_args_t cum_v,
3874 machine_mode mode,
3875 const_tree type, bool named ATTRIBUTE_UNUSED)
3876 {
3877 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3878 int byte_size;
3879
3880 if (*cum >= MAX_REGISTER_ARGS)
3881 return 0;
3882
3883 byte_size = ((mode == BLKmode)
3884 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3885
3886 /* The ABI does not allow parameters to be passed partially in
3887 reg and partially in stack. */
3888 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3889 return 0;
3890
3891 /* Make sure small structs are left justified in a register. */
3892 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3893 && byte_size < UNITS_PER_WORD && byte_size > 0)
3894 {
3895 machine_mode smode;
3896 rtx gr_reg;
3897 if (byte_size < 4)
3898 byte_size = 4;
3899 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3900 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3901 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3902 const0_rtx);
3903 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3904 }
3905 else
3906 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3907 }
3908
3909 static void
3910 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3911 const_tree type, bool named ATTRIBUTE_UNUSED)
3912 {
3913 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3914
3915 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3916 ? 1
3917 : mode == BLKmode
3918 ? ((int_size_in_bytes (type) + 15) / 16)
3919 : mode == VOIDmode
3920 ? 1
3921 : HARD_REGNO_NREGS (cum, mode));
3922 }
3923
3924 /* Variable sized types are passed by reference. */
3925 static bool
3926 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3927 machine_mode mode ATTRIBUTE_UNUSED,
3928 const_tree type, bool named ATTRIBUTE_UNUSED)
3929 {
3930 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3931 }
3932 \f
3933
3934 /* Var args. */
3935
3936 /* Create and return the va_list datatype.
3937
3938 On SPU, va_list is an array type equivalent to
3939
3940 typedef struct __va_list_tag
3941 {
3942 void *__args __attribute__((__aligned(16)));
3943 void *__skip __attribute__((__aligned(16)));
3944
3945 } va_list[1];
3946
3947 where __args points to the arg that will be returned by the next
3948 va_arg(), and __skip points to the previous stack frame such that
3949 when __args == __skip we should advance __args by 32 bytes. */
3950 static tree
3951 spu_build_builtin_va_list (void)
3952 {
3953 tree f_args, f_skip, record, type_decl;
3954 bool owp;
3955
3956 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3957
3958 type_decl =
3959 build_decl (BUILTINS_LOCATION,
3960 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3961
3962 f_args = build_decl (BUILTINS_LOCATION,
3963 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3964 f_skip = build_decl (BUILTINS_LOCATION,
3965 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3966
3967 DECL_FIELD_CONTEXT (f_args) = record;
3968 DECL_ALIGN (f_args) = 128;
3969 DECL_USER_ALIGN (f_args) = 1;
3970
3971 DECL_FIELD_CONTEXT (f_skip) = record;
3972 DECL_ALIGN (f_skip) = 128;
3973 DECL_USER_ALIGN (f_skip) = 1;
3974
3975 TYPE_STUB_DECL (record) = type_decl;
3976 TYPE_NAME (record) = type_decl;
3977 TYPE_FIELDS (record) = f_args;
3978 DECL_CHAIN (f_args) = f_skip;
3979
3980 /* We know this is being padded and we want it too. It is an internal
3981 type so hide the warnings from the user. */
3982 owp = warn_padded;
3983 warn_padded = false;
3984
3985 layout_type (record);
3986
3987 warn_padded = owp;
3988
3989 /* The correct type is an array type of one element. */
3990 return build_array_type (record, build_index_type (size_zero_node));
3991 }
3992
3993 /* Implement va_start by filling the va_list structure VALIST.
3994 NEXTARG points to the first anonymous stack argument.
3995
3996 The following global variables are used to initialize
3997 the va_list structure:
3998
3999 crtl->args.info;
4000 the CUMULATIVE_ARGS for this function
4001
4002 crtl->args.arg_offset_rtx:
4003 holds the offset of the first anonymous stack argument
4004 (relative to the virtual arg pointer). */
4005
4006 static void
4007 spu_va_start (tree valist, rtx nextarg)
4008 {
4009 tree f_args, f_skip;
4010 tree args, skip, t;
4011
4012 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4013 f_skip = DECL_CHAIN (f_args);
4014
4015 valist = build_simple_mem_ref (valist);
4016 args =
4017 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4018 skip =
4019 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4020
4021 /* Find the __args area. */
4022 t = make_tree (TREE_TYPE (args), nextarg);
4023 if (crtl->args.pretend_args_size > 0)
4024 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4025 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4026 TREE_SIDE_EFFECTS (t) = 1;
4027 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4028
4029 /* Find the __skip area. */
4030 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4031 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4032 - STACK_POINTER_OFFSET));
4033 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4034 TREE_SIDE_EFFECTS (t) = 1;
4035 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4036 }
4037
4038 /* Gimplify va_arg by updating the va_list structure
4039 VALIST as required to retrieve an argument of type
4040 TYPE, and returning that argument.
4041
4042 ret = va_arg(VALIST, TYPE);
4043
4044 generates code equivalent to:
4045
4046 paddedsize = (sizeof(TYPE) + 15) & -16;
4047 if (VALIST.__args + paddedsize > VALIST.__skip
4048 && VALIST.__args <= VALIST.__skip)
4049 addr = VALIST.__skip + 32;
4050 else
4051 addr = VALIST.__args;
4052 VALIST.__args = addr + paddedsize;
4053 ret = *(TYPE *)addr;
4054 */
4055 static tree
4056 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4057 gimple_seq * post_p ATTRIBUTE_UNUSED)
4058 {
4059 tree f_args, f_skip;
4060 tree args, skip;
4061 HOST_WIDE_INT size, rsize;
4062 tree addr, tmp;
4063 bool pass_by_reference_p;
4064
4065 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4066 f_skip = DECL_CHAIN (f_args);
4067
4068 valist = build_simple_mem_ref (valist);
4069 args =
4070 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4071 skip =
4072 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4073
4074 addr = create_tmp_var (ptr_type_node, "va_arg");
4075
4076 /* if an object is dynamically sized, a pointer to it is passed
4077 instead of the object itself. */
4078 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4079 false);
4080 if (pass_by_reference_p)
4081 type = build_pointer_type (type);
4082 size = int_size_in_bytes (type);
4083 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4084
4085 /* build conditional expression to calculate addr. The expression
4086 will be gimplified later. */
4087 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4088 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4089 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4090 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4091 unshare_expr (skip)));
4092
4093 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4094 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4095 unshare_expr (args));
4096
4097 gimplify_assign (addr, tmp, pre_p);
4098
4099 /* update VALIST.__args */
4100 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4101 gimplify_assign (unshare_expr (args), tmp, pre_p);
4102
4103 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4104 addr);
4105
4106 if (pass_by_reference_p)
4107 addr = build_va_arg_indirect_ref (addr);
4108
4109 return build_va_arg_indirect_ref (addr);
4110 }
4111
4112 /* Save parameter registers starting with the register that corresponds
4113 to the first unnamed parameters. If the first unnamed parameter is
4114 in the stack then save no registers. Set pretend_args_size to the
4115 amount of space needed to save the registers. */
4116 static void
4117 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4118 tree type, int *pretend_size, int no_rtl)
4119 {
4120 if (!no_rtl)
4121 {
4122 rtx tmp;
4123 int regno;
4124 int offset;
4125 int ncum = *get_cumulative_args (cum);
4126
4127 /* cum currently points to the last named argument, we want to
4128 start at the next argument. */
4129 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4130
4131 offset = -STACK_POINTER_OFFSET;
4132 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4133 {
4134 tmp = gen_frame_mem (V4SImode,
4135 plus_constant (Pmode, virtual_incoming_args_rtx,
4136 offset));
4137 emit_move_insn (tmp,
4138 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4139 offset += 16;
4140 }
4141 *pretend_size = offset + STACK_POINTER_OFFSET;
4142 }
4143 }
4144 \f
4145 static void
4146 spu_conditional_register_usage (void)
4147 {
4148 if (flag_pic)
4149 {
4150 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4151 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4152 }
4153 }
4154
4155 /* This is called any time we inspect the alignment of a register for
4156 addresses. */
4157 static int
4158 reg_aligned_for_addr (rtx x)
4159 {
4160 int regno =
4161 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4162 return REGNO_POINTER_ALIGN (regno) >= 128;
4163 }
4164
4165 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4166 into its SYMBOL_REF_FLAGS. */
4167 static void
4168 spu_encode_section_info (tree decl, rtx rtl, int first)
4169 {
4170 default_encode_section_info (decl, rtl, first);
4171
4172 /* If a variable has a forced alignment to < 16 bytes, mark it with
4173 SYMBOL_FLAG_ALIGN1. */
4174 if (TREE_CODE (decl) == VAR_DECL
4175 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4176 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4177 }
4178
4179 /* Return TRUE if we are certain the mem refers to a complete object
4180 which is both 16-byte aligned and padded to a 16-byte boundary. This
4181 would make it safe to store with a single instruction.
4182 We guarantee the alignment and padding for static objects by aligning
4183 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4184 FIXME: We currently cannot guarantee this for objects on the stack
4185 because assign_parm_setup_stack calls assign_stack_local with the
4186 alignment of the parameter mode and in that case the alignment never
4187 gets adjusted by LOCAL_ALIGNMENT. */
4188 static int
4189 store_with_one_insn_p (rtx mem)
4190 {
4191 machine_mode mode = GET_MODE (mem);
4192 rtx addr = XEXP (mem, 0);
4193 if (mode == BLKmode)
4194 return 0;
4195 if (GET_MODE_SIZE (mode) >= 16)
4196 return 1;
4197 /* Only static objects. */
4198 if (GET_CODE (addr) == SYMBOL_REF)
4199 {
4200 /* We use the associated declaration to make sure the access is
4201 referring to the whole object.
4202 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4203 if it is necessary. Will there be cases where one exists, and
4204 the other does not? Will there be cases where both exist, but
4205 have different types? */
4206 tree decl = MEM_EXPR (mem);
4207 if (decl
4208 && TREE_CODE (decl) == VAR_DECL
4209 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4210 return 1;
4211 decl = SYMBOL_REF_DECL (addr);
4212 if (decl
4213 && TREE_CODE (decl) == VAR_DECL
4214 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4215 return 1;
4216 }
4217 return 0;
4218 }
4219
4220 /* Return 1 when the address is not valid for a simple load and store as
4221 required by the '_mov*' patterns. We could make this less strict
4222 for loads, but we prefer mem's to look the same so they are more
4223 likely to be merged. */
4224 static int
4225 address_needs_split (rtx mem)
4226 {
4227 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4228 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4229 || !(store_with_one_insn_p (mem)
4230 || mem_is_padded_component_ref (mem))))
4231 return 1;
4232
4233 return 0;
4234 }
4235
4236 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4237 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4238 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4239
4240 /* MEM is known to be an __ea qualified memory access. Emit a call to
4241 fetch the ppu memory to local store, and return its address in local
4242 store. */
4243
4244 static void
4245 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4246 {
4247 if (is_store)
4248 {
4249 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4250 if (!cache_fetch_dirty)
4251 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4252 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4253 2, ea_addr, EAmode, ndirty, SImode);
4254 }
4255 else
4256 {
4257 if (!cache_fetch)
4258 cache_fetch = init_one_libfunc ("__cache_fetch");
4259 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4260 1, ea_addr, EAmode);
4261 }
4262 }
4263
4264 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4265 dirty bit marking, inline.
4266
4267 The cache control data structure is an array of
4268
4269 struct __cache_tag_array
4270 {
4271 unsigned int tag_lo[4];
4272 unsigned int tag_hi[4];
4273 void *data_pointer[4];
4274 int reserved[4];
4275 vector unsigned short dirty_bits[4];
4276 } */
4277
4278 static void
4279 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4280 {
4281 rtx ea_addr_si;
4282 HOST_WIDE_INT v;
4283 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4284 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4285 rtx index_mask = gen_reg_rtx (SImode);
4286 rtx tag_arr = gen_reg_rtx (Pmode);
4287 rtx splat_mask = gen_reg_rtx (TImode);
4288 rtx splat = gen_reg_rtx (V4SImode);
4289 rtx splat_hi = NULL_RTX;
4290 rtx tag_index = gen_reg_rtx (Pmode);
4291 rtx block_off = gen_reg_rtx (SImode);
4292 rtx tag_addr = gen_reg_rtx (Pmode);
4293 rtx tag = gen_reg_rtx (V4SImode);
4294 rtx cache_tag = gen_reg_rtx (V4SImode);
4295 rtx cache_tag_hi = NULL_RTX;
4296 rtx cache_ptrs = gen_reg_rtx (TImode);
4297 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4298 rtx tag_equal = gen_reg_rtx (V4SImode);
4299 rtx tag_equal_hi = NULL_RTX;
4300 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4301 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4302 rtx eq_index = gen_reg_rtx (SImode);
4303 rtx bcomp, hit_label, hit_ref, cont_label;
4304 rtx_insn *insn;
4305
4306 if (spu_ea_model != 32)
4307 {
4308 splat_hi = gen_reg_rtx (V4SImode);
4309 cache_tag_hi = gen_reg_rtx (V4SImode);
4310 tag_equal_hi = gen_reg_rtx (V4SImode);
4311 }
4312
4313 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4314 emit_move_insn (tag_arr, tag_arr_sym);
4315 v = 0x0001020300010203LL;
4316 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4317 ea_addr_si = ea_addr;
4318 if (spu_ea_model != 32)
4319 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4320
4321 /* tag_index = ea_addr & (tag_array_size - 128) */
4322 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4323
4324 /* splat ea_addr to all 4 slots. */
4325 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4326 /* Similarly for high 32 bits of ea_addr. */
4327 if (spu_ea_model != 32)
4328 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4329
4330 /* block_off = ea_addr & 127 */
4331 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4332
4333 /* tag_addr = tag_arr + tag_index */
4334 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4335
4336 /* Read cache tags. */
4337 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4338 if (spu_ea_model != 32)
4339 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4340 plus_constant (Pmode,
4341 tag_addr, 16)));
4342
4343 /* tag = ea_addr & -128 */
4344 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4345
4346 /* Read all four cache data pointers. */
4347 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4348 plus_constant (Pmode,
4349 tag_addr, 32)));
4350
4351 /* Compare tags. */
4352 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4353 if (spu_ea_model != 32)
4354 {
4355 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4356 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4357 }
4358
4359 /* At most one of the tags compare equal, so tag_equal has one
4360 32-bit slot set to all 1's, with the other slots all zero.
4361 gbb picks off low bit from each byte in the 128-bit registers,
4362 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4363 we have a hit. */
4364 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4365 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4366
4367 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4368 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4369
4370 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4371 (rotating eq_index mod 16 bytes). */
4372 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4373 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4374
4375 /* Add block offset to form final data address. */
4376 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4377
4378 /* Check that we did hit. */
4379 hit_label = gen_label_rtx ();
4380 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4381 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4382 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4383 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4384 hit_ref, pc_rtx)));
4385 /* Say that this branch is very likely to happen. */
4386 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4387 add_int_reg_note (insn, REG_BR_PROB, v);
4388
4389 ea_load_store (mem, is_store, ea_addr, data_addr);
4390 cont_label = gen_label_rtx ();
4391 emit_jump_insn (gen_jump (cont_label));
4392 emit_barrier ();
4393
4394 emit_label (hit_label);
4395
4396 if (is_store)
4397 {
4398 HOST_WIDE_INT v_hi;
4399 rtx dirty_bits = gen_reg_rtx (TImode);
4400 rtx dirty_off = gen_reg_rtx (SImode);
4401 rtx dirty_128 = gen_reg_rtx (TImode);
4402 rtx neg_block_off = gen_reg_rtx (SImode);
4403
4404 /* Set up mask with one dirty bit per byte of the mem we are
4405 writing, starting from top bit. */
4406 v_hi = v = -1;
4407 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4408 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4409 {
4410 v_hi = v;
4411 v = 0;
4412 }
4413 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4414
4415 /* Form index into cache dirty_bits. eq_index is one of
4416 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4417 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4418 offset to each of the four dirty_bits elements. */
4419 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4420
4421 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4422
4423 /* Rotate bit mask to proper bit. */
4424 emit_insn (gen_negsi2 (neg_block_off, block_off));
4425 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4426 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4427
4428 /* Or in the new dirty bits. */
4429 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4430
4431 /* Store. */
4432 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4433 }
4434
4435 emit_label (cont_label);
4436 }
4437
4438 static rtx
4439 expand_ea_mem (rtx mem, bool is_store)
4440 {
4441 rtx ea_addr;
4442 rtx data_addr = gen_reg_rtx (Pmode);
4443 rtx new_mem;
4444
4445 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4446 if (optimize_size || optimize == 0)
4447 ea_load_store (mem, is_store, ea_addr, data_addr);
4448 else
4449 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4450
4451 if (ea_alias_set == -1)
4452 ea_alias_set = new_alias_set ();
4453
4454 /* We generate a new MEM RTX to refer to the copy of the data
4455 in the cache. We do not copy memory attributes (except the
4456 alignment) from the original MEM, as they may no longer apply
4457 to the cache copy. */
4458 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4459 set_mem_alias_set (new_mem, ea_alias_set);
4460 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4461
4462 return new_mem;
4463 }
4464
4465 int
4466 spu_expand_mov (rtx * ops, machine_mode mode)
4467 {
4468 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4469 {
4470 /* Perform the move in the destination SUBREG's inner mode. */
4471 ops[0] = SUBREG_REG (ops[0]);
4472 mode = GET_MODE (ops[0]);
4473 ops[1] = gen_lowpart_common (mode, ops[1]);
4474 gcc_assert (ops[1]);
4475 }
4476
4477 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4478 {
4479 rtx from = SUBREG_REG (ops[1]);
4480 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4481
4482 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4483 && GET_MODE_CLASS (imode) == MODE_INT
4484 && subreg_lowpart_p (ops[1]));
4485
4486 if (GET_MODE_SIZE (imode) < 4)
4487 imode = SImode;
4488 if (imode != GET_MODE (from))
4489 from = gen_rtx_SUBREG (imode, from, 0);
4490
4491 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4492 {
4493 enum insn_code icode = convert_optab_handler (trunc_optab,
4494 mode, imode);
4495 emit_insn (GEN_FCN (icode) (ops[0], from));
4496 }
4497 else
4498 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4499 return 1;
4500 }
4501
4502 /* At least one of the operands needs to be a register. */
4503 if ((reload_in_progress | reload_completed) == 0
4504 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4505 {
4506 rtx temp = force_reg (mode, ops[1]);
4507 emit_move_insn (ops[0], temp);
4508 return 1;
4509 }
4510 if (reload_in_progress || reload_completed)
4511 {
4512 if (CONSTANT_P (ops[1]))
4513 return spu_split_immediate (ops);
4514 return 0;
4515 }
4516
4517 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4518 extend them. */
4519 if (GET_CODE (ops[1]) == CONST_INT)
4520 {
4521 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4522 if (val != INTVAL (ops[1]))
4523 {
4524 emit_move_insn (ops[0], GEN_INT (val));
4525 return 1;
4526 }
4527 }
4528 if (MEM_P (ops[0]))
4529 {
4530 if (MEM_ADDR_SPACE (ops[0]))
4531 ops[0] = expand_ea_mem (ops[0], true);
4532 return spu_split_store (ops);
4533 }
4534 if (MEM_P (ops[1]))
4535 {
4536 if (MEM_ADDR_SPACE (ops[1]))
4537 ops[1] = expand_ea_mem (ops[1], false);
4538 return spu_split_load (ops);
4539 }
4540
4541 return 0;
4542 }
4543
4544 static void
4545 spu_convert_move (rtx dst, rtx src)
4546 {
4547 machine_mode mode = GET_MODE (dst);
4548 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4549 rtx reg;
4550 gcc_assert (GET_MODE (src) == TImode);
4551 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4552 emit_insn (gen_rtx_SET (reg,
4553 gen_rtx_TRUNCATE (int_mode,
4554 gen_rtx_LSHIFTRT (TImode, src,
4555 GEN_INT (int_mode == DImode ? 64 : 96)))));
4556 if (int_mode != mode)
4557 {
4558 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4559 emit_move_insn (dst, reg);
4560 }
4561 }
4562
4563 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4564 the address from SRC and SRC+16. Return a REG or CONST_INT that
4565 specifies how many bytes to rotate the loaded registers, plus any
4566 extra from EXTRA_ROTQBY. The address and rotate amounts are
4567 normalized to improve merging of loads and rotate computations. */
4568 static rtx
4569 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4570 {
4571 rtx addr = XEXP (src, 0);
4572 rtx p0, p1, rot, addr0, addr1;
4573 int rot_amt;
4574
4575 rot = 0;
4576 rot_amt = 0;
4577
4578 if (MEM_ALIGN (src) >= 128)
4579 /* Address is already aligned; simply perform a TImode load. */ ;
4580 else if (GET_CODE (addr) == PLUS)
4581 {
4582 /* 8 cases:
4583 aligned reg + aligned reg => lqx
4584 aligned reg + unaligned reg => lqx, rotqby
4585 aligned reg + aligned const => lqd
4586 aligned reg + unaligned const => lqd, rotqbyi
4587 unaligned reg + aligned reg => lqx, rotqby
4588 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4589 unaligned reg + aligned const => lqd, rotqby
4590 unaligned reg + unaligned const -> not allowed by legitimate address
4591 */
4592 p0 = XEXP (addr, 0);
4593 p1 = XEXP (addr, 1);
4594 if (!reg_aligned_for_addr (p0))
4595 {
4596 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4597 {
4598 rot = gen_reg_rtx (SImode);
4599 emit_insn (gen_addsi3 (rot, p0, p1));
4600 }
4601 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4602 {
4603 if (INTVAL (p1) > 0
4604 && REG_POINTER (p0)
4605 && INTVAL (p1) * BITS_PER_UNIT
4606 < REGNO_POINTER_ALIGN (REGNO (p0)))
4607 {
4608 rot = gen_reg_rtx (SImode);
4609 emit_insn (gen_addsi3 (rot, p0, p1));
4610 addr = p0;
4611 }
4612 else
4613 {
4614 rtx x = gen_reg_rtx (SImode);
4615 emit_move_insn (x, p1);
4616 if (!spu_arith_operand (p1, SImode))
4617 p1 = x;
4618 rot = gen_reg_rtx (SImode);
4619 emit_insn (gen_addsi3 (rot, p0, p1));
4620 addr = gen_rtx_PLUS (Pmode, p0, x);
4621 }
4622 }
4623 else
4624 rot = p0;
4625 }
4626 else
4627 {
4628 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4629 {
4630 rot_amt = INTVAL (p1) & 15;
4631 if (INTVAL (p1) & -16)
4632 {
4633 p1 = GEN_INT (INTVAL (p1) & -16);
4634 addr = gen_rtx_PLUS (SImode, p0, p1);
4635 }
4636 else
4637 addr = p0;
4638 }
4639 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4640 rot = p1;
4641 }
4642 }
4643 else if (REG_P (addr))
4644 {
4645 if (!reg_aligned_for_addr (addr))
4646 rot = addr;
4647 }
4648 else if (GET_CODE (addr) == CONST)
4649 {
4650 if (GET_CODE (XEXP (addr, 0)) == PLUS
4651 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4652 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4653 {
4654 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4655 if (rot_amt & -16)
4656 addr = gen_rtx_CONST (Pmode,
4657 gen_rtx_PLUS (Pmode,
4658 XEXP (XEXP (addr, 0), 0),
4659 GEN_INT (rot_amt & -16)));
4660 else
4661 addr = XEXP (XEXP (addr, 0), 0);
4662 }
4663 else
4664 {
4665 rot = gen_reg_rtx (Pmode);
4666 emit_move_insn (rot, addr);
4667 }
4668 }
4669 else if (GET_CODE (addr) == CONST_INT)
4670 {
4671 rot_amt = INTVAL (addr);
4672 addr = GEN_INT (rot_amt & -16);
4673 }
4674 else if (!ALIGNED_SYMBOL_REF_P (addr))
4675 {
4676 rot = gen_reg_rtx (Pmode);
4677 emit_move_insn (rot, addr);
4678 }
4679
4680 rot_amt += extra_rotby;
4681
4682 rot_amt &= 15;
4683
4684 if (rot && rot_amt)
4685 {
4686 rtx x = gen_reg_rtx (SImode);
4687 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4688 rot = x;
4689 rot_amt = 0;
4690 }
4691 if (!rot && rot_amt)
4692 rot = GEN_INT (rot_amt);
4693
4694 addr0 = copy_rtx (addr);
4695 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4696 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4697
4698 if (dst1)
4699 {
4700 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4701 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4702 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4703 }
4704
4705 return rot;
4706 }
4707
4708 int
4709 spu_split_load (rtx * ops)
4710 {
4711 machine_mode mode = GET_MODE (ops[0]);
4712 rtx addr, load, rot;
4713 int rot_amt;
4714
4715 if (GET_MODE_SIZE (mode) >= 16)
4716 return 0;
4717
4718 addr = XEXP (ops[1], 0);
4719 gcc_assert (GET_CODE (addr) != AND);
4720
4721 if (!address_needs_split (ops[1]))
4722 {
4723 ops[1] = change_address (ops[1], TImode, addr);
4724 load = gen_reg_rtx (TImode);
4725 emit_insn (gen__movti (load, ops[1]));
4726 spu_convert_move (ops[0], load);
4727 return 1;
4728 }
4729
4730 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4731
4732 load = gen_reg_rtx (TImode);
4733 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4734
4735 if (rot)
4736 emit_insn (gen_rotqby_ti (load, load, rot));
4737
4738 spu_convert_move (ops[0], load);
4739 return 1;
4740 }
4741
4742 int
4743 spu_split_store (rtx * ops)
4744 {
4745 machine_mode mode = GET_MODE (ops[0]);
4746 rtx reg;
4747 rtx addr, p0, p1, p1_lo, smem;
4748 int aform;
4749 int scalar;
4750
4751 if (GET_MODE_SIZE (mode) >= 16)
4752 return 0;
4753
4754 addr = XEXP (ops[0], 0);
4755 gcc_assert (GET_CODE (addr) != AND);
4756
4757 if (!address_needs_split (ops[0]))
4758 {
4759 reg = gen_reg_rtx (TImode);
4760 emit_insn (gen_spu_convert (reg, ops[1]));
4761 ops[0] = change_address (ops[0], TImode, addr);
4762 emit_move_insn (ops[0], reg);
4763 return 1;
4764 }
4765
4766 if (GET_CODE (addr) == PLUS)
4767 {
4768 /* 8 cases:
4769 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4770 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4771 aligned reg + aligned const => lqd, c?d, shuf, stqx
4772 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4773 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4774 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4775 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4776 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4777 */
4778 aform = 0;
4779 p0 = XEXP (addr, 0);
4780 p1 = p1_lo = XEXP (addr, 1);
4781 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4782 {
4783 p1_lo = GEN_INT (INTVAL (p1) & 15);
4784 if (reg_aligned_for_addr (p0))
4785 {
4786 p1 = GEN_INT (INTVAL (p1) & -16);
4787 if (p1 == const0_rtx)
4788 addr = p0;
4789 else
4790 addr = gen_rtx_PLUS (SImode, p0, p1);
4791 }
4792 else
4793 {
4794 rtx x = gen_reg_rtx (SImode);
4795 emit_move_insn (x, p1);
4796 addr = gen_rtx_PLUS (SImode, p0, x);
4797 }
4798 }
4799 }
4800 else if (REG_P (addr))
4801 {
4802 aform = 0;
4803 p0 = addr;
4804 p1 = p1_lo = const0_rtx;
4805 }
4806 else
4807 {
4808 aform = 1;
4809 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4810 p1 = 0; /* aform doesn't use p1 */
4811 p1_lo = addr;
4812 if (ALIGNED_SYMBOL_REF_P (addr))
4813 p1_lo = const0_rtx;
4814 else if (GET_CODE (addr) == CONST
4815 && GET_CODE (XEXP (addr, 0)) == PLUS
4816 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4817 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4818 {
4819 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4820 if ((v & -16) != 0)
4821 addr = gen_rtx_CONST (Pmode,
4822 gen_rtx_PLUS (Pmode,
4823 XEXP (XEXP (addr, 0), 0),
4824 GEN_INT (v & -16)));
4825 else
4826 addr = XEXP (XEXP (addr, 0), 0);
4827 p1_lo = GEN_INT (v & 15);
4828 }
4829 else if (GET_CODE (addr) == CONST_INT)
4830 {
4831 p1_lo = GEN_INT (INTVAL (addr) & 15);
4832 addr = GEN_INT (INTVAL (addr) & -16);
4833 }
4834 else
4835 {
4836 p1_lo = gen_reg_rtx (SImode);
4837 emit_move_insn (p1_lo, addr);
4838 }
4839 }
4840
4841 gcc_assert (aform == 0 || aform == 1);
4842 reg = gen_reg_rtx (TImode);
4843
4844 scalar = store_with_one_insn_p (ops[0]);
4845 if (!scalar)
4846 {
4847 /* We could copy the flags from the ops[0] MEM to mem here,
4848 We don't because we want this load to be optimized away if
4849 possible, and copying the flags will prevent that in certain
4850 cases, e.g. consider the volatile flag. */
4851
4852 rtx pat = gen_reg_rtx (TImode);
4853 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4854 set_mem_alias_set (lmem, 0);
4855 emit_insn (gen_movti (reg, lmem));
4856
4857 if (!p0 || reg_aligned_for_addr (p0))
4858 p0 = stack_pointer_rtx;
4859 if (!p1_lo)
4860 p1_lo = const0_rtx;
4861
4862 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4863 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4864 }
4865 else
4866 {
4867 if (GET_CODE (ops[1]) == REG)
4868 emit_insn (gen_spu_convert (reg, ops[1]));
4869 else if (GET_CODE (ops[1]) == SUBREG)
4870 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4871 else
4872 abort ();
4873 }
4874
4875 if (GET_MODE_SIZE (mode) < 4 && scalar)
4876 emit_insn (gen_ashlti3
4877 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4878
4879 smem = change_address (ops[0], TImode, copy_rtx (addr));
4880 /* We can't use the previous alias set because the memory has changed
4881 size and can potentially overlap objects of other types. */
4882 set_mem_alias_set (smem, 0);
4883
4884 emit_insn (gen_movti (smem, reg));
4885 return 1;
4886 }
4887
4888 /* Return TRUE if X is MEM which is a struct member reference
4889 and the member can safely be loaded and stored with a single
4890 instruction because it is padded. */
4891 static int
4892 mem_is_padded_component_ref (rtx x)
4893 {
4894 tree t = MEM_EXPR (x);
4895 tree r;
4896 if (!t || TREE_CODE (t) != COMPONENT_REF)
4897 return 0;
4898 t = TREE_OPERAND (t, 1);
4899 if (!t || TREE_CODE (t) != FIELD_DECL
4900 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4901 return 0;
4902 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4903 r = DECL_FIELD_CONTEXT (t);
4904 if (!r || TREE_CODE (r) != RECORD_TYPE)
4905 return 0;
4906 /* Make sure they are the same mode */
4907 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4908 return 0;
4909 /* If there are no following fields then the field alignment assures
4910 the structure is padded to the alignment which means this field is
4911 padded too. */
4912 if (TREE_CHAIN (t) == 0)
4913 return 1;
4914 /* If the following field is also aligned then this field will be
4915 padded. */
4916 t = TREE_CHAIN (t);
4917 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4918 return 1;
4919 return 0;
4920 }
4921
4922 /* Parse the -mfixed-range= option string. */
4923 static void
4924 fix_range (const char *const_str)
4925 {
4926 int i, first, last;
4927 char *str, *dash, *comma;
4928
4929 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4930 REG2 are either register names or register numbers. The effect
4931 of this option is to mark the registers in the range from REG1 to
4932 REG2 as ``fixed'' so they won't be used by the compiler. */
4933
4934 i = strlen (const_str);
4935 str = (char *) alloca (i + 1);
4936 memcpy (str, const_str, i + 1);
4937
4938 while (1)
4939 {
4940 dash = strchr (str, '-');
4941 if (!dash)
4942 {
4943 warning (0, "value of -mfixed-range must have form REG1-REG2");
4944 return;
4945 }
4946 *dash = '\0';
4947 comma = strchr (dash + 1, ',');
4948 if (comma)
4949 *comma = '\0';
4950
4951 first = decode_reg_name (str);
4952 if (first < 0)
4953 {
4954 warning (0, "unknown register name: %s", str);
4955 return;
4956 }
4957
4958 last = decode_reg_name (dash + 1);
4959 if (last < 0)
4960 {
4961 warning (0, "unknown register name: %s", dash + 1);
4962 return;
4963 }
4964
4965 *dash = '-';
4966
4967 if (first > last)
4968 {
4969 warning (0, "%s-%s is an empty range", str, dash + 1);
4970 return;
4971 }
4972
4973 for (i = first; i <= last; ++i)
4974 fixed_regs[i] = call_used_regs[i] = 1;
4975
4976 if (!comma)
4977 break;
4978
4979 *comma = ',';
4980 str = comma + 1;
4981 }
4982 }
4983
4984 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4985 can be generated using the fsmbi instruction. */
4986 int
4987 fsmbi_const_p (rtx x)
4988 {
4989 if (CONSTANT_P (x))
4990 {
4991 /* We can always choose TImode for CONST_INT because the high bits
4992 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4993 enum immediate_class c = classify_immediate (x, TImode);
4994 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4995 }
4996 return 0;
4997 }
4998
4999 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5000 can be generated using the cbd, chd, cwd or cdd instruction. */
5001 int
5002 cpat_const_p (rtx x, machine_mode mode)
5003 {
5004 if (CONSTANT_P (x))
5005 {
5006 enum immediate_class c = classify_immediate (x, mode);
5007 return c == IC_CPAT;
5008 }
5009 return 0;
5010 }
5011
5012 rtx
5013 gen_cpat_const (rtx * ops)
5014 {
5015 unsigned char dst[16];
5016 int i, offset, shift, isize;
5017 if (GET_CODE (ops[3]) != CONST_INT
5018 || GET_CODE (ops[2]) != CONST_INT
5019 || (GET_CODE (ops[1]) != CONST_INT
5020 && GET_CODE (ops[1]) != REG))
5021 return 0;
5022 if (GET_CODE (ops[1]) == REG
5023 && (!REG_POINTER (ops[1])
5024 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5025 return 0;
5026
5027 for (i = 0; i < 16; i++)
5028 dst[i] = i + 16;
5029 isize = INTVAL (ops[3]);
5030 if (isize == 1)
5031 shift = 3;
5032 else if (isize == 2)
5033 shift = 2;
5034 else
5035 shift = 0;
5036 offset = (INTVAL (ops[2]) +
5037 (GET_CODE (ops[1]) ==
5038 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5039 for (i = 0; i < isize; i++)
5040 dst[offset + i] = i + shift;
5041 return array_to_constant (TImode, dst);
5042 }
5043
5044 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5045 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5046 than 16 bytes, the value is repeated across the rest of the array. */
5047 void
5048 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5049 {
5050 HOST_WIDE_INT val;
5051 int i, j, first;
5052
5053 memset (arr, 0, 16);
5054 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5055 if (GET_CODE (x) == CONST_INT
5056 || (GET_CODE (x) == CONST_DOUBLE
5057 && (mode == SFmode || mode == DFmode)))
5058 {
5059 gcc_assert (mode != VOIDmode && mode != BLKmode);
5060
5061 if (GET_CODE (x) == CONST_DOUBLE)
5062 val = const_double_to_hwint (x);
5063 else
5064 val = INTVAL (x);
5065 first = GET_MODE_SIZE (mode) - 1;
5066 for (i = first; i >= 0; i--)
5067 {
5068 arr[i] = val & 0xff;
5069 val >>= 8;
5070 }
5071 /* Splat the constant across the whole array. */
5072 for (j = 0, i = first + 1; i < 16; i++)
5073 {
5074 arr[i] = arr[j];
5075 j = (j == first) ? 0 : j + 1;
5076 }
5077 }
5078 else if (GET_CODE (x) == CONST_DOUBLE)
5079 {
5080 val = CONST_DOUBLE_LOW (x);
5081 for (i = 15; i >= 8; i--)
5082 {
5083 arr[i] = val & 0xff;
5084 val >>= 8;
5085 }
5086 val = CONST_DOUBLE_HIGH (x);
5087 for (i = 7; i >= 0; i--)
5088 {
5089 arr[i] = val & 0xff;
5090 val >>= 8;
5091 }
5092 }
5093 else if (GET_CODE (x) == CONST_VECTOR)
5094 {
5095 int units;
5096 rtx elt;
5097 mode = GET_MODE_INNER (mode);
5098 units = CONST_VECTOR_NUNITS (x);
5099 for (i = 0; i < units; i++)
5100 {
5101 elt = CONST_VECTOR_ELT (x, i);
5102 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5103 {
5104 if (GET_CODE (elt) == CONST_DOUBLE)
5105 val = const_double_to_hwint (elt);
5106 else
5107 val = INTVAL (elt);
5108 first = GET_MODE_SIZE (mode) - 1;
5109 if (first + i * GET_MODE_SIZE (mode) > 16)
5110 abort ();
5111 for (j = first; j >= 0; j--)
5112 {
5113 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5114 val >>= 8;
5115 }
5116 }
5117 }
5118 }
5119 else
5120 gcc_unreachable();
5121 }
5122
5123 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5124 smaller than 16 bytes, use the bytes that would represent that value
5125 in a register, e.g., for QImode return the value of arr[3]. */
5126 rtx
5127 array_to_constant (machine_mode mode, const unsigned char arr[16])
5128 {
5129 machine_mode inner_mode;
5130 rtvec v;
5131 int units, size, i, j, k;
5132 HOST_WIDE_INT val;
5133
5134 if (GET_MODE_CLASS (mode) == MODE_INT
5135 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5136 {
5137 j = GET_MODE_SIZE (mode);
5138 i = j < 4 ? 4 - j : 0;
5139 for (val = 0; i < j; i++)
5140 val = (val << 8) | arr[i];
5141 val = trunc_int_for_mode (val, mode);
5142 return GEN_INT (val);
5143 }
5144
5145 if (mode == TImode)
5146 {
5147 HOST_WIDE_INT high;
5148 for (i = high = 0; i < 8; i++)
5149 high = (high << 8) | arr[i];
5150 for (i = 8, val = 0; i < 16; i++)
5151 val = (val << 8) | arr[i];
5152 return immed_double_const (val, high, TImode);
5153 }
5154 if (mode == SFmode)
5155 {
5156 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5157 val = trunc_int_for_mode (val, SImode);
5158 return hwint_to_const_double (SFmode, val);
5159 }
5160 if (mode == DFmode)
5161 {
5162 for (i = 0, val = 0; i < 8; i++)
5163 val = (val << 8) | arr[i];
5164 return hwint_to_const_double (DFmode, val);
5165 }
5166
5167 if (!VECTOR_MODE_P (mode))
5168 abort ();
5169
5170 units = GET_MODE_NUNITS (mode);
5171 size = GET_MODE_UNIT_SIZE (mode);
5172 inner_mode = GET_MODE_INNER (mode);
5173 v = rtvec_alloc (units);
5174
5175 for (k = i = 0; i < units; ++i)
5176 {
5177 val = 0;
5178 for (j = 0; j < size; j++, k++)
5179 val = (val << 8) | arr[k];
5180
5181 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5182 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5183 else
5184 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5185 }
5186 if (k > 16)
5187 abort ();
5188
5189 return gen_rtx_CONST_VECTOR (mode, v);
5190 }
5191
5192 static void
5193 reloc_diagnostic (rtx x)
5194 {
5195 tree decl = 0;
5196 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5197 return;
5198
5199 if (GET_CODE (x) == SYMBOL_REF)
5200 decl = SYMBOL_REF_DECL (x);
5201 else if (GET_CODE (x) == CONST
5202 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5203 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5204
5205 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5206 if (decl && !DECL_P (decl))
5207 decl = 0;
5208
5209 /* The decl could be a string constant. */
5210 if (decl && DECL_P (decl))
5211 {
5212 location_t loc;
5213 /* We use last_assemble_variable_decl to get line information. It's
5214 not always going to be right and might not even be close, but will
5215 be right for the more common cases. */
5216 if (!last_assemble_variable_decl || in_section == ctors_section)
5217 loc = DECL_SOURCE_LOCATION (decl);
5218 else
5219 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5220
5221 if (TARGET_WARN_RELOC)
5222 warning_at (loc, 0,
5223 "creating run-time relocation for %qD", decl);
5224 else
5225 error_at (loc,
5226 "creating run-time relocation for %qD", decl);
5227 }
5228 else
5229 {
5230 if (TARGET_WARN_RELOC)
5231 warning_at (input_location, 0, "creating run-time relocation");
5232 else
5233 error_at (input_location, "creating run-time relocation");
5234 }
5235 }
5236
5237 /* Hook into assemble_integer so we can generate an error for run-time
5238 relocations. The SPU ABI disallows them. */
5239 static bool
5240 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5241 {
5242 /* By default run-time relocations aren't supported, but we allow them
5243 in case users support it in their own run-time loader. And we provide
5244 a warning for those users that don't. */
5245 if ((GET_CODE (x) == SYMBOL_REF)
5246 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5247 reloc_diagnostic (x);
5248
5249 return default_assemble_integer (x, size, aligned_p);
5250 }
5251
5252 static void
5253 spu_asm_globalize_label (FILE * file, const char *name)
5254 {
5255 fputs ("\t.global\t", file);
5256 assemble_name (file, name);
5257 fputs ("\n", file);
5258 }
5259
5260 static bool
5261 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5262 int opno ATTRIBUTE_UNUSED, int *total,
5263 bool speed ATTRIBUTE_UNUSED)
5264 {
5265 machine_mode mode = GET_MODE (x);
5266 int cost = COSTS_N_INSNS (2);
5267
5268 /* Folding to a CONST_VECTOR will use extra space but there might
5269 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5270 only if it allows us to fold away multiple insns. Changing the cost
5271 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5272 because this cost will only be compared against a single insn.
5273 if (code == CONST_VECTOR)
5274 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5275 */
5276
5277 /* Use defaults for float operations. Not accurate but good enough. */
5278 if (mode == DFmode)
5279 {
5280 *total = COSTS_N_INSNS (13);
5281 return true;
5282 }
5283 if (mode == SFmode)
5284 {
5285 *total = COSTS_N_INSNS (6);
5286 return true;
5287 }
5288 switch (code)
5289 {
5290 case CONST_INT:
5291 if (satisfies_constraint_K (x))
5292 *total = 0;
5293 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5294 *total = COSTS_N_INSNS (1);
5295 else
5296 *total = COSTS_N_INSNS (3);
5297 return true;
5298
5299 case CONST:
5300 *total = COSTS_N_INSNS (3);
5301 return true;
5302
5303 case LABEL_REF:
5304 case SYMBOL_REF:
5305 *total = COSTS_N_INSNS (0);
5306 return true;
5307
5308 case CONST_DOUBLE:
5309 *total = COSTS_N_INSNS (5);
5310 return true;
5311
5312 case FLOAT_EXTEND:
5313 case FLOAT_TRUNCATE:
5314 case FLOAT:
5315 case UNSIGNED_FLOAT:
5316 case FIX:
5317 case UNSIGNED_FIX:
5318 *total = COSTS_N_INSNS (7);
5319 return true;
5320
5321 case PLUS:
5322 if (mode == TImode)
5323 {
5324 *total = COSTS_N_INSNS (9);
5325 return true;
5326 }
5327 break;
5328
5329 case MULT:
5330 cost =
5331 GET_CODE (XEXP (x, 0)) ==
5332 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5333 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5334 {
5335 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5336 {
5337 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5338 cost = COSTS_N_INSNS (14);
5339 if ((val & 0xffff) == 0)
5340 cost = COSTS_N_INSNS (9);
5341 else if (val > 0 && val < 0x10000)
5342 cost = COSTS_N_INSNS (11);
5343 }
5344 }
5345 *total = cost;
5346 return true;
5347 case DIV:
5348 case UDIV:
5349 case MOD:
5350 case UMOD:
5351 *total = COSTS_N_INSNS (20);
5352 return true;
5353 case ROTATE:
5354 case ROTATERT:
5355 case ASHIFT:
5356 case ASHIFTRT:
5357 case LSHIFTRT:
5358 *total = COSTS_N_INSNS (4);
5359 return true;
5360 case UNSPEC:
5361 if (XINT (x, 1) == UNSPEC_CONVERT)
5362 *total = COSTS_N_INSNS (0);
5363 else
5364 *total = COSTS_N_INSNS (4);
5365 return true;
5366 }
5367 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5368 if (GET_MODE_CLASS (mode) == MODE_INT
5369 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5370 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5371 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5372 *total = cost;
5373 return true;
5374 }
5375
5376 static machine_mode
5377 spu_unwind_word_mode (void)
5378 {
5379 return SImode;
5380 }
5381
5382 /* Decide whether we can make a sibling call to a function. DECL is the
5383 declaration of the function being targeted by the call and EXP is the
5384 CALL_EXPR representing the call. */
5385 static bool
5386 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5387 {
5388 return decl && !TARGET_LARGE_MEM;
5389 }
5390
5391 /* We need to correctly update the back chain pointer and the Available
5392 Stack Size (which is in the second slot of the sp register.) */
5393 void
5394 spu_allocate_stack (rtx op0, rtx op1)
5395 {
5396 HOST_WIDE_INT v;
5397 rtx chain = gen_reg_rtx (V4SImode);
5398 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5399 rtx sp = gen_reg_rtx (V4SImode);
5400 rtx splatted = gen_reg_rtx (V4SImode);
5401 rtx pat = gen_reg_rtx (TImode);
5402
5403 /* copy the back chain so we can save it back again. */
5404 emit_move_insn (chain, stack_bot);
5405
5406 op1 = force_reg (SImode, op1);
5407
5408 v = 0x1020300010203ll;
5409 emit_move_insn (pat, immed_double_const (v, v, TImode));
5410 emit_insn (gen_shufb (splatted, op1, op1, pat));
5411
5412 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5413 emit_insn (gen_subv4si3 (sp, sp, splatted));
5414
5415 if (flag_stack_check)
5416 {
5417 rtx avail = gen_reg_rtx(SImode);
5418 rtx result = gen_reg_rtx(SImode);
5419 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5420 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5421 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5422 }
5423
5424 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5425
5426 emit_move_insn (stack_bot, chain);
5427
5428 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5429 }
5430
5431 void
5432 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5433 {
5434 static unsigned char arr[16] =
5435 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5436 rtx temp = gen_reg_rtx (SImode);
5437 rtx temp2 = gen_reg_rtx (SImode);
5438 rtx temp3 = gen_reg_rtx (V4SImode);
5439 rtx temp4 = gen_reg_rtx (V4SImode);
5440 rtx pat = gen_reg_rtx (TImode);
5441 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5442
5443 /* Restore the backchain from the first word, sp from the second. */
5444 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5445 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5446
5447 emit_move_insn (pat, array_to_constant (TImode, arr));
5448
5449 /* Compute Available Stack Size for sp */
5450 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5451 emit_insn (gen_shufb (temp3, temp, temp, pat));
5452
5453 /* Compute Available Stack Size for back chain */
5454 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5455 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5456 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5457
5458 emit_insn (gen_addv4si3 (sp, sp, temp3));
5459 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5460 }
5461
5462 static void
5463 spu_init_libfuncs (void)
5464 {
5465 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5466 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5467 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5468 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5469 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5470 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5471 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5472 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5473 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5474 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5475 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5476 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5477
5478 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5479 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5480
5481 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5482 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5483 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5484 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5485 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5486 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5487 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5488 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5489 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5490 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5491 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5492 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5493
5494 set_optab_libfunc (smul_optab, TImode, "__multi3");
5495 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5496 set_optab_libfunc (smod_optab, TImode, "__modti3");
5497 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5498 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5499 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5500 }
5501
5502 /* Make a subreg, stripping any existing subreg. We could possibly just
5503 call simplify_subreg, but in this case we know what we want. */
5504 rtx
5505 spu_gen_subreg (machine_mode mode, rtx x)
5506 {
5507 if (GET_CODE (x) == SUBREG)
5508 x = SUBREG_REG (x);
5509 if (GET_MODE (x) == mode)
5510 return x;
5511 return gen_rtx_SUBREG (mode, x, 0);
5512 }
5513
5514 static bool
5515 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5516 {
5517 return (TYPE_MODE (type) == BLKmode
5518 && ((type) == 0
5519 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5520 || int_size_in_bytes (type) >
5521 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5522 }
5523 \f
5524 /* Create the built-in types and functions */
5525
5526 enum spu_function_code
5527 {
5528 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5529 #include "spu-builtins.def"
5530 #undef DEF_BUILTIN
5531 NUM_SPU_BUILTINS
5532 };
5533
5534 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5535
5536 struct spu_builtin_description spu_builtins[] = {
5537 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5538 {fcode, icode, name, type, params},
5539 #include "spu-builtins.def"
5540 #undef DEF_BUILTIN
5541 };
5542
5543 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5544
5545 /* Returns the spu builtin decl for CODE. */
5546
5547 static tree
5548 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5549 {
5550 if (code >= NUM_SPU_BUILTINS)
5551 return error_mark_node;
5552
5553 return spu_builtin_decls[code];
5554 }
5555
5556
5557 static void
5558 spu_init_builtins (void)
5559 {
5560 struct spu_builtin_description *d;
5561 unsigned int i;
5562
5563 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5564 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5565 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5566 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5567 V4SF_type_node = build_vector_type (float_type_node, 4);
5568 V2DF_type_node = build_vector_type (double_type_node, 2);
5569
5570 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5571 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5572 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5573 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5574
5575 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5576
5577 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5578 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5579 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5580 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5581 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5582 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5583 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5584 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5585 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5586 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5587 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5588 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5589
5590 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5591 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5592 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5593 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5594 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5595 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5596 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5597 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5598
5599 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5600 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5601
5602 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5603
5604 spu_builtin_types[SPU_BTI_PTR] =
5605 build_pointer_type (build_qualified_type
5606 (void_type_node,
5607 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5608
5609 /* For each builtin we build a new prototype. The tree code will make
5610 sure nodes are shared. */
5611 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5612 {
5613 tree p;
5614 char name[64]; /* build_function will make a copy. */
5615 int parm;
5616
5617 if (d->name == 0)
5618 continue;
5619
5620 /* Find last parm. */
5621 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5622 ;
5623
5624 p = void_list_node;
5625 while (parm > 1)
5626 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5627
5628 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5629
5630 sprintf (name, "__builtin_%s", d->name);
5631 spu_builtin_decls[i] =
5632 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5633 if (d->fcode == SPU_MASK_FOR_LOAD)
5634 TREE_READONLY (spu_builtin_decls[i]) = 1;
5635
5636 /* These builtins don't throw. */
5637 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5638 }
5639 }
5640
5641 void
5642 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5643 {
5644 static unsigned char arr[16] =
5645 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5646
5647 rtx temp = gen_reg_rtx (Pmode);
5648 rtx temp2 = gen_reg_rtx (V4SImode);
5649 rtx temp3 = gen_reg_rtx (V4SImode);
5650 rtx pat = gen_reg_rtx (TImode);
5651 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5652
5653 emit_move_insn (pat, array_to_constant (TImode, arr));
5654
5655 /* Restore the sp. */
5656 emit_move_insn (temp, op1);
5657 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5658
5659 /* Compute available stack size for sp. */
5660 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5661 emit_insn (gen_shufb (temp3, temp, temp, pat));
5662
5663 emit_insn (gen_addv4si3 (sp, sp, temp3));
5664 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5665 }
5666
5667 int
5668 spu_safe_dma (HOST_WIDE_INT channel)
5669 {
5670 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5671 }
5672
5673 void
5674 spu_builtin_splats (rtx ops[])
5675 {
5676 machine_mode mode = GET_MODE (ops[0]);
5677 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5678 {
5679 unsigned char arr[16];
5680 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5681 emit_move_insn (ops[0], array_to_constant (mode, arr));
5682 }
5683 else
5684 {
5685 rtx reg = gen_reg_rtx (TImode);
5686 rtx shuf;
5687 if (GET_CODE (ops[1]) != REG
5688 && GET_CODE (ops[1]) != SUBREG)
5689 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5690 switch (mode)
5691 {
5692 case V2DImode:
5693 case V2DFmode:
5694 shuf =
5695 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5696 TImode);
5697 break;
5698 case V4SImode:
5699 case V4SFmode:
5700 shuf =
5701 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5702 TImode);
5703 break;
5704 case V8HImode:
5705 shuf =
5706 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5707 TImode);
5708 break;
5709 case V16QImode:
5710 shuf =
5711 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5712 TImode);
5713 break;
5714 default:
5715 abort ();
5716 }
5717 emit_move_insn (reg, shuf);
5718 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5719 }
5720 }
5721
5722 void
5723 spu_builtin_extract (rtx ops[])
5724 {
5725 machine_mode mode;
5726 rtx rot, from, tmp;
5727
5728 mode = GET_MODE (ops[1]);
5729
5730 if (GET_CODE (ops[2]) == CONST_INT)
5731 {
5732 switch (mode)
5733 {
5734 case V16QImode:
5735 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5736 break;
5737 case V8HImode:
5738 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5739 break;
5740 case V4SFmode:
5741 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5742 break;
5743 case V4SImode:
5744 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5745 break;
5746 case V2DImode:
5747 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5748 break;
5749 case V2DFmode:
5750 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5751 break;
5752 default:
5753 abort ();
5754 }
5755 return;
5756 }
5757
5758 from = spu_gen_subreg (TImode, ops[1]);
5759 rot = gen_reg_rtx (TImode);
5760 tmp = gen_reg_rtx (SImode);
5761
5762 switch (mode)
5763 {
5764 case V16QImode:
5765 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5766 break;
5767 case V8HImode:
5768 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5769 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5770 break;
5771 case V4SFmode:
5772 case V4SImode:
5773 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5774 break;
5775 case V2DImode:
5776 case V2DFmode:
5777 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5778 break;
5779 default:
5780 abort ();
5781 }
5782 emit_insn (gen_rotqby_ti (rot, from, tmp));
5783
5784 emit_insn (gen_spu_convert (ops[0], rot));
5785 }
5786
5787 void
5788 spu_builtin_insert (rtx ops[])
5789 {
5790 machine_mode mode = GET_MODE (ops[0]);
5791 machine_mode imode = GET_MODE_INNER (mode);
5792 rtx mask = gen_reg_rtx (TImode);
5793 rtx offset;
5794
5795 if (GET_CODE (ops[3]) == CONST_INT)
5796 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5797 else
5798 {
5799 offset = gen_reg_rtx (SImode);
5800 emit_insn (gen_mulsi3
5801 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5802 }
5803 emit_insn (gen_cpat
5804 (mask, stack_pointer_rtx, offset,
5805 GEN_INT (GET_MODE_SIZE (imode))));
5806 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5807 }
5808
5809 void
5810 spu_builtin_promote (rtx ops[])
5811 {
5812 machine_mode mode, imode;
5813 rtx rot, from, offset;
5814 HOST_WIDE_INT pos;
5815
5816 mode = GET_MODE (ops[0]);
5817 imode = GET_MODE_INNER (mode);
5818
5819 from = gen_reg_rtx (TImode);
5820 rot = spu_gen_subreg (TImode, ops[0]);
5821
5822 emit_insn (gen_spu_convert (from, ops[1]));
5823
5824 if (GET_CODE (ops[2]) == CONST_INT)
5825 {
5826 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5827 if (GET_MODE_SIZE (imode) < 4)
5828 pos += 4 - GET_MODE_SIZE (imode);
5829 offset = GEN_INT (pos & 15);
5830 }
5831 else
5832 {
5833 offset = gen_reg_rtx (SImode);
5834 switch (mode)
5835 {
5836 case V16QImode:
5837 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5838 break;
5839 case V8HImode:
5840 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5841 emit_insn (gen_addsi3 (offset, offset, offset));
5842 break;
5843 case V4SFmode:
5844 case V4SImode:
5845 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5846 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5847 break;
5848 case V2DImode:
5849 case V2DFmode:
5850 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5851 break;
5852 default:
5853 abort ();
5854 }
5855 }
5856 emit_insn (gen_rotqby_ti (rot, from, offset));
5857 }
5858
5859 static void
5860 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5861 {
5862 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5863 rtx shuf = gen_reg_rtx (V4SImode);
5864 rtx insn = gen_reg_rtx (V4SImode);
5865 rtx shufc;
5866 rtx insnc;
5867 rtx mem;
5868
5869 fnaddr = force_reg (SImode, fnaddr);
5870 cxt = force_reg (SImode, cxt);
5871
5872 if (TARGET_LARGE_MEM)
5873 {
5874 rtx rotl = gen_reg_rtx (V4SImode);
5875 rtx mask = gen_reg_rtx (V4SImode);
5876 rtx bi = gen_reg_rtx (SImode);
5877 static unsigned char const shufa[16] = {
5878 2, 3, 0, 1, 18, 19, 16, 17,
5879 0, 1, 2, 3, 16, 17, 18, 19
5880 };
5881 static unsigned char const insna[16] = {
5882 0x41, 0, 0, 79,
5883 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5884 0x60, 0x80, 0, 79,
5885 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5886 };
5887
5888 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5889 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5890
5891 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5892 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5893 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5894 emit_insn (gen_selb (insn, insnc, rotl, mask));
5895
5896 mem = adjust_address (m_tramp, V4SImode, 0);
5897 emit_move_insn (mem, insn);
5898
5899 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5900 mem = adjust_address (m_tramp, Pmode, 16);
5901 emit_move_insn (mem, bi);
5902 }
5903 else
5904 {
5905 rtx scxt = gen_reg_rtx (SImode);
5906 rtx sfnaddr = gen_reg_rtx (SImode);
5907 static unsigned char const insna[16] = {
5908 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5909 0x30, 0, 0, 0,
5910 0, 0, 0, 0,
5911 0, 0, 0, 0
5912 };
5913
5914 shufc = gen_reg_rtx (TImode);
5915 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5916
5917 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5918 fits 18 bits and the last 4 are zeros. This will be true if
5919 the stack pointer is initialized to 0x3fff0 at program start,
5920 otherwise the ila instruction will be garbage. */
5921
5922 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5923 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5924 emit_insn (gen_cpat
5925 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5926 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5927 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5928
5929 mem = adjust_address (m_tramp, V4SImode, 0);
5930 emit_move_insn (mem, insn);
5931 }
5932 emit_insn (gen_sync ());
5933 }
5934
5935 static bool
5936 spu_warn_func_return (tree decl)
5937 {
5938 /* Naked functions are implemented entirely in assembly, including the
5939 return sequence, so suppress warnings about this. */
5940 return !spu_naked_function_p (decl);
5941 }
5942
5943 void
5944 spu_expand_sign_extend (rtx ops[])
5945 {
5946 unsigned char arr[16];
5947 rtx pat = gen_reg_rtx (TImode);
5948 rtx sign, c;
5949 int i, last;
5950 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5951 if (GET_MODE (ops[1]) == QImode)
5952 {
5953 sign = gen_reg_rtx (HImode);
5954 emit_insn (gen_extendqihi2 (sign, ops[1]));
5955 for (i = 0; i < 16; i++)
5956 arr[i] = 0x12;
5957 arr[last] = 0x13;
5958 }
5959 else
5960 {
5961 for (i = 0; i < 16; i++)
5962 arr[i] = 0x10;
5963 switch (GET_MODE (ops[1]))
5964 {
5965 case HImode:
5966 sign = gen_reg_rtx (SImode);
5967 emit_insn (gen_extendhisi2 (sign, ops[1]));
5968 arr[last] = 0x03;
5969 arr[last - 1] = 0x02;
5970 break;
5971 case SImode:
5972 sign = gen_reg_rtx (SImode);
5973 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5974 for (i = 0; i < 4; i++)
5975 arr[last - i] = 3 - i;
5976 break;
5977 case DImode:
5978 sign = gen_reg_rtx (SImode);
5979 c = gen_reg_rtx (SImode);
5980 emit_insn (gen_spu_convert (c, ops[1]));
5981 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5982 for (i = 0; i < 8; i++)
5983 arr[last - i] = 7 - i;
5984 break;
5985 default:
5986 abort ();
5987 }
5988 }
5989 emit_move_insn (pat, array_to_constant (TImode, arr));
5990 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5991 }
5992
5993 /* expand vector initialization. If there are any constant parts,
5994 load constant parts first. Then load any non-constant parts. */
5995 void
5996 spu_expand_vector_init (rtx target, rtx vals)
5997 {
5998 machine_mode mode = GET_MODE (target);
5999 int n_elts = GET_MODE_NUNITS (mode);
6000 int n_var = 0;
6001 bool all_same = true;
6002 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6003 int i;
6004
6005 first = XVECEXP (vals, 0, 0);
6006 for (i = 0; i < n_elts; ++i)
6007 {
6008 x = XVECEXP (vals, 0, i);
6009 if (!(CONST_INT_P (x)
6010 || GET_CODE (x) == CONST_DOUBLE
6011 || GET_CODE (x) == CONST_FIXED))
6012 ++n_var;
6013 else
6014 {
6015 if (first_constant == NULL_RTX)
6016 first_constant = x;
6017 }
6018 if (i > 0 && !rtx_equal_p (x, first))
6019 all_same = false;
6020 }
6021
6022 /* if all elements are the same, use splats to repeat elements */
6023 if (all_same)
6024 {
6025 if (!CONSTANT_P (first)
6026 && !register_operand (first, GET_MODE (x)))
6027 first = force_reg (GET_MODE (first), first);
6028 emit_insn (gen_spu_splats (target, first));
6029 return;
6030 }
6031
6032 /* load constant parts */
6033 if (n_var != n_elts)
6034 {
6035 if (n_var == 0)
6036 {
6037 emit_move_insn (target,
6038 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6039 }
6040 else
6041 {
6042 rtx constant_parts_rtx = copy_rtx (vals);
6043
6044 gcc_assert (first_constant != NULL_RTX);
6045 /* fill empty slots with the first constant, this increases
6046 our chance of using splats in the recursive call below. */
6047 for (i = 0; i < n_elts; ++i)
6048 {
6049 x = XVECEXP (constant_parts_rtx, 0, i);
6050 if (!(CONST_INT_P (x)
6051 || GET_CODE (x) == CONST_DOUBLE
6052 || GET_CODE (x) == CONST_FIXED))
6053 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6054 }
6055
6056 spu_expand_vector_init (target, constant_parts_rtx);
6057 }
6058 }
6059
6060 /* load variable parts */
6061 if (n_var != 0)
6062 {
6063 rtx insert_operands[4];
6064
6065 insert_operands[0] = target;
6066 insert_operands[2] = target;
6067 for (i = 0; i < n_elts; ++i)
6068 {
6069 x = XVECEXP (vals, 0, i);
6070 if (!(CONST_INT_P (x)
6071 || GET_CODE (x) == CONST_DOUBLE
6072 || GET_CODE (x) == CONST_FIXED))
6073 {
6074 if (!register_operand (x, GET_MODE (x)))
6075 x = force_reg (GET_MODE (x), x);
6076 insert_operands[1] = x;
6077 insert_operands[3] = GEN_INT (i);
6078 spu_builtin_insert (insert_operands);
6079 }
6080 }
6081 }
6082 }
6083
6084 /* Return insn index for the vector compare instruction for given CODE,
6085 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6086
6087 static int
6088 get_vec_cmp_insn (enum rtx_code code,
6089 machine_mode dest_mode,
6090 machine_mode op_mode)
6091
6092 {
6093 switch (code)
6094 {
6095 case EQ:
6096 if (dest_mode == V16QImode && op_mode == V16QImode)
6097 return CODE_FOR_ceq_v16qi;
6098 if (dest_mode == V8HImode && op_mode == V8HImode)
6099 return CODE_FOR_ceq_v8hi;
6100 if (dest_mode == V4SImode && op_mode == V4SImode)
6101 return CODE_FOR_ceq_v4si;
6102 if (dest_mode == V4SImode && op_mode == V4SFmode)
6103 return CODE_FOR_ceq_v4sf;
6104 if (dest_mode == V2DImode && op_mode == V2DFmode)
6105 return CODE_FOR_ceq_v2df;
6106 break;
6107 case GT:
6108 if (dest_mode == V16QImode && op_mode == V16QImode)
6109 return CODE_FOR_cgt_v16qi;
6110 if (dest_mode == V8HImode && op_mode == V8HImode)
6111 return CODE_FOR_cgt_v8hi;
6112 if (dest_mode == V4SImode && op_mode == V4SImode)
6113 return CODE_FOR_cgt_v4si;
6114 if (dest_mode == V4SImode && op_mode == V4SFmode)
6115 return CODE_FOR_cgt_v4sf;
6116 if (dest_mode == V2DImode && op_mode == V2DFmode)
6117 return CODE_FOR_cgt_v2df;
6118 break;
6119 case GTU:
6120 if (dest_mode == V16QImode && op_mode == V16QImode)
6121 return CODE_FOR_clgt_v16qi;
6122 if (dest_mode == V8HImode && op_mode == V8HImode)
6123 return CODE_FOR_clgt_v8hi;
6124 if (dest_mode == V4SImode && op_mode == V4SImode)
6125 return CODE_FOR_clgt_v4si;
6126 break;
6127 default:
6128 break;
6129 }
6130 return -1;
6131 }
6132
6133 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6134 DMODE is expected destination mode. This is a recursive function. */
6135
6136 static rtx
6137 spu_emit_vector_compare (enum rtx_code rcode,
6138 rtx op0, rtx op1,
6139 machine_mode dmode)
6140 {
6141 int vec_cmp_insn;
6142 rtx mask;
6143 machine_mode dest_mode;
6144 machine_mode op_mode = GET_MODE (op1);
6145
6146 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6147
6148 /* Floating point vector compare instructions uses destination V4SImode.
6149 Double floating point vector compare instructions uses destination V2DImode.
6150 Move destination to appropriate mode later. */
6151 if (dmode == V4SFmode)
6152 dest_mode = V4SImode;
6153 else if (dmode == V2DFmode)
6154 dest_mode = V2DImode;
6155 else
6156 dest_mode = dmode;
6157
6158 mask = gen_reg_rtx (dest_mode);
6159 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6160
6161 if (vec_cmp_insn == -1)
6162 {
6163 bool swap_operands = false;
6164 bool try_again = false;
6165 switch (rcode)
6166 {
6167 case LT:
6168 rcode = GT;
6169 swap_operands = true;
6170 try_again = true;
6171 break;
6172 case LTU:
6173 rcode = GTU;
6174 swap_operands = true;
6175 try_again = true;
6176 break;
6177 case NE:
6178 case UNEQ:
6179 case UNLE:
6180 case UNLT:
6181 case UNGE:
6182 case UNGT:
6183 case UNORDERED:
6184 /* Treat A != B as ~(A==B). */
6185 {
6186 enum rtx_code rev_code;
6187 enum insn_code nor_code;
6188 rtx rev_mask;
6189
6190 rev_code = reverse_condition_maybe_unordered (rcode);
6191 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6192
6193 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6194 gcc_assert (nor_code != CODE_FOR_nothing);
6195 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6196 if (dmode != dest_mode)
6197 {
6198 rtx temp = gen_reg_rtx (dest_mode);
6199 convert_move (temp, mask, 0);
6200 return temp;
6201 }
6202 return mask;
6203 }
6204 break;
6205 case GE:
6206 case GEU:
6207 case LE:
6208 case LEU:
6209 /* Try GT/GTU/LT/LTU OR EQ */
6210 {
6211 rtx c_rtx, eq_rtx;
6212 enum insn_code ior_code;
6213 enum rtx_code new_code;
6214
6215 switch (rcode)
6216 {
6217 case GE: new_code = GT; break;
6218 case GEU: new_code = GTU; break;
6219 case LE: new_code = LT; break;
6220 case LEU: new_code = LTU; break;
6221 default:
6222 gcc_unreachable ();
6223 }
6224
6225 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6226 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6227
6228 ior_code = optab_handler (ior_optab, dest_mode);
6229 gcc_assert (ior_code != CODE_FOR_nothing);
6230 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6231 if (dmode != dest_mode)
6232 {
6233 rtx temp = gen_reg_rtx (dest_mode);
6234 convert_move (temp, mask, 0);
6235 return temp;
6236 }
6237 return mask;
6238 }
6239 break;
6240 case LTGT:
6241 /* Try LT OR GT */
6242 {
6243 rtx lt_rtx, gt_rtx;
6244 enum insn_code ior_code;
6245
6246 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6247 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6248
6249 ior_code = optab_handler (ior_optab, dest_mode);
6250 gcc_assert (ior_code != CODE_FOR_nothing);
6251 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6252 if (dmode != dest_mode)
6253 {
6254 rtx temp = gen_reg_rtx (dest_mode);
6255 convert_move (temp, mask, 0);
6256 return temp;
6257 }
6258 return mask;
6259 }
6260 break;
6261 case ORDERED:
6262 /* Implement as (A==A) & (B==B) */
6263 {
6264 rtx a_rtx, b_rtx;
6265 enum insn_code and_code;
6266
6267 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6268 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6269
6270 and_code = optab_handler (and_optab, dest_mode);
6271 gcc_assert (and_code != CODE_FOR_nothing);
6272 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6273 if (dmode != dest_mode)
6274 {
6275 rtx temp = gen_reg_rtx (dest_mode);
6276 convert_move (temp, mask, 0);
6277 return temp;
6278 }
6279 return mask;
6280 }
6281 break;
6282 default:
6283 gcc_unreachable ();
6284 }
6285
6286 /* You only get two chances. */
6287 if (try_again)
6288 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6289
6290 gcc_assert (vec_cmp_insn != -1);
6291
6292 if (swap_operands)
6293 {
6294 rtx tmp;
6295 tmp = op0;
6296 op0 = op1;
6297 op1 = tmp;
6298 }
6299 }
6300
6301 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6302 if (dmode != dest_mode)
6303 {
6304 rtx temp = gen_reg_rtx (dest_mode);
6305 convert_move (temp, mask, 0);
6306 return temp;
6307 }
6308 return mask;
6309 }
6310
6311
6312 /* Emit vector conditional expression.
6313 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6314 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6315
6316 int
6317 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6318 rtx cond, rtx cc_op0, rtx cc_op1)
6319 {
6320 machine_mode dest_mode = GET_MODE (dest);
6321 enum rtx_code rcode = GET_CODE (cond);
6322 rtx mask;
6323
6324 /* Get the vector mask for the given relational operations. */
6325 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6326
6327 emit_insn(gen_selb (dest, op2, op1, mask));
6328
6329 return 1;
6330 }
6331
6332 static rtx
6333 spu_force_reg (machine_mode mode, rtx op)
6334 {
6335 rtx x, r;
6336 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6337 {
6338 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6339 || GET_MODE (op) == BLKmode)
6340 return force_reg (mode, convert_to_mode (mode, op, 0));
6341 abort ();
6342 }
6343
6344 r = force_reg (GET_MODE (op), op);
6345 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6346 {
6347 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6348 if (x)
6349 return x;
6350 }
6351
6352 x = gen_reg_rtx (mode);
6353 emit_insn (gen_spu_convert (x, r));
6354 return x;
6355 }
6356
6357 static void
6358 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6359 {
6360 HOST_WIDE_INT v = 0;
6361 int lsbits;
6362 /* Check the range of immediate operands. */
6363 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6364 {
6365 int range = p - SPU_BTI_7;
6366
6367 if (!CONSTANT_P (op))
6368 error ("%s expects an integer literal in the range [%d, %d]",
6369 d->name,
6370 spu_builtin_range[range].low, spu_builtin_range[range].high);
6371
6372 if (GET_CODE (op) == CONST
6373 && (GET_CODE (XEXP (op, 0)) == PLUS
6374 || GET_CODE (XEXP (op, 0)) == MINUS))
6375 {
6376 v = INTVAL (XEXP (XEXP (op, 0), 1));
6377 op = XEXP (XEXP (op, 0), 0);
6378 }
6379 else if (GET_CODE (op) == CONST_INT)
6380 v = INTVAL (op);
6381 else if (GET_CODE (op) == CONST_VECTOR
6382 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6383 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6384
6385 /* The default for v is 0 which is valid in every range. */
6386 if (v < spu_builtin_range[range].low
6387 || v > spu_builtin_range[range].high)
6388 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6389 d->name,
6390 spu_builtin_range[range].low, spu_builtin_range[range].high,
6391 v);
6392
6393 switch (p)
6394 {
6395 case SPU_BTI_S10_4:
6396 lsbits = 4;
6397 break;
6398 case SPU_BTI_U16_2:
6399 /* This is only used in lqa, and stqa. Even though the insns
6400 encode 16 bits of the address (all but the 2 least
6401 significant), only 14 bits are used because it is masked to
6402 be 16 byte aligned. */
6403 lsbits = 4;
6404 break;
6405 case SPU_BTI_S16_2:
6406 /* This is used for lqr and stqr. */
6407 lsbits = 2;
6408 break;
6409 default:
6410 lsbits = 0;
6411 }
6412
6413 if (GET_CODE (op) == LABEL_REF
6414 || (GET_CODE (op) == SYMBOL_REF
6415 && SYMBOL_REF_FUNCTION_P (op))
6416 || (v & ((1 << lsbits) - 1)) != 0)
6417 warning (0, "%d least significant bits of %s are ignored", lsbits,
6418 d->name);
6419 }
6420 }
6421
6422
6423 static int
6424 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6425 rtx target, rtx ops[])
6426 {
6427 enum insn_code icode = (enum insn_code) d->icode;
6428 int i = 0, a;
6429
6430 /* Expand the arguments into rtl. */
6431
6432 if (d->parm[0] != SPU_BTI_VOID)
6433 ops[i++] = target;
6434
6435 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6436 {
6437 tree arg = CALL_EXPR_ARG (exp, a);
6438 if (arg == 0)
6439 abort ();
6440 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6441 }
6442
6443 gcc_assert (i == insn_data[icode].n_generator_args);
6444 return i;
6445 }
6446
6447 static rtx
6448 spu_expand_builtin_1 (struct spu_builtin_description *d,
6449 tree exp, rtx target)
6450 {
6451 rtx pat;
6452 rtx ops[8];
6453 enum insn_code icode = (enum insn_code) d->icode;
6454 machine_mode mode, tmode;
6455 int i, p;
6456 int n_operands;
6457 tree return_type;
6458
6459 /* Set up ops[] with values from arglist. */
6460 n_operands = expand_builtin_args (d, exp, target, ops);
6461
6462 /* Handle the target operand which must be operand 0. */
6463 i = 0;
6464 if (d->parm[0] != SPU_BTI_VOID)
6465 {
6466
6467 /* We prefer the mode specified for the match_operand otherwise
6468 use the mode from the builtin function prototype. */
6469 tmode = insn_data[d->icode].operand[0].mode;
6470 if (tmode == VOIDmode)
6471 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6472
6473 /* Try to use target because not using it can lead to extra copies
6474 and when we are using all of the registers extra copies leads
6475 to extra spills. */
6476 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6477 ops[0] = target;
6478 else
6479 target = ops[0] = gen_reg_rtx (tmode);
6480
6481 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6482 abort ();
6483
6484 i++;
6485 }
6486
6487 if (d->fcode == SPU_MASK_FOR_LOAD)
6488 {
6489 machine_mode mode = insn_data[icode].operand[1].mode;
6490 tree arg;
6491 rtx addr, op, pat;
6492
6493 /* get addr */
6494 arg = CALL_EXPR_ARG (exp, 0);
6495 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6496 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6497 addr = memory_address (mode, op);
6498
6499 /* negate addr */
6500 op = gen_reg_rtx (GET_MODE (addr));
6501 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6502 op = gen_rtx_MEM (mode, op);
6503
6504 pat = GEN_FCN (icode) (target, op);
6505 if (!pat)
6506 return 0;
6507 emit_insn (pat);
6508 return target;
6509 }
6510
6511 /* Ignore align_hint, but still expand it's args in case they have
6512 side effects. */
6513 if (icode == CODE_FOR_spu_align_hint)
6514 return 0;
6515
6516 /* Handle the rest of the operands. */
6517 for (p = 1; i < n_operands; i++, p++)
6518 {
6519 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6520 mode = insn_data[d->icode].operand[i].mode;
6521 else
6522 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6523
6524 /* mode can be VOIDmode here for labels */
6525
6526 /* For specific intrinsics with an immediate operand, e.g.,
6527 si_ai(), we sometimes need to convert the scalar argument to a
6528 vector argument by splatting the scalar. */
6529 if (VECTOR_MODE_P (mode)
6530 && (GET_CODE (ops[i]) == CONST_INT
6531 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6532 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6533 {
6534 if (GET_CODE (ops[i]) == CONST_INT)
6535 ops[i] = spu_const (mode, INTVAL (ops[i]));
6536 else
6537 {
6538 rtx reg = gen_reg_rtx (mode);
6539 machine_mode imode = GET_MODE_INNER (mode);
6540 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6541 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6542 if (imode != GET_MODE (ops[i]))
6543 ops[i] = convert_to_mode (imode, ops[i],
6544 TYPE_UNSIGNED (spu_builtin_types
6545 [d->parm[i]]));
6546 emit_insn (gen_spu_splats (reg, ops[i]));
6547 ops[i] = reg;
6548 }
6549 }
6550
6551 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6552
6553 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6554 ops[i] = spu_force_reg (mode, ops[i]);
6555 }
6556
6557 switch (n_operands)
6558 {
6559 case 0:
6560 pat = GEN_FCN (icode) (0);
6561 break;
6562 case 1:
6563 pat = GEN_FCN (icode) (ops[0]);
6564 break;
6565 case 2:
6566 pat = GEN_FCN (icode) (ops[0], ops[1]);
6567 break;
6568 case 3:
6569 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6570 break;
6571 case 4:
6572 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6573 break;
6574 case 5:
6575 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6576 break;
6577 case 6:
6578 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6579 break;
6580 default:
6581 abort ();
6582 }
6583
6584 if (!pat)
6585 abort ();
6586
6587 if (d->type == B_CALL || d->type == B_BISLED)
6588 emit_call_insn (pat);
6589 else if (d->type == B_JUMP)
6590 {
6591 emit_jump_insn (pat);
6592 emit_barrier ();
6593 }
6594 else
6595 emit_insn (pat);
6596
6597 return_type = spu_builtin_types[d->parm[0]];
6598 if (d->parm[0] != SPU_BTI_VOID
6599 && GET_MODE (target) != TYPE_MODE (return_type))
6600 {
6601 /* target is the return value. It should always be the mode of
6602 the builtin function prototype. */
6603 target = spu_force_reg (TYPE_MODE (return_type), target);
6604 }
6605
6606 return target;
6607 }
6608
6609 rtx
6610 spu_expand_builtin (tree exp,
6611 rtx target,
6612 rtx subtarget ATTRIBUTE_UNUSED,
6613 machine_mode mode ATTRIBUTE_UNUSED,
6614 int ignore ATTRIBUTE_UNUSED)
6615 {
6616 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6617 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6618 struct spu_builtin_description *d;
6619
6620 if (fcode < NUM_SPU_BUILTINS)
6621 {
6622 d = &spu_builtins[fcode];
6623
6624 return spu_expand_builtin_1 (d, exp, target);
6625 }
6626 abort ();
6627 }
6628
6629 /* Implement targetm.vectorize.builtin_mask_for_load. */
6630 static tree
6631 spu_builtin_mask_for_load (void)
6632 {
6633 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6634 }
6635
6636 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6637 static int
6638 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6639 tree vectype,
6640 int misalign ATTRIBUTE_UNUSED)
6641 {
6642 unsigned elements;
6643
6644 switch (type_of_cost)
6645 {
6646 case scalar_stmt:
6647 case vector_stmt:
6648 case vector_load:
6649 case vector_store:
6650 case vec_to_scalar:
6651 case scalar_to_vec:
6652 case cond_branch_not_taken:
6653 case vec_perm:
6654 case vec_promote_demote:
6655 return 1;
6656
6657 case scalar_store:
6658 return 10;
6659
6660 case scalar_load:
6661 /* Load + rotate. */
6662 return 2;
6663
6664 case unaligned_load:
6665 return 2;
6666
6667 case cond_branch_taken:
6668 return 6;
6669
6670 case vec_construct:
6671 elements = TYPE_VECTOR_SUBPARTS (vectype);
6672 return elements / 2 + 1;
6673
6674 default:
6675 gcc_unreachable ();
6676 }
6677 }
6678
6679 /* Implement targetm.vectorize.init_cost. */
6680
6681 static void *
6682 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6683 {
6684 unsigned *cost = XNEWVEC (unsigned, 3);
6685 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6686 return cost;
6687 }
6688
6689 /* Implement targetm.vectorize.add_stmt_cost. */
6690
6691 static unsigned
6692 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6693 struct _stmt_vec_info *stmt_info, int misalign,
6694 enum vect_cost_model_location where)
6695 {
6696 unsigned *cost = (unsigned *) data;
6697 unsigned retval = 0;
6698
6699 if (flag_vect_cost_model)
6700 {
6701 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6702 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6703
6704 /* Statements in an inner loop relative to the loop being
6705 vectorized are weighted more heavily. The value here is
6706 arbitrary and could potentially be improved with analysis. */
6707 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6708 count *= 50; /* FIXME. */
6709
6710 retval = (unsigned) (count * stmt_cost);
6711 cost[where] += retval;
6712 }
6713
6714 return retval;
6715 }
6716
6717 /* Implement targetm.vectorize.finish_cost. */
6718
6719 static void
6720 spu_finish_cost (void *data, unsigned *prologue_cost,
6721 unsigned *body_cost, unsigned *epilogue_cost)
6722 {
6723 unsigned *cost = (unsigned *) data;
6724 *prologue_cost = cost[vect_prologue];
6725 *body_cost = cost[vect_body];
6726 *epilogue_cost = cost[vect_epilogue];
6727 }
6728
6729 /* Implement targetm.vectorize.destroy_cost_data. */
6730
6731 static void
6732 spu_destroy_cost_data (void *data)
6733 {
6734 free (data);
6735 }
6736
6737 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6738 after applying N number of iterations. This routine does not determine
6739 how may iterations are required to reach desired alignment. */
6740
6741 static bool
6742 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6743 {
6744 if (is_packed)
6745 return false;
6746
6747 /* All other types are naturally aligned. */
6748 return true;
6749 }
6750
6751 /* Return the appropriate mode for a named address pointer. */
6752 static machine_mode
6753 spu_addr_space_pointer_mode (addr_space_t addrspace)
6754 {
6755 switch (addrspace)
6756 {
6757 case ADDR_SPACE_GENERIC:
6758 return ptr_mode;
6759 case ADDR_SPACE_EA:
6760 return EAmode;
6761 default:
6762 gcc_unreachable ();
6763 }
6764 }
6765
6766 /* Return the appropriate mode for a named address address. */
6767 static machine_mode
6768 spu_addr_space_address_mode (addr_space_t addrspace)
6769 {
6770 switch (addrspace)
6771 {
6772 case ADDR_SPACE_GENERIC:
6773 return Pmode;
6774 case ADDR_SPACE_EA:
6775 return EAmode;
6776 default:
6777 gcc_unreachable ();
6778 }
6779 }
6780
6781 /* Determine if one named address space is a subset of another. */
6782
6783 static bool
6784 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6785 {
6786 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6787 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6788
6789 if (subset == superset)
6790 return true;
6791
6792 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6793 being subsets but instead as disjoint address spaces. */
6794 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6795 return false;
6796
6797 else
6798 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6799 }
6800
6801 /* Convert from one address space to another. */
6802 static rtx
6803 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6804 {
6805 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6806 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6807
6808 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6809 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6810
6811 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6812 {
6813 rtx result, ls;
6814
6815 ls = gen_const_mem (DImode,
6816 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6817 set_mem_align (ls, 128);
6818
6819 result = gen_reg_rtx (Pmode);
6820 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6821 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6822 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6823 ls, const0_rtx, Pmode, 1);
6824
6825 emit_insn (gen_subsi3 (result, op, ls));
6826
6827 return result;
6828 }
6829
6830 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6831 {
6832 rtx result, ls;
6833
6834 ls = gen_const_mem (DImode,
6835 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6836 set_mem_align (ls, 128);
6837
6838 result = gen_reg_rtx (EAmode);
6839 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6840 op = force_reg (Pmode, op);
6841 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6842 ls, const0_rtx, EAmode, 1);
6843 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6844
6845 if (EAmode == SImode)
6846 emit_insn (gen_addsi3 (result, op, ls));
6847 else
6848 emit_insn (gen_adddi3 (result, op, ls));
6849
6850 return result;
6851 }
6852
6853 else
6854 gcc_unreachable ();
6855 }
6856
6857
6858 /* Count the total number of instructions in each pipe and return the
6859 maximum, which is used as the Minimum Iteration Interval (MII)
6860 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6861 -2 are instructions that can go in pipe0 or pipe1. */
6862 static int
6863 spu_sms_res_mii (struct ddg *g)
6864 {
6865 int i;
6866 unsigned t[4] = {0, 0, 0, 0};
6867
6868 for (i = 0; i < g->num_nodes; i++)
6869 {
6870 rtx_insn *insn = g->nodes[i].insn;
6871 int p = get_pipe (insn) + 2;
6872
6873 gcc_assert (p >= 0);
6874 gcc_assert (p < 4);
6875
6876 t[p]++;
6877 if (dump_file && INSN_P (insn))
6878 fprintf (dump_file, "i%d %s %d %d\n",
6879 INSN_UID (insn),
6880 insn_data[INSN_CODE(insn)].name,
6881 p, t[p]);
6882 }
6883 if (dump_file)
6884 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6885
6886 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6887 }
6888
6889
6890 void
6891 spu_init_expanders (void)
6892 {
6893 if (cfun)
6894 {
6895 rtx r0, r1;
6896 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6897 frame_pointer_needed is true. We don't know that until we're
6898 expanding the prologue. */
6899 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6900
6901 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6902 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6903 to be treated as aligned, so generate them here. */
6904 r0 = gen_reg_rtx (SImode);
6905 r1 = gen_reg_rtx (SImode);
6906 mark_reg_pointer (r0, 128);
6907 mark_reg_pointer (r1, 128);
6908 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6909 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6910 }
6911 }
6912
6913 static machine_mode
6914 spu_libgcc_cmp_return_mode (void)
6915 {
6916
6917 /* For SPU word mode is TI mode so it is better to use SImode
6918 for compare returns. */
6919 return SImode;
6920 }
6921
6922 static machine_mode
6923 spu_libgcc_shift_count_mode (void)
6924 {
6925 /* For SPU word mode is TI mode so it is better to use SImode
6926 for shift counts. */
6927 return SImode;
6928 }
6929
6930 /* Implement targetm.section_type_flags. */
6931 static unsigned int
6932 spu_section_type_flags (tree decl, const char *name, int reloc)
6933 {
6934 /* .toe needs to have type @nobits. */
6935 if (strcmp (name, ".toe") == 0)
6936 return SECTION_BSS;
6937 /* Don't load _ea into the current address space. */
6938 if (strcmp (name, "._ea") == 0)
6939 return SECTION_WRITE | SECTION_DEBUG;
6940 return default_section_type_flags (decl, name, reloc);
6941 }
6942
6943 /* Implement targetm.select_section. */
6944 static section *
6945 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6946 {
6947 /* Variables and constants defined in the __ea address space
6948 go into a special section named "._ea". */
6949 if (TREE_TYPE (decl) != error_mark_node
6950 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6951 {
6952 /* We might get called with string constants, but get_named_section
6953 doesn't like them as they are not DECLs. Also, we need to set
6954 flags in that case. */
6955 if (!DECL_P (decl))
6956 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6957
6958 return get_named_section (decl, "._ea", reloc);
6959 }
6960
6961 return default_elf_select_section (decl, reloc, align);
6962 }
6963
6964 /* Implement targetm.unique_section. */
6965 static void
6966 spu_unique_section (tree decl, int reloc)
6967 {
6968 /* We don't support unique section names in the __ea address
6969 space for now. */
6970 if (TREE_TYPE (decl) != error_mark_node
6971 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6972 return;
6973
6974 default_unique_section (decl, reloc);
6975 }
6976
6977 /* Generate a constant or register which contains 2^SCALE. We assume
6978 the result is valid for MODE. Currently, MODE must be V4SFmode and
6979 SCALE must be SImode. */
6980 rtx
6981 spu_gen_exp2 (machine_mode mode, rtx scale)
6982 {
6983 gcc_assert (mode == V4SFmode);
6984 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6985 if (GET_CODE (scale) != CONST_INT)
6986 {
6987 /* unsigned int exp = (127 + scale) << 23;
6988 __vector float m = (__vector float) spu_splats (exp); */
6989 rtx reg = force_reg (SImode, scale);
6990 rtx exp = gen_reg_rtx (SImode);
6991 rtx mul = gen_reg_rtx (mode);
6992 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6993 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6994 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6995 return mul;
6996 }
6997 else
6998 {
6999 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7000 unsigned char arr[16];
7001 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7002 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7003 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7004 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7005 return array_to_constant (mode, arr);
7006 }
7007 }
7008
7009 /* After reload, just change the convert into a move instruction
7010 or a dead instruction. */
7011 void
7012 spu_split_convert (rtx ops[])
7013 {
7014 if (REGNO (ops[0]) == REGNO (ops[1]))
7015 emit_note (NOTE_INSN_DELETED);
7016 else
7017 {
7018 /* Use TImode always as this might help hard reg copyprop. */
7019 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7020 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7021 emit_insn (gen_move_insn (op0, op1));
7022 }
7023 }
7024
7025 void
7026 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7027 {
7028 fprintf (file, "# profile\n");
7029 fprintf (file, "brsl $75, _mcount\n");
7030 }
7031
7032 /* Implement targetm.ref_may_alias_errno. */
7033 static bool
7034 spu_ref_may_alias_errno (ao_ref *ref)
7035 {
7036 tree base = ao_ref_base (ref);
7037
7038 /* With SPU newlib, errno is defined as something like
7039 _impure_data._errno
7040 The default implementation of this target macro does not
7041 recognize such expressions, so special-code for it here. */
7042
7043 if (TREE_CODE (base) == VAR_DECL
7044 && !TREE_STATIC (base)
7045 && DECL_EXTERNAL (base)
7046 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7047 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7048 "_impure_data") == 0
7049 /* _errno is the first member of _impure_data. */
7050 && ref->offset == 0)
7051 return true;
7052
7053 return default_ref_may_alias_errno (ref);
7054 }
7055
7056 /* Output thunk to FILE that implements a C++ virtual function call (with
7057 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7058 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7059 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7060 relative to the resulting this pointer. */
7061
7062 static void
7063 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7064 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7065 tree function)
7066 {
7067 rtx op[8];
7068
7069 /* Make sure unwind info is emitted for the thunk if needed. */
7070 final_start_function (emit_barrier (), file, 1);
7071
7072 /* Operand 0 is the target function. */
7073 op[0] = XEXP (DECL_RTL (function), 0);
7074
7075 /* Operand 1 is the 'this' pointer. */
7076 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7077 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7078 else
7079 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7080
7081 /* Operands 2/3 are the low/high halfwords of delta. */
7082 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7083 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7084
7085 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7086 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7087 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7088
7089 /* Operands 6/7 are temporary registers. */
7090 op[6] = gen_rtx_REG (Pmode, 79);
7091 op[7] = gen_rtx_REG (Pmode, 78);
7092
7093 /* Add DELTA to this pointer. */
7094 if (delta)
7095 {
7096 if (delta >= -0x200 && delta < 0x200)
7097 output_asm_insn ("ai\t%1,%1,%2", op);
7098 else if (delta >= -0x8000 && delta < 0x8000)
7099 {
7100 output_asm_insn ("il\t%6,%2", op);
7101 output_asm_insn ("a\t%1,%1,%6", op);
7102 }
7103 else
7104 {
7105 output_asm_insn ("ilhu\t%6,%3", op);
7106 output_asm_insn ("iohl\t%6,%2", op);
7107 output_asm_insn ("a\t%1,%1,%6", op);
7108 }
7109 }
7110
7111 /* Perform vcall adjustment. */
7112 if (vcall_offset)
7113 {
7114 output_asm_insn ("lqd\t%7,0(%1)", op);
7115 output_asm_insn ("rotqby\t%7,%7,%1", op);
7116
7117 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7118 output_asm_insn ("ai\t%7,%7,%4", op);
7119 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7120 {
7121 output_asm_insn ("il\t%6,%4", op);
7122 output_asm_insn ("a\t%7,%7,%6", op);
7123 }
7124 else
7125 {
7126 output_asm_insn ("ilhu\t%6,%5", op);
7127 output_asm_insn ("iohl\t%6,%4", op);
7128 output_asm_insn ("a\t%7,%7,%6", op);
7129 }
7130
7131 output_asm_insn ("lqd\t%6,0(%7)", op);
7132 output_asm_insn ("rotqby\t%6,%6,%7", op);
7133 output_asm_insn ("a\t%1,%1,%6", op);
7134 }
7135
7136 /* Jump to target. */
7137 output_asm_insn ("br\t%0", op);
7138
7139 final_end_function ();
7140 }
7141
7142 /* Canonicalize a comparison from one we don't have to one we do have. */
7143 static void
7144 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7145 bool op0_preserve_value)
7146 {
7147 if (!op0_preserve_value
7148 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7149 {
7150 rtx tem = *op0;
7151 *op0 = *op1;
7152 *op1 = tem;
7153 *code = (int)swap_condition ((enum rtx_code)*code);
7154 }
7155 }
7156 \f
7157 /* Table of machine attributes. */
7158 static const struct attribute_spec spu_attribute_table[] =
7159 {
7160 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7161 affects_type_identity } */
7162 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7163 false },
7164 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7165 false },
7166 { NULL, 0, 0, false, false, false, NULL, false }
7167 };
7168
7169 /* TARGET overrides. */
7170
7171 #undef TARGET_ADDR_SPACE_POINTER_MODE
7172 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7173
7174 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7175 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7176
7177 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7178 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7179 spu_addr_space_legitimate_address_p
7180
7181 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7182 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7183
7184 #undef TARGET_ADDR_SPACE_SUBSET_P
7185 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7186
7187 #undef TARGET_ADDR_SPACE_CONVERT
7188 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7189
7190 #undef TARGET_INIT_BUILTINS
7191 #define TARGET_INIT_BUILTINS spu_init_builtins
7192 #undef TARGET_BUILTIN_DECL
7193 #define TARGET_BUILTIN_DECL spu_builtin_decl
7194
7195 #undef TARGET_EXPAND_BUILTIN
7196 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7197
7198 #undef TARGET_UNWIND_WORD_MODE
7199 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7200
7201 #undef TARGET_LEGITIMIZE_ADDRESS
7202 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7203
7204 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7205 and .quad for the debugger. When it is known that the assembler is fixed,
7206 these can be removed. */
7207 #undef TARGET_ASM_UNALIGNED_SI_OP
7208 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7209
7210 #undef TARGET_ASM_ALIGNED_DI_OP
7211 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7212
7213 /* The .8byte directive doesn't seem to work well for a 32 bit
7214 architecture. */
7215 #undef TARGET_ASM_UNALIGNED_DI_OP
7216 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7217
7218 #undef TARGET_RTX_COSTS
7219 #define TARGET_RTX_COSTS spu_rtx_costs
7220
7221 #undef TARGET_ADDRESS_COST
7222 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7223
7224 #undef TARGET_SCHED_ISSUE_RATE
7225 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7226
7227 #undef TARGET_SCHED_INIT_GLOBAL
7228 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7229
7230 #undef TARGET_SCHED_INIT
7231 #define TARGET_SCHED_INIT spu_sched_init
7232
7233 #undef TARGET_SCHED_VARIABLE_ISSUE
7234 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7235
7236 #undef TARGET_SCHED_REORDER
7237 #define TARGET_SCHED_REORDER spu_sched_reorder
7238
7239 #undef TARGET_SCHED_REORDER2
7240 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7241
7242 #undef TARGET_SCHED_ADJUST_COST
7243 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7244
7245 #undef TARGET_ATTRIBUTE_TABLE
7246 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7247
7248 #undef TARGET_ASM_INTEGER
7249 #define TARGET_ASM_INTEGER spu_assemble_integer
7250
7251 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7252 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7253
7254 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7255 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7256
7257 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7258 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7259
7260 #undef TARGET_ASM_GLOBALIZE_LABEL
7261 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7262
7263 #undef TARGET_PASS_BY_REFERENCE
7264 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7265
7266 #undef TARGET_FUNCTION_ARG
7267 #define TARGET_FUNCTION_ARG spu_function_arg
7268
7269 #undef TARGET_FUNCTION_ARG_ADVANCE
7270 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7271
7272 #undef TARGET_MUST_PASS_IN_STACK
7273 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7274
7275 #undef TARGET_BUILD_BUILTIN_VA_LIST
7276 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7277
7278 #undef TARGET_EXPAND_BUILTIN_VA_START
7279 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7280
7281 #undef TARGET_SETUP_INCOMING_VARARGS
7282 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7283
7284 #undef TARGET_MACHINE_DEPENDENT_REORG
7285 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7286
7287 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7288 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7289
7290 #undef TARGET_INIT_LIBFUNCS
7291 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7292
7293 #undef TARGET_RETURN_IN_MEMORY
7294 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7295
7296 #undef TARGET_ENCODE_SECTION_INFO
7297 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7298
7299 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7300 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7301
7302 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7303 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7304
7305 #undef TARGET_VECTORIZE_INIT_COST
7306 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7307
7308 #undef TARGET_VECTORIZE_ADD_STMT_COST
7309 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7310
7311 #undef TARGET_VECTORIZE_FINISH_COST
7312 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7313
7314 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7315 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7316
7317 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7318 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7319
7320 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7321 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7322
7323 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7324 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7325
7326 #undef TARGET_SCHED_SMS_RES_MII
7327 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7328
7329 #undef TARGET_SECTION_TYPE_FLAGS
7330 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7331
7332 #undef TARGET_ASM_SELECT_SECTION
7333 #define TARGET_ASM_SELECT_SECTION spu_select_section
7334
7335 #undef TARGET_ASM_UNIQUE_SECTION
7336 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7337
7338 #undef TARGET_LEGITIMATE_ADDRESS_P
7339 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7340
7341 #undef TARGET_LEGITIMATE_CONSTANT_P
7342 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7343
7344 #undef TARGET_TRAMPOLINE_INIT
7345 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7346
7347 #undef TARGET_WARN_FUNC_RETURN
7348 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7349
7350 #undef TARGET_OPTION_OVERRIDE
7351 #define TARGET_OPTION_OVERRIDE spu_option_override
7352
7353 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7354 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7355
7356 #undef TARGET_REF_MAY_ALIAS_ERRNO
7357 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7358
7359 #undef TARGET_ASM_OUTPUT_MI_THUNK
7360 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7361 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7362 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7363
7364 /* Variable tracking should be run after all optimizations which
7365 change order of insns. It also needs a valid CFG. */
7366 #undef TARGET_DELAY_VARTRACK
7367 #define TARGET_DELAY_VARTRACK true
7368
7369 #undef TARGET_CANONICALIZE_COMPARISON
7370 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7371
7372 #undef TARGET_CAN_USE_DOLOOP_P
7373 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7374
7375 struct gcc_target targetm = TARGET_INITIALIZER;
7376
7377 #include "gt-spu.h"
This page took 0.404179 seconds and 4 git commands to generate.