]> gcc.gnu.org Git - gcc.git/blob - gcc/config/spu/spu.c
backport: ChangeLog.tuples: ChangeLog from gimple-tuples-branch.
[gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "c-common.h"
52 #include "machmode.h"
53 #include "gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
56 #include "ddg.h"
57
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
60 {
61 int low, high;
62 };
63
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77 };
78
79 \f
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
100 static rtx get_branch_target (rtx branch);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107 static int get_pipe (rtx insn);
108 static int spu_sched_adjust_priority (rtx insn, int pri);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116 static int spu_naked_function_p (tree func);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
119 static tree spu_build_builtin_va_list (void);
120 static void spu_va_start (tree, rtx);
121 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
122 gimple_seq * pre_p, gimple_seq * post_p);
123 static int regno_aligned_for_load (int regno);
124 static int store_with_one_insn_p (rtx mem);
125 static int mem_is_padded_component_ref (rtx x);
126 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
127 static void spu_asm_globalize_label (FILE * file, const char *name);
128 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
129 int *total);
130 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
131 static void spu_init_libfuncs (void);
132 static bool spu_return_in_memory (const_tree type, const_tree fntype);
133 static void fix_range (const char *);
134 static void spu_encode_section_info (tree, rtx, int);
135 static tree spu_builtin_mul_widen_even (tree);
136 static tree spu_builtin_mul_widen_odd (tree);
137 static tree spu_builtin_mask_for_load (void);
138 static int spu_builtin_vectorization_cost (bool);
139 static bool spu_vector_alignment_reachable (const_tree, bool);
140 static int spu_sms_res_mii (struct ddg *g);
141
142 extern const char *reg_names[];
143 rtx spu_compare_op0, spu_compare_op1;
144
145 /* Which instruction set architecture to use. */
146 int spu_arch;
147 /* Which cpu are we tuning for. */
148 int spu_tune;
149
150 enum spu_immediate {
151 SPU_NONE,
152 SPU_IL,
153 SPU_ILA,
154 SPU_ILH,
155 SPU_ILHU,
156 SPU_ORI,
157 SPU_ORHI,
158 SPU_ORBI,
159 SPU_IOHL
160 };
161 enum immediate_class
162 {
163 IC_POOL, /* constant pool */
164 IC_IL1, /* one il* instruction */
165 IC_IL2, /* both ilhu and iohl instructions */
166 IC_IL1s, /* one il* instruction */
167 IC_IL2s, /* both ilhu and iohl instructions */
168 IC_FSMBI, /* the fsmbi instruction */
169 IC_CPAT, /* one of the c*d instructions */
170 IC_FSMBI2 /* fsmbi plus 1 other instruction */
171 };
172
173 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
174 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
175 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
176 static enum immediate_class classify_immediate (rtx op,
177 enum machine_mode mode);
178
179 static enum machine_mode spu_unwind_word_mode (void);
180
181 static enum machine_mode
182 spu_libgcc_cmp_return_mode (void);
183
184 static enum machine_mode
185 spu_libgcc_shift_count_mode (void);
186
187 /* Built in types. */
188 tree spu_builtin_types[SPU_BTI_MAX];
189 \f
190 /* TARGET overrides. */
191
192 #undef TARGET_INIT_BUILTINS
193 #define TARGET_INIT_BUILTINS spu_init_builtins
194
195 #undef TARGET_EXPAND_BUILTIN
196 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
197
198 #undef TARGET_UNWIND_WORD_MODE
199 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
200
201 /* The .8byte directive doesn't seem to work well for a 32 bit
202 architecture. */
203 #undef TARGET_ASM_UNALIGNED_DI_OP
204 #define TARGET_ASM_UNALIGNED_DI_OP NULL
205
206 #undef TARGET_RTX_COSTS
207 #define TARGET_RTX_COSTS spu_rtx_costs
208
209 #undef TARGET_ADDRESS_COST
210 #define TARGET_ADDRESS_COST hook_int_rtx_0
211
212 #undef TARGET_SCHED_ISSUE_RATE
213 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
214
215 #undef TARGET_SCHED_VARIABLE_ISSUE
216 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
217
218 #undef TARGET_SCHED_ADJUST_PRIORITY
219 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
220
221 #undef TARGET_SCHED_ADJUST_COST
222 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
223
224 const struct attribute_spec spu_attribute_table[];
225 #undef TARGET_ATTRIBUTE_TABLE
226 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
227
228 #undef TARGET_ASM_INTEGER
229 #define TARGET_ASM_INTEGER spu_assemble_integer
230
231 #undef TARGET_SCALAR_MODE_SUPPORTED_P
232 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
233
234 #undef TARGET_VECTOR_MODE_SUPPORTED_P
235 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
236
237 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
238 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
239
240 #undef TARGET_ASM_GLOBALIZE_LABEL
241 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
242
243 #undef TARGET_PASS_BY_REFERENCE
244 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
245
246 #undef TARGET_MUST_PASS_IN_STACK
247 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
248
249 #undef TARGET_BUILD_BUILTIN_VA_LIST
250 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
251
252 #undef TARGET_EXPAND_BUILTIN_VA_START
253 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
254
255 #undef TARGET_SETUP_INCOMING_VARARGS
256 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
257
258 #undef TARGET_MACHINE_DEPENDENT_REORG
259 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
260
261 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
262 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
263
264 #undef TARGET_DEFAULT_TARGET_FLAGS
265 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
266
267 #undef TARGET_INIT_LIBFUNCS
268 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
269
270 #undef TARGET_RETURN_IN_MEMORY
271 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
272
273 #undef TARGET_ENCODE_SECTION_INFO
274 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
275
276 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
277 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
278
279 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
280 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
281
282 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
283 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
284
285 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
286 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
287
288 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
289 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
290
291 #undef TARGET_LIBGCC_CMP_RETURN_MODE
292 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
293
294 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
295 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
296
297 #undef TARGET_SCHED_SMS_RES_MII
298 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
299
300 struct gcc_target targetm = TARGET_INITIALIZER;
301
302 void
303 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
304 {
305 /* Override some of the default param values. With so many registers
306 larger values are better for these params. */
307 MAX_PENDING_LIST_LENGTH = 128;
308
309 /* With so many registers this is better on by default. */
310 flag_rename_registers = 1;
311 }
312
313 /* Sometimes certain combinations of command options do not make sense
314 on a particular target machine. You can define a macro
315 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
316 executed once just after all the command options have been parsed. */
317 void
318 spu_override_options (void)
319 {
320 /* Small loops will be unpeeled at -O3. For SPU it is more important
321 to keep code small by default. */
322 if (!flag_unroll_loops && !flag_peel_loops
323 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
324 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
325
326 flag_omit_frame_pointer = 1;
327
328 if (align_functions < 8)
329 align_functions = 8;
330
331 if (spu_fixed_range_string)
332 fix_range (spu_fixed_range_string);
333
334 /* Determine processor architectural level. */
335 if (spu_arch_string)
336 {
337 if (strcmp (&spu_arch_string[0], "cell") == 0)
338 spu_arch = PROCESSOR_CELL;
339 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
340 spu_arch = PROCESSOR_CELLEDP;
341 else
342 error ("Unknown architecture '%s'", &spu_arch_string[0]);
343 }
344
345 /* Determine processor to tune for. */
346 if (spu_tune_string)
347 {
348 if (strcmp (&spu_tune_string[0], "cell") == 0)
349 spu_tune = PROCESSOR_CELL;
350 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
351 spu_tune = PROCESSOR_CELLEDP;
352 else
353 error ("Unknown architecture '%s'", &spu_tune_string[0]);
354 }
355 }
356 \f
357 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
358 struct attribute_spec.handler. */
359
360 /* Table of machine attributes. */
361 const struct attribute_spec spu_attribute_table[] =
362 {
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
364 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
365 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
366 { NULL, 0, 0, false, false, false, NULL }
367 };
368
369 /* True if MODE is valid for the target. By "valid", we mean able to
370 be manipulated in non-trivial ways. In particular, this means all
371 the arithmetic is supported. */
372 static bool
373 spu_scalar_mode_supported_p (enum machine_mode mode)
374 {
375 switch (mode)
376 {
377 case QImode:
378 case HImode:
379 case SImode:
380 case SFmode:
381 case DImode:
382 case TImode:
383 case DFmode:
384 return true;
385
386 default:
387 return false;
388 }
389 }
390
391 /* Similarly for vector modes. "Supported" here is less strict. At
392 least some operations are supported; need to check optabs or builtins
393 for further details. */
394 static bool
395 spu_vector_mode_supported_p (enum machine_mode mode)
396 {
397 switch (mode)
398 {
399 case V16QImode:
400 case V8HImode:
401 case V4SImode:
402 case V2DImode:
403 case V4SFmode:
404 case V2DFmode:
405 return true;
406
407 default:
408 return false;
409 }
410 }
411
412 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
413 least significant bytes of the outer mode. This function returns
414 TRUE for the SUBREG's where this is correct. */
415 int
416 valid_subreg (rtx op)
417 {
418 enum machine_mode om = GET_MODE (op);
419 enum machine_mode im = GET_MODE (SUBREG_REG (op));
420 return om != VOIDmode && im != VOIDmode
421 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
422 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
423 }
424
425 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
426 and adjust the start offset. */
427 static rtx
428 adjust_operand (rtx op, HOST_WIDE_INT * start)
429 {
430 enum machine_mode mode;
431 int op_size;
432 /* Strip any SUBREG */
433 if (GET_CODE (op) == SUBREG)
434 {
435 if (start)
436 *start -=
437 GET_MODE_BITSIZE (GET_MODE (op)) -
438 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
439 op = SUBREG_REG (op);
440 }
441 /* If it is smaller than SI, assure a SUBREG */
442 op_size = GET_MODE_BITSIZE (GET_MODE (op));
443 if (op_size < 32)
444 {
445 if (start)
446 *start += 32 - op_size;
447 op_size = 32;
448 }
449 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
450 mode = mode_for_size (op_size, MODE_INT, 0);
451 if (mode != GET_MODE (op))
452 op = gen_rtx_SUBREG (mode, op, 0);
453 return op;
454 }
455
456 void
457 spu_expand_extv (rtx ops[], int unsignedp)
458 {
459 HOST_WIDE_INT width = INTVAL (ops[2]);
460 HOST_WIDE_INT start = INTVAL (ops[3]);
461 HOST_WIDE_INT src_size, dst_size;
462 enum machine_mode src_mode, dst_mode;
463 rtx dst = ops[0], src = ops[1];
464 rtx s;
465
466 dst = adjust_operand (ops[0], 0);
467 dst_mode = GET_MODE (dst);
468 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
469
470 src = adjust_operand (src, &start);
471 src_mode = GET_MODE (src);
472 src_size = GET_MODE_BITSIZE (GET_MODE (src));
473
474 if (start > 0)
475 {
476 s = gen_reg_rtx (src_mode);
477 switch (src_mode)
478 {
479 case SImode:
480 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
481 break;
482 case DImode:
483 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
484 break;
485 case TImode:
486 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
487 break;
488 default:
489 abort ();
490 }
491 src = s;
492 }
493
494 if (width < src_size)
495 {
496 rtx pat;
497 int icode;
498 switch (src_mode)
499 {
500 case SImode:
501 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
502 break;
503 case DImode:
504 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
505 break;
506 case TImode:
507 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
508 break;
509 default:
510 abort ();
511 }
512 s = gen_reg_rtx (src_mode);
513 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
514 emit_insn (pat);
515 src = s;
516 }
517
518 convert_move (dst, src, unsignedp);
519 }
520
521 void
522 spu_expand_insv (rtx ops[])
523 {
524 HOST_WIDE_INT width = INTVAL (ops[1]);
525 HOST_WIDE_INT start = INTVAL (ops[2]);
526 HOST_WIDE_INT maskbits;
527 enum machine_mode dst_mode, src_mode;
528 rtx dst = ops[0], src = ops[3];
529 int dst_size, src_size;
530 rtx mask;
531 rtx shift_reg;
532 int shift;
533
534
535 if (GET_CODE (ops[0]) == MEM)
536 dst = gen_reg_rtx (TImode);
537 else
538 dst = adjust_operand (dst, &start);
539 dst_mode = GET_MODE (dst);
540 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
541
542 if (CONSTANT_P (src))
543 {
544 enum machine_mode m =
545 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
546 src = force_reg (m, convert_to_mode (m, src, 0));
547 }
548 src = adjust_operand (src, 0);
549 src_mode = GET_MODE (src);
550 src_size = GET_MODE_BITSIZE (GET_MODE (src));
551
552 mask = gen_reg_rtx (dst_mode);
553 shift_reg = gen_reg_rtx (dst_mode);
554 shift = dst_size - start - width;
555
556 /* It's not safe to use subreg here because the compiler assumes
557 that the SUBREG_REG is right justified in the SUBREG. */
558 convert_move (shift_reg, src, 1);
559
560 if (shift > 0)
561 {
562 switch (dst_mode)
563 {
564 case SImode:
565 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
566 break;
567 case DImode:
568 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
569 break;
570 case TImode:
571 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
572 break;
573 default:
574 abort ();
575 }
576 }
577 else if (shift < 0)
578 abort ();
579
580 switch (dst_size)
581 {
582 case 32:
583 maskbits = (-1ll << (32 - width - start));
584 if (start)
585 maskbits += (1ll << (32 - start));
586 emit_move_insn (mask, GEN_INT (maskbits));
587 break;
588 case 64:
589 maskbits = (-1ll << (64 - width - start));
590 if (start)
591 maskbits += (1ll << (64 - start));
592 emit_move_insn (mask, GEN_INT (maskbits));
593 break;
594 case 128:
595 {
596 unsigned char arr[16];
597 int i = start / 8;
598 memset (arr, 0, sizeof (arr));
599 arr[i] = 0xff >> (start & 7);
600 for (i++; i <= (start + width - 1) / 8; i++)
601 arr[i] = 0xff;
602 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
603 emit_move_insn (mask, array_to_constant (TImode, arr));
604 }
605 break;
606 default:
607 abort ();
608 }
609 if (GET_CODE (ops[0]) == MEM)
610 {
611 rtx aligned = gen_reg_rtx (SImode);
612 rtx low = gen_reg_rtx (SImode);
613 rtx addr = gen_reg_rtx (SImode);
614 rtx rotl = gen_reg_rtx (SImode);
615 rtx mask0 = gen_reg_rtx (TImode);
616 rtx mem;
617
618 emit_move_insn (addr, XEXP (ops[0], 0));
619 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
620 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
621 emit_insn (gen_negsi2 (rotl, low));
622 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
623 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
624 mem = change_address (ops[0], TImode, aligned);
625 set_mem_alias_set (mem, 0);
626 emit_move_insn (dst, mem);
627 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
628 emit_move_insn (mem, dst);
629 if (start + width > MEM_ALIGN (ops[0]))
630 {
631 rtx shl = gen_reg_rtx (SImode);
632 rtx mask1 = gen_reg_rtx (TImode);
633 rtx dst1 = gen_reg_rtx (TImode);
634 rtx mem1;
635 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
636 emit_insn (gen_shlqby_ti (mask1, mask, shl));
637 mem1 = adjust_address (mem, TImode, 16);
638 set_mem_alias_set (mem1, 0);
639 emit_move_insn (dst1, mem1);
640 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
641 emit_move_insn (mem1, dst1);
642 }
643 }
644 else
645 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
646 }
647
648
649 int
650 spu_expand_block_move (rtx ops[])
651 {
652 HOST_WIDE_INT bytes, align, offset;
653 rtx src, dst, sreg, dreg, target;
654 int i;
655 if (GET_CODE (ops[2]) != CONST_INT
656 || GET_CODE (ops[3]) != CONST_INT
657 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
658 return 0;
659
660 bytes = INTVAL (ops[2]);
661 align = INTVAL (ops[3]);
662
663 if (bytes <= 0)
664 return 1;
665
666 dst = ops[0];
667 src = ops[1];
668
669 if (align == 16)
670 {
671 for (offset = 0; offset + 16 <= bytes; offset += 16)
672 {
673 dst = adjust_address (ops[0], V16QImode, offset);
674 src = adjust_address (ops[1], V16QImode, offset);
675 emit_move_insn (dst, src);
676 }
677 if (offset < bytes)
678 {
679 rtx mask;
680 unsigned char arr[16] = { 0 };
681 for (i = 0; i < bytes - offset; i++)
682 arr[i] = 0xff;
683 dst = adjust_address (ops[0], V16QImode, offset);
684 src = adjust_address (ops[1], V16QImode, offset);
685 mask = gen_reg_rtx (V16QImode);
686 sreg = gen_reg_rtx (V16QImode);
687 dreg = gen_reg_rtx (V16QImode);
688 target = gen_reg_rtx (V16QImode);
689 emit_move_insn (mask, array_to_constant (V16QImode, arr));
690 emit_move_insn (dreg, dst);
691 emit_move_insn (sreg, src);
692 emit_insn (gen_selb (target, dreg, sreg, mask));
693 emit_move_insn (dst, target);
694 }
695 return 1;
696 }
697 return 0;
698 }
699
700 enum spu_comp_code
701 { SPU_EQ, SPU_GT, SPU_GTU };
702
703 int spu_comp_icode[12][3] = {
704 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
705 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
706 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
707 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
708 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
709 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
710 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
711 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
712 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
713 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
714 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
715 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
716 };
717
718 /* Generate a compare for CODE. Return a brand-new rtx that represents
719 the result of the compare. GCC can figure this out too if we don't
720 provide all variations of compares, but GCC always wants to use
721 WORD_MODE, we can generate better code in most cases if we do it
722 ourselves. */
723 void
724 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
725 {
726 int reverse_compare = 0;
727 int reverse_test = 0;
728 rtx compare_result, eq_result;
729 rtx comp_rtx, eq_rtx;
730 rtx target = operands[0];
731 enum machine_mode comp_mode;
732 enum machine_mode op_mode;
733 enum spu_comp_code scode, eq_code, ior_code;
734 int index;
735 int eq_test = 0;
736
737 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
738 and so on, to keep the constant in operand 1. */
739 if (GET_CODE (spu_compare_op1) == CONST_INT)
740 {
741 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
742 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
743 switch (code)
744 {
745 case GE:
746 spu_compare_op1 = GEN_INT (val);
747 code = GT;
748 break;
749 case LT:
750 spu_compare_op1 = GEN_INT (val);
751 code = LE;
752 break;
753 case GEU:
754 spu_compare_op1 = GEN_INT (val);
755 code = GTU;
756 break;
757 case LTU:
758 spu_compare_op1 = GEN_INT (val);
759 code = LEU;
760 break;
761 default:
762 break;
763 }
764 }
765
766 comp_mode = SImode;
767 op_mode = GET_MODE (spu_compare_op0);
768
769 switch (code)
770 {
771 case GE:
772 scode = SPU_GT;
773 if (HONOR_NANS (op_mode))
774 {
775 reverse_compare = 0;
776 reverse_test = 0;
777 eq_test = 1;
778 eq_code = SPU_EQ;
779 }
780 else
781 {
782 reverse_compare = 1;
783 reverse_test = 1;
784 }
785 break;
786 case LE:
787 scode = SPU_GT;
788 if (HONOR_NANS (op_mode))
789 {
790 reverse_compare = 1;
791 reverse_test = 0;
792 eq_test = 1;
793 eq_code = SPU_EQ;
794 }
795 else
796 {
797 reverse_compare = 0;
798 reverse_test = 1;
799 }
800 break;
801 case LT:
802 reverse_compare = 1;
803 reverse_test = 0;
804 scode = SPU_GT;
805 break;
806 case GEU:
807 reverse_compare = 1;
808 reverse_test = 1;
809 scode = SPU_GTU;
810 break;
811 case LEU:
812 reverse_compare = 0;
813 reverse_test = 1;
814 scode = SPU_GTU;
815 break;
816 case LTU:
817 reverse_compare = 1;
818 reverse_test = 0;
819 scode = SPU_GTU;
820 break;
821 case NE:
822 reverse_compare = 0;
823 reverse_test = 1;
824 scode = SPU_EQ;
825 break;
826
827 case EQ:
828 scode = SPU_EQ;
829 break;
830 case GT:
831 scode = SPU_GT;
832 break;
833 case GTU:
834 scode = SPU_GTU;
835 break;
836 default:
837 scode = SPU_EQ;
838 break;
839 }
840
841 switch (op_mode)
842 {
843 case QImode:
844 index = 0;
845 comp_mode = QImode;
846 break;
847 case HImode:
848 index = 1;
849 comp_mode = HImode;
850 break;
851 case SImode:
852 index = 2;
853 break;
854 case DImode:
855 index = 3;
856 break;
857 case TImode:
858 index = 4;
859 break;
860 case SFmode:
861 index = 5;
862 break;
863 case DFmode:
864 index = 6;
865 break;
866 case V16QImode:
867 index = 7;
868 comp_mode = op_mode;
869 break;
870 case V8HImode:
871 index = 8;
872 comp_mode = op_mode;
873 break;
874 case V4SImode:
875 index = 9;
876 comp_mode = op_mode;
877 break;
878 case V4SFmode:
879 index = 10;
880 comp_mode = V4SImode;
881 break;
882 case V2DFmode:
883 index = 11;
884 comp_mode = V2DImode;
885 break;
886 case V2DImode:
887 default:
888 abort ();
889 }
890
891 if (GET_MODE (spu_compare_op1) == DFmode
892 && (scode != SPU_GT && scode != SPU_EQ))
893 abort ();
894
895 if (is_set == 0 && spu_compare_op1 == const0_rtx
896 && (GET_MODE (spu_compare_op0) == SImode
897 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
898 {
899 /* Don't need to set a register with the result when we are
900 comparing against zero and branching. */
901 reverse_test = !reverse_test;
902 compare_result = spu_compare_op0;
903 }
904 else
905 {
906 compare_result = gen_reg_rtx (comp_mode);
907
908 if (reverse_compare)
909 {
910 rtx t = spu_compare_op1;
911 spu_compare_op1 = spu_compare_op0;
912 spu_compare_op0 = t;
913 }
914
915 if (spu_comp_icode[index][scode] == 0)
916 abort ();
917
918 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
919 (spu_compare_op0, op_mode))
920 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
921 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
922 (spu_compare_op1, op_mode))
923 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
924 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
925 spu_compare_op0,
926 spu_compare_op1);
927 if (comp_rtx == 0)
928 abort ();
929 emit_insn (comp_rtx);
930
931 if (eq_test)
932 {
933 eq_result = gen_reg_rtx (comp_mode);
934 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
935 spu_compare_op0,
936 spu_compare_op1);
937 if (eq_rtx == 0)
938 abort ();
939 emit_insn (eq_rtx);
940 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
941 gcc_assert (ior_code != CODE_FOR_nothing);
942 emit_insn (GEN_FCN (ior_code)
943 (compare_result, compare_result, eq_result));
944 }
945 }
946
947 if (is_set == 0)
948 {
949 rtx bcomp;
950 rtx loc_ref;
951
952 /* We don't have branch on QI compare insns, so we convert the
953 QI compare result to a HI result. */
954 if (comp_mode == QImode)
955 {
956 rtx old_res = compare_result;
957 compare_result = gen_reg_rtx (HImode);
958 comp_mode = HImode;
959 emit_insn (gen_extendqihi2 (compare_result, old_res));
960 }
961
962 if (reverse_test)
963 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
964 else
965 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
966
967 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
968 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
969 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
970 loc_ref, pc_rtx)));
971 }
972 else if (is_set == 2)
973 {
974 int compare_size = GET_MODE_BITSIZE (comp_mode);
975 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
976 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
977 rtx select_mask;
978 rtx op_t = operands[2];
979 rtx op_f = operands[3];
980
981 /* The result of the comparison can be SI, HI or QI mode. Create a
982 mask based on that result. */
983 if (target_size > compare_size)
984 {
985 select_mask = gen_reg_rtx (mode);
986 emit_insn (gen_extend_compare (select_mask, compare_result));
987 }
988 else if (target_size < compare_size)
989 select_mask =
990 gen_rtx_SUBREG (mode, compare_result,
991 (compare_size - target_size) / BITS_PER_UNIT);
992 else if (comp_mode != mode)
993 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
994 else
995 select_mask = compare_result;
996
997 if (GET_MODE (target) != GET_MODE (op_t)
998 || GET_MODE (target) != GET_MODE (op_f))
999 abort ();
1000
1001 if (reverse_test)
1002 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1003 else
1004 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1005 }
1006 else
1007 {
1008 if (reverse_test)
1009 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1010 gen_rtx_NOT (comp_mode, compare_result)));
1011 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1012 emit_insn (gen_extendhisi2 (target, compare_result));
1013 else if (GET_MODE (target) == SImode
1014 && GET_MODE (compare_result) == QImode)
1015 emit_insn (gen_extend_compare (target, compare_result));
1016 else
1017 emit_move_insn (target, compare_result);
1018 }
1019 }
1020
1021 HOST_WIDE_INT
1022 const_double_to_hwint (rtx x)
1023 {
1024 HOST_WIDE_INT val;
1025 REAL_VALUE_TYPE rv;
1026 if (GET_MODE (x) == SFmode)
1027 {
1028 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1029 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1030 }
1031 else if (GET_MODE (x) == DFmode)
1032 {
1033 long l[2];
1034 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1035 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1036 val = l[0];
1037 val = (val << 32) | (l[1] & 0xffffffff);
1038 }
1039 else
1040 abort ();
1041 return val;
1042 }
1043
1044 rtx
1045 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1046 {
1047 long tv[2];
1048 REAL_VALUE_TYPE rv;
1049 gcc_assert (mode == SFmode || mode == DFmode);
1050
1051 if (mode == SFmode)
1052 tv[0] = (v << 32) >> 32;
1053 else if (mode == DFmode)
1054 {
1055 tv[1] = (v << 32) >> 32;
1056 tv[0] = v >> 32;
1057 }
1058 real_from_target (&rv, tv, mode);
1059 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1060 }
1061
1062 void
1063 print_operand_address (FILE * file, register rtx addr)
1064 {
1065 rtx reg;
1066 rtx offset;
1067
1068 if (GET_CODE (addr) == AND
1069 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1070 && INTVAL (XEXP (addr, 1)) == -16)
1071 addr = XEXP (addr, 0);
1072
1073 switch (GET_CODE (addr))
1074 {
1075 case REG:
1076 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1077 break;
1078
1079 case PLUS:
1080 reg = XEXP (addr, 0);
1081 offset = XEXP (addr, 1);
1082 if (GET_CODE (offset) == REG)
1083 {
1084 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1085 reg_names[REGNO (offset)]);
1086 }
1087 else if (GET_CODE (offset) == CONST_INT)
1088 {
1089 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1090 INTVAL (offset), reg_names[REGNO (reg)]);
1091 }
1092 else
1093 abort ();
1094 break;
1095
1096 case CONST:
1097 case LABEL_REF:
1098 case SYMBOL_REF:
1099 case CONST_INT:
1100 output_addr_const (file, addr);
1101 break;
1102
1103 default:
1104 debug_rtx (addr);
1105 abort ();
1106 }
1107 }
1108
1109 void
1110 print_operand (FILE * file, rtx x, int code)
1111 {
1112 enum machine_mode mode = GET_MODE (x);
1113 HOST_WIDE_INT val;
1114 unsigned char arr[16];
1115 int xcode = GET_CODE (x);
1116 int i, info;
1117 if (GET_MODE (x) == VOIDmode)
1118 switch (code)
1119 {
1120 case 'L': /* 128 bits, signed */
1121 case 'm': /* 128 bits, signed */
1122 case 'T': /* 128 bits, signed */
1123 case 't': /* 128 bits, signed */
1124 mode = TImode;
1125 break;
1126 case 'K': /* 64 bits, signed */
1127 case 'k': /* 64 bits, signed */
1128 case 'D': /* 64 bits, signed */
1129 case 'd': /* 64 bits, signed */
1130 mode = DImode;
1131 break;
1132 case 'J': /* 32 bits, signed */
1133 case 'j': /* 32 bits, signed */
1134 case 's': /* 32 bits, signed */
1135 case 'S': /* 32 bits, signed */
1136 mode = SImode;
1137 break;
1138 }
1139 switch (code)
1140 {
1141
1142 case 'j': /* 32 bits, signed */
1143 case 'k': /* 64 bits, signed */
1144 case 'm': /* 128 bits, signed */
1145 if (xcode == CONST_INT
1146 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1147 {
1148 gcc_assert (logical_immediate_p (x, mode));
1149 constant_to_array (mode, x, arr);
1150 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1151 val = trunc_int_for_mode (val, SImode);
1152 switch (which_logical_immediate (val))
1153 {
1154 case SPU_ORI:
1155 break;
1156 case SPU_ORHI:
1157 fprintf (file, "h");
1158 break;
1159 case SPU_ORBI:
1160 fprintf (file, "b");
1161 break;
1162 default:
1163 gcc_unreachable();
1164 }
1165 }
1166 else
1167 gcc_unreachable();
1168 return;
1169
1170 case 'J': /* 32 bits, signed */
1171 case 'K': /* 64 bits, signed */
1172 case 'L': /* 128 bits, signed */
1173 if (xcode == CONST_INT
1174 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1175 {
1176 gcc_assert (logical_immediate_p (x, mode)
1177 || iohl_immediate_p (x, mode));
1178 constant_to_array (mode, x, arr);
1179 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1180 val = trunc_int_for_mode (val, SImode);
1181 switch (which_logical_immediate (val))
1182 {
1183 case SPU_ORI:
1184 case SPU_IOHL:
1185 break;
1186 case SPU_ORHI:
1187 val = trunc_int_for_mode (val, HImode);
1188 break;
1189 case SPU_ORBI:
1190 val = trunc_int_for_mode (val, QImode);
1191 break;
1192 default:
1193 gcc_unreachable();
1194 }
1195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1196 }
1197 else
1198 gcc_unreachable();
1199 return;
1200
1201 case 't': /* 128 bits, signed */
1202 case 'd': /* 64 bits, signed */
1203 case 's': /* 32 bits, signed */
1204 if (CONSTANT_P (x))
1205 {
1206 enum immediate_class c = classify_immediate (x, mode);
1207 switch (c)
1208 {
1209 case IC_IL1:
1210 constant_to_array (mode, x, arr);
1211 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1212 val = trunc_int_for_mode (val, SImode);
1213 switch (which_immediate_load (val))
1214 {
1215 case SPU_IL:
1216 break;
1217 case SPU_ILA:
1218 fprintf (file, "a");
1219 break;
1220 case SPU_ILH:
1221 fprintf (file, "h");
1222 break;
1223 case SPU_ILHU:
1224 fprintf (file, "hu");
1225 break;
1226 default:
1227 gcc_unreachable ();
1228 }
1229 break;
1230 case IC_CPAT:
1231 constant_to_array (mode, x, arr);
1232 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1233 if (info == 1)
1234 fprintf (file, "b");
1235 else if (info == 2)
1236 fprintf (file, "h");
1237 else if (info == 4)
1238 fprintf (file, "w");
1239 else if (info == 8)
1240 fprintf (file, "d");
1241 break;
1242 case IC_IL1s:
1243 if (xcode == CONST_VECTOR)
1244 {
1245 x = CONST_VECTOR_ELT (x, 0);
1246 xcode = GET_CODE (x);
1247 }
1248 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1249 fprintf (file, "a");
1250 else if (xcode == HIGH)
1251 fprintf (file, "hu");
1252 break;
1253 case IC_FSMBI:
1254 case IC_FSMBI2:
1255 case IC_IL2:
1256 case IC_IL2s:
1257 case IC_POOL:
1258 abort ();
1259 }
1260 }
1261 else
1262 gcc_unreachable ();
1263 return;
1264
1265 case 'T': /* 128 bits, signed */
1266 case 'D': /* 64 bits, signed */
1267 case 'S': /* 32 bits, signed */
1268 if (CONSTANT_P (x))
1269 {
1270 enum immediate_class c = classify_immediate (x, mode);
1271 switch (c)
1272 {
1273 case IC_IL1:
1274 constant_to_array (mode, x, arr);
1275 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1276 val = trunc_int_for_mode (val, SImode);
1277 switch (which_immediate_load (val))
1278 {
1279 case SPU_IL:
1280 case SPU_ILA:
1281 break;
1282 case SPU_ILH:
1283 case SPU_ILHU:
1284 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1285 break;
1286 default:
1287 gcc_unreachable ();
1288 }
1289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1290 break;
1291 case IC_FSMBI:
1292 constant_to_array (mode, x, arr);
1293 val = 0;
1294 for (i = 0; i < 16; i++)
1295 {
1296 val <<= 1;
1297 val |= arr[i] & 1;
1298 }
1299 print_operand (file, GEN_INT (val), 0);
1300 break;
1301 case IC_CPAT:
1302 constant_to_array (mode, x, arr);
1303 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1305 break;
1306 case IC_IL1s:
1307 if (xcode == HIGH)
1308 x = XEXP (x, 0);
1309 if (GET_CODE (x) == CONST_VECTOR)
1310 x = CONST_VECTOR_ELT (x, 0);
1311 output_addr_const (file, x);
1312 if (xcode == HIGH)
1313 fprintf (file, "@h");
1314 break;
1315 case IC_IL2:
1316 case IC_IL2s:
1317 case IC_FSMBI2:
1318 case IC_POOL:
1319 abort ();
1320 }
1321 }
1322 else
1323 gcc_unreachable ();
1324 return;
1325
1326 case 'C':
1327 if (xcode == CONST_INT)
1328 {
1329 /* Only 4 least significant bits are relevant for generate
1330 control word instructions. */
1331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1332 return;
1333 }
1334 break;
1335
1336 case 'M': /* print code for c*d */
1337 if (GET_CODE (x) == CONST_INT)
1338 switch (INTVAL (x))
1339 {
1340 case 1:
1341 fprintf (file, "b");
1342 break;
1343 case 2:
1344 fprintf (file, "h");
1345 break;
1346 case 4:
1347 fprintf (file, "w");
1348 break;
1349 case 8:
1350 fprintf (file, "d");
1351 break;
1352 default:
1353 gcc_unreachable();
1354 }
1355 else
1356 gcc_unreachable();
1357 return;
1358
1359 case 'N': /* Negate the operand */
1360 if (xcode == CONST_INT)
1361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1362 else if (xcode == CONST_VECTOR)
1363 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1364 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1365 return;
1366
1367 case 'I': /* enable/disable interrupts */
1368 if (xcode == CONST_INT)
1369 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1370 return;
1371
1372 case 'b': /* branch modifiers */
1373 if (xcode == REG)
1374 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1375 else if (COMPARISON_P (x))
1376 fprintf (file, "%s", xcode == NE ? "n" : "");
1377 return;
1378
1379 case 'i': /* indirect call */
1380 if (xcode == MEM)
1381 {
1382 if (GET_CODE (XEXP (x, 0)) == REG)
1383 /* Used in indirect function calls. */
1384 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1385 else
1386 output_address (XEXP (x, 0));
1387 }
1388 return;
1389
1390 case 'p': /* load/store */
1391 if (xcode == MEM)
1392 {
1393 x = XEXP (x, 0);
1394 xcode = GET_CODE (x);
1395 }
1396 if (xcode == AND)
1397 {
1398 x = XEXP (x, 0);
1399 xcode = GET_CODE (x);
1400 }
1401 if (xcode == REG)
1402 fprintf (file, "d");
1403 else if (xcode == CONST_INT)
1404 fprintf (file, "a");
1405 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1406 fprintf (file, "r");
1407 else if (xcode == PLUS || xcode == LO_SUM)
1408 {
1409 if (GET_CODE (XEXP (x, 1)) == REG)
1410 fprintf (file, "x");
1411 else
1412 fprintf (file, "d");
1413 }
1414 return;
1415
1416 case 'e':
1417 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1418 val &= 0x7;
1419 output_addr_const (file, GEN_INT (val));
1420 return;
1421
1422 case 'f':
1423 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 val &= 0x1f;
1425 output_addr_const (file, GEN_INT (val));
1426 return;
1427
1428 case 'g':
1429 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1430 val &= 0x3f;
1431 output_addr_const (file, GEN_INT (val));
1432 return;
1433
1434 case 'h':
1435 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1436 val = (val >> 3) & 0x1f;
1437 output_addr_const (file, GEN_INT (val));
1438 return;
1439
1440 case 'E':
1441 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1442 val = -val;
1443 val &= 0x7;
1444 output_addr_const (file, GEN_INT (val));
1445 return;
1446
1447 case 'F':
1448 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1449 val = -val;
1450 val &= 0x1f;
1451 output_addr_const (file, GEN_INT (val));
1452 return;
1453
1454 case 'G':
1455 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1456 val = -val;
1457 val &= 0x3f;
1458 output_addr_const (file, GEN_INT (val));
1459 return;
1460
1461 case 'H':
1462 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1463 val = -(val & -8ll);
1464 val = (val >> 3) & 0x1f;
1465 output_addr_const (file, GEN_INT (val));
1466 return;
1467
1468 case 0:
1469 if (xcode == REG)
1470 fprintf (file, "%s", reg_names[REGNO (x)]);
1471 else if (xcode == MEM)
1472 output_address (XEXP (x, 0));
1473 else if (xcode == CONST_VECTOR)
1474 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1475 else
1476 output_addr_const (file, x);
1477 return;
1478
1479 /* unused letters
1480 o qr uvw yz
1481 AB OPQR UVWXYZ */
1482 default:
1483 output_operand_lossage ("invalid %%xn code");
1484 }
1485 gcc_unreachable ();
1486 }
1487
1488 extern char call_used_regs[];
1489
1490 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1491 caller saved register. For leaf functions it is more efficient to
1492 use a volatile register because we won't need to save and restore the
1493 pic register. This routine is only valid after register allocation
1494 is completed, so we can pick an unused register. */
1495 static rtx
1496 get_pic_reg (void)
1497 {
1498 rtx pic_reg = pic_offset_table_rtx;
1499 if (!reload_completed && !reload_in_progress)
1500 abort ();
1501 return pic_reg;
1502 }
1503
1504 /* Split constant addresses to handle cases that are too large.
1505 Add in the pic register when in PIC mode.
1506 Split immediates that require more than 1 instruction. */
1507 int
1508 spu_split_immediate (rtx * ops)
1509 {
1510 enum machine_mode mode = GET_MODE (ops[0]);
1511 enum immediate_class c = classify_immediate (ops[1], mode);
1512
1513 switch (c)
1514 {
1515 case IC_IL2:
1516 {
1517 unsigned char arrhi[16];
1518 unsigned char arrlo[16];
1519 rtx to, hi, lo;
1520 int i;
1521 constant_to_array (mode, ops[1], arrhi);
1522 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
1523 for (i = 0; i < 16; i += 4)
1524 {
1525 arrlo[i + 2] = arrhi[i + 2];
1526 arrlo[i + 3] = arrhi[i + 3];
1527 arrlo[i + 0] = arrlo[i + 1] = 0;
1528 arrhi[i + 2] = arrhi[i + 3] = 0;
1529 }
1530 hi = array_to_constant (mode, arrhi);
1531 lo = array_to_constant (mode, arrlo);
1532 emit_move_insn (to, hi);
1533 emit_insn (gen_rtx_SET
1534 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1535 return 1;
1536 }
1537 case IC_FSMBI2:
1538 {
1539 unsigned char arr_fsmbi[16];
1540 unsigned char arr_andbi[16];
1541 rtx to, reg_fsmbi, reg_and;
1542 int i;
1543 enum machine_mode imode = mode;
1544 /* We need to do reals as ints because the constant used in the
1545 * AND might not be a legitimate real constant. */
1546 imode = int_mode_for_mode (mode);
1547 constant_to_array (mode, ops[1], arr_fsmbi);
1548 if (imode != mode)
1549 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1550 else
1551 to = ops[0];
1552 for (i = 0; i < 16; i++)
1553 if (arr_fsmbi[i] != 0)
1554 {
1555 arr_andbi[0] = arr_fsmbi[i];
1556 arr_fsmbi[i] = 0xff;
1557 }
1558 for (i = 1; i < 16; i++)
1559 arr_andbi[i] = arr_andbi[0];
1560 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1561 reg_and = array_to_constant (imode, arr_andbi);
1562 emit_move_insn (to, reg_fsmbi);
1563 emit_insn (gen_rtx_SET
1564 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1565 return 1;
1566 }
1567 case IC_POOL:
1568 if (reload_in_progress || reload_completed)
1569 {
1570 rtx mem = force_const_mem (mode, ops[1]);
1571 if (TARGET_LARGE_MEM)
1572 {
1573 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1574 emit_move_insn (addr, XEXP (mem, 0));
1575 mem = replace_equiv_address (mem, addr);
1576 }
1577 emit_move_insn (ops[0], mem);
1578 return 1;
1579 }
1580 break;
1581 case IC_IL1s:
1582 case IC_IL2s:
1583 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1584 {
1585 if (c == IC_IL2s)
1586 {
1587 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1588 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1589 }
1590 else if (flag_pic)
1591 emit_insn (gen_pic (ops[0], ops[1]));
1592 if (flag_pic)
1593 {
1594 rtx pic_reg = get_pic_reg ();
1595 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1596 crtl->uses_pic_offset_table = 1;
1597 }
1598 return flag_pic || c == IC_IL2s;
1599 }
1600 break;
1601 case IC_IL1:
1602 case IC_FSMBI:
1603 case IC_CPAT:
1604 break;
1605 }
1606 return 0;
1607 }
1608
1609 /* SAVING is TRUE when we are generating the actual load and store
1610 instructions for REGNO. When determining the size of the stack
1611 needed for saving register we must allocate enough space for the
1612 worst case, because we don't always have the information early enough
1613 to not allocate it. But we can at least eliminate the actual loads
1614 and stores during the prologue/epilogue. */
1615 static int
1616 need_to_save_reg (int regno, int saving)
1617 {
1618 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1619 return 1;
1620 if (flag_pic
1621 && regno == PIC_OFFSET_TABLE_REGNUM
1622 && (!saving || crtl->uses_pic_offset_table)
1623 && (!saving
1624 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1625 return 1;
1626 return 0;
1627 }
1628
1629 /* This function is only correct starting with local register
1630 allocation */
1631 int
1632 spu_saved_regs_size (void)
1633 {
1634 int reg_save_size = 0;
1635 int regno;
1636
1637 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1638 if (need_to_save_reg (regno, 0))
1639 reg_save_size += 0x10;
1640 return reg_save_size;
1641 }
1642
1643 static rtx
1644 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1645 {
1646 rtx reg = gen_rtx_REG (V4SImode, regno);
1647 rtx mem =
1648 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1649 return emit_insn (gen_movv4si (mem, reg));
1650 }
1651
1652 static rtx
1653 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1654 {
1655 rtx reg = gen_rtx_REG (V4SImode, regno);
1656 rtx mem =
1657 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1658 return emit_insn (gen_movv4si (reg, mem));
1659 }
1660
1661 /* This happens after reload, so we need to expand it. */
1662 static rtx
1663 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1664 {
1665 rtx insn;
1666 if (satisfies_constraint_K (GEN_INT (imm)))
1667 {
1668 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1669 }
1670 else
1671 {
1672 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1673 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1674 if (REGNO (src) == REGNO (scratch))
1675 abort ();
1676 }
1677 return insn;
1678 }
1679
1680 /* Return nonzero if this function is known to have a null epilogue. */
1681
1682 int
1683 direct_return (void)
1684 {
1685 if (reload_completed)
1686 {
1687 if (cfun->static_chain_decl == 0
1688 && (spu_saved_regs_size ()
1689 + get_frame_size ()
1690 + crtl->outgoing_args_size
1691 + crtl->args.pretend_args_size == 0)
1692 && current_function_is_leaf)
1693 return 1;
1694 }
1695 return 0;
1696 }
1697
1698 /*
1699 The stack frame looks like this:
1700 +-------------+
1701 | incoming |
1702 AP | args |
1703 +-------------+
1704 | $lr save |
1705 +-------------+
1706 prev SP | back chain |
1707 +-------------+
1708 | var args |
1709 | reg save | crtl->args.pretend_args_size bytes
1710 +-------------+
1711 | ... |
1712 | saved regs | spu_saved_regs_size() bytes
1713 +-------------+
1714 | ... |
1715 FP | vars | get_frame_size() bytes
1716 +-------------+
1717 | ... |
1718 | outgoing |
1719 | args | crtl->outgoing_args_size bytes
1720 +-------------+
1721 | $lr of next |
1722 | frame |
1723 +-------------+
1724 SP | back chain |
1725 +-------------+
1726
1727 */
1728 void
1729 spu_expand_prologue (void)
1730 {
1731 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1732 HOST_WIDE_INT total_size;
1733 HOST_WIDE_INT saved_regs_size;
1734 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1735 rtx scratch_reg_0, scratch_reg_1;
1736 rtx insn, real;
1737
1738 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1739 the "toplevel" insn chain. */
1740 emit_note (NOTE_INSN_DELETED);
1741
1742 if (flag_pic && optimize == 0)
1743 crtl->uses_pic_offset_table = 1;
1744
1745 if (spu_naked_function_p (current_function_decl))
1746 return;
1747
1748 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1749 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1750
1751 saved_regs_size = spu_saved_regs_size ();
1752 total_size = size + saved_regs_size
1753 + crtl->outgoing_args_size
1754 + crtl->args.pretend_args_size;
1755
1756 if (!current_function_is_leaf
1757 || cfun->calls_alloca || total_size > 0)
1758 total_size += STACK_POINTER_OFFSET;
1759
1760 /* Save this first because code after this might use the link
1761 register as a scratch register. */
1762 if (!current_function_is_leaf)
1763 {
1764 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1765 RTX_FRAME_RELATED_P (insn) = 1;
1766 }
1767
1768 if (total_size > 0)
1769 {
1770 offset = -crtl->args.pretend_args_size;
1771 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1772 if (need_to_save_reg (regno, 1))
1773 {
1774 offset -= 16;
1775 insn = frame_emit_store (regno, sp_reg, offset);
1776 RTX_FRAME_RELATED_P (insn) = 1;
1777 }
1778 }
1779
1780 if (flag_pic && crtl->uses_pic_offset_table)
1781 {
1782 rtx pic_reg = get_pic_reg ();
1783 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1784 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1785 }
1786
1787 if (total_size > 0)
1788 {
1789 if (flag_stack_check)
1790 {
1791 /* We compare against total_size-1 because
1792 ($sp >= total_size) <=> ($sp > total_size-1) */
1793 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1794 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1795 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1796 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1797 {
1798 emit_move_insn (scratch_v4si, size_v4si);
1799 size_v4si = scratch_v4si;
1800 }
1801 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1802 emit_insn (gen_vec_extractv4si
1803 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1804 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1805 }
1806
1807 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1808 the value of the previous $sp because we save it as the back
1809 chain. */
1810 if (total_size <= 2000)
1811 {
1812 /* In this case we save the back chain first. */
1813 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1814 insn =
1815 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1816 }
1817 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1818 {
1819 insn = emit_move_insn (scratch_reg_0, sp_reg);
1820 insn =
1821 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1822 }
1823 else
1824 {
1825 insn = emit_move_insn (scratch_reg_0, sp_reg);
1826 insn =
1827 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1828 }
1829 RTX_FRAME_RELATED_P (insn) = 1;
1830 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1831 REG_NOTES (insn) =
1832 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1833
1834 if (total_size > 2000)
1835 {
1836 /* Save the back chain ptr */
1837 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1838 }
1839
1840 if (frame_pointer_needed)
1841 {
1842 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1843 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1844 + crtl->outgoing_args_size;
1845 /* Set the new frame_pointer */
1846 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1847 RTX_FRAME_RELATED_P (insn) = 1;
1848 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1849 REG_NOTES (insn) =
1850 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1851 real, REG_NOTES (insn));
1852 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1853 }
1854 }
1855
1856 emit_note (NOTE_INSN_DELETED);
1857 }
1858
1859 void
1860 spu_expand_epilogue (bool sibcall_p)
1861 {
1862 int size = get_frame_size (), offset, regno;
1863 HOST_WIDE_INT saved_regs_size, total_size;
1864 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1865 rtx jump, scratch_reg_0;
1866
1867 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1868 the "toplevel" insn chain. */
1869 emit_note (NOTE_INSN_DELETED);
1870
1871 if (spu_naked_function_p (current_function_decl))
1872 return;
1873
1874 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1875
1876 saved_regs_size = spu_saved_regs_size ();
1877 total_size = size + saved_regs_size
1878 + crtl->outgoing_args_size
1879 + crtl->args.pretend_args_size;
1880
1881 if (!current_function_is_leaf
1882 || cfun->calls_alloca || total_size > 0)
1883 total_size += STACK_POINTER_OFFSET;
1884
1885 if (total_size > 0)
1886 {
1887 if (cfun->calls_alloca)
1888 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1889 else
1890 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1891
1892
1893 if (saved_regs_size > 0)
1894 {
1895 offset = -crtl->args.pretend_args_size;
1896 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1897 if (need_to_save_reg (regno, 1))
1898 {
1899 offset -= 0x10;
1900 frame_emit_load (regno, sp_reg, offset);
1901 }
1902 }
1903 }
1904
1905 if (!current_function_is_leaf)
1906 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1907
1908 if (!sibcall_p)
1909 {
1910 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1911 jump = emit_jump_insn (gen__return ());
1912 emit_barrier_after (jump);
1913 }
1914
1915 emit_note (NOTE_INSN_DELETED);
1916 }
1917
1918 rtx
1919 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1920 {
1921 if (count != 0)
1922 return 0;
1923 /* This is inefficient because it ends up copying to a save-register
1924 which then gets saved even though $lr has already been saved. But
1925 it does generate better code for leaf functions and we don't need
1926 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1927 used for __builtin_return_address anyway, so maybe we don't care if
1928 it's inefficient. */
1929 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1930 }
1931 \f
1932
1933 /* Given VAL, generate a constant appropriate for MODE.
1934 If MODE is a vector mode, every element will be VAL.
1935 For TImode, VAL will be zero extended to 128 bits. */
1936 rtx
1937 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1938 {
1939 rtx inner;
1940 rtvec v;
1941 int units, i;
1942
1943 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1944 || GET_MODE_CLASS (mode) == MODE_FLOAT
1945 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1946 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1947
1948 if (GET_MODE_CLASS (mode) == MODE_INT)
1949 return immed_double_const (val, 0, mode);
1950
1951 /* val is the bit representation of the float */
1952 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1953 return hwint_to_const_double (mode, val);
1954
1955 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1956 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1957 else
1958 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1959
1960 units = GET_MODE_NUNITS (mode);
1961
1962 v = rtvec_alloc (units);
1963
1964 for (i = 0; i < units; ++i)
1965 RTVEC_ELT (v, i) = inner;
1966
1967 return gen_rtx_CONST_VECTOR (mode, v);
1968 }
1969 \f
1970 /* branch hint stuff */
1971
1972 /* The hardware requires 8 insns between a hint and the branch it
1973 effects. This variable describes how many rtl instructions the
1974 compiler needs to see before inserting a hint. (FIXME: We should
1975 accept less and insert nops to enforce it because hinting is always
1976 profitable for performance, but we do need to be careful of code
1977 size.) */
1978 int spu_hint_dist = (8 * 4);
1979
1980 /* Create a MODE vector constant from 4 ints. */
1981 rtx
1982 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1983 {
1984 unsigned char arr[16];
1985 arr[0] = (a >> 24) & 0xff;
1986 arr[1] = (a >> 16) & 0xff;
1987 arr[2] = (a >> 8) & 0xff;
1988 arr[3] = (a >> 0) & 0xff;
1989 arr[4] = (b >> 24) & 0xff;
1990 arr[5] = (b >> 16) & 0xff;
1991 arr[6] = (b >> 8) & 0xff;
1992 arr[7] = (b >> 0) & 0xff;
1993 arr[8] = (c >> 24) & 0xff;
1994 arr[9] = (c >> 16) & 0xff;
1995 arr[10] = (c >> 8) & 0xff;
1996 arr[11] = (c >> 0) & 0xff;
1997 arr[12] = (d >> 24) & 0xff;
1998 arr[13] = (d >> 16) & 0xff;
1999 arr[14] = (d >> 8) & 0xff;
2000 arr[15] = (d >> 0) & 0xff;
2001 return array_to_constant(mode, arr);
2002 }
2003
2004 /* An array of these is used to propagate hints to predecessor blocks. */
2005 struct spu_bb_info
2006 {
2007 rtx prop_jump; /* propagated from another block */
2008 basic_block bb; /* the original block. */
2009 };
2010
2011 /* The special $hbr register is used to prevent the insn scheduler from
2012 moving hbr insns across instructions which invalidate them. It
2013 should only be used in a clobber, and this function searches for
2014 insns which clobber it. */
2015 static bool
2016 insn_clobbers_hbr (rtx insn)
2017 {
2018 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2019 {
2020 rtx parallel = PATTERN (insn);
2021 rtx clobber;
2022 int j;
2023 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2024 {
2025 clobber = XVECEXP (parallel, 0, j);
2026 if (GET_CODE (clobber) == CLOBBER
2027 && GET_CODE (XEXP (clobber, 0)) == REG
2028 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2029 return 1;
2030 }
2031 }
2032 return 0;
2033 }
2034
2035 static void
2036 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2037 {
2038 rtx branch_label;
2039 rtx hint, insn, prev, next;
2040
2041 if (before == 0 || branch == 0 || target == 0)
2042 return;
2043
2044 if (distance > 600)
2045 return;
2046
2047
2048 branch_label = gen_label_rtx ();
2049 LABEL_NUSES (branch_label)++;
2050 LABEL_PRESERVE_P (branch_label) = 1;
2051 insn = emit_label_before (branch_label, branch);
2052 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2053
2054 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2055 the current insn is pipe0, dual issue with it. */
2056 prev = prev_active_insn (before);
2057 if (prev && get_pipe (prev) == 0)
2058 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2059 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2060 {
2061 next = next_active_insn (before);
2062 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2063 if (next)
2064 PUT_MODE (next, TImode);
2065 }
2066 else
2067 {
2068 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2069 PUT_MODE (hint, TImode);
2070 }
2071 recog_memoized (hint);
2072 }
2073
2074 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2075 the rtx for the branch target. */
2076 static rtx
2077 get_branch_target (rtx branch)
2078 {
2079 if (GET_CODE (branch) == JUMP_INSN)
2080 {
2081 rtx set, src;
2082
2083 /* Return statements */
2084 if (GET_CODE (PATTERN (branch)) == RETURN)
2085 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2086
2087 /* jump table */
2088 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2089 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2090 return 0;
2091
2092 set = single_set (branch);
2093 src = SET_SRC (set);
2094 if (GET_CODE (SET_DEST (set)) != PC)
2095 abort ();
2096
2097 if (GET_CODE (src) == IF_THEN_ELSE)
2098 {
2099 rtx lab = 0;
2100 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2101 if (note)
2102 {
2103 /* If the more probable case is not a fall through, then
2104 try a branch hint. */
2105 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2106 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2107 && GET_CODE (XEXP (src, 1)) != PC)
2108 lab = XEXP (src, 1);
2109 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2110 && GET_CODE (XEXP (src, 2)) != PC)
2111 lab = XEXP (src, 2);
2112 }
2113 if (lab)
2114 {
2115 if (GET_CODE (lab) == RETURN)
2116 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2117 return lab;
2118 }
2119 return 0;
2120 }
2121
2122 return src;
2123 }
2124 else if (GET_CODE (branch) == CALL_INSN)
2125 {
2126 rtx call;
2127 /* All of our call patterns are in a PARALLEL and the CALL is
2128 the first pattern in the PARALLEL. */
2129 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2130 abort ();
2131 call = XVECEXP (PATTERN (branch), 0, 0);
2132 if (GET_CODE (call) == SET)
2133 call = SET_SRC (call);
2134 if (GET_CODE (call) != CALL)
2135 abort ();
2136 return XEXP (XEXP (call, 0), 0);
2137 }
2138 return 0;
2139 }
2140
2141 static void
2142 insert_branch_hints (void)
2143 {
2144 struct spu_bb_info *spu_bb_info;
2145 rtx branch, insn, next;
2146 rtx branch_target = 0;
2147 int branch_addr = 0, insn_addr, head_addr;
2148 basic_block bb;
2149 unsigned int j;
2150
2151 spu_bb_info =
2152 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2153 sizeof (struct spu_bb_info));
2154
2155 /* We need exact insn addresses and lengths. */
2156 shorten_branches (get_insns ());
2157
2158 FOR_EACH_BB_REVERSE (bb)
2159 {
2160 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2161 branch = 0;
2162 if (spu_bb_info[bb->index].prop_jump)
2163 {
2164 branch = spu_bb_info[bb->index].prop_jump;
2165 branch_target = get_branch_target (branch);
2166 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2167 }
2168 /* Search from end of a block to beginning. In this loop, find
2169 jumps which need a branch and emit them only when:
2170 - it's an indirect branch and we're at the insn which sets
2171 the register
2172 - we're at an insn that will invalidate the hint. e.g., a
2173 call, another hint insn, inline asm that clobbers $hbr, and
2174 some inlined operations (divmodsi4). Don't consider jumps
2175 because they are only at the end of a block and are
2176 considered when we are deciding whether to propagate
2177 - we're getting too far away from the branch. The hbr insns
2178 only have a signed 10-bit offset
2179 We go back as far as possible so the branch will be considered
2180 for propagation when we get to the beginning of the block. */
2181 next = 0;
2182 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2183 {
2184 if (INSN_P (insn))
2185 {
2186 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2187 if (branch && next
2188 && ((GET_CODE (branch_target) == REG
2189 && set_of (branch_target, insn) != NULL_RTX)
2190 || insn_clobbers_hbr (insn)
2191 || branch_addr - insn_addr > 600))
2192 {
2193 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2194 if (insn != BB_END (bb)
2195 && branch_addr - next_addr >= spu_hint_dist)
2196 {
2197 if (dump_file)
2198 fprintf (dump_file,
2199 "hint for %i in block %i before %i\n",
2200 INSN_UID (branch), bb->index, INSN_UID (next));
2201 spu_emit_branch_hint (next, branch, branch_target,
2202 branch_addr - next_addr);
2203 }
2204 branch = 0;
2205 }
2206
2207 /* JUMP_P will only be true at the end of a block. When
2208 branch is already set it means we've previously decided
2209 to propagate a hint for that branch into this block. */
2210 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2211 {
2212 branch = 0;
2213 if ((branch_target = get_branch_target (insn)))
2214 {
2215 branch = insn;
2216 branch_addr = insn_addr;
2217 }
2218 }
2219
2220 /* When a branch hint is emitted it will be inserted
2221 before "next". Make sure next is the beginning of a
2222 cycle to minimize impact on the scheduled insns. */
2223 if (GET_MODE (insn) == TImode)
2224 next = insn;
2225 }
2226 if (insn == BB_HEAD (bb))
2227 break;
2228 }
2229
2230 if (branch)
2231 {
2232 /* If we haven't emitted a hint for this branch yet, it might
2233 be profitable to emit it in one of the predecessor blocks,
2234 especially for loops. */
2235 rtx bbend;
2236 basic_block prev = 0, prop = 0, prev2 = 0;
2237 int loop_exit = 0, simple_loop = 0;
2238 int next_addr = 0;
2239 if (next)
2240 next_addr = INSN_ADDRESSES (INSN_UID (next));
2241
2242 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2243 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2244 prev = EDGE_PRED (bb, j)->src;
2245 else
2246 prev2 = EDGE_PRED (bb, j)->src;
2247
2248 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2249 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2250 loop_exit = 1;
2251 else if (EDGE_SUCC (bb, j)->dest == bb)
2252 simple_loop = 1;
2253
2254 /* If this branch is a loop exit then propagate to previous
2255 fallthru block. This catches the cases when it is a simple
2256 loop or when there is an initial branch into the loop. */
2257 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2258 prop = prev;
2259
2260 /* If there is only one adjacent predecessor. Don't propagate
2261 outside this loop. This loop_depth test isn't perfect, but
2262 I'm not sure the loop_father member is valid at this point. */
2263 else if (prev && single_pred_p (bb)
2264 && prev->loop_depth == bb->loop_depth)
2265 prop = prev;
2266
2267 /* If this is the JOIN block of a simple IF-THEN then
2268 propagate the hint to the HEADER block. */
2269 else if (prev && prev2
2270 && EDGE_COUNT (bb->preds) == 2
2271 && EDGE_COUNT (prev->preds) == 1
2272 && EDGE_PRED (prev, 0)->src == prev2
2273 && prev2->loop_depth == bb->loop_depth
2274 && GET_CODE (branch_target) != REG)
2275 prop = prev;
2276
2277 /* Don't propagate when:
2278 - this is a simple loop and the hint would be too far
2279 - this is not a simple loop and there are 16 insns in
2280 this block already
2281 - the predecessor block ends in a branch that will be
2282 hinted
2283 - the predecessor block ends in an insn that invalidates
2284 the hint */
2285 if (prop
2286 && prop->index >= 0
2287 && (bbend = BB_END (prop))
2288 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2289 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2290 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2291 {
2292 if (dump_file)
2293 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2294 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2295 bb->index, prop->index, bb->loop_depth,
2296 INSN_UID (branch), loop_exit, simple_loop,
2297 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2298
2299 spu_bb_info[prop->index].prop_jump = branch;
2300 spu_bb_info[prop->index].bb = bb;
2301 }
2302 else if (next && branch_addr - next_addr >= spu_hint_dist)
2303 {
2304 if (dump_file)
2305 fprintf (dump_file, "hint for %i in block %i before %i\n",
2306 INSN_UID (branch), bb->index, INSN_UID (next));
2307 spu_emit_branch_hint (next, branch, branch_target,
2308 branch_addr - next_addr);
2309 }
2310 branch = 0;
2311 }
2312 }
2313 free (spu_bb_info);
2314 }
2315 \f
2316 /* Emit a nop for INSN such that the two will dual issue. This assumes
2317 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2318 We check for TImode to handle a MULTI1 insn which has dual issued its
2319 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2320 ADDR_VEC insns. */
2321 static void
2322 emit_nop_for_insn (rtx insn)
2323 {
2324 int p;
2325 rtx new_insn;
2326 p = get_pipe (insn);
2327 if (p == 1 && GET_MODE (insn) == TImode)
2328 {
2329 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2330 PUT_MODE (new_insn, TImode);
2331 PUT_MODE (insn, VOIDmode);
2332 }
2333 else
2334 new_insn = emit_insn_after (gen_lnop (), insn);
2335 }
2336
2337 /* Insert nops in basic blocks to meet dual issue alignment
2338 requirements. */
2339 static void
2340 insert_nops (void)
2341 {
2342 rtx insn, next_insn, prev_insn;
2343 int length;
2344 int addr;
2345
2346 /* This sets up INSN_ADDRESSES. */
2347 shorten_branches (get_insns ());
2348
2349 /* Keep track of length added by nops. */
2350 length = 0;
2351
2352 prev_insn = 0;
2353 for (insn = get_insns (); insn; insn = next_insn)
2354 {
2355 next_insn = next_active_insn (insn);
2356 addr = INSN_ADDRESSES (INSN_UID (insn));
2357 if (GET_MODE (insn) == TImode
2358 && next_insn
2359 && GET_MODE (next_insn) != TImode
2360 && ((addr + length) & 7) != 0)
2361 {
2362 /* prev_insn will always be set because the first insn is
2363 always 8-byte aligned. */
2364 emit_nop_for_insn (prev_insn);
2365 length += 4;
2366 }
2367 prev_insn = insn;
2368 }
2369 }
2370
2371 static void
2372 spu_machine_dependent_reorg (void)
2373 {
2374 if (optimize > 0)
2375 {
2376 if (TARGET_BRANCH_HINTS)
2377 insert_branch_hints ();
2378 insert_nops ();
2379 }
2380 }
2381 \f
2382
2383 /* Insn scheduling routines, primarily for dual issue. */
2384 static int
2385 spu_sched_issue_rate (void)
2386 {
2387 return 2;
2388 }
2389
2390 static int
2391 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2392 int verbose ATTRIBUTE_UNUSED, rtx insn,
2393 int can_issue_more)
2394 {
2395 if (GET_CODE (PATTERN (insn)) != USE
2396 && GET_CODE (PATTERN (insn)) != CLOBBER
2397 && get_pipe (insn) != -2)
2398 can_issue_more--;
2399 return can_issue_more;
2400 }
2401
2402 static int
2403 get_pipe (rtx insn)
2404 {
2405 enum attr_type t;
2406 /* Handle inline asm */
2407 if (INSN_CODE (insn) == -1)
2408 return -1;
2409 t = get_attr_type (insn);
2410 switch (t)
2411 {
2412 case TYPE_CONVERT:
2413 return -2;
2414 case TYPE_MULTI0:
2415 return -1;
2416
2417 case TYPE_FX2:
2418 case TYPE_FX3:
2419 case TYPE_SPR:
2420 case TYPE_NOP:
2421 case TYPE_FXB:
2422 case TYPE_FPD:
2423 case TYPE_FP6:
2424 case TYPE_FP7:
2425 case TYPE_IPREFETCH:
2426 return 0;
2427
2428 case TYPE_LNOP:
2429 case TYPE_SHUF:
2430 case TYPE_LOAD:
2431 case TYPE_STORE:
2432 case TYPE_BR:
2433 case TYPE_MULTI1:
2434 case TYPE_HBR:
2435 return 1;
2436 default:
2437 abort ();
2438 }
2439 }
2440
2441 static int
2442 spu_sched_adjust_priority (rtx insn, int pri)
2443 {
2444 int p = get_pipe (insn);
2445 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2446 * scheduling. */
2447 if (GET_CODE (PATTERN (insn)) == USE
2448 || GET_CODE (PATTERN (insn)) == CLOBBER
2449 || p == -2)
2450 return pri + 100;
2451 /* Schedule pipe0 insns early for greedier dual issue. */
2452 if (p != 1)
2453 return pri + 50;
2454 return pri;
2455 }
2456
2457 /* INSN is dependent on DEP_INSN. */
2458 static int
2459 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2460 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2461 {
2462 if (GET_CODE (insn) == CALL_INSN)
2463 return cost - 2;
2464 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2465 scheduler makes every insn in a block anti-dependent on the final
2466 jump_insn. We adjust here so higher cost insns will get scheduled
2467 earlier. */
2468 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2469 return insn_cost (dep_insn) - 3;
2470 return cost;
2471 }
2472 \f
2473 /* Create a CONST_DOUBLE from a string. */
2474 struct rtx_def *
2475 spu_float_const (const char *string, enum machine_mode mode)
2476 {
2477 REAL_VALUE_TYPE value;
2478 value = REAL_VALUE_ATOF (string, mode);
2479 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2480 }
2481
2482 int
2483 spu_constant_address_p (rtx x)
2484 {
2485 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2486 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2487 || GET_CODE (x) == HIGH);
2488 }
2489
2490 static enum spu_immediate
2491 which_immediate_load (HOST_WIDE_INT val)
2492 {
2493 gcc_assert (val == trunc_int_for_mode (val, SImode));
2494
2495 if (val >= -0x8000 && val <= 0x7fff)
2496 return SPU_IL;
2497 if (val >= 0 && val <= 0x3ffff)
2498 return SPU_ILA;
2499 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2500 return SPU_ILH;
2501 if ((val & 0xffff) == 0)
2502 return SPU_ILHU;
2503
2504 return SPU_NONE;
2505 }
2506
2507 /* Return true when OP can be loaded by one of the il instructions, or
2508 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2509 int
2510 immediate_load_p (rtx op, enum machine_mode mode)
2511 {
2512 if (CONSTANT_P (op))
2513 {
2514 enum immediate_class c = classify_immediate (op, mode);
2515 return c == IC_IL1 || c == IC_IL1s
2516 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
2517 }
2518 return 0;
2519 }
2520
2521 /* Return true if the first SIZE bytes of arr is a constant that can be
2522 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2523 represent the size and offset of the instruction to use. */
2524 static int
2525 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2526 {
2527 int cpat, run, i, start;
2528 cpat = 1;
2529 run = 0;
2530 start = -1;
2531 for (i = 0; i < size && cpat; i++)
2532 if (arr[i] != i+16)
2533 {
2534 if (!run)
2535 {
2536 start = i;
2537 if (arr[i] == 3)
2538 run = 1;
2539 else if (arr[i] == 2 && arr[i+1] == 3)
2540 run = 2;
2541 else if (arr[i] == 0)
2542 {
2543 while (arr[i+run] == run && i+run < 16)
2544 run++;
2545 if (run != 4 && run != 8)
2546 cpat = 0;
2547 }
2548 else
2549 cpat = 0;
2550 if ((i & (run-1)) != 0)
2551 cpat = 0;
2552 i += run;
2553 }
2554 else
2555 cpat = 0;
2556 }
2557 if (cpat && (run || size < 16))
2558 {
2559 if (run == 0)
2560 run = 1;
2561 if (prun)
2562 *prun = run;
2563 if (pstart)
2564 *pstart = start == -1 ? 16-run : start;
2565 return 1;
2566 }
2567 return 0;
2568 }
2569
2570 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2571 it into a register. MODE is only valid when OP is a CONST_INT. */
2572 static enum immediate_class
2573 classify_immediate (rtx op, enum machine_mode mode)
2574 {
2575 HOST_WIDE_INT val;
2576 unsigned char arr[16];
2577 int i, j, repeated, fsmbi, repeat;
2578
2579 gcc_assert (CONSTANT_P (op));
2580
2581 if (GET_MODE (op) != VOIDmode)
2582 mode = GET_MODE (op);
2583
2584 /* A V4SI const_vector with all identical symbols is ok. */
2585 if (!flag_pic
2586 && mode == V4SImode
2587 && GET_CODE (op) == CONST_VECTOR
2588 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2589 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2590 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2591 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2592 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2593 op = CONST_VECTOR_ELT (op, 0);
2594
2595 switch (GET_CODE (op))
2596 {
2597 case SYMBOL_REF:
2598 case LABEL_REF:
2599 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2600
2601 case CONST:
2602 /* We can never know if the resulting address fits in 18 bits and can be
2603 loaded with ila. For now, assume the address will not overflow if
2604 the displacement is "small" (fits 'K' constraint). */
2605 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2606 {
2607 rtx sym = XEXP (XEXP (op, 0), 0);
2608 rtx cst = XEXP (XEXP (op, 0), 1);
2609
2610 if (GET_CODE (sym) == SYMBOL_REF
2611 && GET_CODE (cst) == CONST_INT
2612 && satisfies_constraint_K (cst))
2613 return IC_IL1s;
2614 }
2615 return IC_IL2s;
2616
2617 case HIGH:
2618 return IC_IL1s;
2619
2620 case CONST_VECTOR:
2621 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2622 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2623 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2624 return IC_POOL;
2625 /* Fall through. */
2626
2627 case CONST_INT:
2628 case CONST_DOUBLE:
2629 constant_to_array (mode, op, arr);
2630
2631 /* Check that each 4-byte slot is identical. */
2632 repeated = 1;
2633 for (i = 4; i < 16; i += 4)
2634 for (j = 0; j < 4; j++)
2635 if (arr[j] != arr[i + j])
2636 repeated = 0;
2637
2638 if (repeated)
2639 {
2640 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2641 val = trunc_int_for_mode (val, SImode);
2642
2643 if (which_immediate_load (val) != SPU_NONE)
2644 return IC_IL1;
2645 }
2646
2647 /* Any mode of 2 bytes or smaller can be loaded with an il
2648 instruction. */
2649 gcc_assert (GET_MODE_SIZE (mode) > 2);
2650
2651 fsmbi = 1;
2652 repeat = 0;
2653 for (i = 0; i < 16 && fsmbi; i++)
2654 if (arr[i] != 0 && repeat == 0)
2655 repeat = arr[i];
2656 else if (arr[i] != 0 && arr[i] != repeat)
2657 fsmbi = 0;
2658 if (fsmbi)
2659 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
2660
2661 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2662 return IC_CPAT;
2663
2664 if (repeated)
2665 return IC_IL2;
2666
2667 return IC_POOL;
2668 default:
2669 break;
2670 }
2671 gcc_unreachable ();
2672 }
2673
2674 static enum spu_immediate
2675 which_logical_immediate (HOST_WIDE_INT val)
2676 {
2677 gcc_assert (val == trunc_int_for_mode (val, SImode));
2678
2679 if (val >= -0x200 && val <= 0x1ff)
2680 return SPU_ORI;
2681 if (val >= 0 && val <= 0xffff)
2682 return SPU_IOHL;
2683 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2684 {
2685 val = trunc_int_for_mode (val, HImode);
2686 if (val >= -0x200 && val <= 0x1ff)
2687 return SPU_ORHI;
2688 if ((val & 0xff) == ((val >> 8) & 0xff))
2689 {
2690 val = trunc_int_for_mode (val, QImode);
2691 if (val >= -0x200 && val <= 0x1ff)
2692 return SPU_ORBI;
2693 }
2694 }
2695 return SPU_NONE;
2696 }
2697
2698 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2699 CONST_DOUBLEs. */
2700 static int
2701 const_vector_immediate_p (rtx x)
2702 {
2703 int i;
2704 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2705 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2706 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2707 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2708 return 0;
2709 return 1;
2710 }
2711
2712 int
2713 logical_immediate_p (rtx op, enum machine_mode mode)
2714 {
2715 HOST_WIDE_INT val;
2716 unsigned char arr[16];
2717 int i, j;
2718
2719 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2720 || GET_CODE (op) == CONST_VECTOR);
2721
2722 if (GET_CODE (op) == CONST_VECTOR
2723 && !const_vector_immediate_p (op))
2724 return 0;
2725
2726 if (GET_MODE (op) != VOIDmode)
2727 mode = GET_MODE (op);
2728
2729 constant_to_array (mode, op, arr);
2730
2731 /* Check that bytes are repeated. */
2732 for (i = 4; i < 16; i += 4)
2733 for (j = 0; j < 4; j++)
2734 if (arr[j] != arr[i + j])
2735 return 0;
2736
2737 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2738 val = trunc_int_for_mode (val, SImode);
2739
2740 i = which_logical_immediate (val);
2741 return i != SPU_NONE && i != SPU_IOHL;
2742 }
2743
2744 int
2745 iohl_immediate_p (rtx op, enum machine_mode mode)
2746 {
2747 HOST_WIDE_INT val;
2748 unsigned char arr[16];
2749 int i, j;
2750
2751 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2752 || GET_CODE (op) == CONST_VECTOR);
2753
2754 if (GET_CODE (op) == CONST_VECTOR
2755 && !const_vector_immediate_p (op))
2756 return 0;
2757
2758 if (GET_MODE (op) != VOIDmode)
2759 mode = GET_MODE (op);
2760
2761 constant_to_array (mode, op, arr);
2762
2763 /* Check that bytes are repeated. */
2764 for (i = 4; i < 16; i += 4)
2765 for (j = 0; j < 4; j++)
2766 if (arr[j] != arr[i + j])
2767 return 0;
2768
2769 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2770 val = trunc_int_for_mode (val, SImode);
2771
2772 return val >= 0 && val <= 0xffff;
2773 }
2774
2775 int
2776 arith_immediate_p (rtx op, enum machine_mode mode,
2777 HOST_WIDE_INT low, HOST_WIDE_INT high)
2778 {
2779 HOST_WIDE_INT val;
2780 unsigned char arr[16];
2781 int bytes, i, j;
2782
2783 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2784 || GET_CODE (op) == CONST_VECTOR);
2785
2786 if (GET_CODE (op) == CONST_VECTOR
2787 && !const_vector_immediate_p (op))
2788 return 0;
2789
2790 if (GET_MODE (op) != VOIDmode)
2791 mode = GET_MODE (op);
2792
2793 constant_to_array (mode, op, arr);
2794
2795 if (VECTOR_MODE_P (mode))
2796 mode = GET_MODE_INNER (mode);
2797
2798 bytes = GET_MODE_SIZE (mode);
2799 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2800
2801 /* Check that bytes are repeated. */
2802 for (i = bytes; i < 16; i += bytes)
2803 for (j = 0; j < bytes; j++)
2804 if (arr[j] != arr[i + j])
2805 return 0;
2806
2807 val = arr[0];
2808 for (j = 1; j < bytes; j++)
2809 val = (val << 8) | arr[j];
2810
2811 val = trunc_int_for_mode (val, mode);
2812
2813 return val >= low && val <= high;
2814 }
2815
2816 /* We accept:
2817 - any 32-bit constant (SImode, SFmode)
2818 - any constant that can be generated with fsmbi (any mode)
2819 - a 64-bit constant where the high and low bits are identical
2820 (DImode, DFmode)
2821 - a 128-bit constant where the four 32-bit words match. */
2822 int
2823 spu_legitimate_constant_p (rtx x)
2824 {
2825 if (GET_CODE (x) == HIGH)
2826 x = XEXP (x, 0);
2827 /* V4SI with all identical symbols is valid. */
2828 if (!flag_pic
2829 && GET_MODE (x) == V4SImode
2830 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2831 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2832 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
2833 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2834 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2835 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2836
2837 if (GET_CODE (x) == CONST_VECTOR
2838 && !const_vector_immediate_p (x))
2839 return 0;
2840 return 1;
2841 }
2842
2843 /* Valid address are:
2844 - symbol_ref, label_ref, const
2845 - reg
2846 - reg + const, where either reg or const is 16 byte aligned
2847 - reg + reg, alignment doesn't matter
2848 The alignment matters in the reg+const case because lqd and stqd
2849 ignore the 4 least significant bits of the const. (TODO: It might be
2850 preferable to allow any alignment and fix it up when splitting.) */
2851 int
2852 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2853 rtx x, int reg_ok_strict)
2854 {
2855 if (mode == TImode && GET_CODE (x) == AND
2856 && GET_CODE (XEXP (x, 1)) == CONST_INT
2857 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2858 x = XEXP (x, 0);
2859 switch (GET_CODE (x))
2860 {
2861 case SYMBOL_REF:
2862 case LABEL_REF:
2863 return !TARGET_LARGE_MEM;
2864
2865 case CONST:
2866 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2867 {
2868 rtx sym = XEXP (XEXP (x, 0), 0);
2869 rtx cst = XEXP (XEXP (x, 0), 1);
2870
2871 /* Accept any symbol_ref + constant, assuming it does not
2872 wrap around the local store addressability limit. */
2873 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2874 return 1;
2875 }
2876 return 0;
2877
2878 case CONST_INT:
2879 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2880
2881 case SUBREG:
2882 x = XEXP (x, 0);
2883 gcc_assert (GET_CODE (x) == REG);
2884
2885 case REG:
2886 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2887
2888 case PLUS:
2889 case LO_SUM:
2890 {
2891 rtx op0 = XEXP (x, 0);
2892 rtx op1 = XEXP (x, 1);
2893 if (GET_CODE (op0) == SUBREG)
2894 op0 = XEXP (op0, 0);
2895 if (GET_CODE (op1) == SUBREG)
2896 op1 = XEXP (op1, 0);
2897 /* We can't just accept any aligned register because CSE can
2898 change it to a register that is not marked aligned and then
2899 recog will fail. So we only accept frame registers because
2900 they will only be changed to other frame registers. */
2901 if (GET_CODE (op0) == REG
2902 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2903 && GET_CODE (op1) == CONST_INT
2904 && INTVAL (op1) >= -0x2000
2905 && INTVAL (op1) <= 0x1fff
2906 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2907 return 1;
2908 if (GET_CODE (op0) == REG
2909 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2910 && GET_CODE (op1) == REG
2911 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2912 return 1;
2913 }
2914 break;
2915
2916 default:
2917 break;
2918 }
2919 return 0;
2920 }
2921
2922 /* When the address is reg + const_int, force the const_int into a
2923 register. */
2924 rtx
2925 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2926 enum machine_mode mode)
2927 {
2928 rtx op0, op1;
2929 /* Make sure both operands are registers. */
2930 if (GET_CODE (x) == PLUS)
2931 {
2932 op0 = XEXP (x, 0);
2933 op1 = XEXP (x, 1);
2934 if (ALIGNED_SYMBOL_REF_P (op0))
2935 {
2936 op0 = force_reg (Pmode, op0);
2937 mark_reg_pointer (op0, 128);
2938 }
2939 else if (GET_CODE (op0) != REG)
2940 op0 = force_reg (Pmode, op0);
2941 if (ALIGNED_SYMBOL_REF_P (op1))
2942 {
2943 op1 = force_reg (Pmode, op1);
2944 mark_reg_pointer (op1, 128);
2945 }
2946 else if (GET_CODE (op1) != REG)
2947 op1 = force_reg (Pmode, op1);
2948 x = gen_rtx_PLUS (Pmode, op0, op1);
2949 if (spu_legitimate_address (mode, x, 0))
2950 return x;
2951 }
2952 return NULL_RTX;
2953 }
2954
2955 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2956 struct attribute_spec.handler. */
2957 static tree
2958 spu_handle_fndecl_attribute (tree * node,
2959 tree name,
2960 tree args ATTRIBUTE_UNUSED,
2961 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2962 {
2963 if (TREE_CODE (*node) != FUNCTION_DECL)
2964 {
2965 warning (0, "`%s' attribute only applies to functions",
2966 IDENTIFIER_POINTER (name));
2967 *no_add_attrs = true;
2968 }
2969
2970 return NULL_TREE;
2971 }
2972
2973 /* Handle the "vector" attribute. */
2974 static tree
2975 spu_handle_vector_attribute (tree * node, tree name,
2976 tree args ATTRIBUTE_UNUSED,
2977 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2978 {
2979 tree type = *node, result = NULL_TREE;
2980 enum machine_mode mode;
2981 int unsigned_p;
2982
2983 while (POINTER_TYPE_P (type)
2984 || TREE_CODE (type) == FUNCTION_TYPE
2985 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2986 type = TREE_TYPE (type);
2987
2988 mode = TYPE_MODE (type);
2989
2990 unsigned_p = TYPE_UNSIGNED (type);
2991 switch (mode)
2992 {
2993 case DImode:
2994 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2995 break;
2996 case SImode:
2997 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2998 break;
2999 case HImode:
3000 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3001 break;
3002 case QImode:
3003 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3004 break;
3005 case SFmode:
3006 result = V4SF_type_node;
3007 break;
3008 case DFmode:
3009 result = V2DF_type_node;
3010 break;
3011 default:
3012 break;
3013 }
3014
3015 /* Propagate qualifiers attached to the element type
3016 onto the vector type. */
3017 if (result && result != type && TYPE_QUALS (type))
3018 result = build_qualified_type (result, TYPE_QUALS (type));
3019
3020 *no_add_attrs = true; /* No need to hang on to the attribute. */
3021
3022 if (!result)
3023 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3024 else
3025 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3026
3027 return NULL_TREE;
3028 }
3029
3030 /* Return nonzero if FUNC is a naked function. */
3031 static int
3032 spu_naked_function_p (tree func)
3033 {
3034 tree a;
3035
3036 if (TREE_CODE (func) != FUNCTION_DECL)
3037 abort ();
3038
3039 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3040 return a != NULL_TREE;
3041 }
3042
3043 int
3044 spu_initial_elimination_offset (int from, int to)
3045 {
3046 int saved_regs_size = spu_saved_regs_size ();
3047 int sp_offset = 0;
3048 if (!current_function_is_leaf || crtl->outgoing_args_size
3049 || get_frame_size () || saved_regs_size)
3050 sp_offset = STACK_POINTER_OFFSET;
3051 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3052 return (sp_offset + crtl->outgoing_args_size);
3053 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3054 return 0;
3055 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3056 return sp_offset + crtl->outgoing_args_size
3057 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3058 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3059 return get_frame_size () + saved_regs_size + sp_offset;
3060 return 0;
3061 }
3062
3063 rtx
3064 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3065 {
3066 enum machine_mode mode = TYPE_MODE (type);
3067 int byte_size = ((mode == BLKmode)
3068 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3069
3070 /* Make sure small structs are left justified in a register. */
3071 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3072 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3073 {
3074 enum machine_mode smode;
3075 rtvec v;
3076 int i;
3077 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3078 int n = byte_size / UNITS_PER_WORD;
3079 v = rtvec_alloc (nregs);
3080 for (i = 0; i < n; i++)
3081 {
3082 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3083 gen_rtx_REG (TImode,
3084 FIRST_RETURN_REGNUM
3085 + i),
3086 GEN_INT (UNITS_PER_WORD * i));
3087 byte_size -= UNITS_PER_WORD;
3088 }
3089
3090 if (n < nregs)
3091 {
3092 if (byte_size < 4)
3093 byte_size = 4;
3094 smode =
3095 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3096 RTVEC_ELT (v, n) =
3097 gen_rtx_EXPR_LIST (VOIDmode,
3098 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3099 GEN_INT (UNITS_PER_WORD * n));
3100 }
3101 return gen_rtx_PARALLEL (mode, v);
3102 }
3103 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3104 }
3105
3106 rtx
3107 spu_function_arg (CUMULATIVE_ARGS cum,
3108 enum machine_mode mode,
3109 tree type, int named ATTRIBUTE_UNUSED)
3110 {
3111 int byte_size;
3112
3113 if (cum >= MAX_REGISTER_ARGS)
3114 return 0;
3115
3116 byte_size = ((mode == BLKmode)
3117 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3118
3119 /* The ABI does not allow parameters to be passed partially in
3120 reg and partially in stack. */
3121 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3122 return 0;
3123
3124 /* Make sure small structs are left justified in a register. */
3125 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3126 && byte_size < UNITS_PER_WORD && byte_size > 0)
3127 {
3128 enum machine_mode smode;
3129 rtx gr_reg;
3130 if (byte_size < 4)
3131 byte_size = 4;
3132 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3133 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3134 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3135 const0_rtx);
3136 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3137 }
3138 else
3139 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3140 }
3141
3142 /* Variable sized types are passed by reference. */
3143 static bool
3144 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3145 enum machine_mode mode ATTRIBUTE_UNUSED,
3146 const_tree type, bool named ATTRIBUTE_UNUSED)
3147 {
3148 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3149 }
3150 \f
3151
3152 /* Var args. */
3153
3154 /* Create and return the va_list datatype.
3155
3156 On SPU, va_list is an array type equivalent to
3157
3158 typedef struct __va_list_tag
3159 {
3160 void *__args __attribute__((__aligned(16)));
3161 void *__skip __attribute__((__aligned(16)));
3162
3163 } va_list[1];
3164
3165 where __args points to the arg that will be returned by the next
3166 va_arg(), and __skip points to the previous stack frame such that
3167 when __args == __skip we should advance __args by 32 bytes. */
3168 static tree
3169 spu_build_builtin_va_list (void)
3170 {
3171 tree f_args, f_skip, record, type_decl;
3172 bool owp;
3173
3174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3175
3176 type_decl =
3177 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3178
3179 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3180 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3181
3182 DECL_FIELD_CONTEXT (f_args) = record;
3183 DECL_ALIGN (f_args) = 128;
3184 DECL_USER_ALIGN (f_args) = 1;
3185
3186 DECL_FIELD_CONTEXT (f_skip) = record;
3187 DECL_ALIGN (f_skip) = 128;
3188 DECL_USER_ALIGN (f_skip) = 1;
3189
3190 TREE_CHAIN (record) = type_decl;
3191 TYPE_NAME (record) = type_decl;
3192 TYPE_FIELDS (record) = f_args;
3193 TREE_CHAIN (f_args) = f_skip;
3194
3195 /* We know this is being padded and we want it too. It is an internal
3196 type so hide the warnings from the user. */
3197 owp = warn_padded;
3198 warn_padded = false;
3199
3200 layout_type (record);
3201
3202 warn_padded = owp;
3203
3204 /* The correct type is an array type of one element. */
3205 return build_array_type (record, build_index_type (size_zero_node));
3206 }
3207
3208 /* Implement va_start by filling the va_list structure VALIST.
3209 NEXTARG points to the first anonymous stack argument.
3210
3211 The following global variables are used to initialize
3212 the va_list structure:
3213
3214 crtl->args.info;
3215 the CUMULATIVE_ARGS for this function
3216
3217 crtl->args.arg_offset_rtx:
3218 holds the offset of the first anonymous stack argument
3219 (relative to the virtual arg pointer). */
3220
3221 static void
3222 spu_va_start (tree valist, rtx nextarg)
3223 {
3224 tree f_args, f_skip;
3225 tree args, skip, t;
3226
3227 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3228 f_skip = TREE_CHAIN (f_args);
3229
3230 valist = build_va_arg_indirect_ref (valist);
3231 args =
3232 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3233 skip =
3234 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3235
3236 /* Find the __args area. */
3237 t = make_tree (TREE_TYPE (args), nextarg);
3238 if (crtl->args.pretend_args_size > 0)
3239 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3240 size_int (-STACK_POINTER_OFFSET));
3241 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3242 TREE_SIDE_EFFECTS (t) = 1;
3243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3244
3245 /* Find the __skip area. */
3246 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3247 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3248 size_int (crtl->args.pretend_args_size
3249 - STACK_POINTER_OFFSET));
3250 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3251 TREE_SIDE_EFFECTS (t) = 1;
3252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253 }
3254
3255 /* Gimplify va_arg by updating the va_list structure
3256 VALIST as required to retrieve an argument of type
3257 TYPE, and returning that argument.
3258
3259 ret = va_arg(VALIST, TYPE);
3260
3261 generates code equivalent to:
3262
3263 paddedsize = (sizeof(TYPE) + 15) & -16;
3264 if (VALIST.__args + paddedsize > VALIST.__skip
3265 && VALIST.__args <= VALIST.__skip)
3266 addr = VALIST.__skip + 32;
3267 else
3268 addr = VALIST.__args;
3269 VALIST.__args = addr + paddedsize;
3270 ret = *(TYPE *)addr;
3271 */
3272 static tree
3273 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3274 gimple_seq * post_p ATTRIBUTE_UNUSED)
3275 {
3276 tree f_args, f_skip;
3277 tree args, skip;
3278 HOST_WIDE_INT size, rsize;
3279 tree paddedsize, addr, tmp;
3280 bool pass_by_reference_p;
3281
3282 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3283 f_skip = TREE_CHAIN (f_args);
3284
3285 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3286 args =
3287 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3288 skip =
3289 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3290
3291 addr = create_tmp_var (ptr_type_node, "va_arg");
3292 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3293
3294 /* if an object is dynamically sized, a pointer to it is passed
3295 instead of the object itself. */
3296 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3297 false);
3298 if (pass_by_reference_p)
3299 type = build_pointer_type (type);
3300 size = int_size_in_bytes (type);
3301 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3302
3303 /* build conditional expression to calculate addr. The expression
3304 will be gimplified later. */
3305 paddedsize = size_int (rsize);
3306 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
3307 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3308 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3309 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3310 unshare_expr (skip)));
3311
3312 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3313 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3314 size_int (32)), unshare_expr (args));
3315
3316 gimplify_assign (addr, tmp, pre_p);
3317
3318 /* update VALIST.__args */
3319 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3320 gimplify_assign (unshare_expr (args), tmp, pre_p);
3321
3322 addr = fold_convert (build_pointer_type (type), addr);
3323
3324 if (pass_by_reference_p)
3325 addr = build_va_arg_indirect_ref (addr);
3326
3327 return build_va_arg_indirect_ref (addr);
3328 }
3329
3330 /* Save parameter registers starting with the register that corresponds
3331 to the first unnamed parameters. If the first unnamed parameter is
3332 in the stack then save no registers. Set pretend_args_size to the
3333 amount of space needed to save the registers. */
3334 void
3335 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3336 tree type, int *pretend_size, int no_rtl)
3337 {
3338 if (!no_rtl)
3339 {
3340 rtx tmp;
3341 int regno;
3342 int offset;
3343 int ncum = *cum;
3344
3345 /* cum currently points to the last named argument, we want to
3346 start at the next argument. */
3347 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3348
3349 offset = -STACK_POINTER_OFFSET;
3350 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3351 {
3352 tmp = gen_frame_mem (V4SImode,
3353 plus_constant (virtual_incoming_args_rtx,
3354 offset));
3355 emit_move_insn (tmp,
3356 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3357 offset += 16;
3358 }
3359 *pretend_size = offset + STACK_POINTER_OFFSET;
3360 }
3361 }
3362 \f
3363 void
3364 spu_conditional_register_usage (void)
3365 {
3366 if (flag_pic)
3367 {
3368 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3369 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3370 }
3371 }
3372
3373 /* This is called to decide when we can simplify a load instruction. We
3374 must only return true for registers which we know will always be
3375 aligned. Taking into account that CSE might replace this reg with
3376 another one that has not been marked aligned.
3377 So this is really only true for frame, stack and virtual registers,
3378 which we know are always aligned and should not be adversely effected
3379 by CSE. */
3380 static int
3381 regno_aligned_for_load (int regno)
3382 {
3383 return regno == FRAME_POINTER_REGNUM
3384 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
3385 || regno == ARG_POINTER_REGNUM
3386 || regno == STACK_POINTER_REGNUM
3387 || (regno >= FIRST_VIRTUAL_REGISTER
3388 && regno <= LAST_VIRTUAL_REGISTER);
3389 }
3390
3391 /* Return TRUE when mem is known to be 16-byte aligned. */
3392 int
3393 aligned_mem_p (rtx mem)
3394 {
3395 if (MEM_ALIGN (mem) >= 128)
3396 return 1;
3397 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3398 return 1;
3399 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3400 {
3401 rtx p0 = XEXP (XEXP (mem, 0), 0);
3402 rtx p1 = XEXP (XEXP (mem, 0), 1);
3403 if (regno_aligned_for_load (REGNO (p0)))
3404 {
3405 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3406 return 1;
3407 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3408 return 1;
3409 }
3410 }
3411 else if (GET_CODE (XEXP (mem, 0)) == REG)
3412 {
3413 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3414 return 1;
3415 }
3416 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3417 return 1;
3418 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3419 {
3420 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3421 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3422 if (GET_CODE (p0) == SYMBOL_REF
3423 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3424 return 1;
3425 }
3426 return 0;
3427 }
3428
3429 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3430 into its SYMBOL_REF_FLAGS. */
3431 static void
3432 spu_encode_section_info (tree decl, rtx rtl, int first)
3433 {
3434 default_encode_section_info (decl, rtl, first);
3435
3436 /* If a variable has a forced alignment to < 16 bytes, mark it with
3437 SYMBOL_FLAG_ALIGN1. */
3438 if (TREE_CODE (decl) == VAR_DECL
3439 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3440 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3441 }
3442
3443 /* Return TRUE if we are certain the mem refers to a complete object
3444 which is both 16-byte aligned and padded to a 16-byte boundary. This
3445 would make it safe to store with a single instruction.
3446 We guarantee the alignment and padding for static objects by aligning
3447 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3448 FIXME: We currently cannot guarantee this for objects on the stack
3449 because assign_parm_setup_stack calls assign_stack_local with the
3450 alignment of the parameter mode and in that case the alignment never
3451 gets adjusted by LOCAL_ALIGNMENT. */
3452 static int
3453 store_with_one_insn_p (rtx mem)
3454 {
3455 rtx addr = XEXP (mem, 0);
3456 if (GET_MODE (mem) == BLKmode)
3457 return 0;
3458 /* Only static objects. */
3459 if (GET_CODE (addr) == SYMBOL_REF)
3460 {
3461 /* We use the associated declaration to make sure the access is
3462 referring to the whole object.
3463 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3464 if it is necessary. Will there be cases where one exists, and
3465 the other does not? Will there be cases where both exist, but
3466 have different types? */
3467 tree decl = MEM_EXPR (mem);
3468 if (decl
3469 && TREE_CODE (decl) == VAR_DECL
3470 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3471 return 1;
3472 decl = SYMBOL_REF_DECL (addr);
3473 if (decl
3474 && TREE_CODE (decl) == VAR_DECL
3475 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3476 return 1;
3477 }
3478 return 0;
3479 }
3480
3481 int
3482 spu_expand_mov (rtx * ops, enum machine_mode mode)
3483 {
3484 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3485 abort ();
3486
3487 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3488 {
3489 rtx from = SUBREG_REG (ops[1]);
3490 enum machine_mode imode = GET_MODE (from);
3491
3492 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3493 && GET_MODE_CLASS (imode) == MODE_INT
3494 && subreg_lowpart_p (ops[1]));
3495
3496 if (GET_MODE_SIZE (imode) < 4)
3497 {
3498 from = gen_rtx_SUBREG (SImode, from, 0);
3499 imode = SImode;
3500 }
3501
3502 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3503 {
3504 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
3505 emit_insn (GEN_FCN (icode) (ops[0], from));
3506 }
3507 else
3508 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3509 return 1;
3510 }
3511
3512 /* At least one of the operands needs to be a register. */
3513 if ((reload_in_progress | reload_completed) == 0
3514 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3515 {
3516 rtx temp = force_reg (mode, ops[1]);
3517 emit_move_insn (ops[0], temp);
3518 return 1;
3519 }
3520 if (reload_in_progress || reload_completed)
3521 {
3522 if (CONSTANT_P (ops[1]))
3523 return spu_split_immediate (ops);
3524 return 0;
3525 }
3526 else
3527 {
3528 if (GET_CODE (ops[0]) == MEM)
3529 {
3530 if (!spu_valid_move (ops))
3531 {
3532 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3533 gen_reg_rtx (TImode)));
3534 return 1;
3535 }
3536 }
3537 else if (GET_CODE (ops[1]) == MEM)
3538 {
3539 if (!spu_valid_move (ops))
3540 {
3541 emit_insn (gen_load
3542 (ops[0], ops[1], gen_reg_rtx (TImode),
3543 gen_reg_rtx (SImode)));
3544 return 1;
3545 }
3546 }
3547 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3548 extend them. */
3549 if (GET_CODE (ops[1]) == CONST_INT)
3550 {
3551 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3552 if (val != INTVAL (ops[1]))
3553 {
3554 emit_move_insn (ops[0], GEN_INT (val));
3555 return 1;
3556 }
3557 }
3558 }
3559 return 0;
3560 }
3561
3562 void
3563 spu_split_load (rtx * ops)
3564 {
3565 enum machine_mode mode = GET_MODE (ops[0]);
3566 rtx addr, load, rot, mem, p0, p1;
3567 int rot_amt;
3568
3569 addr = XEXP (ops[1], 0);
3570
3571 rot = 0;
3572 rot_amt = 0;
3573 if (GET_CODE (addr) == PLUS)
3574 {
3575 /* 8 cases:
3576 aligned reg + aligned reg => lqx
3577 aligned reg + unaligned reg => lqx, rotqby
3578 aligned reg + aligned const => lqd
3579 aligned reg + unaligned const => lqd, rotqbyi
3580 unaligned reg + aligned reg => lqx, rotqby
3581 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3582 unaligned reg + aligned const => lqd, rotqby
3583 unaligned reg + unaligned const -> not allowed by legitimate address
3584 */
3585 p0 = XEXP (addr, 0);
3586 p1 = XEXP (addr, 1);
3587 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
3588 {
3589 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
3590 {
3591 emit_insn (gen_addsi3 (ops[3], p0, p1));
3592 rot = ops[3];
3593 }
3594 else
3595 rot = p0;
3596 }
3597 else
3598 {
3599 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3600 {
3601 rot_amt = INTVAL (p1) & 15;
3602 p1 = GEN_INT (INTVAL (p1) & -16);
3603 addr = gen_rtx_PLUS (SImode, p0, p1);
3604 }
3605 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
3606 rot = p1;
3607 }
3608 }
3609 else if (GET_CODE (addr) == REG)
3610 {
3611 if (!regno_aligned_for_load (REGNO (addr)))
3612 rot = addr;
3613 }
3614 else if (GET_CODE (addr) == CONST)
3615 {
3616 if (GET_CODE (XEXP (addr, 0)) == PLUS
3617 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3618 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3619 {
3620 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3621 if (rot_amt & -16)
3622 addr = gen_rtx_CONST (Pmode,
3623 gen_rtx_PLUS (Pmode,
3624 XEXP (XEXP (addr, 0), 0),
3625 GEN_INT (rot_amt & -16)));
3626 else
3627 addr = XEXP (XEXP (addr, 0), 0);
3628 }
3629 else
3630 rot = addr;
3631 }
3632 else if (GET_CODE (addr) == CONST_INT)
3633 {
3634 rot_amt = INTVAL (addr);
3635 addr = GEN_INT (rot_amt & -16);
3636 }
3637 else if (!ALIGNED_SYMBOL_REF_P (addr))
3638 rot = addr;
3639
3640 if (GET_MODE_SIZE (mode) < 4)
3641 rot_amt += GET_MODE_SIZE (mode) - 4;
3642
3643 rot_amt &= 15;
3644
3645 if (rot && rot_amt)
3646 {
3647 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3648 rot = ops[3];
3649 rot_amt = 0;
3650 }
3651
3652 load = ops[2];
3653
3654 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3655 mem = change_address (ops[1], TImode, addr);
3656
3657 emit_insn (gen_movti (load, mem));
3658
3659 if (rot)
3660 emit_insn (gen_rotqby_ti (load, load, rot));
3661 else if (rot_amt)
3662 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3663
3664 if (reload_completed)
3665 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3666 else
3667 emit_insn (gen_spu_convert (ops[0], load));
3668 }
3669
3670 void
3671 spu_split_store (rtx * ops)
3672 {
3673 enum machine_mode mode = GET_MODE (ops[0]);
3674 rtx pat = ops[2];
3675 rtx reg = ops[3];
3676 rtx addr, p0, p1, p1_lo, smem;
3677 int aform;
3678 int scalar;
3679
3680 addr = XEXP (ops[0], 0);
3681
3682 if (GET_CODE (addr) == PLUS)
3683 {
3684 /* 8 cases:
3685 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3686 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3687 aligned reg + aligned const => lqd, c?d, shuf, stqx
3688 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3689 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3690 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3691 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3692 unaligned reg + unaligned const -> not allowed by legitimate address
3693 */
3694 aform = 0;
3695 p0 = XEXP (addr, 0);
3696 p1 = p1_lo = XEXP (addr, 1);
3697 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3698 {
3699 p1_lo = GEN_INT (INTVAL (p1) & 15);
3700 p1 = GEN_INT (INTVAL (p1) & -16);
3701 addr = gen_rtx_PLUS (SImode, p0, p1);
3702 }
3703 }
3704 else if (GET_CODE (addr) == REG)
3705 {
3706 aform = 0;
3707 p0 = addr;
3708 p1 = p1_lo = const0_rtx;
3709 }
3710 else
3711 {
3712 aform = 1;
3713 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3714 p1 = 0; /* aform doesn't use p1 */
3715 p1_lo = addr;
3716 if (ALIGNED_SYMBOL_REF_P (addr))
3717 p1_lo = const0_rtx;
3718 else if (GET_CODE (addr) == CONST)
3719 {
3720 if (GET_CODE (XEXP (addr, 0)) == PLUS
3721 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3722 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3723 {
3724 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3725 if ((v & -16) != 0)
3726 addr = gen_rtx_CONST (Pmode,
3727 gen_rtx_PLUS (Pmode,
3728 XEXP (XEXP (addr, 0), 0),
3729 GEN_INT (v & -16)));
3730 else
3731 addr = XEXP (XEXP (addr, 0), 0);
3732 p1_lo = GEN_INT (v & 15);
3733 }
3734 }
3735 else if (GET_CODE (addr) == CONST_INT)
3736 {
3737 p1_lo = GEN_INT (INTVAL (addr) & 15);
3738 addr = GEN_INT (INTVAL (addr) & -16);
3739 }
3740 }
3741
3742 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3743
3744 scalar = store_with_one_insn_p (ops[0]);
3745 if (!scalar)
3746 {
3747 /* We could copy the flags from the ops[0] MEM to mem here,
3748 We don't because we want this load to be optimized away if
3749 possible, and copying the flags will prevent that in certain
3750 cases, e.g. consider the volatile flag. */
3751
3752 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3753 set_mem_alias_set (lmem, 0);
3754 emit_insn (gen_movti (reg, lmem));
3755
3756 if (!p0 || regno_aligned_for_load (REGNO (p0)))
3757 p0 = stack_pointer_rtx;
3758 if (!p1_lo)
3759 p1_lo = const0_rtx;
3760
3761 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3762 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3763 }
3764 else if (reload_completed)
3765 {
3766 if (GET_CODE (ops[1]) == REG)
3767 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3768 else if (GET_CODE (ops[1]) == SUBREG)
3769 emit_move_insn (reg,
3770 gen_rtx_REG (GET_MODE (reg),
3771 REGNO (SUBREG_REG (ops[1]))));
3772 else
3773 abort ();
3774 }
3775 else
3776 {
3777 if (GET_CODE (ops[1]) == REG)
3778 emit_insn (gen_spu_convert (reg, ops[1]));
3779 else if (GET_CODE (ops[1]) == SUBREG)
3780 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3781 else
3782 abort ();
3783 }
3784
3785 if (GET_MODE_SIZE (mode) < 4 && scalar)
3786 emit_insn (gen_shlqby_ti
3787 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3788
3789 smem = change_address (ops[0], TImode, addr);
3790 /* We can't use the previous alias set because the memory has changed
3791 size and can potentially overlap objects of other types. */
3792 set_mem_alias_set (smem, 0);
3793
3794 emit_insn (gen_movti (smem, reg));
3795 }
3796
3797 /* Return TRUE if X is MEM which is a struct member reference
3798 and the member can safely be loaded and stored with a single
3799 instruction because it is padded. */
3800 static int
3801 mem_is_padded_component_ref (rtx x)
3802 {
3803 tree t = MEM_EXPR (x);
3804 tree r;
3805 if (!t || TREE_CODE (t) != COMPONENT_REF)
3806 return 0;
3807 t = TREE_OPERAND (t, 1);
3808 if (!t || TREE_CODE (t) != FIELD_DECL
3809 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3810 return 0;
3811 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3812 r = DECL_FIELD_CONTEXT (t);
3813 if (!r || TREE_CODE (r) != RECORD_TYPE)
3814 return 0;
3815 /* Make sure they are the same mode */
3816 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3817 return 0;
3818 /* If there are no following fields then the field alignment assures
3819 the structure is padded to the alignment which means this field is
3820 padded too. */
3821 if (TREE_CHAIN (t) == 0)
3822 return 1;
3823 /* If the following field is also aligned then this field will be
3824 padded. */
3825 t = TREE_CHAIN (t);
3826 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3827 return 1;
3828 return 0;
3829 }
3830
3831 /* Parse the -mfixed-range= option string. */
3832 static void
3833 fix_range (const char *const_str)
3834 {
3835 int i, first, last;
3836 char *str, *dash, *comma;
3837
3838 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3839 REG2 are either register names or register numbers. The effect
3840 of this option is to mark the registers in the range from REG1 to
3841 REG2 as ``fixed'' so they won't be used by the compiler. */
3842
3843 i = strlen (const_str);
3844 str = (char *) alloca (i + 1);
3845 memcpy (str, const_str, i + 1);
3846
3847 while (1)
3848 {
3849 dash = strchr (str, '-');
3850 if (!dash)
3851 {
3852 warning (0, "value of -mfixed-range must have form REG1-REG2");
3853 return;
3854 }
3855 *dash = '\0';
3856 comma = strchr (dash + 1, ',');
3857 if (comma)
3858 *comma = '\0';
3859
3860 first = decode_reg_name (str);
3861 if (first < 0)
3862 {
3863 warning (0, "unknown register name: %s", str);
3864 return;
3865 }
3866
3867 last = decode_reg_name (dash + 1);
3868 if (last < 0)
3869 {
3870 warning (0, "unknown register name: %s", dash + 1);
3871 return;
3872 }
3873
3874 *dash = '-';
3875
3876 if (first > last)
3877 {
3878 warning (0, "%s-%s is an empty range", str, dash + 1);
3879 return;
3880 }
3881
3882 for (i = first; i <= last; ++i)
3883 fixed_regs[i] = call_used_regs[i] = 1;
3884
3885 if (!comma)
3886 break;
3887
3888 *comma = ',';
3889 str = comma + 1;
3890 }
3891 }
3892
3893 int
3894 spu_valid_move (rtx * ops)
3895 {
3896 enum machine_mode mode = GET_MODE (ops[0]);
3897 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3898 return 0;
3899
3900 /* init_expr_once tries to recog against load and store insns to set
3901 the direct_load[] and direct_store[] arrays. We always want to
3902 consider those loads and stores valid. init_expr_once is called in
3903 the context of a dummy function which does not have a decl. */
3904 if (cfun->decl == 0)
3905 return 1;
3906
3907 /* Don't allows loads/stores which would require more than 1 insn.
3908 During and after reload we assume loads and stores only take 1
3909 insn. */
3910 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3911 {
3912 if (GET_CODE (ops[0]) == MEM
3913 && (GET_MODE_SIZE (mode) < 4
3914 || !(store_with_one_insn_p (ops[0])
3915 || mem_is_padded_component_ref (ops[0]))))
3916 return 0;
3917 if (GET_CODE (ops[1]) == MEM
3918 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3919 return 0;
3920 }
3921 return 1;
3922 }
3923
3924 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3925 can be generated using the fsmbi instruction. */
3926 int
3927 fsmbi_const_p (rtx x)
3928 {
3929 if (CONSTANT_P (x))
3930 {
3931 /* We can always choose TImode for CONST_INT because the high bits
3932 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3933 enum immediate_class c = classify_immediate (x, TImode);
3934 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
3935 }
3936 return 0;
3937 }
3938
3939 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3940 can be generated using the cbd, chd, cwd or cdd instruction. */
3941 int
3942 cpat_const_p (rtx x, enum machine_mode mode)
3943 {
3944 if (CONSTANT_P (x))
3945 {
3946 enum immediate_class c = classify_immediate (x, mode);
3947 return c == IC_CPAT;
3948 }
3949 return 0;
3950 }
3951
3952 rtx
3953 gen_cpat_const (rtx * ops)
3954 {
3955 unsigned char dst[16];
3956 int i, offset, shift, isize;
3957 if (GET_CODE (ops[3]) != CONST_INT
3958 || GET_CODE (ops[2]) != CONST_INT
3959 || (GET_CODE (ops[1]) != CONST_INT
3960 && GET_CODE (ops[1]) != REG))
3961 return 0;
3962 if (GET_CODE (ops[1]) == REG
3963 && (!REG_POINTER (ops[1])
3964 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3965 return 0;
3966
3967 for (i = 0; i < 16; i++)
3968 dst[i] = i + 16;
3969 isize = INTVAL (ops[3]);
3970 if (isize == 1)
3971 shift = 3;
3972 else if (isize == 2)
3973 shift = 2;
3974 else
3975 shift = 0;
3976 offset = (INTVAL (ops[2]) +
3977 (GET_CODE (ops[1]) ==
3978 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3979 for (i = 0; i < isize; i++)
3980 dst[offset + i] = i + shift;
3981 return array_to_constant (TImode, dst);
3982 }
3983
3984 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3985 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3986 than 16 bytes, the value is repeated across the rest of the array. */
3987 void
3988 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
3989 {
3990 HOST_WIDE_INT val;
3991 int i, j, first;
3992
3993 memset (arr, 0, 16);
3994 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
3995 if (GET_CODE (x) == CONST_INT
3996 || (GET_CODE (x) == CONST_DOUBLE
3997 && (mode == SFmode || mode == DFmode)))
3998 {
3999 gcc_assert (mode != VOIDmode && mode != BLKmode);
4000
4001 if (GET_CODE (x) == CONST_DOUBLE)
4002 val = const_double_to_hwint (x);
4003 else
4004 val = INTVAL (x);
4005 first = GET_MODE_SIZE (mode) - 1;
4006 for (i = first; i >= 0; i--)
4007 {
4008 arr[i] = val & 0xff;
4009 val >>= 8;
4010 }
4011 /* Splat the constant across the whole array. */
4012 for (j = 0, i = first + 1; i < 16; i++)
4013 {
4014 arr[i] = arr[j];
4015 j = (j == first) ? 0 : j + 1;
4016 }
4017 }
4018 else if (GET_CODE (x) == CONST_DOUBLE)
4019 {
4020 val = CONST_DOUBLE_LOW (x);
4021 for (i = 15; i >= 8; i--)
4022 {
4023 arr[i] = val & 0xff;
4024 val >>= 8;
4025 }
4026 val = CONST_DOUBLE_HIGH (x);
4027 for (i = 7; i >= 0; i--)
4028 {
4029 arr[i] = val & 0xff;
4030 val >>= 8;
4031 }
4032 }
4033 else if (GET_CODE (x) == CONST_VECTOR)
4034 {
4035 int units;
4036 rtx elt;
4037 mode = GET_MODE_INNER (mode);
4038 units = CONST_VECTOR_NUNITS (x);
4039 for (i = 0; i < units; i++)
4040 {
4041 elt = CONST_VECTOR_ELT (x, i);
4042 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4043 {
4044 if (GET_CODE (elt) == CONST_DOUBLE)
4045 val = const_double_to_hwint (elt);
4046 else
4047 val = INTVAL (elt);
4048 first = GET_MODE_SIZE (mode) - 1;
4049 if (first + i * GET_MODE_SIZE (mode) > 16)
4050 abort ();
4051 for (j = first; j >= 0; j--)
4052 {
4053 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4054 val >>= 8;
4055 }
4056 }
4057 }
4058 }
4059 else
4060 gcc_unreachable();
4061 }
4062
4063 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4064 smaller than 16 bytes, use the bytes that would represent that value
4065 in a register, e.g., for QImode return the value of arr[3]. */
4066 rtx
4067 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4068 {
4069 enum machine_mode inner_mode;
4070 rtvec v;
4071 int units, size, i, j, k;
4072 HOST_WIDE_INT val;
4073
4074 if (GET_MODE_CLASS (mode) == MODE_INT
4075 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4076 {
4077 j = GET_MODE_SIZE (mode);
4078 i = j < 4 ? 4 - j : 0;
4079 for (val = 0; i < j; i++)
4080 val = (val << 8) | arr[i];
4081 val = trunc_int_for_mode (val, mode);
4082 return GEN_INT (val);
4083 }
4084
4085 if (mode == TImode)
4086 {
4087 HOST_WIDE_INT high;
4088 for (i = high = 0; i < 8; i++)
4089 high = (high << 8) | arr[i];
4090 for (i = 8, val = 0; i < 16; i++)
4091 val = (val << 8) | arr[i];
4092 return immed_double_const (val, high, TImode);
4093 }
4094 if (mode == SFmode)
4095 {
4096 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4097 val = trunc_int_for_mode (val, SImode);
4098 return hwint_to_const_double (SFmode, val);
4099 }
4100 if (mode == DFmode)
4101 {
4102 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4103 val <<= 32;
4104 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
4105 return hwint_to_const_double (DFmode, val);
4106 }
4107
4108 if (!VECTOR_MODE_P (mode))
4109 abort ();
4110
4111 units = GET_MODE_NUNITS (mode);
4112 size = GET_MODE_UNIT_SIZE (mode);
4113 inner_mode = GET_MODE_INNER (mode);
4114 v = rtvec_alloc (units);
4115
4116 for (k = i = 0; i < units; ++i)
4117 {
4118 val = 0;
4119 for (j = 0; j < size; j++, k++)
4120 val = (val << 8) | arr[k];
4121
4122 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4123 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4124 else
4125 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4126 }
4127 if (k > 16)
4128 abort ();
4129
4130 return gen_rtx_CONST_VECTOR (mode, v);
4131 }
4132
4133 static void
4134 reloc_diagnostic (rtx x)
4135 {
4136 tree loc_decl, decl = 0;
4137 const char *msg;
4138 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4139 return;
4140
4141 if (GET_CODE (x) == SYMBOL_REF)
4142 decl = SYMBOL_REF_DECL (x);
4143 else if (GET_CODE (x) == CONST
4144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4145 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4146
4147 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4148 if (decl && !DECL_P (decl))
4149 decl = 0;
4150
4151 /* We use last_assemble_variable_decl to get line information. It's
4152 not always going to be right and might not even be close, but will
4153 be right for the more common cases. */
4154 if (!last_assemble_variable_decl || in_section == ctors_section)
4155 loc_decl = decl;
4156 else
4157 loc_decl = last_assemble_variable_decl;
4158
4159 /* The decl could be a string constant. */
4160 if (decl && DECL_P (decl))
4161 msg = "%Jcreating run-time relocation for %qD";
4162 else
4163 msg = "creating run-time relocation";
4164
4165 if (TARGET_WARN_RELOC)
4166 warning (0, msg, loc_decl, decl);
4167 else
4168 error (msg, loc_decl, decl);
4169 }
4170
4171 /* Hook into assemble_integer so we can generate an error for run-time
4172 relocations. The SPU ABI disallows them. */
4173 static bool
4174 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4175 {
4176 /* By default run-time relocations aren't supported, but we allow them
4177 in case users support it in their own run-time loader. And we provide
4178 a warning for those users that don't. */
4179 if ((GET_CODE (x) == SYMBOL_REF)
4180 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4181 reloc_diagnostic (x);
4182
4183 return default_assemble_integer (x, size, aligned_p);
4184 }
4185
4186 static void
4187 spu_asm_globalize_label (FILE * file, const char *name)
4188 {
4189 fputs ("\t.global\t", file);
4190 assemble_name (file, name);
4191 fputs ("\n", file);
4192 }
4193
4194 static bool
4195 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4196 {
4197 enum machine_mode mode = GET_MODE (x);
4198 int cost = COSTS_N_INSNS (2);
4199
4200 /* Folding to a CONST_VECTOR will use extra space but there might
4201 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4202 only if it allows us to fold away multiple insns. Changing the cost
4203 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4204 because this cost will only be compared against a single insn.
4205 if (code == CONST_VECTOR)
4206 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4207 */
4208
4209 /* Use defaults for float operations. Not accurate but good enough. */
4210 if (mode == DFmode)
4211 {
4212 *total = COSTS_N_INSNS (13);
4213 return true;
4214 }
4215 if (mode == SFmode)
4216 {
4217 *total = COSTS_N_INSNS (6);
4218 return true;
4219 }
4220 switch (code)
4221 {
4222 case CONST_INT:
4223 if (satisfies_constraint_K (x))
4224 *total = 0;
4225 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4226 *total = COSTS_N_INSNS (1);
4227 else
4228 *total = COSTS_N_INSNS (3);
4229 return true;
4230
4231 case CONST:
4232 *total = COSTS_N_INSNS (3);
4233 return true;
4234
4235 case LABEL_REF:
4236 case SYMBOL_REF:
4237 *total = COSTS_N_INSNS (0);
4238 return true;
4239
4240 case CONST_DOUBLE:
4241 *total = COSTS_N_INSNS (5);
4242 return true;
4243
4244 case FLOAT_EXTEND:
4245 case FLOAT_TRUNCATE:
4246 case FLOAT:
4247 case UNSIGNED_FLOAT:
4248 case FIX:
4249 case UNSIGNED_FIX:
4250 *total = COSTS_N_INSNS (7);
4251 return true;
4252
4253 case PLUS:
4254 if (mode == TImode)
4255 {
4256 *total = COSTS_N_INSNS (9);
4257 return true;
4258 }
4259 break;
4260
4261 case MULT:
4262 cost =
4263 GET_CODE (XEXP (x, 0)) ==
4264 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4265 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4266 {
4267 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4268 {
4269 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4270 cost = COSTS_N_INSNS (14);
4271 if ((val & 0xffff) == 0)
4272 cost = COSTS_N_INSNS (9);
4273 else if (val > 0 && val < 0x10000)
4274 cost = COSTS_N_INSNS (11);
4275 }
4276 }
4277 *total = cost;
4278 return true;
4279 case DIV:
4280 case UDIV:
4281 case MOD:
4282 case UMOD:
4283 *total = COSTS_N_INSNS (20);
4284 return true;
4285 case ROTATE:
4286 case ROTATERT:
4287 case ASHIFT:
4288 case ASHIFTRT:
4289 case LSHIFTRT:
4290 *total = COSTS_N_INSNS (4);
4291 return true;
4292 case UNSPEC:
4293 if (XINT (x, 1) == UNSPEC_CONVERT)
4294 *total = COSTS_N_INSNS (0);
4295 else
4296 *total = COSTS_N_INSNS (4);
4297 return true;
4298 }
4299 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4300 if (GET_MODE_CLASS (mode) == MODE_INT
4301 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4302 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4303 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4304 *total = cost;
4305 return true;
4306 }
4307
4308 static enum machine_mode
4309 spu_unwind_word_mode (void)
4310 {
4311 return SImode;
4312 }
4313
4314 /* Decide whether we can make a sibling call to a function. DECL is the
4315 declaration of the function being targeted by the call and EXP is the
4316 CALL_EXPR representing the call. */
4317 static bool
4318 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4319 {
4320 return decl && !TARGET_LARGE_MEM;
4321 }
4322
4323 /* We need to correctly update the back chain pointer and the Available
4324 Stack Size (which is in the second slot of the sp register.) */
4325 void
4326 spu_allocate_stack (rtx op0, rtx op1)
4327 {
4328 HOST_WIDE_INT v;
4329 rtx chain = gen_reg_rtx (V4SImode);
4330 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4331 rtx sp = gen_reg_rtx (V4SImode);
4332 rtx splatted = gen_reg_rtx (V4SImode);
4333 rtx pat = gen_reg_rtx (TImode);
4334
4335 /* copy the back chain so we can save it back again. */
4336 emit_move_insn (chain, stack_bot);
4337
4338 op1 = force_reg (SImode, op1);
4339
4340 v = 0x1020300010203ll;
4341 emit_move_insn (pat, immed_double_const (v, v, TImode));
4342 emit_insn (gen_shufb (splatted, op1, op1, pat));
4343
4344 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4345 emit_insn (gen_subv4si3 (sp, sp, splatted));
4346
4347 if (flag_stack_check)
4348 {
4349 rtx avail = gen_reg_rtx(SImode);
4350 rtx result = gen_reg_rtx(SImode);
4351 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4352 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4353 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4354 }
4355
4356 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4357
4358 emit_move_insn (stack_bot, chain);
4359
4360 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4361 }
4362
4363 void
4364 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4365 {
4366 static unsigned char arr[16] =
4367 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4368 rtx temp = gen_reg_rtx (SImode);
4369 rtx temp2 = gen_reg_rtx (SImode);
4370 rtx temp3 = gen_reg_rtx (V4SImode);
4371 rtx temp4 = gen_reg_rtx (V4SImode);
4372 rtx pat = gen_reg_rtx (TImode);
4373 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4374
4375 /* Restore the backchain from the first word, sp from the second. */
4376 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4377 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4378
4379 emit_move_insn (pat, array_to_constant (TImode, arr));
4380
4381 /* Compute Available Stack Size for sp */
4382 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4383 emit_insn (gen_shufb (temp3, temp, temp, pat));
4384
4385 /* Compute Available Stack Size for back chain */
4386 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4387 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4388 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4389
4390 emit_insn (gen_addv4si3 (sp, sp, temp3));
4391 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4392 }
4393
4394 static void
4395 spu_init_libfuncs (void)
4396 {
4397 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4398 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4399 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4400 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4401 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4402 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4403 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4404 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4405 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4406 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4407 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4408
4409 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4410 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4411
4412 set_optab_libfunc (smul_optab, TImode, "__multi3");
4413 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
4414 set_optab_libfunc (smod_optab, TImode, "__modti3");
4415 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
4416 set_optab_libfunc (umod_optab, TImode, "__umodti3");
4417 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
4418 }
4419
4420 /* Make a subreg, stripping any existing subreg. We could possibly just
4421 call simplify_subreg, but in this case we know what we want. */
4422 rtx
4423 spu_gen_subreg (enum machine_mode mode, rtx x)
4424 {
4425 if (GET_CODE (x) == SUBREG)
4426 x = SUBREG_REG (x);
4427 if (GET_MODE (x) == mode)
4428 return x;
4429 return gen_rtx_SUBREG (mode, x, 0);
4430 }
4431
4432 static bool
4433 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4434 {
4435 return (TYPE_MODE (type) == BLKmode
4436 && ((type) == 0
4437 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4438 || int_size_in_bytes (type) >
4439 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4440 }
4441 \f
4442 /* Create the built-in types and functions */
4443
4444 struct spu_builtin_description spu_builtins[] = {
4445 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4446 {fcode, icode, name, type, params, NULL_TREE},
4447 #include "spu-builtins.def"
4448 #undef DEF_BUILTIN
4449 };
4450
4451 static void
4452 spu_init_builtins (void)
4453 {
4454 struct spu_builtin_description *d;
4455 unsigned int i;
4456
4457 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4458 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4459 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4460 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4461 V4SF_type_node = build_vector_type (float_type_node, 4);
4462 V2DF_type_node = build_vector_type (double_type_node, 2);
4463
4464 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4465 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4466 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4467 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4468
4469 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
4470
4471 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4472 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4473 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4474 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4475 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4476 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4477 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4478 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4479 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4480 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4481 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4482 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4483
4484 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4485 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4486 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4487 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4488 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4489 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4490 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4491 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4492
4493 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4494 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4495
4496 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4497
4498 spu_builtin_types[SPU_BTI_PTR] =
4499 build_pointer_type (build_qualified_type
4500 (void_type_node,
4501 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4502
4503 /* For each builtin we build a new prototype. The tree code will make
4504 sure nodes are shared. */
4505 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4506 {
4507 tree p;
4508 char name[64]; /* build_function will make a copy. */
4509 int parm;
4510
4511 if (d->name == 0)
4512 continue;
4513
4514 /* Find last parm. */
4515 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4516 ;
4517
4518 p = void_list_node;
4519 while (parm > 1)
4520 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4521
4522 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4523
4524 sprintf (name, "__builtin_%s", d->name);
4525 d->fndecl =
4526 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4527 NULL, NULL_TREE);
4528 if (d->fcode == SPU_MASK_FOR_LOAD)
4529 TREE_READONLY (d->fndecl) = 1;
4530
4531 /* These builtins don't throw. */
4532 TREE_NOTHROW (d->fndecl) = 1;
4533 }
4534 }
4535
4536 void
4537 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4538 {
4539 static unsigned char arr[16] =
4540 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4541
4542 rtx temp = gen_reg_rtx (Pmode);
4543 rtx temp2 = gen_reg_rtx (V4SImode);
4544 rtx temp3 = gen_reg_rtx (V4SImode);
4545 rtx pat = gen_reg_rtx (TImode);
4546 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4547
4548 emit_move_insn (pat, array_to_constant (TImode, arr));
4549
4550 /* Restore the sp. */
4551 emit_move_insn (temp, op1);
4552 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4553
4554 /* Compute available stack size for sp. */
4555 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4556 emit_insn (gen_shufb (temp3, temp, temp, pat));
4557
4558 emit_insn (gen_addv4si3 (sp, sp, temp3));
4559 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4560 }
4561
4562 int
4563 spu_safe_dma (HOST_WIDE_INT channel)
4564 {
4565 return (channel >= 21 && channel <= 27);
4566 }
4567
4568 void
4569 spu_builtin_splats (rtx ops[])
4570 {
4571 enum machine_mode mode = GET_MODE (ops[0]);
4572 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4573 {
4574 unsigned char arr[16];
4575 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4576 emit_move_insn (ops[0], array_to_constant (mode, arr));
4577 }
4578 else
4579 {
4580 rtx reg = gen_reg_rtx (TImode);
4581 rtx shuf;
4582 if (GET_CODE (ops[1]) != REG
4583 && GET_CODE (ops[1]) != SUBREG)
4584 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4585 switch (mode)
4586 {
4587 case V2DImode:
4588 case V2DFmode:
4589 shuf =
4590 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4591 TImode);
4592 break;
4593 case V4SImode:
4594 case V4SFmode:
4595 shuf =
4596 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4597 TImode);
4598 break;
4599 case V8HImode:
4600 shuf =
4601 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4602 TImode);
4603 break;
4604 case V16QImode:
4605 shuf =
4606 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4607 TImode);
4608 break;
4609 default:
4610 abort ();
4611 }
4612 emit_move_insn (reg, shuf);
4613 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4614 }
4615 }
4616
4617 void
4618 spu_builtin_extract (rtx ops[])
4619 {
4620 enum machine_mode mode;
4621 rtx rot, from, tmp;
4622
4623 mode = GET_MODE (ops[1]);
4624
4625 if (GET_CODE (ops[2]) == CONST_INT)
4626 {
4627 switch (mode)
4628 {
4629 case V16QImode:
4630 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4631 break;
4632 case V8HImode:
4633 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4634 break;
4635 case V4SFmode:
4636 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4637 break;
4638 case V4SImode:
4639 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4640 break;
4641 case V2DImode:
4642 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4643 break;
4644 case V2DFmode:
4645 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4646 break;
4647 default:
4648 abort ();
4649 }
4650 return;
4651 }
4652
4653 from = spu_gen_subreg (TImode, ops[1]);
4654 rot = gen_reg_rtx (TImode);
4655 tmp = gen_reg_rtx (SImode);
4656
4657 switch (mode)
4658 {
4659 case V16QImode:
4660 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4661 break;
4662 case V8HImode:
4663 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4664 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4665 break;
4666 case V4SFmode:
4667 case V4SImode:
4668 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4669 break;
4670 case V2DImode:
4671 case V2DFmode:
4672 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4673 break;
4674 default:
4675 abort ();
4676 }
4677 emit_insn (gen_rotqby_ti (rot, from, tmp));
4678
4679 emit_insn (gen_spu_convert (ops[0], rot));
4680 }
4681
4682 void
4683 spu_builtin_insert (rtx ops[])
4684 {
4685 enum machine_mode mode = GET_MODE (ops[0]);
4686 enum machine_mode imode = GET_MODE_INNER (mode);
4687 rtx mask = gen_reg_rtx (TImode);
4688 rtx offset;
4689
4690 if (GET_CODE (ops[3]) == CONST_INT)
4691 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4692 else
4693 {
4694 offset = gen_reg_rtx (SImode);
4695 emit_insn (gen_mulsi3
4696 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4697 }
4698 emit_insn (gen_cpat
4699 (mask, stack_pointer_rtx, offset,
4700 GEN_INT (GET_MODE_SIZE (imode))));
4701 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4702 }
4703
4704 void
4705 spu_builtin_promote (rtx ops[])
4706 {
4707 enum machine_mode mode, imode;
4708 rtx rot, from, offset;
4709 HOST_WIDE_INT pos;
4710
4711 mode = GET_MODE (ops[0]);
4712 imode = GET_MODE_INNER (mode);
4713
4714 from = gen_reg_rtx (TImode);
4715 rot = spu_gen_subreg (TImode, ops[0]);
4716
4717 emit_insn (gen_spu_convert (from, ops[1]));
4718
4719 if (GET_CODE (ops[2]) == CONST_INT)
4720 {
4721 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4722 if (GET_MODE_SIZE (imode) < 4)
4723 pos += 4 - GET_MODE_SIZE (imode);
4724 offset = GEN_INT (pos & 15);
4725 }
4726 else
4727 {
4728 offset = gen_reg_rtx (SImode);
4729 switch (mode)
4730 {
4731 case V16QImode:
4732 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4733 break;
4734 case V8HImode:
4735 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4736 emit_insn (gen_addsi3 (offset, offset, offset));
4737 break;
4738 case V4SFmode:
4739 case V4SImode:
4740 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4741 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4742 break;
4743 case V2DImode:
4744 case V2DFmode:
4745 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4746 break;
4747 default:
4748 abort ();
4749 }
4750 }
4751 emit_insn (gen_rotqby_ti (rot, from, offset));
4752 }
4753
4754 void
4755 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4756 {
4757 rtx shuf = gen_reg_rtx (V4SImode);
4758 rtx insn = gen_reg_rtx (V4SImode);
4759 rtx shufc;
4760 rtx insnc;
4761 rtx mem;
4762
4763 fnaddr = force_reg (SImode, fnaddr);
4764 cxt = force_reg (SImode, cxt);
4765
4766 if (TARGET_LARGE_MEM)
4767 {
4768 rtx rotl = gen_reg_rtx (V4SImode);
4769 rtx mask = gen_reg_rtx (V4SImode);
4770 rtx bi = gen_reg_rtx (SImode);
4771 unsigned char shufa[16] = {
4772 2, 3, 0, 1, 18, 19, 16, 17,
4773 0, 1, 2, 3, 16, 17, 18, 19
4774 };
4775 unsigned char insna[16] = {
4776 0x41, 0, 0, 79,
4777 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4778 0x60, 0x80, 0, 79,
4779 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4780 };
4781
4782 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4783 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4784
4785 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4786 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4787 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4788 emit_insn (gen_selb (insn, insnc, rotl, mask));
4789
4790 mem = memory_address (Pmode, tramp);
4791 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4792
4793 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4794 mem = memory_address (Pmode, plus_constant (tramp, 16));
4795 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4796 }
4797 else
4798 {
4799 rtx scxt = gen_reg_rtx (SImode);
4800 rtx sfnaddr = gen_reg_rtx (SImode);
4801 unsigned char insna[16] = {
4802 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4803 0x30, 0, 0, 0,
4804 0, 0, 0, 0,
4805 0, 0, 0, 0
4806 };
4807
4808 shufc = gen_reg_rtx (TImode);
4809 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4810
4811 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4812 fits 18 bits and the last 4 are zeros. This will be true if
4813 the stack pointer is initialized to 0x3fff0 at program start,
4814 otherwise the ila instruction will be garbage. */
4815
4816 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4817 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4818 emit_insn (gen_cpat
4819 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4820 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4821 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4822
4823 mem = memory_address (Pmode, tramp);
4824 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4825
4826 }
4827 emit_insn (gen_sync ());
4828 }
4829
4830 void
4831 spu_expand_sign_extend (rtx ops[])
4832 {
4833 unsigned char arr[16];
4834 rtx pat = gen_reg_rtx (TImode);
4835 rtx sign, c;
4836 int i, last;
4837 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4838 if (GET_MODE (ops[1]) == QImode)
4839 {
4840 sign = gen_reg_rtx (HImode);
4841 emit_insn (gen_extendqihi2 (sign, ops[1]));
4842 for (i = 0; i < 16; i++)
4843 arr[i] = 0x12;
4844 arr[last] = 0x13;
4845 }
4846 else
4847 {
4848 for (i = 0; i < 16; i++)
4849 arr[i] = 0x10;
4850 switch (GET_MODE (ops[1]))
4851 {
4852 case HImode:
4853 sign = gen_reg_rtx (SImode);
4854 emit_insn (gen_extendhisi2 (sign, ops[1]));
4855 arr[last] = 0x03;
4856 arr[last - 1] = 0x02;
4857 break;
4858 case SImode:
4859 sign = gen_reg_rtx (SImode);
4860 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4861 for (i = 0; i < 4; i++)
4862 arr[last - i] = 3 - i;
4863 break;
4864 case DImode:
4865 sign = gen_reg_rtx (SImode);
4866 c = gen_reg_rtx (SImode);
4867 emit_insn (gen_spu_convert (c, ops[1]));
4868 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4869 for (i = 0; i < 8; i++)
4870 arr[last - i] = 7 - i;
4871 break;
4872 default:
4873 abort ();
4874 }
4875 }
4876 emit_move_insn (pat, array_to_constant (TImode, arr));
4877 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4878 }
4879
4880 /* expand vector initialization. If there are any constant parts,
4881 load constant parts first. Then load any non-constant parts. */
4882 void
4883 spu_expand_vector_init (rtx target, rtx vals)
4884 {
4885 enum machine_mode mode = GET_MODE (target);
4886 int n_elts = GET_MODE_NUNITS (mode);
4887 int n_var = 0;
4888 bool all_same = true;
4889 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4890 int i;
4891
4892 first = XVECEXP (vals, 0, 0);
4893 for (i = 0; i < n_elts; ++i)
4894 {
4895 x = XVECEXP (vals, 0, i);
4896 if (!(CONST_INT_P (x)
4897 || GET_CODE (x) == CONST_DOUBLE
4898 || GET_CODE (x) == CONST_FIXED))
4899 ++n_var;
4900 else
4901 {
4902 if (first_constant == NULL_RTX)
4903 first_constant = x;
4904 }
4905 if (i > 0 && !rtx_equal_p (x, first))
4906 all_same = false;
4907 }
4908
4909 /* if all elements are the same, use splats to repeat elements */
4910 if (all_same)
4911 {
4912 if (!CONSTANT_P (first)
4913 && !register_operand (first, GET_MODE (x)))
4914 first = force_reg (GET_MODE (first), first);
4915 emit_insn (gen_spu_splats (target, first));
4916 return;
4917 }
4918
4919 /* load constant parts */
4920 if (n_var != n_elts)
4921 {
4922 if (n_var == 0)
4923 {
4924 emit_move_insn (target,
4925 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4926 }
4927 else
4928 {
4929 rtx constant_parts_rtx = copy_rtx (vals);
4930
4931 gcc_assert (first_constant != NULL_RTX);
4932 /* fill empty slots with the first constant, this increases
4933 our chance of using splats in the recursive call below. */
4934 for (i = 0; i < n_elts; ++i)
4935 {
4936 x = XVECEXP (constant_parts_rtx, 0, i);
4937 if (!(CONST_INT_P (x)
4938 || GET_CODE (x) == CONST_DOUBLE
4939 || GET_CODE (x) == CONST_FIXED))
4940 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4941 }
4942
4943 spu_expand_vector_init (target, constant_parts_rtx);
4944 }
4945 }
4946
4947 /* load variable parts */
4948 if (n_var != 0)
4949 {
4950 rtx insert_operands[4];
4951
4952 insert_operands[0] = target;
4953 insert_operands[2] = target;
4954 for (i = 0; i < n_elts; ++i)
4955 {
4956 x = XVECEXP (vals, 0, i);
4957 if (!(CONST_INT_P (x)
4958 || GET_CODE (x) == CONST_DOUBLE
4959 || GET_CODE (x) == CONST_FIXED))
4960 {
4961 if (!register_operand (x, GET_MODE (x)))
4962 x = force_reg (GET_MODE (x), x);
4963 insert_operands[1] = x;
4964 insert_operands[3] = GEN_INT (i);
4965 spu_builtin_insert (insert_operands);
4966 }
4967 }
4968 }
4969 }
4970
4971 /* Return insn index for the vector compare instruction for given CODE,
4972 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4973
4974 static int
4975 get_vec_cmp_insn (enum rtx_code code,
4976 enum machine_mode dest_mode,
4977 enum machine_mode op_mode)
4978
4979 {
4980 switch (code)
4981 {
4982 case EQ:
4983 if (dest_mode == V16QImode && op_mode == V16QImode)
4984 return CODE_FOR_ceq_v16qi;
4985 if (dest_mode == V8HImode && op_mode == V8HImode)
4986 return CODE_FOR_ceq_v8hi;
4987 if (dest_mode == V4SImode && op_mode == V4SImode)
4988 return CODE_FOR_ceq_v4si;
4989 if (dest_mode == V4SImode && op_mode == V4SFmode)
4990 return CODE_FOR_ceq_v4sf;
4991 if (dest_mode == V2DImode && op_mode == V2DFmode)
4992 return CODE_FOR_ceq_v2df;
4993 break;
4994 case GT:
4995 if (dest_mode == V16QImode && op_mode == V16QImode)
4996 return CODE_FOR_cgt_v16qi;
4997 if (dest_mode == V8HImode && op_mode == V8HImode)
4998 return CODE_FOR_cgt_v8hi;
4999 if (dest_mode == V4SImode && op_mode == V4SImode)
5000 return CODE_FOR_cgt_v4si;
5001 if (dest_mode == V4SImode && op_mode == V4SFmode)
5002 return CODE_FOR_cgt_v4sf;
5003 if (dest_mode == V2DImode && op_mode == V2DFmode)
5004 return CODE_FOR_cgt_v2df;
5005 break;
5006 case GTU:
5007 if (dest_mode == V16QImode && op_mode == V16QImode)
5008 return CODE_FOR_clgt_v16qi;
5009 if (dest_mode == V8HImode && op_mode == V8HImode)
5010 return CODE_FOR_clgt_v8hi;
5011 if (dest_mode == V4SImode && op_mode == V4SImode)
5012 return CODE_FOR_clgt_v4si;
5013 break;
5014 default:
5015 break;
5016 }
5017 return -1;
5018 }
5019
5020 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5021 DMODE is expected destination mode. This is a recursive function. */
5022
5023 static rtx
5024 spu_emit_vector_compare (enum rtx_code rcode,
5025 rtx op0, rtx op1,
5026 enum machine_mode dmode)
5027 {
5028 int vec_cmp_insn;
5029 rtx mask;
5030 enum machine_mode dest_mode;
5031 enum machine_mode op_mode = GET_MODE (op1);
5032
5033 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5034
5035 /* Floating point vector compare instructions uses destination V4SImode.
5036 Double floating point vector compare instructions uses destination V2DImode.
5037 Move destination to appropriate mode later. */
5038 if (dmode == V4SFmode)
5039 dest_mode = V4SImode;
5040 else if (dmode == V2DFmode)
5041 dest_mode = V2DImode;
5042 else
5043 dest_mode = dmode;
5044
5045 mask = gen_reg_rtx (dest_mode);
5046 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5047
5048 if (vec_cmp_insn == -1)
5049 {
5050 bool swap_operands = false;
5051 bool try_again = false;
5052 switch (rcode)
5053 {
5054 case LT:
5055 rcode = GT;
5056 swap_operands = true;
5057 try_again = true;
5058 break;
5059 case LTU:
5060 rcode = GTU;
5061 swap_operands = true;
5062 try_again = true;
5063 break;
5064 case NE:
5065 /* Treat A != B as ~(A==B). */
5066 {
5067 enum insn_code nor_code;
5068 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5069 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5070 gcc_assert (nor_code != CODE_FOR_nothing);
5071 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5072 if (dmode != dest_mode)
5073 {
5074 rtx temp = gen_reg_rtx (dest_mode);
5075 convert_move (temp, mask, 0);
5076 return temp;
5077 }
5078 return mask;
5079 }
5080 break;
5081 case GE:
5082 case GEU:
5083 case LE:
5084 case LEU:
5085 /* Try GT/GTU/LT/LTU OR EQ */
5086 {
5087 rtx c_rtx, eq_rtx;
5088 enum insn_code ior_code;
5089 enum rtx_code new_code;
5090
5091 switch (rcode)
5092 {
5093 case GE: new_code = GT; break;
5094 case GEU: new_code = GTU; break;
5095 case LE: new_code = LT; break;
5096 case LEU: new_code = LTU; break;
5097 default:
5098 gcc_unreachable ();
5099 }
5100
5101 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5102 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5103
5104 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5105 gcc_assert (ior_code != CODE_FOR_nothing);
5106 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5107 if (dmode != dest_mode)
5108 {
5109 rtx temp = gen_reg_rtx (dest_mode);
5110 convert_move (temp, mask, 0);
5111 return temp;
5112 }
5113 return mask;
5114 }
5115 break;
5116 default:
5117 gcc_unreachable ();
5118 }
5119
5120 /* You only get two chances. */
5121 if (try_again)
5122 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5123
5124 gcc_assert (vec_cmp_insn != -1);
5125
5126 if (swap_operands)
5127 {
5128 rtx tmp;
5129 tmp = op0;
5130 op0 = op1;
5131 op1 = tmp;
5132 }
5133 }
5134
5135 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5136 if (dmode != dest_mode)
5137 {
5138 rtx temp = gen_reg_rtx (dest_mode);
5139 convert_move (temp, mask, 0);
5140 return temp;
5141 }
5142 return mask;
5143 }
5144
5145
5146 /* Emit vector conditional expression.
5147 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5148 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5149
5150 int
5151 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5152 rtx cond, rtx cc_op0, rtx cc_op1)
5153 {
5154 enum machine_mode dest_mode = GET_MODE (dest);
5155 enum rtx_code rcode = GET_CODE (cond);
5156 rtx mask;
5157
5158 /* Get the vector mask for the given relational operations. */
5159 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5160
5161 emit_insn(gen_selb (dest, op2, op1, mask));
5162
5163 return 1;
5164 }
5165
5166 static rtx
5167 spu_force_reg (enum machine_mode mode, rtx op)
5168 {
5169 rtx x, r;
5170 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5171 {
5172 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5173 || GET_MODE (op) == BLKmode)
5174 return force_reg (mode, convert_to_mode (mode, op, 0));
5175 abort ();
5176 }
5177
5178 r = force_reg (GET_MODE (op), op);
5179 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5180 {
5181 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5182 if (x)
5183 return x;
5184 }
5185
5186 x = gen_reg_rtx (mode);
5187 emit_insn (gen_spu_convert (x, r));
5188 return x;
5189 }
5190
5191 static void
5192 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5193 {
5194 HOST_WIDE_INT v = 0;
5195 int lsbits;
5196 /* Check the range of immediate operands. */
5197 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5198 {
5199 int range = p - SPU_BTI_7;
5200
5201 if (!CONSTANT_P (op))
5202 error ("%s expects an integer literal in the range [%d, %d].",
5203 d->name,
5204 spu_builtin_range[range].low, spu_builtin_range[range].high);
5205
5206 if (GET_CODE (op) == CONST
5207 && (GET_CODE (XEXP (op, 0)) == PLUS
5208 || GET_CODE (XEXP (op, 0)) == MINUS))
5209 {
5210 v = INTVAL (XEXP (XEXP (op, 0), 1));
5211 op = XEXP (XEXP (op, 0), 0);
5212 }
5213 else if (GET_CODE (op) == CONST_INT)
5214 v = INTVAL (op);
5215 else if (GET_CODE (op) == CONST_VECTOR
5216 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5217 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5218
5219 /* The default for v is 0 which is valid in every range. */
5220 if (v < spu_builtin_range[range].low
5221 || v > spu_builtin_range[range].high)
5222 error ("%s expects an integer literal in the range [%d, %d]. ("
5223 HOST_WIDE_INT_PRINT_DEC ")",
5224 d->name,
5225 spu_builtin_range[range].low, spu_builtin_range[range].high,
5226 v);
5227
5228 switch (p)
5229 {
5230 case SPU_BTI_S10_4:
5231 lsbits = 4;
5232 break;
5233 case SPU_BTI_U16_2:
5234 /* This is only used in lqa, and stqa. Even though the insns
5235 encode 16 bits of the address (all but the 2 least
5236 significant), only 14 bits are used because it is masked to
5237 be 16 byte aligned. */
5238 lsbits = 4;
5239 break;
5240 case SPU_BTI_S16_2:
5241 /* This is used for lqr and stqr. */
5242 lsbits = 2;
5243 break;
5244 default:
5245 lsbits = 0;
5246 }
5247
5248 if (GET_CODE (op) == LABEL_REF
5249 || (GET_CODE (op) == SYMBOL_REF
5250 && SYMBOL_REF_FUNCTION_P (op))
5251 || (v & ((1 << lsbits) - 1)) != 0)
5252 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5253 d->name);
5254 }
5255 }
5256
5257
5258 static void
5259 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5260 rtx target, rtx ops[])
5261 {
5262 enum insn_code icode = d->icode;
5263 int i = 0, a;
5264
5265 /* Expand the arguments into rtl. */
5266
5267 if (d->parm[0] != SPU_BTI_VOID)
5268 ops[i++] = target;
5269
5270 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
5271 {
5272 tree arg = CALL_EXPR_ARG (exp, a);
5273 if (arg == 0)
5274 abort ();
5275 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
5276 }
5277 }
5278
5279 static rtx
5280 spu_expand_builtin_1 (struct spu_builtin_description *d,
5281 tree exp, rtx target)
5282 {
5283 rtx pat;
5284 rtx ops[8];
5285 enum insn_code icode = d->icode;
5286 enum machine_mode mode, tmode;
5287 int i, p;
5288 tree return_type;
5289
5290 /* Set up ops[] with values from arglist. */
5291 expand_builtin_args (d, exp, target, ops);
5292
5293 /* Handle the target operand which must be operand 0. */
5294 i = 0;
5295 if (d->parm[0] != SPU_BTI_VOID)
5296 {
5297
5298 /* We prefer the mode specified for the match_operand otherwise
5299 use the mode from the builtin function prototype. */
5300 tmode = insn_data[d->icode].operand[0].mode;
5301 if (tmode == VOIDmode)
5302 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5303
5304 /* Try to use target because not using it can lead to extra copies
5305 and when we are using all of the registers extra copies leads
5306 to extra spills. */
5307 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5308 ops[0] = target;
5309 else
5310 target = ops[0] = gen_reg_rtx (tmode);
5311
5312 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5313 abort ();
5314
5315 i++;
5316 }
5317
5318 if (d->fcode == SPU_MASK_FOR_LOAD)
5319 {
5320 enum machine_mode mode = insn_data[icode].operand[1].mode;
5321 tree arg;
5322 rtx addr, op, pat;
5323
5324 /* get addr */
5325 arg = CALL_EXPR_ARG (exp, 0);
5326 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5327 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5328 addr = memory_address (mode, op);
5329
5330 /* negate addr */
5331 op = gen_reg_rtx (GET_MODE (addr));
5332 emit_insn (gen_rtx_SET (VOIDmode, op,
5333 gen_rtx_NEG (GET_MODE (addr), addr)));
5334 op = gen_rtx_MEM (mode, op);
5335
5336 pat = GEN_FCN (icode) (target, op);
5337 if (!pat)
5338 return 0;
5339 emit_insn (pat);
5340 return target;
5341 }
5342
5343 /* Ignore align_hint, but still expand it's args in case they have
5344 side effects. */
5345 if (icode == CODE_FOR_spu_align_hint)
5346 return 0;
5347
5348 /* Handle the rest of the operands. */
5349 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5350 {
5351 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5352 mode = insn_data[d->icode].operand[i].mode;
5353 else
5354 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5355
5356 /* mode can be VOIDmode here for labels */
5357
5358 /* For specific intrinsics with an immediate operand, e.g.,
5359 si_ai(), we sometimes need to convert the scalar argument to a
5360 vector argument by splatting the scalar. */
5361 if (VECTOR_MODE_P (mode)
5362 && (GET_CODE (ops[i]) == CONST_INT
5363 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5364 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
5365 {
5366 if (GET_CODE (ops[i]) == CONST_INT)
5367 ops[i] = spu_const (mode, INTVAL (ops[i]));
5368 else
5369 {
5370 rtx reg = gen_reg_rtx (mode);
5371 enum machine_mode imode = GET_MODE_INNER (mode);
5372 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5373 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5374 if (imode != GET_MODE (ops[i]))
5375 ops[i] = convert_to_mode (imode, ops[i],
5376 TYPE_UNSIGNED (spu_builtin_types
5377 [d->parm[i]]));
5378 emit_insn (gen_spu_splats (reg, ops[i]));
5379 ops[i] = reg;
5380 }
5381 }
5382
5383 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5384
5385 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5386 ops[i] = spu_force_reg (mode, ops[i]);
5387 }
5388
5389 switch (insn_data[icode].n_operands)
5390 {
5391 case 0:
5392 pat = GEN_FCN (icode) (0);
5393 break;
5394 case 1:
5395 pat = GEN_FCN (icode) (ops[0]);
5396 break;
5397 case 2:
5398 pat = GEN_FCN (icode) (ops[0], ops[1]);
5399 break;
5400 case 3:
5401 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5402 break;
5403 case 4:
5404 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5405 break;
5406 case 5:
5407 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5408 break;
5409 case 6:
5410 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5411 break;
5412 default:
5413 abort ();
5414 }
5415
5416 if (!pat)
5417 abort ();
5418
5419 if (d->type == B_CALL || d->type == B_BISLED)
5420 emit_call_insn (pat);
5421 else if (d->type == B_JUMP)
5422 {
5423 emit_jump_insn (pat);
5424 emit_barrier ();
5425 }
5426 else
5427 emit_insn (pat);
5428
5429 return_type = spu_builtin_types[d->parm[0]];
5430 if (d->parm[0] != SPU_BTI_VOID
5431 && GET_MODE (target) != TYPE_MODE (return_type))
5432 {
5433 /* target is the return value. It should always be the mode of
5434 the builtin function prototype. */
5435 target = spu_force_reg (TYPE_MODE (return_type), target);
5436 }
5437
5438 return target;
5439 }
5440
5441 rtx
5442 spu_expand_builtin (tree exp,
5443 rtx target,
5444 rtx subtarget ATTRIBUTE_UNUSED,
5445 enum machine_mode mode ATTRIBUTE_UNUSED,
5446 int ignore ATTRIBUTE_UNUSED)
5447 {
5448 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5449 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5450 struct spu_builtin_description *d;
5451
5452 if (fcode < NUM_SPU_BUILTINS)
5453 {
5454 d = &spu_builtins[fcode];
5455
5456 return spu_expand_builtin_1 (d, exp, target);
5457 }
5458 abort ();
5459 }
5460
5461 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5462 static tree
5463 spu_builtin_mul_widen_even (tree type)
5464 {
5465 switch (TYPE_MODE (type))
5466 {
5467 case V8HImode:
5468 if (TYPE_UNSIGNED (type))
5469 return spu_builtins[SPU_MULE_0].fndecl;
5470 else
5471 return spu_builtins[SPU_MULE_1].fndecl;
5472 break;
5473 default:
5474 return NULL_TREE;
5475 }
5476 }
5477
5478 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5479 static tree
5480 spu_builtin_mul_widen_odd (tree type)
5481 {
5482 switch (TYPE_MODE (type))
5483 {
5484 case V8HImode:
5485 if (TYPE_UNSIGNED (type))
5486 return spu_builtins[SPU_MULO_1].fndecl;
5487 else
5488 return spu_builtins[SPU_MULO_0].fndecl;
5489 break;
5490 default:
5491 return NULL_TREE;
5492 }
5493 }
5494
5495 /* Implement targetm.vectorize.builtin_mask_for_load. */
5496 static tree
5497 spu_builtin_mask_for_load (void)
5498 {
5499 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5500 gcc_assert (d);
5501 return d->fndecl;
5502 }
5503
5504 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5505 static int
5506 spu_builtin_vectorization_cost (bool runtime_test)
5507 {
5508 /* If the branch of the runtime test is taken - i.e. - the vectorized
5509 version is skipped - this incurs a misprediction cost (because the
5510 vectorized version is expected to be the fall-through). So we subtract
5511 the latency of a mispredicted branch from the costs that are incurred
5512 when the vectorized version is executed. */
5513 if (runtime_test)
5514 return -19;
5515 else
5516 return 0;
5517 }
5518
5519 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5520 after applying N number of iterations. This routine does not determine
5521 how may iterations are required to reach desired alignment. */
5522
5523 static bool
5524 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5525 {
5526 if (is_packed)
5527 return false;
5528
5529 /* All other types are naturally aligned. */
5530 return true;
5531 }
5532
5533 /* Count the total number of instructions in each pipe and return the
5534 maximum, which is used as the Minimum Iteration Interval (MII)
5535 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5536 -2 are instructions that can go in pipe0 or pipe1. */
5537 static int
5538 spu_sms_res_mii (struct ddg *g)
5539 {
5540 int i;
5541 unsigned t[4] = {0, 0, 0, 0};
5542
5543 for (i = 0; i < g->num_nodes; i++)
5544 {
5545 rtx insn = g->nodes[i].insn;
5546 int p = get_pipe (insn) + 2;
5547
5548 assert (p >= 0);
5549 assert (p < 4);
5550
5551 t[p]++;
5552 if (dump_file && INSN_P (insn))
5553 fprintf (dump_file, "i%d %s %d %d\n",
5554 INSN_UID (insn),
5555 insn_data[INSN_CODE(insn)].name,
5556 p, t[p]);
5557 }
5558 if (dump_file)
5559 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5560
5561 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5562 }
5563
5564
5565 void
5566 spu_init_expanders (void)
5567 {
5568 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5569 * frame_pointer_needed is true. We don't know that until we're
5570 * expanding the prologue. */
5571 if (cfun)
5572 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
5573 }
5574
5575 static enum machine_mode
5576 spu_libgcc_cmp_return_mode (void)
5577 {
5578
5579 /* For SPU word mode is TI mode so it is better to use SImode
5580 for compare returns. */
5581 return SImode;
5582 }
5583
5584 static enum machine_mode
5585 spu_libgcc_shift_count_mode (void)
5586 {
5587 /* For SPU word mode is TI mode so it is better to use SImode
5588 for shift counts. */
5589 return SImode;
5590 }
This page took 0.302257 seconds and 5 git commands to generate.