1 /* Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range
[] = {
65 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
66 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
68 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
69 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
71 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
72 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
73 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
87 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
88 static rtx
get_pic_reg (void);
89 static int need_to_save_reg (int regno
, int saving
);
90 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
91 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
92 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
94 static void emit_nop_for_insn (rtx insn
);
95 static bool insn_clobbers_hbr (rtx insn
);
96 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
98 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
99 enum machine_mode dmode
);
100 static rtx
get_branch_target (rtx branch
);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
107 static int get_pipe (rtx insn
);
108 static int spu_sched_adjust_priority (rtx insn
, int pri
);
109 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
110 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
112 unsigned char *no_add_attrs
);
113 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
115 unsigned char *no_add_attrs
);
116 static int spu_naked_function_p (tree func
);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
118 const_tree type
, unsigned char named
);
119 static tree
spu_build_builtin_va_list (void);
120 static void spu_va_start (tree
, rtx
);
121 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
,
122 gimple_seq
* pre_p
, gimple_seq
* post_p
);
123 static int regno_aligned_for_load (int regno
);
124 static int store_with_one_insn_p (rtx mem
);
125 static int mem_is_padded_component_ref (rtx x
);
126 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
127 static void spu_asm_globalize_label (FILE * file
, const char *name
);
128 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
130 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
131 static void spu_init_libfuncs (void);
132 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
133 static void fix_range (const char *);
134 static void spu_encode_section_info (tree
, rtx
, int);
135 static tree
spu_builtin_mul_widen_even (tree
);
136 static tree
spu_builtin_mul_widen_odd (tree
);
137 static tree
spu_builtin_mask_for_load (void);
138 static int spu_builtin_vectorization_cost (bool);
139 static bool spu_vector_alignment_reachable (const_tree
, bool);
140 static int spu_sms_res_mii (struct ddg
*g
);
142 extern const char *reg_names
[];
143 rtx spu_compare_op0
, spu_compare_op1
;
145 /* Which instruction set architecture to use. */
147 /* Which cpu are we tuning for. */
163 IC_POOL
, /* constant pool */
164 IC_IL1
, /* one il* instruction */
165 IC_IL2
, /* both ilhu and iohl instructions */
166 IC_IL1s
, /* one il* instruction */
167 IC_IL2s
, /* both ilhu and iohl instructions */
168 IC_FSMBI
, /* the fsmbi instruction */
169 IC_CPAT
, /* one of the c*d instructions */
170 IC_FSMBI2
/* fsmbi plus 1 other instruction */
173 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
174 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
175 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
176 static enum immediate_class
classify_immediate (rtx op
,
177 enum machine_mode mode
);
179 static enum machine_mode
spu_unwind_word_mode (void);
181 static enum machine_mode
182 spu_libgcc_cmp_return_mode (void);
184 static enum machine_mode
185 spu_libgcc_shift_count_mode (void);
187 /* Built in types. */
188 tree spu_builtin_types
[SPU_BTI_MAX
];
190 /* TARGET overrides. */
192 #undef TARGET_INIT_BUILTINS
193 #define TARGET_INIT_BUILTINS spu_init_builtins
195 #undef TARGET_EXPAND_BUILTIN
196 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
198 #undef TARGET_UNWIND_WORD_MODE
199 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
201 /* The .8byte directive doesn't seem to work well for a 32 bit
203 #undef TARGET_ASM_UNALIGNED_DI_OP
204 #define TARGET_ASM_UNALIGNED_DI_OP NULL
206 #undef TARGET_RTX_COSTS
207 #define TARGET_RTX_COSTS spu_rtx_costs
209 #undef TARGET_ADDRESS_COST
210 #define TARGET_ADDRESS_COST hook_int_rtx_0
212 #undef TARGET_SCHED_ISSUE_RATE
213 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
215 #undef TARGET_SCHED_VARIABLE_ISSUE
216 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
218 #undef TARGET_SCHED_ADJUST_PRIORITY
219 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
221 #undef TARGET_SCHED_ADJUST_COST
222 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
224 const struct attribute_spec spu_attribute_table
[];
225 #undef TARGET_ATTRIBUTE_TABLE
226 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
228 #undef TARGET_ASM_INTEGER
229 #define TARGET_ASM_INTEGER spu_assemble_integer
231 #undef TARGET_SCALAR_MODE_SUPPORTED_P
232 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
234 #undef TARGET_VECTOR_MODE_SUPPORTED_P
235 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
237 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
238 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
240 #undef TARGET_ASM_GLOBALIZE_LABEL
241 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
243 #undef TARGET_PASS_BY_REFERENCE
244 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
246 #undef TARGET_MUST_PASS_IN_STACK
247 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
249 #undef TARGET_BUILD_BUILTIN_VA_LIST
250 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
252 #undef TARGET_EXPAND_BUILTIN_VA_START
253 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
255 #undef TARGET_SETUP_INCOMING_VARARGS
256 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
258 #undef TARGET_MACHINE_DEPENDENT_REORG
259 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
261 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
262 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
264 #undef TARGET_DEFAULT_TARGET_FLAGS
265 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
267 #undef TARGET_INIT_LIBFUNCS
268 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
270 #undef TARGET_RETURN_IN_MEMORY
271 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
273 #undef TARGET_ENCODE_SECTION_INFO
274 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
276 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
277 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
279 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
280 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
282 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
283 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
285 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
286 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
288 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
289 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
291 #undef TARGET_LIBGCC_CMP_RETURN_MODE
292 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
294 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
295 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
297 #undef TARGET_SCHED_SMS_RES_MII
298 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
300 struct gcc_target targetm
= TARGET_INITIALIZER
;
303 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
305 /* Override some of the default param values. With so many registers
306 larger values are better for these params. */
307 MAX_PENDING_LIST_LENGTH
= 128;
309 /* With so many registers this is better on by default. */
310 flag_rename_registers
= 1;
313 /* Sometimes certain combinations of command options do not make sense
314 on a particular target machine. You can define a macro
315 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
316 executed once just after all the command options have been parsed. */
318 spu_override_options (void)
320 /* Small loops will be unpeeled at -O3. For SPU it is more important
321 to keep code small by default. */
322 if (!flag_unroll_loops
&& !flag_peel_loops
323 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
324 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
326 flag_omit_frame_pointer
= 1;
328 if (align_functions
< 8)
331 if (spu_fixed_range_string
)
332 fix_range (spu_fixed_range_string
);
334 /* Determine processor architectural level. */
337 if (strcmp (&spu_arch_string
[0], "cell") == 0)
338 spu_arch
= PROCESSOR_CELL
;
339 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
340 spu_arch
= PROCESSOR_CELLEDP
;
342 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
345 /* Determine processor to tune for. */
348 if (strcmp (&spu_tune_string
[0], "cell") == 0)
349 spu_tune
= PROCESSOR_CELL
;
350 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
351 spu_tune
= PROCESSOR_CELLEDP
;
353 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
357 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
358 struct attribute_spec.handler. */
360 /* Table of machine attributes. */
361 const struct attribute_spec spu_attribute_table
[] =
363 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
364 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
365 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
366 { NULL
, 0, 0, false, false, false, NULL
}
369 /* True if MODE is valid for the target. By "valid", we mean able to
370 be manipulated in non-trivial ways. In particular, this means all
371 the arithmetic is supported. */
373 spu_scalar_mode_supported_p (enum machine_mode mode
)
391 /* Similarly for vector modes. "Supported" here is less strict. At
392 least some operations are supported; need to check optabs or builtins
393 for further details. */
395 spu_vector_mode_supported_p (enum machine_mode mode
)
412 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
413 least significant bytes of the outer mode. This function returns
414 TRUE for the SUBREG's where this is correct. */
416 valid_subreg (rtx op
)
418 enum machine_mode om
= GET_MODE (op
);
419 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
420 return om
!= VOIDmode
&& im
!= VOIDmode
421 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
422 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4));
425 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
426 and adjust the start offset. */
428 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
430 enum machine_mode mode
;
432 /* Strip any SUBREG */
433 if (GET_CODE (op
) == SUBREG
)
437 GET_MODE_BITSIZE (GET_MODE (op
)) -
438 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
439 op
= SUBREG_REG (op
);
441 /* If it is smaller than SI, assure a SUBREG */
442 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
446 *start
+= 32 - op_size
;
449 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
450 mode
= mode_for_size (op_size
, MODE_INT
, 0);
451 if (mode
!= GET_MODE (op
))
452 op
= gen_rtx_SUBREG (mode
, op
, 0);
457 spu_expand_extv (rtx ops
[], int unsignedp
)
459 HOST_WIDE_INT width
= INTVAL (ops
[2]);
460 HOST_WIDE_INT start
= INTVAL (ops
[3]);
461 HOST_WIDE_INT src_size
, dst_size
;
462 enum machine_mode src_mode
, dst_mode
;
463 rtx dst
= ops
[0], src
= ops
[1];
466 dst
= adjust_operand (ops
[0], 0);
467 dst_mode
= GET_MODE (dst
);
468 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
470 src
= adjust_operand (src
, &start
);
471 src_mode
= GET_MODE (src
);
472 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
476 s
= gen_reg_rtx (src_mode
);
480 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
483 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
486 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
494 if (width
< src_size
)
501 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
504 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
507 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
512 s
= gen_reg_rtx (src_mode
);
513 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
518 convert_move (dst
, src
, unsignedp
);
522 spu_expand_insv (rtx ops
[])
524 HOST_WIDE_INT width
= INTVAL (ops
[1]);
525 HOST_WIDE_INT start
= INTVAL (ops
[2]);
526 HOST_WIDE_INT maskbits
;
527 enum machine_mode dst_mode
, src_mode
;
528 rtx dst
= ops
[0], src
= ops
[3];
529 int dst_size
, src_size
;
535 if (GET_CODE (ops
[0]) == MEM
)
536 dst
= gen_reg_rtx (TImode
);
538 dst
= adjust_operand (dst
, &start
);
539 dst_mode
= GET_MODE (dst
);
540 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
542 if (CONSTANT_P (src
))
544 enum machine_mode m
=
545 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
546 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
548 src
= adjust_operand (src
, 0);
549 src_mode
= GET_MODE (src
);
550 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
552 mask
= gen_reg_rtx (dst_mode
);
553 shift_reg
= gen_reg_rtx (dst_mode
);
554 shift
= dst_size
- start
- width
;
556 /* It's not safe to use subreg here because the compiler assumes
557 that the SUBREG_REG is right justified in the SUBREG. */
558 convert_move (shift_reg
, src
, 1);
565 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
568 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
571 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
583 maskbits
= (-1ll << (32 - width
- start
));
585 maskbits
+= (1ll << (32 - start
));
586 emit_move_insn (mask
, GEN_INT (maskbits
));
589 maskbits
= (-1ll << (64 - width
- start
));
591 maskbits
+= (1ll << (64 - start
));
592 emit_move_insn (mask
, GEN_INT (maskbits
));
596 unsigned char arr
[16];
598 memset (arr
, 0, sizeof (arr
));
599 arr
[i
] = 0xff >> (start
& 7);
600 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
602 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
603 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
609 if (GET_CODE (ops
[0]) == MEM
)
611 rtx aligned
= gen_reg_rtx (SImode
);
612 rtx low
= gen_reg_rtx (SImode
);
613 rtx addr
= gen_reg_rtx (SImode
);
614 rtx rotl
= gen_reg_rtx (SImode
);
615 rtx mask0
= gen_reg_rtx (TImode
);
618 emit_move_insn (addr
, XEXP (ops
[0], 0));
619 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
620 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
621 emit_insn (gen_negsi2 (rotl
, low
));
622 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
623 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
624 mem
= change_address (ops
[0], TImode
, aligned
);
625 set_mem_alias_set (mem
, 0);
626 emit_move_insn (dst
, mem
);
627 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
628 emit_move_insn (mem
, dst
);
629 if (start
+ width
> MEM_ALIGN (ops
[0]))
631 rtx shl
= gen_reg_rtx (SImode
);
632 rtx mask1
= gen_reg_rtx (TImode
);
633 rtx dst1
= gen_reg_rtx (TImode
);
635 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
636 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
637 mem1
= adjust_address (mem
, TImode
, 16);
638 set_mem_alias_set (mem1
, 0);
639 emit_move_insn (dst1
, mem1
);
640 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
641 emit_move_insn (mem1
, dst1
);
645 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
650 spu_expand_block_move (rtx ops
[])
652 HOST_WIDE_INT bytes
, align
, offset
;
653 rtx src
, dst
, sreg
, dreg
, target
;
655 if (GET_CODE (ops
[2]) != CONST_INT
656 || GET_CODE (ops
[3]) != CONST_INT
657 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO
* 8))
660 bytes
= INTVAL (ops
[2]);
661 align
= INTVAL (ops
[3]);
671 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
673 dst
= adjust_address (ops
[0], V16QImode
, offset
);
674 src
= adjust_address (ops
[1], V16QImode
, offset
);
675 emit_move_insn (dst
, src
);
680 unsigned char arr
[16] = { 0 };
681 for (i
= 0; i
< bytes
- offset
; i
++)
683 dst
= adjust_address (ops
[0], V16QImode
, offset
);
684 src
= adjust_address (ops
[1], V16QImode
, offset
);
685 mask
= gen_reg_rtx (V16QImode
);
686 sreg
= gen_reg_rtx (V16QImode
);
687 dreg
= gen_reg_rtx (V16QImode
);
688 target
= gen_reg_rtx (V16QImode
);
689 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
690 emit_move_insn (dreg
, dst
);
691 emit_move_insn (sreg
, src
);
692 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
693 emit_move_insn (dst
, target
);
701 { SPU_EQ
, SPU_GT
, SPU_GTU
};
703 int spu_comp_icode
[12][3] = {
704 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
705 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
706 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
707 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
708 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
709 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
710 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
711 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
712 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
713 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
714 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
715 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
718 /* Generate a compare for CODE. Return a brand-new rtx that represents
719 the result of the compare. GCC can figure this out too if we don't
720 provide all variations of compares, but GCC always wants to use
721 WORD_MODE, we can generate better code in most cases if we do it
724 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
726 int reverse_compare
= 0;
727 int reverse_test
= 0;
728 rtx compare_result
, eq_result
;
729 rtx comp_rtx
, eq_rtx
;
730 rtx target
= operands
[0];
731 enum machine_mode comp_mode
;
732 enum machine_mode op_mode
;
733 enum spu_comp_code scode
, eq_code
, ior_code
;
737 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
738 and so on, to keep the constant in operand 1. */
739 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
741 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
742 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
746 spu_compare_op1
= GEN_INT (val
);
750 spu_compare_op1
= GEN_INT (val
);
754 spu_compare_op1
= GEN_INT (val
);
758 spu_compare_op1
= GEN_INT (val
);
767 op_mode
= GET_MODE (spu_compare_op0
);
773 if (HONOR_NANS (op_mode
))
788 if (HONOR_NANS (op_mode
))
880 comp_mode
= V4SImode
;
884 comp_mode
= V2DImode
;
891 if (GET_MODE (spu_compare_op1
) == DFmode
892 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
895 if (is_set
== 0 && spu_compare_op1
== const0_rtx
896 && (GET_MODE (spu_compare_op0
) == SImode
897 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
899 /* Don't need to set a register with the result when we are
900 comparing against zero and branching. */
901 reverse_test
= !reverse_test
;
902 compare_result
= spu_compare_op0
;
906 compare_result
= gen_reg_rtx (comp_mode
);
910 rtx t
= spu_compare_op1
;
911 spu_compare_op1
= spu_compare_op0
;
915 if (spu_comp_icode
[index
][scode
] == 0)
918 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
919 (spu_compare_op0
, op_mode
))
920 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
921 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
922 (spu_compare_op1
, op_mode
))
923 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
924 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
929 emit_insn (comp_rtx
);
933 eq_result
= gen_reg_rtx (comp_mode
);
934 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
940 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
941 gcc_assert (ior_code
!= CODE_FOR_nothing
);
942 emit_insn (GEN_FCN (ior_code
)
943 (compare_result
, compare_result
, eq_result
));
952 /* We don't have branch on QI compare insns, so we convert the
953 QI compare result to a HI result. */
954 if (comp_mode
== QImode
)
956 rtx old_res
= compare_result
;
957 compare_result
= gen_reg_rtx (HImode
);
959 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
963 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
965 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
967 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
968 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
969 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
972 else if (is_set
== 2)
974 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
975 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
976 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
978 rtx op_t
= operands
[2];
979 rtx op_f
= operands
[3];
981 /* The result of the comparison can be SI, HI or QI mode. Create a
982 mask based on that result. */
983 if (target_size
> compare_size
)
985 select_mask
= gen_reg_rtx (mode
);
986 emit_insn (gen_extend_compare (select_mask
, compare_result
));
988 else if (target_size
< compare_size
)
990 gen_rtx_SUBREG (mode
, compare_result
,
991 (compare_size
- target_size
) / BITS_PER_UNIT
);
992 else if (comp_mode
!= mode
)
993 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
995 select_mask
= compare_result
;
997 if (GET_MODE (target
) != GET_MODE (op_t
)
998 || GET_MODE (target
) != GET_MODE (op_f
))
1002 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1004 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1009 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1010 gen_rtx_NOT (comp_mode
, compare_result
)));
1011 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1012 emit_insn (gen_extendhisi2 (target
, compare_result
));
1013 else if (GET_MODE (target
) == SImode
1014 && GET_MODE (compare_result
) == QImode
)
1015 emit_insn (gen_extend_compare (target
, compare_result
));
1017 emit_move_insn (target
, compare_result
);
1022 const_double_to_hwint (rtx x
)
1026 if (GET_MODE (x
) == SFmode
)
1028 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1029 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1031 else if (GET_MODE (x
) == DFmode
)
1034 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1035 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1037 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1045 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1049 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1052 tv
[0] = (v
<< 32) >> 32;
1053 else if (mode
== DFmode
)
1055 tv
[1] = (v
<< 32) >> 32;
1058 real_from_target (&rv
, tv
, mode
);
1059 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1063 print_operand_address (FILE * file
, register rtx addr
)
1068 if (GET_CODE (addr
) == AND
1069 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1070 && INTVAL (XEXP (addr
, 1)) == -16)
1071 addr
= XEXP (addr
, 0);
1073 switch (GET_CODE (addr
))
1076 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1080 reg
= XEXP (addr
, 0);
1081 offset
= XEXP (addr
, 1);
1082 if (GET_CODE (offset
) == REG
)
1084 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1085 reg_names
[REGNO (offset
)]);
1087 else if (GET_CODE (offset
) == CONST_INT
)
1089 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1090 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1100 output_addr_const (file
, addr
);
1110 print_operand (FILE * file
, rtx x
, int code
)
1112 enum machine_mode mode
= GET_MODE (x
);
1114 unsigned char arr
[16];
1115 int xcode
= GET_CODE (x
);
1117 if (GET_MODE (x
) == VOIDmode
)
1120 case 'L': /* 128 bits, signed */
1121 case 'm': /* 128 bits, signed */
1122 case 'T': /* 128 bits, signed */
1123 case 't': /* 128 bits, signed */
1126 case 'K': /* 64 bits, signed */
1127 case 'k': /* 64 bits, signed */
1128 case 'D': /* 64 bits, signed */
1129 case 'd': /* 64 bits, signed */
1132 case 'J': /* 32 bits, signed */
1133 case 'j': /* 32 bits, signed */
1134 case 's': /* 32 bits, signed */
1135 case 'S': /* 32 bits, signed */
1142 case 'j': /* 32 bits, signed */
1143 case 'k': /* 64 bits, signed */
1144 case 'm': /* 128 bits, signed */
1145 if (xcode
== CONST_INT
1146 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1148 gcc_assert (logical_immediate_p (x
, mode
));
1149 constant_to_array (mode
, x
, arr
);
1150 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1151 val
= trunc_int_for_mode (val
, SImode
);
1152 switch (which_logical_immediate (val
))
1157 fprintf (file
, "h");
1160 fprintf (file
, "b");
1170 case 'J': /* 32 bits, signed */
1171 case 'K': /* 64 bits, signed */
1172 case 'L': /* 128 bits, signed */
1173 if (xcode
== CONST_INT
1174 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1176 gcc_assert (logical_immediate_p (x
, mode
)
1177 || iohl_immediate_p (x
, mode
));
1178 constant_to_array (mode
, x
, arr
);
1179 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1180 val
= trunc_int_for_mode (val
, SImode
);
1181 switch (which_logical_immediate (val
))
1187 val
= trunc_int_for_mode (val
, HImode
);
1190 val
= trunc_int_for_mode (val
, QImode
);
1195 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1201 case 't': /* 128 bits, signed */
1202 case 'd': /* 64 bits, signed */
1203 case 's': /* 32 bits, signed */
1206 enum immediate_class c
= classify_immediate (x
, mode
);
1210 constant_to_array (mode
, x
, arr
);
1211 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1212 val
= trunc_int_for_mode (val
, SImode
);
1213 switch (which_immediate_load (val
))
1218 fprintf (file
, "a");
1221 fprintf (file
, "h");
1224 fprintf (file
, "hu");
1231 constant_to_array (mode
, x
, arr
);
1232 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1234 fprintf (file
, "b");
1236 fprintf (file
, "h");
1238 fprintf (file
, "w");
1240 fprintf (file
, "d");
1243 if (xcode
== CONST_VECTOR
)
1245 x
= CONST_VECTOR_ELT (x
, 0);
1246 xcode
= GET_CODE (x
);
1248 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1249 fprintf (file
, "a");
1250 else if (xcode
== HIGH
)
1251 fprintf (file
, "hu");
1265 case 'T': /* 128 bits, signed */
1266 case 'D': /* 64 bits, signed */
1267 case 'S': /* 32 bits, signed */
1270 enum immediate_class c
= classify_immediate (x
, mode
);
1274 constant_to_array (mode
, x
, arr
);
1275 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1276 val
= trunc_int_for_mode (val
, SImode
);
1277 switch (which_immediate_load (val
))
1284 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1289 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1292 constant_to_array (mode
, x
, arr
);
1294 for (i
= 0; i
< 16; i
++)
1299 print_operand (file
, GEN_INT (val
), 0);
1302 constant_to_array (mode
, x
, arr
);
1303 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1304 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1309 if (GET_CODE (x
) == CONST_VECTOR
)
1310 x
= CONST_VECTOR_ELT (x
, 0);
1311 output_addr_const (file
, x
);
1313 fprintf (file
, "@h");
1327 if (xcode
== CONST_INT
)
1329 /* Only 4 least significant bits are relevant for generate
1330 control word instructions. */
1331 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1336 case 'M': /* print code for c*d */
1337 if (GET_CODE (x
) == CONST_INT
)
1341 fprintf (file
, "b");
1344 fprintf (file
, "h");
1347 fprintf (file
, "w");
1350 fprintf (file
, "d");
1359 case 'N': /* Negate the operand */
1360 if (xcode
== CONST_INT
)
1361 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1362 else if (xcode
== CONST_VECTOR
)
1363 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1364 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1367 case 'I': /* enable/disable interrupts */
1368 if (xcode
== CONST_INT
)
1369 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1372 case 'b': /* branch modifiers */
1374 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1375 else if (COMPARISON_P (x
))
1376 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1379 case 'i': /* indirect call */
1382 if (GET_CODE (XEXP (x
, 0)) == REG
)
1383 /* Used in indirect function calls. */
1384 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1386 output_address (XEXP (x
, 0));
1390 case 'p': /* load/store */
1394 xcode
= GET_CODE (x
);
1399 xcode
= GET_CODE (x
);
1402 fprintf (file
, "d");
1403 else if (xcode
== CONST_INT
)
1404 fprintf (file
, "a");
1405 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1406 fprintf (file
, "r");
1407 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1409 if (GET_CODE (XEXP (x
, 1)) == REG
)
1410 fprintf (file
, "x");
1412 fprintf (file
, "d");
1417 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1419 output_addr_const (file
, GEN_INT (val
));
1423 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1425 output_addr_const (file
, GEN_INT (val
));
1429 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1431 output_addr_const (file
, GEN_INT (val
));
1435 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1436 val
= (val
>> 3) & 0x1f;
1437 output_addr_const (file
, GEN_INT (val
));
1441 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1444 output_addr_const (file
, GEN_INT (val
));
1448 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1451 output_addr_const (file
, GEN_INT (val
));
1455 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1458 output_addr_const (file
, GEN_INT (val
));
1462 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1463 val
= -(val
& -8ll);
1464 val
= (val
>> 3) & 0x1f;
1465 output_addr_const (file
, GEN_INT (val
));
1470 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1471 else if (xcode
== MEM
)
1472 output_address (XEXP (x
, 0));
1473 else if (xcode
== CONST_VECTOR
)
1474 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1476 output_addr_const (file
, x
);
1483 output_operand_lossage ("invalid %%xn code");
1488 extern char call_used_regs
[];
1490 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1491 caller saved register. For leaf functions it is more efficient to
1492 use a volatile register because we won't need to save and restore the
1493 pic register. This routine is only valid after register allocation
1494 is completed, so we can pick an unused register. */
1498 rtx pic_reg
= pic_offset_table_rtx
;
1499 if (!reload_completed
&& !reload_in_progress
)
1504 /* Split constant addresses to handle cases that are too large.
1505 Add in the pic register when in PIC mode.
1506 Split immediates that require more than 1 instruction. */
1508 spu_split_immediate (rtx
* ops
)
1510 enum machine_mode mode
= GET_MODE (ops
[0]);
1511 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1517 unsigned char arrhi
[16];
1518 unsigned char arrlo
[16];
1521 constant_to_array (mode
, ops
[1], arrhi
);
1522 to
= !can_create_pseudo_p () ? ops
[0] : gen_reg_rtx (mode
);
1523 for (i
= 0; i
< 16; i
+= 4)
1525 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1526 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1527 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1528 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1530 hi
= array_to_constant (mode
, arrhi
);
1531 lo
= array_to_constant (mode
, arrlo
);
1532 emit_move_insn (to
, hi
);
1533 emit_insn (gen_rtx_SET
1534 (VOIDmode
, ops
[0], gen_rtx_IOR (mode
, to
, lo
)));
1539 unsigned char arr_fsmbi
[16];
1540 unsigned char arr_andbi
[16];
1541 rtx to
, reg_fsmbi
, reg_and
;
1543 enum machine_mode imode
= mode
;
1544 /* We need to do reals as ints because the constant used in the
1545 * AND might not be a legitimate real constant. */
1546 imode
= int_mode_for_mode (mode
);
1547 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1549 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1552 for (i
= 0; i
< 16; i
++)
1553 if (arr_fsmbi
[i
] != 0)
1555 arr_andbi
[0] = arr_fsmbi
[i
];
1556 arr_fsmbi
[i
] = 0xff;
1558 for (i
= 1; i
< 16; i
++)
1559 arr_andbi
[i
] = arr_andbi
[0];
1560 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1561 reg_and
= array_to_constant (imode
, arr_andbi
);
1562 emit_move_insn (to
, reg_fsmbi
);
1563 emit_insn (gen_rtx_SET
1564 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1568 if (reload_in_progress
|| reload_completed
)
1570 rtx mem
= force_const_mem (mode
, ops
[1]);
1571 if (TARGET_LARGE_MEM
)
1573 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1574 emit_move_insn (addr
, XEXP (mem
, 0));
1575 mem
= replace_equiv_address (mem
, addr
);
1577 emit_move_insn (ops
[0], mem
);
1583 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1587 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1588 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1591 emit_insn (gen_pic (ops
[0], ops
[1]));
1594 rtx pic_reg
= get_pic_reg ();
1595 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1596 crtl
->uses_pic_offset_table
= 1;
1598 return flag_pic
|| c
== IC_IL2s
;
1609 /* SAVING is TRUE when we are generating the actual load and store
1610 instructions for REGNO. When determining the size of the stack
1611 needed for saving register we must allocate enough space for the
1612 worst case, because we don't always have the information early enough
1613 to not allocate it. But we can at least eliminate the actual loads
1614 and stores during the prologue/epilogue. */
1616 need_to_save_reg (int regno
, int saving
)
1618 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1621 && regno
== PIC_OFFSET_TABLE_REGNUM
1622 && (!saving
|| crtl
->uses_pic_offset_table
)
1624 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1629 /* This function is only correct starting with local register
1632 spu_saved_regs_size (void)
1634 int reg_save_size
= 0;
1637 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1638 if (need_to_save_reg (regno
, 0))
1639 reg_save_size
+= 0x10;
1640 return reg_save_size
;
1644 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1646 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1648 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1649 return emit_insn (gen_movv4si (mem
, reg
));
1653 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1655 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1657 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1658 return emit_insn (gen_movv4si (reg
, mem
));
1661 /* This happens after reload, so we need to expand it. */
1663 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1666 if (satisfies_constraint_K (GEN_INT (imm
)))
1668 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1672 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1673 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1674 if (REGNO (src
) == REGNO (scratch
))
1680 /* Return nonzero if this function is known to have a null epilogue. */
1683 direct_return (void)
1685 if (reload_completed
)
1687 if (cfun
->static_chain_decl
== 0
1688 && (spu_saved_regs_size ()
1690 + crtl
->outgoing_args_size
1691 + crtl
->args
.pretend_args_size
== 0)
1692 && current_function_is_leaf
)
1699 The stack frame looks like this:
1706 prev SP | back chain |
1709 | reg save | crtl->args.pretend_args_size bytes
1712 | saved regs | spu_saved_regs_size() bytes
1715 FP | vars | get_frame_size() bytes
1719 | args | crtl->outgoing_args_size bytes
1729 spu_expand_prologue (void)
1731 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1732 HOST_WIDE_INT total_size
;
1733 HOST_WIDE_INT saved_regs_size
;
1734 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1735 rtx scratch_reg_0
, scratch_reg_1
;
1738 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1739 the "toplevel" insn chain. */
1740 emit_note (NOTE_INSN_DELETED
);
1742 if (flag_pic
&& optimize
== 0)
1743 crtl
->uses_pic_offset_table
= 1;
1745 if (spu_naked_function_p (current_function_decl
))
1748 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1749 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1751 saved_regs_size
= spu_saved_regs_size ();
1752 total_size
= size
+ saved_regs_size
1753 + crtl
->outgoing_args_size
1754 + crtl
->args
.pretend_args_size
;
1756 if (!current_function_is_leaf
1757 || cfun
->calls_alloca
|| total_size
> 0)
1758 total_size
+= STACK_POINTER_OFFSET
;
1760 /* Save this first because code after this might use the link
1761 register as a scratch register. */
1762 if (!current_function_is_leaf
)
1764 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1765 RTX_FRAME_RELATED_P (insn
) = 1;
1770 offset
= -crtl
->args
.pretend_args_size
;
1771 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1772 if (need_to_save_reg (regno
, 1))
1775 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1776 RTX_FRAME_RELATED_P (insn
) = 1;
1780 if (flag_pic
&& crtl
->uses_pic_offset_table
)
1782 rtx pic_reg
= get_pic_reg ();
1783 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1784 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1789 if (flag_stack_check
)
1791 /* We compare against total_size-1 because
1792 ($sp >= total_size) <=> ($sp > total_size-1) */
1793 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1794 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1795 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1796 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1798 emit_move_insn (scratch_v4si
, size_v4si
);
1799 size_v4si
= scratch_v4si
;
1801 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1802 emit_insn (gen_vec_extractv4si
1803 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1804 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1807 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1808 the value of the previous $sp because we save it as the back
1810 if (total_size
<= 2000)
1812 /* In this case we save the back chain first. */
1813 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1815 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1817 else if (satisfies_constraint_K (GEN_INT (-total_size
)))
1819 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1821 emit_insn (gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
)));
1825 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1827 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1829 RTX_FRAME_RELATED_P (insn
) = 1;
1830 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1832 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, real
, REG_NOTES (insn
));
1834 if (total_size
> 2000)
1836 /* Save the back chain ptr */
1837 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1840 if (frame_pointer_needed
)
1842 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1843 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1844 + crtl
->outgoing_args_size
;
1845 /* Set the new frame_pointer */
1846 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1847 RTX_FRAME_RELATED_P (insn
) = 1;
1848 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1850 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1851 real
, REG_NOTES (insn
));
1852 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1856 emit_note (NOTE_INSN_DELETED
);
1860 spu_expand_epilogue (bool sibcall_p
)
1862 int size
= get_frame_size (), offset
, regno
;
1863 HOST_WIDE_INT saved_regs_size
, total_size
;
1864 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1865 rtx jump
, scratch_reg_0
;
1867 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1868 the "toplevel" insn chain. */
1869 emit_note (NOTE_INSN_DELETED
);
1871 if (spu_naked_function_p (current_function_decl
))
1874 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1876 saved_regs_size
= spu_saved_regs_size ();
1877 total_size
= size
+ saved_regs_size
1878 + crtl
->outgoing_args_size
1879 + crtl
->args
.pretend_args_size
;
1881 if (!current_function_is_leaf
1882 || cfun
->calls_alloca
|| total_size
> 0)
1883 total_size
+= STACK_POINTER_OFFSET
;
1887 if (cfun
->calls_alloca
)
1888 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1890 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1893 if (saved_regs_size
> 0)
1895 offset
= -crtl
->args
.pretend_args_size
;
1896 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1897 if (need_to_save_reg (regno
, 1))
1900 frame_emit_load (regno
, sp_reg
, offset
);
1905 if (!current_function_is_leaf
)
1906 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1910 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1911 jump
= emit_jump_insn (gen__return ());
1912 emit_barrier_after (jump
);
1915 emit_note (NOTE_INSN_DELETED
);
1919 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1923 /* This is inefficient because it ends up copying to a save-register
1924 which then gets saved even though $lr has already been saved. But
1925 it does generate better code for leaf functions and we don't need
1926 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1927 used for __builtin_return_address anyway, so maybe we don't care if
1928 it's inefficient. */
1929 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1933 /* Given VAL, generate a constant appropriate for MODE.
1934 If MODE is a vector mode, every element will be VAL.
1935 For TImode, VAL will be zero extended to 128 bits. */
1937 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1943 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1944 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1945 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1946 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1948 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1949 return immed_double_const (val
, 0, mode
);
1951 /* val is the bit representation of the float */
1952 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1953 return hwint_to_const_double (mode
, val
);
1955 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1956 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1958 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1960 units
= GET_MODE_NUNITS (mode
);
1962 v
= rtvec_alloc (units
);
1964 for (i
= 0; i
< units
; ++i
)
1965 RTVEC_ELT (v
, i
) = inner
;
1967 return gen_rtx_CONST_VECTOR (mode
, v
);
1970 /* branch hint stuff */
1972 /* The hardware requires 8 insns between a hint and the branch it
1973 effects. This variable describes how many rtl instructions the
1974 compiler needs to see before inserting a hint. (FIXME: We should
1975 accept less and insert nops to enforce it because hinting is always
1976 profitable for performance, but we do need to be careful of code
1978 int spu_hint_dist
= (8 * 4);
1980 /* Create a MODE vector constant from 4 ints. */
1982 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
1984 unsigned char arr
[16];
1985 arr
[0] = (a
>> 24) & 0xff;
1986 arr
[1] = (a
>> 16) & 0xff;
1987 arr
[2] = (a
>> 8) & 0xff;
1988 arr
[3] = (a
>> 0) & 0xff;
1989 arr
[4] = (b
>> 24) & 0xff;
1990 arr
[5] = (b
>> 16) & 0xff;
1991 arr
[6] = (b
>> 8) & 0xff;
1992 arr
[7] = (b
>> 0) & 0xff;
1993 arr
[8] = (c
>> 24) & 0xff;
1994 arr
[9] = (c
>> 16) & 0xff;
1995 arr
[10] = (c
>> 8) & 0xff;
1996 arr
[11] = (c
>> 0) & 0xff;
1997 arr
[12] = (d
>> 24) & 0xff;
1998 arr
[13] = (d
>> 16) & 0xff;
1999 arr
[14] = (d
>> 8) & 0xff;
2000 arr
[15] = (d
>> 0) & 0xff;
2001 return array_to_constant(mode
, arr
);
2004 /* An array of these is used to propagate hints to predecessor blocks. */
2007 rtx prop_jump
; /* propagated from another block */
2008 basic_block bb
; /* the original block. */
2011 /* The special $hbr register is used to prevent the insn scheduler from
2012 moving hbr insns across instructions which invalidate them. It
2013 should only be used in a clobber, and this function searches for
2014 insns which clobber it. */
2016 insn_clobbers_hbr (rtx insn
)
2018 if (INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2020 rtx parallel
= PATTERN (insn
);
2023 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2025 clobber
= XVECEXP (parallel
, 0, j
);
2026 if (GET_CODE (clobber
) == CLOBBER
2027 && GET_CODE (XEXP (clobber
, 0)) == REG
2028 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2036 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
, int distance
)
2039 rtx hint
, insn
, prev
, next
;
2041 if (before
== 0 || branch
== 0 || target
== 0)
2048 branch_label
= gen_label_rtx ();
2049 LABEL_NUSES (branch_label
)++;
2050 LABEL_PRESERVE_P (branch_label
) = 1;
2051 insn
= emit_label_before (branch_label
, branch
);
2052 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2054 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2055 the current insn is pipe0, dual issue with it. */
2056 prev
= prev_active_insn (before
);
2057 if (prev
&& get_pipe (prev
) == 0)
2058 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2059 else if (get_pipe (before
) == 0 && distance
> spu_hint_dist
)
2061 next
= next_active_insn (before
);
2062 hint
= emit_insn_after (gen_hbr (branch_label
, target
), before
);
2064 PUT_MODE (next
, TImode
);
2068 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2069 PUT_MODE (hint
, TImode
);
2071 recog_memoized (hint
);
2074 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2075 the rtx for the branch target. */
2077 get_branch_target (rtx branch
)
2079 if (GET_CODE (branch
) == JUMP_INSN
)
2083 /* Return statements */
2084 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2085 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2088 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2089 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2092 set
= single_set (branch
);
2093 src
= SET_SRC (set
);
2094 if (GET_CODE (SET_DEST (set
)) != PC
)
2097 if (GET_CODE (src
) == IF_THEN_ELSE
)
2100 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2103 /* If the more probable case is not a fall through, then
2104 try a branch hint. */
2105 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2106 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2107 && GET_CODE (XEXP (src
, 1)) != PC
)
2108 lab
= XEXP (src
, 1);
2109 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2110 && GET_CODE (XEXP (src
, 2)) != PC
)
2111 lab
= XEXP (src
, 2);
2115 if (GET_CODE (lab
) == RETURN
)
2116 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2124 else if (GET_CODE (branch
) == CALL_INSN
)
2127 /* All of our call patterns are in a PARALLEL and the CALL is
2128 the first pattern in the PARALLEL. */
2129 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2131 call
= XVECEXP (PATTERN (branch
), 0, 0);
2132 if (GET_CODE (call
) == SET
)
2133 call
= SET_SRC (call
);
2134 if (GET_CODE (call
) != CALL
)
2136 return XEXP (XEXP (call
, 0), 0);
2142 insert_branch_hints (void)
2144 struct spu_bb_info
*spu_bb_info
;
2145 rtx branch
, insn
, next
;
2146 rtx branch_target
= 0;
2147 int branch_addr
= 0, insn_addr
, head_addr
;
2152 (struct spu_bb_info
*) xcalloc (last_basic_block
+ 1,
2153 sizeof (struct spu_bb_info
));
2155 /* We need exact insn addresses and lengths. */
2156 shorten_branches (get_insns ());
2158 FOR_EACH_BB_REVERSE (bb
)
2160 head_addr
= INSN_ADDRESSES (INSN_UID (BB_HEAD (bb
)));
2162 if (spu_bb_info
[bb
->index
].prop_jump
)
2164 branch
= spu_bb_info
[bb
->index
].prop_jump
;
2165 branch_target
= get_branch_target (branch
);
2166 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2168 /* Search from end of a block to beginning. In this loop, find
2169 jumps which need a branch and emit them only when:
2170 - it's an indirect branch and we're at the insn which sets
2172 - we're at an insn that will invalidate the hint. e.g., a
2173 call, another hint insn, inline asm that clobbers $hbr, and
2174 some inlined operations (divmodsi4). Don't consider jumps
2175 because they are only at the end of a block and are
2176 considered when we are deciding whether to propagate
2177 - we're getting too far away from the branch. The hbr insns
2178 only have a signed 10-bit offset
2179 We go back as far as possible so the branch will be considered
2180 for propagation when we get to the beginning of the block. */
2182 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2186 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2188 && ((GET_CODE (branch_target
) == REG
2189 && set_of (branch_target
, insn
) != NULL_RTX
)
2190 || insn_clobbers_hbr (insn
)
2191 || branch_addr
- insn_addr
> 600))
2193 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2194 if (insn
!= BB_END (bb
)
2195 && branch_addr
- next_addr
>= spu_hint_dist
)
2199 "hint for %i in block %i before %i\n",
2200 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2201 spu_emit_branch_hint (next
, branch
, branch_target
,
2202 branch_addr
- next_addr
);
2207 /* JUMP_P will only be true at the end of a block. When
2208 branch is already set it means we've previously decided
2209 to propagate a hint for that branch into this block. */
2210 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2213 if ((branch_target
= get_branch_target (insn
)))
2216 branch_addr
= insn_addr
;
2220 /* When a branch hint is emitted it will be inserted
2221 before "next". Make sure next is the beginning of a
2222 cycle to minimize impact on the scheduled insns. */
2223 if (GET_MODE (insn
) == TImode
)
2226 if (insn
== BB_HEAD (bb
))
2232 /* If we haven't emitted a hint for this branch yet, it might
2233 be profitable to emit it in one of the predecessor blocks,
2234 especially for loops. */
2236 basic_block prev
= 0, prop
= 0, prev2
= 0;
2237 int loop_exit
= 0, simple_loop
= 0;
2240 next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2242 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2243 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2244 prev
= EDGE_PRED (bb
, j
)->src
;
2246 prev2
= EDGE_PRED (bb
, j
)->src
;
2248 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2249 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2251 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2254 /* If this branch is a loop exit then propagate to previous
2255 fallthru block. This catches the cases when it is a simple
2256 loop or when there is an initial branch into the loop. */
2257 if (prev
&& loop_exit
&& prev
->loop_depth
<= bb
->loop_depth
)
2260 /* If there is only one adjacent predecessor. Don't propagate
2261 outside this loop. This loop_depth test isn't perfect, but
2262 I'm not sure the loop_father member is valid at this point. */
2263 else if (prev
&& single_pred_p (bb
)
2264 && prev
->loop_depth
== bb
->loop_depth
)
2267 /* If this is the JOIN block of a simple IF-THEN then
2268 propagate the hint to the HEADER block. */
2269 else if (prev
&& prev2
2270 && EDGE_COUNT (bb
->preds
) == 2
2271 && EDGE_COUNT (prev
->preds
) == 1
2272 && EDGE_PRED (prev
, 0)->src
== prev2
2273 && prev2
->loop_depth
== bb
->loop_depth
2274 && GET_CODE (branch_target
) != REG
)
2277 /* Don't propagate when:
2278 - this is a simple loop and the hint would be too far
2279 - this is not a simple loop and there are 16 insns in
2281 - the predecessor block ends in a branch that will be
2283 - the predecessor block ends in an insn that invalidates
2287 && (bbend
= BB_END (prop
))
2288 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2289 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2290 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2293 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2294 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2295 bb
->index
, prop
->index
, bb
->loop_depth
,
2296 INSN_UID (branch
), loop_exit
, simple_loop
,
2297 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2299 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2300 spu_bb_info
[prop
->index
].bb
= bb
;
2302 else if (next
&& branch_addr
- next_addr
>= spu_hint_dist
)
2305 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2306 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2307 spu_emit_branch_hint (next
, branch
, branch_target
,
2308 branch_addr
- next_addr
);
2316 /* Emit a nop for INSN such that the two will dual issue. This assumes
2317 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2318 We check for TImode to handle a MULTI1 insn which has dual issued its
2319 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2322 emit_nop_for_insn (rtx insn
)
2326 p
= get_pipe (insn
);
2327 if (p
== 1 && GET_MODE (insn
) == TImode
)
2329 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2330 PUT_MODE (new_insn
, TImode
);
2331 PUT_MODE (insn
, VOIDmode
);
2334 new_insn
= emit_insn_after (gen_lnop (), insn
);
2337 /* Insert nops in basic blocks to meet dual issue alignment
2342 rtx insn
, next_insn
, prev_insn
;
2346 /* This sets up INSN_ADDRESSES. */
2347 shorten_branches (get_insns ());
2349 /* Keep track of length added by nops. */
2353 for (insn
= get_insns (); insn
; insn
= next_insn
)
2355 next_insn
= next_active_insn (insn
);
2356 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2357 if (GET_MODE (insn
) == TImode
2359 && GET_MODE (next_insn
) != TImode
2360 && ((addr
+ length
) & 7) != 0)
2362 /* prev_insn will always be set because the first insn is
2363 always 8-byte aligned. */
2364 emit_nop_for_insn (prev_insn
);
2372 spu_machine_dependent_reorg (void)
2376 if (TARGET_BRANCH_HINTS
)
2377 insert_branch_hints ();
2383 /* Insn scheduling routines, primarily for dual issue. */
2385 spu_sched_issue_rate (void)
2391 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED
,
2392 int verbose ATTRIBUTE_UNUSED
, rtx insn
,
2395 if (GET_CODE (PATTERN (insn
)) != USE
2396 && GET_CODE (PATTERN (insn
)) != CLOBBER
2397 && get_pipe (insn
) != -2)
2399 return can_issue_more
;
2406 /* Handle inline asm */
2407 if (INSN_CODE (insn
) == -1)
2409 t
= get_attr_type (insn
);
2425 case TYPE_IPREFETCH
:
2442 spu_sched_adjust_priority (rtx insn
, int pri
)
2444 int p
= get_pipe (insn
);
2445 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2447 if (GET_CODE (PATTERN (insn
)) == USE
2448 || GET_CODE (PATTERN (insn
)) == CLOBBER
2451 /* Schedule pipe0 insns early for greedier dual issue. */
2457 /* INSN is dependent on DEP_INSN. */
2459 spu_sched_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
,
2460 rtx dep_insn ATTRIBUTE_UNUSED
, int cost
)
2462 if (GET_CODE (insn
) == CALL_INSN
)
2464 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2465 scheduler makes every insn in a block anti-dependent on the final
2466 jump_insn. We adjust here so higher cost insns will get scheduled
2468 if (GET_CODE (insn
) == JUMP_INSN
&& REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
2469 return insn_cost (dep_insn
) - 3;
2473 /* Create a CONST_DOUBLE from a string. */
2475 spu_float_const (const char *string
, enum machine_mode mode
)
2477 REAL_VALUE_TYPE value
;
2478 value
= REAL_VALUE_ATOF (string
, mode
);
2479 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
2483 spu_constant_address_p (rtx x
)
2485 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
2486 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
2487 || GET_CODE (x
) == HIGH
);
2490 static enum spu_immediate
2491 which_immediate_load (HOST_WIDE_INT val
)
2493 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2495 if (val
>= -0x8000 && val
<= 0x7fff)
2497 if (val
>= 0 && val
<= 0x3ffff)
2499 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2501 if ((val
& 0xffff) == 0)
2507 /* Return true when OP can be loaded by one of the il instructions, or
2508 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2510 immediate_load_p (rtx op
, enum machine_mode mode
)
2512 if (CONSTANT_P (op
))
2514 enum immediate_class c
= classify_immediate (op
, mode
);
2515 return c
== IC_IL1
|| c
== IC_IL1s
2516 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
2521 /* Return true if the first SIZE bytes of arr is a constant that can be
2522 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2523 represent the size and offset of the instruction to use. */
2525 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
2527 int cpat
, run
, i
, start
;
2531 for (i
= 0; i
< size
&& cpat
; i
++)
2539 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
2541 else if (arr
[i
] == 0)
2543 while (arr
[i
+run
] == run
&& i
+run
< 16)
2545 if (run
!= 4 && run
!= 8)
2550 if ((i
& (run
-1)) != 0)
2557 if (cpat
&& (run
|| size
< 16))
2564 *pstart
= start
== -1 ? 16-run
: start
;
2570 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2571 it into a register. MODE is only valid when OP is a CONST_INT. */
2572 static enum immediate_class
2573 classify_immediate (rtx op
, enum machine_mode mode
)
2576 unsigned char arr
[16];
2577 int i
, j
, repeated
, fsmbi
, repeat
;
2579 gcc_assert (CONSTANT_P (op
));
2581 if (GET_MODE (op
) != VOIDmode
)
2582 mode
= GET_MODE (op
);
2584 /* A V4SI const_vector with all identical symbols is ok. */
2587 && GET_CODE (op
) == CONST_VECTOR
2588 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
2589 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
2590 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
2591 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
2592 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
2593 op
= CONST_VECTOR_ELT (op
, 0);
2595 switch (GET_CODE (op
))
2599 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
2602 /* We can never know if the resulting address fits in 18 bits and can be
2603 loaded with ila. For now, assume the address will not overflow if
2604 the displacement is "small" (fits 'K' constraint). */
2605 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
2607 rtx sym
= XEXP (XEXP (op
, 0), 0);
2608 rtx cst
= XEXP (XEXP (op
, 0), 1);
2610 if (GET_CODE (sym
) == SYMBOL_REF
2611 && GET_CODE (cst
) == CONST_INT
2612 && satisfies_constraint_K (cst
))
2621 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
2622 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
2623 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
2629 constant_to_array (mode
, op
, arr
);
2631 /* Check that each 4-byte slot is identical. */
2633 for (i
= 4; i
< 16; i
+= 4)
2634 for (j
= 0; j
< 4; j
++)
2635 if (arr
[j
] != arr
[i
+ j
])
2640 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2641 val
= trunc_int_for_mode (val
, SImode
);
2643 if (which_immediate_load (val
) != SPU_NONE
)
2647 /* Any mode of 2 bytes or smaller can be loaded with an il
2649 gcc_assert (GET_MODE_SIZE (mode
) > 2);
2653 for (i
= 0; i
< 16 && fsmbi
; i
++)
2654 if (arr
[i
] != 0 && repeat
== 0)
2656 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
2659 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
2661 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
2674 static enum spu_immediate
2675 which_logical_immediate (HOST_WIDE_INT val
)
2677 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2679 if (val
>= -0x200 && val
<= 0x1ff)
2681 if (val
>= 0 && val
<= 0xffff)
2683 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2685 val
= trunc_int_for_mode (val
, HImode
);
2686 if (val
>= -0x200 && val
<= 0x1ff)
2688 if ((val
& 0xff) == ((val
>> 8) & 0xff))
2690 val
= trunc_int_for_mode (val
, QImode
);
2691 if (val
>= -0x200 && val
<= 0x1ff)
2698 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2701 const_vector_immediate_p (rtx x
)
2704 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
2705 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
2706 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
2707 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
2713 logical_immediate_p (rtx op
, enum machine_mode mode
)
2716 unsigned char arr
[16];
2719 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2720 || GET_CODE (op
) == CONST_VECTOR
);
2722 if (GET_CODE (op
) == CONST_VECTOR
2723 && !const_vector_immediate_p (op
))
2726 if (GET_MODE (op
) != VOIDmode
)
2727 mode
= GET_MODE (op
);
2729 constant_to_array (mode
, op
, arr
);
2731 /* Check that bytes are repeated. */
2732 for (i
= 4; i
< 16; i
+= 4)
2733 for (j
= 0; j
< 4; j
++)
2734 if (arr
[j
] != arr
[i
+ j
])
2737 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2738 val
= trunc_int_for_mode (val
, SImode
);
2740 i
= which_logical_immediate (val
);
2741 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
2745 iohl_immediate_p (rtx op
, enum machine_mode mode
)
2748 unsigned char arr
[16];
2751 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2752 || GET_CODE (op
) == CONST_VECTOR
);
2754 if (GET_CODE (op
) == CONST_VECTOR
2755 && !const_vector_immediate_p (op
))
2758 if (GET_MODE (op
) != VOIDmode
)
2759 mode
= GET_MODE (op
);
2761 constant_to_array (mode
, op
, arr
);
2763 /* Check that bytes are repeated. */
2764 for (i
= 4; i
< 16; i
+= 4)
2765 for (j
= 0; j
< 4; j
++)
2766 if (arr
[j
] != arr
[i
+ j
])
2769 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2770 val
= trunc_int_for_mode (val
, SImode
);
2772 return val
>= 0 && val
<= 0xffff;
2776 arith_immediate_p (rtx op
, enum machine_mode mode
,
2777 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
2780 unsigned char arr
[16];
2783 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2784 || GET_CODE (op
) == CONST_VECTOR
);
2786 if (GET_CODE (op
) == CONST_VECTOR
2787 && !const_vector_immediate_p (op
))
2790 if (GET_MODE (op
) != VOIDmode
)
2791 mode
= GET_MODE (op
);
2793 constant_to_array (mode
, op
, arr
);
2795 if (VECTOR_MODE_P (mode
))
2796 mode
= GET_MODE_INNER (mode
);
2798 bytes
= GET_MODE_SIZE (mode
);
2799 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
2801 /* Check that bytes are repeated. */
2802 for (i
= bytes
; i
< 16; i
+= bytes
)
2803 for (j
= 0; j
< bytes
; j
++)
2804 if (arr
[j
] != arr
[i
+ j
])
2808 for (j
= 1; j
< bytes
; j
++)
2809 val
= (val
<< 8) | arr
[j
];
2811 val
= trunc_int_for_mode (val
, mode
);
2813 return val
>= low
&& val
<= high
;
2817 - any 32-bit constant (SImode, SFmode)
2818 - any constant that can be generated with fsmbi (any mode)
2819 - a 64-bit constant where the high and low bits are identical
2821 - a 128-bit constant where the four 32-bit words match. */
2823 spu_legitimate_constant_p (rtx x
)
2825 if (GET_CODE (x
) == HIGH
)
2827 /* V4SI with all identical symbols is valid. */
2829 && GET_MODE (x
) == V4SImode
2830 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
2831 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
2832 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
2833 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
2834 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
2835 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
2837 if (GET_CODE (x
) == CONST_VECTOR
2838 && !const_vector_immediate_p (x
))
2843 /* Valid address are:
2844 - symbol_ref, label_ref, const
2846 - reg + const, where either reg or const is 16 byte aligned
2847 - reg + reg, alignment doesn't matter
2848 The alignment matters in the reg+const case because lqd and stqd
2849 ignore the 4 least significant bits of the const. (TODO: It might be
2850 preferable to allow any alignment and fix it up when splitting.) */
2852 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED
,
2853 rtx x
, int reg_ok_strict
)
2855 if (mode
== TImode
&& GET_CODE (x
) == AND
2856 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2857 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) -16)
2859 switch (GET_CODE (x
))
2863 return !TARGET_LARGE_MEM
;
2866 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
2868 rtx sym
= XEXP (XEXP (x
, 0), 0);
2869 rtx cst
= XEXP (XEXP (x
, 0), 1);
2871 /* Accept any symbol_ref + constant, assuming it does not
2872 wrap around the local store addressability limit. */
2873 if (GET_CODE (sym
) == SYMBOL_REF
&& GET_CODE (cst
) == CONST_INT
)
2879 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
2883 gcc_assert (GET_CODE (x
) == REG
);
2886 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
2891 rtx op0
= XEXP (x
, 0);
2892 rtx op1
= XEXP (x
, 1);
2893 if (GET_CODE (op0
) == SUBREG
)
2894 op0
= XEXP (op0
, 0);
2895 if (GET_CODE (op1
) == SUBREG
)
2896 op1
= XEXP (op1
, 0);
2897 /* We can't just accept any aligned register because CSE can
2898 change it to a register that is not marked aligned and then
2899 recog will fail. So we only accept frame registers because
2900 they will only be changed to other frame registers. */
2901 if (GET_CODE (op0
) == REG
2902 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2903 && GET_CODE (op1
) == CONST_INT
2904 && INTVAL (op1
) >= -0x2000
2905 && INTVAL (op1
) <= 0x1fff
2906 && (regno_aligned_for_load (REGNO (op0
)) || (INTVAL (op1
) & 15) == 0))
2908 if (GET_CODE (op0
) == REG
2909 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2910 && GET_CODE (op1
) == REG
2911 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
2922 /* When the address is reg + const_int, force the const_int into a
2925 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
2926 enum machine_mode mode
)
2929 /* Make sure both operands are registers. */
2930 if (GET_CODE (x
) == PLUS
)
2934 if (ALIGNED_SYMBOL_REF_P (op0
))
2936 op0
= force_reg (Pmode
, op0
);
2937 mark_reg_pointer (op0
, 128);
2939 else if (GET_CODE (op0
) != REG
)
2940 op0
= force_reg (Pmode
, op0
);
2941 if (ALIGNED_SYMBOL_REF_P (op1
))
2943 op1
= force_reg (Pmode
, op1
);
2944 mark_reg_pointer (op1
, 128);
2946 else if (GET_CODE (op1
) != REG
)
2947 op1
= force_reg (Pmode
, op1
);
2948 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
2949 if (spu_legitimate_address (mode
, x
, 0))
2955 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2956 struct attribute_spec.handler. */
2958 spu_handle_fndecl_attribute (tree
* node
,
2960 tree args ATTRIBUTE_UNUSED
,
2961 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2963 if (TREE_CODE (*node
) != FUNCTION_DECL
)
2965 warning (0, "`%s' attribute only applies to functions",
2966 IDENTIFIER_POINTER (name
));
2967 *no_add_attrs
= true;
2973 /* Handle the "vector" attribute. */
2975 spu_handle_vector_attribute (tree
* node
, tree name
,
2976 tree args ATTRIBUTE_UNUSED
,
2977 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2979 tree type
= *node
, result
= NULL_TREE
;
2980 enum machine_mode mode
;
2983 while (POINTER_TYPE_P (type
)
2984 || TREE_CODE (type
) == FUNCTION_TYPE
2985 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
2986 type
= TREE_TYPE (type
);
2988 mode
= TYPE_MODE (type
);
2990 unsigned_p
= TYPE_UNSIGNED (type
);
2994 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
2997 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3000 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3003 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3006 result
= V4SF_type_node
;
3009 result
= V2DF_type_node
;
3015 /* Propagate qualifiers attached to the element type
3016 onto the vector type. */
3017 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3018 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3020 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3023 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
3025 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3030 /* Return nonzero if FUNC is a naked function. */
3032 spu_naked_function_p (tree func
)
3036 if (TREE_CODE (func
) != FUNCTION_DECL
)
3039 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3040 return a
!= NULL_TREE
;
3044 spu_initial_elimination_offset (int from
, int to
)
3046 int saved_regs_size
= spu_saved_regs_size ();
3048 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3049 || get_frame_size () || saved_regs_size
)
3050 sp_offset
= STACK_POINTER_OFFSET
;
3051 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3052 return (sp_offset
+ crtl
->outgoing_args_size
);
3053 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3055 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3056 return sp_offset
+ crtl
->outgoing_args_size
3057 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3058 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3059 return get_frame_size () + saved_regs_size
+ sp_offset
;
3064 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3066 enum machine_mode mode
= TYPE_MODE (type
);
3067 int byte_size
= ((mode
== BLKmode
)
3068 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3070 /* Make sure small structs are left justified in a register. */
3071 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3072 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3074 enum machine_mode smode
;
3077 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3078 int n
= byte_size
/ UNITS_PER_WORD
;
3079 v
= rtvec_alloc (nregs
);
3080 for (i
= 0; i
< n
; i
++)
3082 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3083 gen_rtx_REG (TImode
,
3086 GEN_INT (UNITS_PER_WORD
* i
));
3087 byte_size
-= UNITS_PER_WORD
;
3095 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3097 gen_rtx_EXPR_LIST (VOIDmode
,
3098 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3099 GEN_INT (UNITS_PER_WORD
* n
));
3101 return gen_rtx_PARALLEL (mode
, v
);
3103 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3107 spu_function_arg (CUMULATIVE_ARGS cum
,
3108 enum machine_mode mode
,
3109 tree type
, int named ATTRIBUTE_UNUSED
)
3113 if (cum
>= MAX_REGISTER_ARGS
)
3116 byte_size
= ((mode
== BLKmode
)
3117 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3119 /* The ABI does not allow parameters to be passed partially in
3120 reg and partially in stack. */
3121 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3124 /* Make sure small structs are left justified in a register. */
3125 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3126 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3128 enum machine_mode smode
;
3132 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3133 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3134 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3136 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3139 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3142 /* Variable sized types are passed by reference. */
3144 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3145 enum machine_mode mode ATTRIBUTE_UNUSED
,
3146 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3148 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3154 /* Create and return the va_list datatype.
3156 On SPU, va_list is an array type equivalent to
3158 typedef struct __va_list_tag
3160 void *__args __attribute__((__aligned(16)));
3161 void *__skip __attribute__((__aligned(16)));
3165 where __args points to the arg that will be returned by the next
3166 va_arg(), and __skip points to the previous stack frame such that
3167 when __args == __skip we should advance __args by 32 bytes. */
3169 spu_build_builtin_va_list (void)
3171 tree f_args
, f_skip
, record
, type_decl
;
3174 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3177 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3179 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3180 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3182 DECL_FIELD_CONTEXT (f_args
) = record
;
3183 DECL_ALIGN (f_args
) = 128;
3184 DECL_USER_ALIGN (f_args
) = 1;
3186 DECL_FIELD_CONTEXT (f_skip
) = record
;
3187 DECL_ALIGN (f_skip
) = 128;
3188 DECL_USER_ALIGN (f_skip
) = 1;
3190 TREE_CHAIN (record
) = type_decl
;
3191 TYPE_NAME (record
) = type_decl
;
3192 TYPE_FIELDS (record
) = f_args
;
3193 TREE_CHAIN (f_args
) = f_skip
;
3195 /* We know this is being padded and we want it too. It is an internal
3196 type so hide the warnings from the user. */
3198 warn_padded
= false;
3200 layout_type (record
);
3204 /* The correct type is an array type of one element. */
3205 return build_array_type (record
, build_index_type (size_zero_node
));
3208 /* Implement va_start by filling the va_list structure VALIST.
3209 NEXTARG points to the first anonymous stack argument.
3211 The following global variables are used to initialize
3212 the va_list structure:
3215 the CUMULATIVE_ARGS for this function
3217 crtl->args.arg_offset_rtx:
3218 holds the offset of the first anonymous stack argument
3219 (relative to the virtual arg pointer). */
3222 spu_va_start (tree valist
, rtx nextarg
)
3224 tree f_args
, f_skip
;
3227 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3228 f_skip
= TREE_CHAIN (f_args
);
3230 valist
= build_va_arg_indirect_ref (valist
);
3232 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3234 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3236 /* Find the __args area. */
3237 t
= make_tree (TREE_TYPE (args
), nextarg
);
3238 if (crtl
->args
.pretend_args_size
> 0)
3239 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
3240 size_int (-STACK_POINTER_OFFSET
));
3241 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
3242 TREE_SIDE_EFFECTS (t
) = 1;
3243 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3245 /* Find the __skip area. */
3246 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3247 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
3248 size_int (crtl
->args
.pretend_args_size
3249 - STACK_POINTER_OFFSET
));
3250 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
3251 TREE_SIDE_EFFECTS (t
) = 1;
3252 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3255 /* Gimplify va_arg by updating the va_list structure
3256 VALIST as required to retrieve an argument of type
3257 TYPE, and returning that argument.
3259 ret = va_arg(VALIST, TYPE);
3261 generates code equivalent to:
3263 paddedsize = (sizeof(TYPE) + 15) & -16;
3264 if (VALIST.__args + paddedsize > VALIST.__skip
3265 && VALIST.__args <= VALIST.__skip)
3266 addr = VALIST.__skip + 32;
3268 addr = VALIST.__args;
3269 VALIST.__args = addr + paddedsize;
3270 ret = *(TYPE *)addr;
3273 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
3274 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
3276 tree f_args
, f_skip
;
3278 HOST_WIDE_INT size
, rsize
;
3279 tree paddedsize
, addr
, tmp
;
3280 bool pass_by_reference_p
;
3282 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3283 f_skip
= TREE_CHAIN (f_args
);
3285 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3287 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3289 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3291 addr
= create_tmp_var (ptr_type_node
, "va_arg");
3292 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3294 /* if an object is dynamically sized, a pointer to it is passed
3295 instead of the object itself. */
3296 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
3298 if (pass_by_reference_p
)
3299 type
= build_pointer_type (type
);
3300 size
= int_size_in_bytes (type
);
3301 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
3303 /* build conditional expression to calculate addr. The expression
3304 will be gimplified later. */
3305 paddedsize
= size_int (rsize
);
3306 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (args
), paddedsize
);
3307 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
3308 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
3309 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
3310 unshare_expr (skip
)));
3312 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
3313 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (skip
),
3314 size_int (32)), unshare_expr (args
));
3316 gimplify_assign (addr
, tmp
, pre_p
);
3318 /* update VALIST.__args */
3319 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
3320 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
3322 addr
= fold_convert (build_pointer_type (type
), addr
);
3324 if (pass_by_reference_p
)
3325 addr
= build_va_arg_indirect_ref (addr
);
3327 return build_va_arg_indirect_ref (addr
);
3330 /* Save parameter registers starting with the register that corresponds
3331 to the first unnamed parameters. If the first unnamed parameter is
3332 in the stack then save no registers. Set pretend_args_size to the
3333 amount of space needed to save the registers. */
3335 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
3336 tree type
, int *pretend_size
, int no_rtl
)
3345 /* cum currently points to the last named argument, we want to
3346 start at the next argument. */
3347 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
3349 offset
= -STACK_POINTER_OFFSET
;
3350 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
3352 tmp
= gen_frame_mem (V4SImode
,
3353 plus_constant (virtual_incoming_args_rtx
,
3355 emit_move_insn (tmp
,
3356 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
3359 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
3364 spu_conditional_register_usage (void)
3368 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3369 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3373 /* This is called to decide when we can simplify a load instruction. We
3374 must only return true for registers which we know will always be
3375 aligned. Taking into account that CSE might replace this reg with
3376 another one that has not been marked aligned.
3377 So this is really only true for frame, stack and virtual registers,
3378 which we know are always aligned and should not be adversely effected
3381 regno_aligned_for_load (int regno
)
3383 return regno
== FRAME_POINTER_REGNUM
3384 || (frame_pointer_needed
&& regno
== HARD_FRAME_POINTER_REGNUM
)
3385 || regno
== ARG_POINTER_REGNUM
3386 || regno
== STACK_POINTER_REGNUM
3387 || (regno
>= FIRST_VIRTUAL_REGISTER
3388 && regno
<= LAST_VIRTUAL_REGISTER
);
3391 /* Return TRUE when mem is known to be 16-byte aligned. */
3393 aligned_mem_p (rtx mem
)
3395 if (MEM_ALIGN (mem
) >= 128)
3397 if (GET_MODE_SIZE (GET_MODE (mem
)) >= 16)
3399 if (GET_CODE (XEXP (mem
, 0)) == PLUS
)
3401 rtx p0
= XEXP (XEXP (mem
, 0), 0);
3402 rtx p1
= XEXP (XEXP (mem
, 0), 1);
3403 if (regno_aligned_for_load (REGNO (p0
)))
3405 if (GET_CODE (p1
) == REG
&& regno_aligned_for_load (REGNO (p1
)))
3407 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3411 else if (GET_CODE (XEXP (mem
, 0)) == REG
)
3413 if (regno_aligned_for_load (REGNO (XEXP (mem
, 0))))
3416 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem
, 0)))
3418 else if (GET_CODE (XEXP (mem
, 0)) == CONST
)
3420 rtx p0
= XEXP (XEXP (XEXP (mem
, 0), 0), 0);
3421 rtx p1
= XEXP (XEXP (XEXP (mem
, 0), 0), 1);
3422 if (GET_CODE (p0
) == SYMBOL_REF
3423 && GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3429 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3430 into its SYMBOL_REF_FLAGS. */
3432 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
3434 default_encode_section_info (decl
, rtl
, first
);
3436 /* If a variable has a forced alignment to < 16 bytes, mark it with
3437 SYMBOL_FLAG_ALIGN1. */
3438 if (TREE_CODE (decl
) == VAR_DECL
3439 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
3440 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
3443 /* Return TRUE if we are certain the mem refers to a complete object
3444 which is both 16-byte aligned and padded to a 16-byte boundary. This
3445 would make it safe to store with a single instruction.
3446 We guarantee the alignment and padding for static objects by aligning
3447 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3448 FIXME: We currently cannot guarantee this for objects on the stack
3449 because assign_parm_setup_stack calls assign_stack_local with the
3450 alignment of the parameter mode and in that case the alignment never
3451 gets adjusted by LOCAL_ALIGNMENT. */
3453 store_with_one_insn_p (rtx mem
)
3455 rtx addr
= XEXP (mem
, 0);
3456 if (GET_MODE (mem
) == BLKmode
)
3458 /* Only static objects. */
3459 if (GET_CODE (addr
) == SYMBOL_REF
)
3461 /* We use the associated declaration to make sure the access is
3462 referring to the whole object.
3463 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3464 if it is necessary. Will there be cases where one exists, and
3465 the other does not? Will there be cases where both exist, but
3466 have different types? */
3467 tree decl
= MEM_EXPR (mem
);
3469 && TREE_CODE (decl
) == VAR_DECL
3470 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3472 decl
= SYMBOL_REF_DECL (addr
);
3474 && TREE_CODE (decl
) == VAR_DECL
3475 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3482 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
3484 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
3487 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
3489 rtx from
= SUBREG_REG (ops
[1]);
3490 enum machine_mode imode
= GET_MODE (from
);
3492 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
3493 && GET_MODE_CLASS (imode
) == MODE_INT
3494 && subreg_lowpart_p (ops
[1]));
3496 if (GET_MODE_SIZE (imode
) < 4)
3498 from
= gen_rtx_SUBREG (SImode
, from
, 0);
3502 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
3504 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
3505 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
3508 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
3512 /* At least one of the operands needs to be a register. */
3513 if ((reload_in_progress
| reload_completed
) == 0
3514 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3516 rtx temp
= force_reg (mode
, ops
[1]);
3517 emit_move_insn (ops
[0], temp
);
3520 if (reload_in_progress
|| reload_completed
)
3522 if (CONSTANT_P (ops
[1]))
3523 return spu_split_immediate (ops
);
3528 if (GET_CODE (ops
[0]) == MEM
)
3530 if (!spu_valid_move (ops
))
3532 emit_insn (gen_store (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3533 gen_reg_rtx (TImode
)));
3537 else if (GET_CODE (ops
[1]) == MEM
)
3539 if (!spu_valid_move (ops
))
3542 (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3543 gen_reg_rtx (SImode
)));
3547 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3549 if (GET_CODE (ops
[1]) == CONST_INT
)
3551 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
3552 if (val
!= INTVAL (ops
[1]))
3554 emit_move_insn (ops
[0], GEN_INT (val
));
3563 spu_split_load (rtx
* ops
)
3565 enum machine_mode mode
= GET_MODE (ops
[0]);
3566 rtx addr
, load
, rot
, mem
, p0
, p1
;
3569 addr
= XEXP (ops
[1], 0);
3573 if (GET_CODE (addr
) == PLUS
)
3576 aligned reg + aligned reg => lqx
3577 aligned reg + unaligned reg => lqx, rotqby
3578 aligned reg + aligned const => lqd
3579 aligned reg + unaligned const => lqd, rotqbyi
3580 unaligned reg + aligned reg => lqx, rotqby
3581 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3582 unaligned reg + aligned const => lqd, rotqby
3583 unaligned reg + unaligned const -> not allowed by legitimate address
3585 p0
= XEXP (addr
, 0);
3586 p1
= XEXP (addr
, 1);
3587 if (REG_P (p0
) && !regno_aligned_for_load (REGNO (p0
)))
3589 if (REG_P (p1
) && !regno_aligned_for_load (REGNO (p1
)))
3591 emit_insn (gen_addsi3 (ops
[3], p0
, p1
));
3599 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
3601 rot_amt
= INTVAL (p1
) & 15;
3602 p1
= GEN_INT (INTVAL (p1
) & -16);
3603 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3605 else if (REG_P (p1
) && !regno_aligned_for_load (REGNO (p1
)))
3609 else if (GET_CODE (addr
) == REG
)
3611 if (!regno_aligned_for_load (REGNO (addr
)))
3614 else if (GET_CODE (addr
) == CONST
)
3616 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3617 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3618 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3620 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3622 addr
= gen_rtx_CONST (Pmode
,
3623 gen_rtx_PLUS (Pmode
,
3624 XEXP (XEXP (addr
, 0), 0),
3625 GEN_INT (rot_amt
& -16)));
3627 addr
= XEXP (XEXP (addr
, 0), 0);
3632 else if (GET_CODE (addr
) == CONST_INT
)
3634 rot_amt
= INTVAL (addr
);
3635 addr
= GEN_INT (rot_amt
& -16);
3637 else if (!ALIGNED_SYMBOL_REF_P (addr
))
3640 if (GET_MODE_SIZE (mode
) < 4)
3641 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
3647 emit_insn (gen_addsi3 (ops
[3], rot
, GEN_INT (rot_amt
)));
3654 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3655 mem
= change_address (ops
[1], TImode
, addr
);
3657 emit_insn (gen_movti (load
, mem
));
3660 emit_insn (gen_rotqby_ti (load
, load
, rot
));
3662 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
3664 if (reload_completed
)
3665 emit_move_insn (ops
[0], gen_rtx_REG (GET_MODE (ops
[0]), REGNO (load
)));
3667 emit_insn (gen_spu_convert (ops
[0], load
));
3671 spu_split_store (rtx
* ops
)
3673 enum machine_mode mode
= GET_MODE (ops
[0]);
3676 rtx addr
, p0
, p1
, p1_lo
, smem
;
3680 addr
= XEXP (ops
[0], 0);
3682 if (GET_CODE (addr
) == PLUS
)
3685 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3686 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3687 aligned reg + aligned const => lqd, c?d, shuf, stqx
3688 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3689 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3690 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3691 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3692 unaligned reg + unaligned const -> not allowed by legitimate address
3695 p0
= XEXP (addr
, 0);
3696 p1
= p1_lo
= XEXP (addr
, 1);
3697 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
3699 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
3700 p1
= GEN_INT (INTVAL (p1
) & -16);
3701 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3704 else if (GET_CODE (addr
) == REG
)
3708 p1
= p1_lo
= const0_rtx
;
3713 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
3714 p1
= 0; /* aform doesn't use p1 */
3716 if (ALIGNED_SYMBOL_REF_P (addr
))
3718 else if (GET_CODE (addr
) == CONST
)
3720 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3721 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3722 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3724 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3726 addr
= gen_rtx_CONST (Pmode
,
3727 gen_rtx_PLUS (Pmode
,
3728 XEXP (XEXP (addr
, 0), 0),
3729 GEN_INT (v
& -16)));
3731 addr
= XEXP (XEXP (addr
, 0), 0);
3732 p1_lo
= GEN_INT (v
& 15);
3735 else if (GET_CODE (addr
) == CONST_INT
)
3737 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
3738 addr
= GEN_INT (INTVAL (addr
) & -16);
3742 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3744 scalar
= store_with_one_insn_p (ops
[0]);
3747 /* We could copy the flags from the ops[0] MEM to mem here,
3748 We don't because we want this load to be optimized away if
3749 possible, and copying the flags will prevent that in certain
3750 cases, e.g. consider the volatile flag. */
3752 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
3753 set_mem_alias_set (lmem
, 0);
3754 emit_insn (gen_movti (reg
, lmem
));
3756 if (!p0
|| regno_aligned_for_load (REGNO (p0
)))
3757 p0
= stack_pointer_rtx
;
3761 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
3762 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
3764 else if (reload_completed
)
3766 if (GET_CODE (ops
[1]) == REG
)
3767 emit_move_insn (reg
, gen_rtx_REG (GET_MODE (reg
), REGNO (ops
[1])));
3768 else if (GET_CODE (ops
[1]) == SUBREG
)
3769 emit_move_insn (reg
,
3770 gen_rtx_REG (GET_MODE (reg
),
3771 REGNO (SUBREG_REG (ops
[1]))));
3777 if (GET_CODE (ops
[1]) == REG
)
3778 emit_insn (gen_spu_convert (reg
, ops
[1]));
3779 else if (GET_CODE (ops
[1]) == SUBREG
)
3780 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
3785 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
3786 emit_insn (gen_shlqby_ti
3787 (reg
, reg
, GEN_INT (4 - GET_MODE_SIZE (mode
))));
3789 smem
= change_address (ops
[0], TImode
, addr
);
3790 /* We can't use the previous alias set because the memory has changed
3791 size and can potentially overlap objects of other types. */
3792 set_mem_alias_set (smem
, 0);
3794 emit_insn (gen_movti (smem
, reg
));
3797 /* Return TRUE if X is MEM which is a struct member reference
3798 and the member can safely be loaded and stored with a single
3799 instruction because it is padded. */
3801 mem_is_padded_component_ref (rtx x
)
3803 tree t
= MEM_EXPR (x
);
3805 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
3807 t
= TREE_OPERAND (t
, 1);
3808 if (!t
|| TREE_CODE (t
) != FIELD_DECL
3809 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
3811 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3812 r
= DECL_FIELD_CONTEXT (t
);
3813 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
3815 /* Make sure they are the same mode */
3816 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
3818 /* If there are no following fields then the field alignment assures
3819 the structure is padded to the alignment which means this field is
3821 if (TREE_CHAIN (t
) == 0)
3823 /* If the following field is also aligned then this field will be
3826 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
3831 /* Parse the -mfixed-range= option string. */
3833 fix_range (const char *const_str
)
3836 char *str
, *dash
, *comma
;
3838 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3839 REG2 are either register names or register numbers. The effect
3840 of this option is to mark the registers in the range from REG1 to
3841 REG2 as ``fixed'' so they won't be used by the compiler. */
3843 i
= strlen (const_str
);
3844 str
= (char *) alloca (i
+ 1);
3845 memcpy (str
, const_str
, i
+ 1);
3849 dash
= strchr (str
, '-');
3852 warning (0, "value of -mfixed-range must have form REG1-REG2");
3856 comma
= strchr (dash
+ 1, ',');
3860 first
= decode_reg_name (str
);
3863 warning (0, "unknown register name: %s", str
);
3867 last
= decode_reg_name (dash
+ 1);
3870 warning (0, "unknown register name: %s", dash
+ 1);
3878 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
3882 for (i
= first
; i
<= last
; ++i
)
3883 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3894 spu_valid_move (rtx
* ops
)
3896 enum machine_mode mode
= GET_MODE (ops
[0]);
3897 if (!register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3900 /* init_expr_once tries to recog against load and store insns to set
3901 the direct_load[] and direct_store[] arrays. We always want to
3902 consider those loads and stores valid. init_expr_once is called in
3903 the context of a dummy function which does not have a decl. */
3904 if (cfun
->decl
== 0)
3907 /* Don't allows loads/stores which would require more than 1 insn.
3908 During and after reload we assume loads and stores only take 1
3910 if (GET_MODE_SIZE (mode
) < 16 && !reload_in_progress
&& !reload_completed
)
3912 if (GET_CODE (ops
[0]) == MEM
3913 && (GET_MODE_SIZE (mode
) < 4
3914 || !(store_with_one_insn_p (ops
[0])
3915 || mem_is_padded_component_ref (ops
[0]))))
3917 if (GET_CODE (ops
[1]) == MEM
3918 && (GET_MODE_SIZE (mode
) < 4 || !aligned_mem_p (ops
[1])))
3924 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3925 can be generated using the fsmbi instruction. */
3927 fsmbi_const_p (rtx x
)
3931 /* We can always choose TImode for CONST_INT because the high bits
3932 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3933 enum immediate_class c
= classify_immediate (x
, TImode
);
3934 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
3939 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3940 can be generated using the cbd, chd, cwd or cdd instruction. */
3942 cpat_const_p (rtx x
, enum machine_mode mode
)
3946 enum immediate_class c
= classify_immediate (x
, mode
);
3947 return c
== IC_CPAT
;
3953 gen_cpat_const (rtx
* ops
)
3955 unsigned char dst
[16];
3956 int i
, offset
, shift
, isize
;
3957 if (GET_CODE (ops
[3]) != CONST_INT
3958 || GET_CODE (ops
[2]) != CONST_INT
3959 || (GET_CODE (ops
[1]) != CONST_INT
3960 && GET_CODE (ops
[1]) != REG
))
3962 if (GET_CODE (ops
[1]) == REG
3963 && (!REG_POINTER (ops
[1])
3964 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
3967 for (i
= 0; i
< 16; i
++)
3969 isize
= INTVAL (ops
[3]);
3972 else if (isize
== 2)
3976 offset
= (INTVAL (ops
[2]) +
3977 (GET_CODE (ops
[1]) ==
3978 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
3979 for (i
= 0; i
< isize
; i
++)
3980 dst
[offset
+ i
] = i
+ shift
;
3981 return array_to_constant (TImode
, dst
);
3984 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3985 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3986 than 16 bytes, the value is repeated across the rest of the array. */
3988 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
3993 memset (arr
, 0, 16);
3994 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
3995 if (GET_CODE (x
) == CONST_INT
3996 || (GET_CODE (x
) == CONST_DOUBLE
3997 && (mode
== SFmode
|| mode
== DFmode
)))
3999 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
4001 if (GET_CODE (x
) == CONST_DOUBLE
)
4002 val
= const_double_to_hwint (x
);
4005 first
= GET_MODE_SIZE (mode
) - 1;
4006 for (i
= first
; i
>= 0; i
--)
4008 arr
[i
] = val
& 0xff;
4011 /* Splat the constant across the whole array. */
4012 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
4015 j
= (j
== first
) ? 0 : j
+ 1;
4018 else if (GET_CODE (x
) == CONST_DOUBLE
)
4020 val
= CONST_DOUBLE_LOW (x
);
4021 for (i
= 15; i
>= 8; i
--)
4023 arr
[i
] = val
& 0xff;
4026 val
= CONST_DOUBLE_HIGH (x
);
4027 for (i
= 7; i
>= 0; i
--)
4029 arr
[i
] = val
& 0xff;
4033 else if (GET_CODE (x
) == CONST_VECTOR
)
4037 mode
= GET_MODE_INNER (mode
);
4038 units
= CONST_VECTOR_NUNITS (x
);
4039 for (i
= 0; i
< units
; i
++)
4041 elt
= CONST_VECTOR_ELT (x
, i
);
4042 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
4044 if (GET_CODE (elt
) == CONST_DOUBLE
)
4045 val
= const_double_to_hwint (elt
);
4048 first
= GET_MODE_SIZE (mode
) - 1;
4049 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
4051 for (j
= first
; j
>= 0; j
--)
4053 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
4063 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4064 smaller than 16 bytes, use the bytes that would represent that value
4065 in a register, e.g., for QImode return the value of arr[3]. */
4067 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
4069 enum machine_mode inner_mode
;
4071 int units
, size
, i
, j
, k
;
4074 if (GET_MODE_CLASS (mode
) == MODE_INT
4075 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
4077 j
= GET_MODE_SIZE (mode
);
4078 i
= j
< 4 ? 4 - j
: 0;
4079 for (val
= 0; i
< j
; i
++)
4080 val
= (val
<< 8) | arr
[i
];
4081 val
= trunc_int_for_mode (val
, mode
);
4082 return GEN_INT (val
);
4088 for (i
= high
= 0; i
< 8; i
++)
4089 high
= (high
<< 8) | arr
[i
];
4090 for (i
= 8, val
= 0; i
< 16; i
++)
4091 val
= (val
<< 8) | arr
[i
];
4092 return immed_double_const (val
, high
, TImode
);
4096 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4097 val
= trunc_int_for_mode (val
, SImode
);
4098 return hwint_to_const_double (SFmode
, val
);
4102 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4104 val
|= (arr
[4] << 24) | (arr
[5] << 16) | (arr
[6] << 8) | arr
[7];
4105 return hwint_to_const_double (DFmode
, val
);
4108 if (!VECTOR_MODE_P (mode
))
4111 units
= GET_MODE_NUNITS (mode
);
4112 size
= GET_MODE_UNIT_SIZE (mode
);
4113 inner_mode
= GET_MODE_INNER (mode
);
4114 v
= rtvec_alloc (units
);
4116 for (k
= i
= 0; i
< units
; ++i
)
4119 for (j
= 0; j
< size
; j
++, k
++)
4120 val
= (val
<< 8) | arr
[k
];
4122 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4123 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4125 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4130 return gen_rtx_CONST_VECTOR (mode
, v
);
4134 reloc_diagnostic (rtx x
)
4136 tree loc_decl
, decl
= 0;
4138 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4141 if (GET_CODE (x
) == SYMBOL_REF
)
4142 decl
= SYMBOL_REF_DECL (x
);
4143 else if (GET_CODE (x
) == CONST
4144 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4145 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4147 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4148 if (decl
&& !DECL_P (decl
))
4151 /* We use last_assemble_variable_decl to get line information. It's
4152 not always going to be right and might not even be close, but will
4153 be right for the more common cases. */
4154 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4157 loc_decl
= last_assemble_variable_decl
;
4159 /* The decl could be a string constant. */
4160 if (decl
&& DECL_P (decl
))
4161 msg
= "%Jcreating run-time relocation for %qD";
4163 msg
= "creating run-time relocation";
4165 if (TARGET_WARN_RELOC
)
4166 warning (0, msg
, loc_decl
, decl
);
4168 error (msg
, loc_decl
, decl
);
4171 /* Hook into assemble_integer so we can generate an error for run-time
4172 relocations. The SPU ABI disallows them. */
4174 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4176 /* By default run-time relocations aren't supported, but we allow them
4177 in case users support it in their own run-time loader. And we provide
4178 a warning for those users that don't. */
4179 if ((GET_CODE (x
) == SYMBOL_REF
)
4180 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4181 reloc_diagnostic (x
);
4183 return default_assemble_integer (x
, size
, aligned_p
);
4187 spu_asm_globalize_label (FILE * file
, const char *name
)
4189 fputs ("\t.global\t", file
);
4190 assemble_name (file
, name
);
4195 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
)
4197 enum machine_mode mode
= GET_MODE (x
);
4198 int cost
= COSTS_N_INSNS (2);
4200 /* Folding to a CONST_VECTOR will use extra space but there might
4201 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4202 only if it allows us to fold away multiple insns. Changing the cost
4203 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4204 because this cost will only be compared against a single insn.
4205 if (code == CONST_VECTOR)
4206 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4209 /* Use defaults for float operations. Not accurate but good enough. */
4212 *total
= COSTS_N_INSNS (13);
4217 *total
= COSTS_N_INSNS (6);
4223 if (satisfies_constraint_K (x
))
4225 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
4226 *total
= COSTS_N_INSNS (1);
4228 *total
= COSTS_N_INSNS (3);
4232 *total
= COSTS_N_INSNS (3);
4237 *total
= COSTS_N_INSNS (0);
4241 *total
= COSTS_N_INSNS (5);
4245 case FLOAT_TRUNCATE
:
4247 case UNSIGNED_FLOAT
:
4250 *total
= COSTS_N_INSNS (7);
4256 *total
= COSTS_N_INSNS (9);
4263 GET_CODE (XEXP (x
, 0)) ==
4264 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4265 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
4267 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4269 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4270 cost
= COSTS_N_INSNS (14);
4271 if ((val
& 0xffff) == 0)
4272 cost
= COSTS_N_INSNS (9);
4273 else if (val
> 0 && val
< 0x10000)
4274 cost
= COSTS_N_INSNS (11);
4283 *total
= COSTS_N_INSNS (20);
4290 *total
= COSTS_N_INSNS (4);
4293 if (XINT (x
, 1) == UNSPEC_CONVERT
)
4294 *total
= COSTS_N_INSNS (0);
4296 *total
= COSTS_N_INSNS (4);
4299 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4300 if (GET_MODE_CLASS (mode
) == MODE_INT
4301 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
4302 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
4303 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
4308 static enum machine_mode
4309 spu_unwind_word_mode (void)
4314 /* Decide whether we can make a sibling call to a function. DECL is the
4315 declaration of the function being targeted by the call and EXP is the
4316 CALL_EXPR representing the call. */
4318 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4320 return decl
&& !TARGET_LARGE_MEM
;
4323 /* We need to correctly update the back chain pointer and the Available
4324 Stack Size (which is in the second slot of the sp register.) */
4326 spu_allocate_stack (rtx op0
, rtx op1
)
4329 rtx chain
= gen_reg_rtx (V4SImode
);
4330 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
4331 rtx sp
= gen_reg_rtx (V4SImode
);
4332 rtx splatted
= gen_reg_rtx (V4SImode
);
4333 rtx pat
= gen_reg_rtx (TImode
);
4335 /* copy the back chain so we can save it back again. */
4336 emit_move_insn (chain
, stack_bot
);
4338 op1
= force_reg (SImode
, op1
);
4340 v
= 0x1020300010203ll
;
4341 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
4342 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
4344 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
4345 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
4347 if (flag_stack_check
)
4349 rtx avail
= gen_reg_rtx(SImode
);
4350 rtx result
= gen_reg_rtx(SImode
);
4351 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
4352 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
4353 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
4356 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
4358 emit_move_insn (stack_bot
, chain
);
4360 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
4364 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4366 static unsigned char arr
[16] =
4367 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4368 rtx temp
= gen_reg_rtx (SImode
);
4369 rtx temp2
= gen_reg_rtx (SImode
);
4370 rtx temp3
= gen_reg_rtx (V4SImode
);
4371 rtx temp4
= gen_reg_rtx (V4SImode
);
4372 rtx pat
= gen_reg_rtx (TImode
);
4373 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4375 /* Restore the backchain from the first word, sp from the second. */
4376 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
4377 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
4379 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4381 /* Compute Available Stack Size for sp */
4382 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4383 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4385 /* Compute Available Stack Size for back chain */
4386 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
4387 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
4388 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
4390 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4391 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
4395 spu_init_libfuncs (void)
4397 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
4398 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
4399 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
4400 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
4401 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
4402 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
4403 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
4404 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
4405 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
4406 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
4407 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
4409 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
4410 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
4412 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
4413 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
4414 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
4415 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
4416 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
4417 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
4420 /* Make a subreg, stripping any existing subreg. We could possibly just
4421 call simplify_subreg, but in this case we know what we want. */
4423 spu_gen_subreg (enum machine_mode mode
, rtx x
)
4425 if (GET_CODE (x
) == SUBREG
)
4427 if (GET_MODE (x
) == mode
)
4429 return gen_rtx_SUBREG (mode
, x
, 0);
4433 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4435 return (TYPE_MODE (type
) == BLKmode
4437 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
4438 || int_size_in_bytes (type
) >
4439 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
4442 /* Create the built-in types and functions */
4444 struct spu_builtin_description spu_builtins
[] = {
4445 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4446 {fcode, icode, name, type, params, NULL_TREE},
4447 #include "spu-builtins.def"
4452 spu_init_builtins (void)
4454 struct spu_builtin_description
*d
;
4457 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
4458 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
4459 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
4460 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
4461 V4SF_type_node
= build_vector_type (float_type_node
, 4);
4462 V2DF_type_node
= build_vector_type (double_type_node
, 2);
4464 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
4465 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
4466 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
4467 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
4469 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
4471 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
4472 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
4473 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
4474 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
4475 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
4476 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
4477 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
4478 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
4479 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
4480 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
4481 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
4482 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
4484 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
4485 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
4486 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
4487 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
4488 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
4489 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
4490 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
4491 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
4493 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
4494 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
4496 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
4498 spu_builtin_types
[SPU_BTI_PTR
] =
4499 build_pointer_type (build_qualified_type
4501 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
4503 /* For each builtin we build a new prototype. The tree code will make
4504 sure nodes are shared. */
4505 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
4508 char name
[64]; /* build_function will make a copy. */
4514 /* Find last parm. */
4515 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
4520 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
4522 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
4524 sprintf (name
, "__builtin_%s", d
->name
);
4526 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
4528 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
4529 TREE_READONLY (d
->fndecl
) = 1;
4531 /* These builtins don't throw. */
4532 TREE_NOTHROW (d
->fndecl
) = 1;
4537 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4539 static unsigned char arr
[16] =
4540 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4542 rtx temp
= gen_reg_rtx (Pmode
);
4543 rtx temp2
= gen_reg_rtx (V4SImode
);
4544 rtx temp3
= gen_reg_rtx (V4SImode
);
4545 rtx pat
= gen_reg_rtx (TImode
);
4546 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4548 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4550 /* Restore the sp. */
4551 emit_move_insn (temp
, op1
);
4552 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
4554 /* Compute available stack size for sp. */
4555 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4556 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4558 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4559 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
4563 spu_safe_dma (HOST_WIDE_INT channel
)
4565 return (channel
>= 21 && channel
<= 27);
4569 spu_builtin_splats (rtx ops
[])
4571 enum machine_mode mode
= GET_MODE (ops
[0]);
4572 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
4574 unsigned char arr
[16];
4575 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
4576 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
4580 rtx reg
= gen_reg_rtx (TImode
);
4582 if (GET_CODE (ops
[1]) != REG
4583 && GET_CODE (ops
[1]) != SUBREG
)
4584 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
4590 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
4596 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
4601 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
4606 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
4612 emit_move_insn (reg
, shuf
);
4613 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
4618 spu_builtin_extract (rtx ops
[])
4620 enum machine_mode mode
;
4623 mode
= GET_MODE (ops
[1]);
4625 if (GET_CODE (ops
[2]) == CONST_INT
)
4630 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
4633 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
4636 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
4639 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
4642 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
4645 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
4653 from
= spu_gen_subreg (TImode
, ops
[1]);
4654 rot
= gen_reg_rtx (TImode
);
4655 tmp
= gen_reg_rtx (SImode
);
4660 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
4663 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
4664 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
4668 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
4672 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
4677 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
4679 emit_insn (gen_spu_convert (ops
[0], rot
));
4683 spu_builtin_insert (rtx ops
[])
4685 enum machine_mode mode
= GET_MODE (ops
[0]);
4686 enum machine_mode imode
= GET_MODE_INNER (mode
);
4687 rtx mask
= gen_reg_rtx (TImode
);
4690 if (GET_CODE (ops
[3]) == CONST_INT
)
4691 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
4694 offset
= gen_reg_rtx (SImode
);
4695 emit_insn (gen_mulsi3
4696 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
4699 (mask
, stack_pointer_rtx
, offset
,
4700 GEN_INT (GET_MODE_SIZE (imode
))));
4701 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
4705 spu_builtin_promote (rtx ops
[])
4707 enum machine_mode mode
, imode
;
4708 rtx rot
, from
, offset
;
4711 mode
= GET_MODE (ops
[0]);
4712 imode
= GET_MODE_INNER (mode
);
4714 from
= gen_reg_rtx (TImode
);
4715 rot
= spu_gen_subreg (TImode
, ops
[0]);
4717 emit_insn (gen_spu_convert (from
, ops
[1]));
4719 if (GET_CODE (ops
[2]) == CONST_INT
)
4721 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
4722 if (GET_MODE_SIZE (imode
) < 4)
4723 pos
+= 4 - GET_MODE_SIZE (imode
);
4724 offset
= GEN_INT (pos
& 15);
4728 offset
= gen_reg_rtx (SImode
);
4732 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
4735 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
4736 emit_insn (gen_addsi3 (offset
, offset
, offset
));
4740 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
4741 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
4745 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
4751 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
4755 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
4757 rtx shuf
= gen_reg_rtx (V4SImode
);
4758 rtx insn
= gen_reg_rtx (V4SImode
);
4763 fnaddr
= force_reg (SImode
, fnaddr
);
4764 cxt
= force_reg (SImode
, cxt
);
4766 if (TARGET_LARGE_MEM
)
4768 rtx rotl
= gen_reg_rtx (V4SImode
);
4769 rtx mask
= gen_reg_rtx (V4SImode
);
4770 rtx bi
= gen_reg_rtx (SImode
);
4771 unsigned char shufa
[16] = {
4772 2, 3, 0, 1, 18, 19, 16, 17,
4773 0, 1, 2, 3, 16, 17, 18, 19
4775 unsigned char insna
[16] = {
4777 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
4779 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4782 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
4783 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4785 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
4786 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
4787 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
4788 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
4790 mem
= memory_address (Pmode
, tramp
);
4791 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4793 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
4794 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
4795 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
4799 rtx scxt
= gen_reg_rtx (SImode
);
4800 rtx sfnaddr
= gen_reg_rtx (SImode
);
4801 unsigned char insna
[16] = {
4802 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
4808 shufc
= gen_reg_rtx (TImode
);
4809 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4811 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4812 fits 18 bits and the last 4 are zeros. This will be true if
4813 the stack pointer is initialized to 0x3fff0 at program start,
4814 otherwise the ila instruction will be garbage. */
4816 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
4817 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
4819 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
4820 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
4821 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
4823 mem
= memory_address (Pmode
, tramp
);
4824 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4827 emit_insn (gen_sync ());
4831 spu_expand_sign_extend (rtx ops
[])
4833 unsigned char arr
[16];
4834 rtx pat
= gen_reg_rtx (TImode
);
4837 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
4838 if (GET_MODE (ops
[1]) == QImode
)
4840 sign
= gen_reg_rtx (HImode
);
4841 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
4842 for (i
= 0; i
< 16; i
++)
4848 for (i
= 0; i
< 16; i
++)
4850 switch (GET_MODE (ops
[1]))
4853 sign
= gen_reg_rtx (SImode
);
4854 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
4856 arr
[last
- 1] = 0x02;
4859 sign
= gen_reg_rtx (SImode
);
4860 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
4861 for (i
= 0; i
< 4; i
++)
4862 arr
[last
- i
] = 3 - i
;
4865 sign
= gen_reg_rtx (SImode
);
4866 c
= gen_reg_rtx (SImode
);
4867 emit_insn (gen_spu_convert (c
, ops
[1]));
4868 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
4869 for (i
= 0; i
< 8; i
++)
4870 arr
[last
- i
] = 7 - i
;
4876 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4877 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
4880 /* expand vector initialization. If there are any constant parts,
4881 load constant parts first. Then load any non-constant parts. */
4883 spu_expand_vector_init (rtx target
, rtx vals
)
4885 enum machine_mode mode
= GET_MODE (target
);
4886 int n_elts
= GET_MODE_NUNITS (mode
);
4888 bool all_same
= true;
4889 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
4892 first
= XVECEXP (vals
, 0, 0);
4893 for (i
= 0; i
< n_elts
; ++i
)
4895 x
= XVECEXP (vals
, 0, i
);
4896 if (!(CONST_INT_P (x
)
4897 || GET_CODE (x
) == CONST_DOUBLE
4898 || GET_CODE (x
) == CONST_FIXED
))
4902 if (first_constant
== NULL_RTX
)
4905 if (i
> 0 && !rtx_equal_p (x
, first
))
4909 /* if all elements are the same, use splats to repeat elements */
4912 if (!CONSTANT_P (first
)
4913 && !register_operand (first
, GET_MODE (x
)))
4914 first
= force_reg (GET_MODE (first
), first
);
4915 emit_insn (gen_spu_splats (target
, first
));
4919 /* load constant parts */
4920 if (n_var
!= n_elts
)
4924 emit_move_insn (target
,
4925 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
4929 rtx constant_parts_rtx
= copy_rtx (vals
);
4931 gcc_assert (first_constant
!= NULL_RTX
);
4932 /* fill empty slots with the first constant, this increases
4933 our chance of using splats in the recursive call below. */
4934 for (i
= 0; i
< n_elts
; ++i
)
4936 x
= XVECEXP (constant_parts_rtx
, 0, i
);
4937 if (!(CONST_INT_P (x
)
4938 || GET_CODE (x
) == CONST_DOUBLE
4939 || GET_CODE (x
) == CONST_FIXED
))
4940 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
4943 spu_expand_vector_init (target
, constant_parts_rtx
);
4947 /* load variable parts */
4950 rtx insert_operands
[4];
4952 insert_operands
[0] = target
;
4953 insert_operands
[2] = target
;
4954 for (i
= 0; i
< n_elts
; ++i
)
4956 x
= XVECEXP (vals
, 0, i
);
4957 if (!(CONST_INT_P (x
)
4958 || GET_CODE (x
) == CONST_DOUBLE
4959 || GET_CODE (x
) == CONST_FIXED
))
4961 if (!register_operand (x
, GET_MODE (x
)))
4962 x
= force_reg (GET_MODE (x
), x
);
4963 insert_operands
[1] = x
;
4964 insert_operands
[3] = GEN_INT (i
);
4965 spu_builtin_insert (insert_operands
);
4971 /* Return insn index for the vector compare instruction for given CODE,
4972 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4975 get_vec_cmp_insn (enum rtx_code code
,
4976 enum machine_mode dest_mode
,
4977 enum machine_mode op_mode
)
4983 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
4984 return CODE_FOR_ceq_v16qi
;
4985 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
4986 return CODE_FOR_ceq_v8hi
;
4987 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
4988 return CODE_FOR_ceq_v4si
;
4989 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
4990 return CODE_FOR_ceq_v4sf
;
4991 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
4992 return CODE_FOR_ceq_v2df
;
4995 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
4996 return CODE_FOR_cgt_v16qi
;
4997 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
4998 return CODE_FOR_cgt_v8hi
;
4999 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5000 return CODE_FOR_cgt_v4si
;
5001 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5002 return CODE_FOR_cgt_v4sf
;
5003 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5004 return CODE_FOR_cgt_v2df
;
5007 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5008 return CODE_FOR_clgt_v16qi
;
5009 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5010 return CODE_FOR_clgt_v8hi
;
5011 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5012 return CODE_FOR_clgt_v4si
;
5020 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5021 DMODE is expected destination mode. This is a recursive function. */
5024 spu_emit_vector_compare (enum rtx_code rcode
,
5026 enum machine_mode dmode
)
5030 enum machine_mode dest_mode
;
5031 enum machine_mode op_mode
= GET_MODE (op1
);
5033 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
5035 /* Floating point vector compare instructions uses destination V4SImode.
5036 Double floating point vector compare instructions uses destination V2DImode.
5037 Move destination to appropriate mode later. */
5038 if (dmode
== V4SFmode
)
5039 dest_mode
= V4SImode
;
5040 else if (dmode
== V2DFmode
)
5041 dest_mode
= V2DImode
;
5045 mask
= gen_reg_rtx (dest_mode
);
5046 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5048 if (vec_cmp_insn
== -1)
5050 bool swap_operands
= false;
5051 bool try_again
= false;
5056 swap_operands
= true;
5061 swap_operands
= true;
5065 /* Treat A != B as ~(A==B). */
5067 enum insn_code nor_code
;
5068 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5069 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
5070 gcc_assert (nor_code
!= CODE_FOR_nothing
);
5071 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
5072 if (dmode
!= dest_mode
)
5074 rtx temp
= gen_reg_rtx (dest_mode
);
5075 convert_move (temp
, mask
, 0);
5085 /* Try GT/GTU/LT/LTU OR EQ */
5088 enum insn_code ior_code
;
5089 enum rtx_code new_code
;
5093 case GE
: new_code
= GT
; break;
5094 case GEU
: new_code
= GTU
; break;
5095 case LE
: new_code
= LT
; break;
5096 case LEU
: new_code
= LTU
; break;
5101 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
5102 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5104 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
5105 gcc_assert (ior_code
!= CODE_FOR_nothing
);
5106 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
5107 if (dmode
!= dest_mode
)
5109 rtx temp
= gen_reg_rtx (dest_mode
);
5110 convert_move (temp
, mask
, 0);
5120 /* You only get two chances. */
5122 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5124 gcc_assert (vec_cmp_insn
!= -1);
5135 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
5136 if (dmode
!= dest_mode
)
5138 rtx temp
= gen_reg_rtx (dest_mode
);
5139 convert_move (temp
, mask
, 0);
5146 /* Emit vector conditional expression.
5147 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5148 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5151 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
5152 rtx cond
, rtx cc_op0
, rtx cc_op1
)
5154 enum machine_mode dest_mode
= GET_MODE (dest
);
5155 enum rtx_code rcode
= GET_CODE (cond
);
5158 /* Get the vector mask for the given relational operations. */
5159 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
5161 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
5167 spu_force_reg (enum machine_mode mode
, rtx op
)
5170 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
5172 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
5173 || GET_MODE (op
) == BLKmode
)
5174 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
5178 r
= force_reg (GET_MODE (op
), op
);
5179 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
5181 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
5186 x
= gen_reg_rtx (mode
);
5187 emit_insn (gen_spu_convert (x
, r
));
5192 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
5194 HOST_WIDE_INT v
= 0;
5196 /* Check the range of immediate operands. */
5197 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
5199 int range
= p
- SPU_BTI_7
;
5201 if (!CONSTANT_P (op
))
5202 error ("%s expects an integer literal in the range [%d, %d].",
5204 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
5206 if (GET_CODE (op
) == CONST
5207 && (GET_CODE (XEXP (op
, 0)) == PLUS
5208 || GET_CODE (XEXP (op
, 0)) == MINUS
))
5210 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
5211 op
= XEXP (XEXP (op
, 0), 0);
5213 else if (GET_CODE (op
) == CONST_INT
)
5215 else if (GET_CODE (op
) == CONST_VECTOR
5216 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
5217 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
5219 /* The default for v is 0 which is valid in every range. */
5220 if (v
< spu_builtin_range
[range
].low
5221 || v
> spu_builtin_range
[range
].high
)
5222 error ("%s expects an integer literal in the range [%d, %d]. ("
5223 HOST_WIDE_INT_PRINT_DEC
")",
5225 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
5234 /* This is only used in lqa, and stqa. Even though the insns
5235 encode 16 bits of the address (all but the 2 least
5236 significant), only 14 bits are used because it is masked to
5237 be 16 byte aligned. */
5241 /* This is used for lqr and stqr. */
5248 if (GET_CODE (op
) == LABEL_REF
5249 || (GET_CODE (op
) == SYMBOL_REF
5250 && SYMBOL_REF_FUNCTION_P (op
))
5251 || (v
& ((1 << lsbits
) - 1)) != 0)
5252 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
5259 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
5260 rtx target
, rtx ops
[])
5262 enum insn_code icode
= d
->icode
;
5265 /* Expand the arguments into rtl. */
5267 if (d
->parm
[0] != SPU_BTI_VOID
)
5270 for (a
= 0; i
< insn_data
[icode
].n_operands
; i
++, a
++)
5272 tree arg
= CALL_EXPR_ARG (exp
, a
);
5275 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, 0);
5280 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
5281 tree exp
, rtx target
)
5285 enum insn_code icode
= d
->icode
;
5286 enum machine_mode mode
, tmode
;
5290 /* Set up ops[] with values from arglist. */
5291 expand_builtin_args (d
, exp
, target
, ops
);
5293 /* Handle the target operand which must be operand 0. */
5295 if (d
->parm
[0] != SPU_BTI_VOID
)
5298 /* We prefer the mode specified for the match_operand otherwise
5299 use the mode from the builtin function prototype. */
5300 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
5301 if (tmode
== VOIDmode
)
5302 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
5304 /* Try to use target because not using it can lead to extra copies
5305 and when we are using all of the registers extra copies leads
5307 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
5310 target
= ops
[0] = gen_reg_rtx (tmode
);
5312 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
5318 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5320 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
5325 arg
= CALL_EXPR_ARG (exp
, 0);
5326 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
5327 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
5328 addr
= memory_address (mode
, op
);
5331 op
= gen_reg_rtx (GET_MODE (addr
));
5332 emit_insn (gen_rtx_SET (VOIDmode
, op
,
5333 gen_rtx_NEG (GET_MODE (addr
), addr
)));
5334 op
= gen_rtx_MEM (mode
, op
);
5336 pat
= GEN_FCN (icode
) (target
, op
);
5343 /* Ignore align_hint, but still expand it's args in case they have
5345 if (icode
== CODE_FOR_spu_align_hint
)
5348 /* Handle the rest of the operands. */
5349 for (p
= 1; i
< insn_data
[icode
].n_operands
; i
++, p
++)
5351 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
5352 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
5354 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
5356 /* mode can be VOIDmode here for labels */
5358 /* For specific intrinsics with an immediate operand, e.g.,
5359 si_ai(), we sometimes need to convert the scalar argument to a
5360 vector argument by splatting the scalar. */
5361 if (VECTOR_MODE_P (mode
)
5362 && (GET_CODE (ops
[i
]) == CONST_INT
5363 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
5364 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
5366 if (GET_CODE (ops
[i
]) == CONST_INT
)
5367 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
5370 rtx reg
= gen_reg_rtx (mode
);
5371 enum machine_mode imode
= GET_MODE_INNER (mode
);
5372 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
5373 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
5374 if (imode
!= GET_MODE (ops
[i
]))
5375 ops
[i
] = convert_to_mode (imode
, ops
[i
],
5376 TYPE_UNSIGNED (spu_builtin_types
5378 emit_insn (gen_spu_splats (reg
, ops
[i
]));
5383 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
5385 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
5386 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
5389 switch (insn_data
[icode
].n_operands
)
5392 pat
= GEN_FCN (icode
) (0);
5395 pat
= GEN_FCN (icode
) (ops
[0]);
5398 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
5401 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
5404 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
5407 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
5410 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
5419 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
5420 emit_call_insn (pat
);
5421 else if (d
->type
== B_JUMP
)
5423 emit_jump_insn (pat
);
5429 return_type
= spu_builtin_types
[d
->parm
[0]];
5430 if (d
->parm
[0] != SPU_BTI_VOID
5431 && GET_MODE (target
) != TYPE_MODE (return_type
))
5433 /* target is the return value. It should always be the mode of
5434 the builtin function prototype. */
5435 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
5442 spu_expand_builtin (tree exp
,
5444 rtx subtarget ATTRIBUTE_UNUSED
,
5445 enum machine_mode mode ATTRIBUTE_UNUSED
,
5446 int ignore ATTRIBUTE_UNUSED
)
5448 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
5449 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
5450 struct spu_builtin_description
*d
;
5452 if (fcode
< NUM_SPU_BUILTINS
)
5454 d
= &spu_builtins
[fcode
];
5456 return spu_expand_builtin_1 (d
, exp
, target
);
5461 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5463 spu_builtin_mul_widen_even (tree type
)
5465 switch (TYPE_MODE (type
))
5468 if (TYPE_UNSIGNED (type
))
5469 return spu_builtins
[SPU_MULE_0
].fndecl
;
5471 return spu_builtins
[SPU_MULE_1
].fndecl
;
5478 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5480 spu_builtin_mul_widen_odd (tree type
)
5482 switch (TYPE_MODE (type
))
5485 if (TYPE_UNSIGNED (type
))
5486 return spu_builtins
[SPU_MULO_1
].fndecl
;
5488 return spu_builtins
[SPU_MULO_0
].fndecl
;
5495 /* Implement targetm.vectorize.builtin_mask_for_load. */
5497 spu_builtin_mask_for_load (void)
5499 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
5504 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5506 spu_builtin_vectorization_cost (bool runtime_test
)
5508 /* If the branch of the runtime test is taken - i.e. - the vectorized
5509 version is skipped - this incurs a misprediction cost (because the
5510 vectorized version is expected to be the fall-through). So we subtract
5511 the latency of a mispredicted branch from the costs that are incurred
5512 when the vectorized version is executed. */
5519 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5520 after applying N number of iterations. This routine does not determine
5521 how may iterations are required to reach desired alignment. */
5524 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5529 /* All other types are naturally aligned. */
5533 /* Count the total number of instructions in each pipe and return the
5534 maximum, which is used as the Minimum Iteration Interval (MII)
5535 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5536 -2 are instructions that can go in pipe0 or pipe1. */
5538 spu_sms_res_mii (struct ddg
*g
)
5541 unsigned t
[4] = {0, 0, 0, 0};
5543 for (i
= 0; i
< g
->num_nodes
; i
++)
5545 rtx insn
= g
->nodes
[i
].insn
;
5546 int p
= get_pipe (insn
) + 2;
5552 if (dump_file
&& INSN_P (insn
))
5553 fprintf (dump_file
, "i%d %s %d %d\n",
5555 insn_data
[INSN_CODE(insn
)].name
,
5559 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
5561 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
5566 spu_init_expanders (void)
5568 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5569 * frame_pointer_needed is true. We don't know that until we're
5570 * expanding the prologue. */
5572 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
5575 static enum machine_mode
5576 spu_libgcc_cmp_return_mode (void)
5579 /* For SPU word mode is TI mode so it is better to use SImode
5580 for compare returns. */
5584 static enum machine_mode
5585 spu_libgcc_shift_count_mode (void)
5587 /* For SPU word mode is TI mode so it is better to use SImode
5588 for shift counts. */