* pa.md (abssi2): New pattern.
* pa.c (secondary_reload_class): Loads from reg+d addresses into
FP registers don't need secondary reloads.
* pa.h: Delete soem #if 0 code. Update some comments.
(EXTRA_CONSTRAINT, case 'Q'): Only accept valid memory addresses.
* pa.h (RTX_COSTS): Tege's rewrite.
* pa.c (hppa_legitimize_address): Generate unscaled indexed
addressing for (plus (symbol_ref) (reg)).
(emit_move_sequence): Set REGNO_POINTER_FLAG appropriately
to encourage unscaled indexing modes.
(basereg_operand): New function for unscaled index address support.
* pa.md (unscaled indexing patterns): New patterns for unscaled
index address support.
* pa.h (MOVE_RATIO): Define.
* pa.md (movstrsi expander): Refine tests for when to use the
library routine instead of an inlined loop copy. Provide an
additional scratch register for use in the inlined loop copy.
(movstrsi_internal): Name the pattern for ease of use. Add
additional scratch register.
* pa.c (output_block_move): Greatly simplify. Use 2X unrolled
copy loops to improve performance.
(compute_movstrsi_length): Corresponding changes.
* pa.c (print_operand): Handle 'y' case for reversed FP
comparisons. Delete some #if 0 code. Fix various comment typos.
* pa.md (fcmp patterns): Try and reverse the comparison to avoid
useless add,tr insns.
From-SVN: r10609
if (GET_CODE (x) == CONST)
x = XEXP (x, 0);
+ /* Special case. Get the SYMBOL_REF into a register and use indexing.
+ That should always be safe. */
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 0)) == REG
+ && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+ {
+ rtx reg = force_reg (SImode, XEXP (x, 1));
+ return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
+ }
+
/* Note we must reject symbols which represent function addresses
since the assembler/linker can't handle arithmetic on plabels. */
if (GET_CODE (x) == PLUS
/* Handle secondary reloads for loads/stores of FP registers from
REG+D addresses where D does not fit in 5 bits, including
- (subreg (mem (addr)) cases. */
+ (subreg (mem (addr))) cases. */
if (fp_reg_operand (operand0, mode)
&& ((GET_CODE (operand1) == MEM
&& ! memory_address_p (DFmode, XEXP (operand1, 0)))
operands[1] = force_const_mem (mode, operand1);
emit_move_sequence (operands, mode, temp);
}
- /* Likewise for (const (plus (symbol) (const_int)) when generating
- pic code during or after reload and const_int will not fit
- in 14 bits. */
+ /* Likewise for (const (plus (symbol) (const_int))) when
+ generating pic code during or after reload and const_int
+ will not fit in 14 bits. */
else if (GET_CODE (operand1) == CONST
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
else
temp = gen_reg_rtx (mode);
+ /* Loading a SYMBOL_REF into a register makes that register
+ safe to be used as the base in an indexed address.
+
+ Don't mark hard registers though. That loses. */
+ if (REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+ REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
+ if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
+ REGNO_POINTER_FLAG (REGNO (temp)) = 1;
if (ishighonly)
set = gen_rtx (SET, mode, operand0, temp);
else
/* Emit code to perform a block move.
- Restriction: If the length argument is non-constant, alignment
- must be 4.
-
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is the source pointer as a REG, clobbered.
- if SIZE_IS_CONSTANT
- OPERANDS[2] is a register for temporary storage.
- OPERANDS[4] is the size as a CONST_INT
- else
- OPERANDS[2] is a REG which will contain the size, clobbered.
+ OPERANDS[2] is a register for temporary storage.
+ OPERANDS[4] is the size as a CONST_INT
OPERANDS[3] is a register for temporary storage.
- OPERANDS[5] is the alignment safe to use, as a CONST_INT. */
+ OPERANDS[5] is the alignment safe to use, as a CONST_INT.
+ OPERNADS[6] is another temporary register. */
char *
output_block_move (operands, size_is_constant)
int size_is_constant;
{
int align = INTVAL (operands[5]);
- unsigned long n_bytes;
+ unsigned long n_bytes = INTVAL (operands[4]);
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
- if (size_is_constant)
+ /* Note that we know each loop below will execute at least twice
+ (else we would have open-coded the copy). */
+ switch (align)
{
- unsigned long offset;
- rtx temp;
-
- n_bytes = INTVAL (operands[4]);
- if (n_bytes == 0)
- return "";
-
- if (align >= 4)
- {
- /* Don't unroll too large blocks. */
- if (n_bytes > 32)
- goto copy_with_loop;
-
- /* Read and store using two registers, and hide latency
- by deferring the stores until three instructions after
- the corresponding load. The last load insn will read
- the entire word were the last bytes are, possibly past
- the end of the source block, but since loads are aligned,
- this is harmless. */
-
- output_asm_insn ("ldws,ma 4(0,%1),%2", operands);
-
- for (offset = 4; offset < n_bytes; offset += 4)
- {
+ case 4:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 8);
+ output_asm_insn ("ldi %4,%2", operands);
+
+ /* Copying loop. */
+ output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
+ output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
+ output_asm_insn ("stws,ma %3,4(0,%0)", operands);
+ output_asm_insn ("addib,>= -8,%2,.-12", operands);
+ output_asm_insn ("stws,ma %6,4(0,%0)", operands);
+
+ /* Handle the residual. There could be up to 7 bytes of
+ residual to copy! */
+ if (n_bytes % 8 != 0)
+ {
+ operands[4] = GEN_INT (n_bytes % 4);
+ if (n_bytes % 8 >= 4)
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
- output_asm_insn ("stws,ma %2,4(0,%0)", operands);
+ if (n_bytes % 4 != 0)
+ output_asm_insn ("ldw 0(0,%1),%6", operands);
+ if (n_bytes % 8 >= 4)
+ output_asm_insn ("stws,ma %3,4(0,%0)", operands);
+ if (n_bytes % 4 != 0)
+ output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
+ }
+ return "";
- temp = operands[2];
- operands[2] = operands[3];
- operands[3] = temp;
- }
- if (n_bytes % 4 == 0)
- /* Store the last word. */
- output_asm_insn ("stw %2,0(0,%0)", operands);
- else
- {
- /* Store the last, partial word. */
- operands[4] = GEN_INT (n_bytes % 4);
- output_asm_insn ("stbys,e %2,%4(0,%0)", operands);
- }
- return "";
- }
+ case 2:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 4);
+ output_asm_insn ("ldi %4,%2", operands);
- if (align >= 2 && n_bytes >= 2)
- {
- output_asm_insn ("ldhs,ma 2(0,%1),%2", operands);
+ /* Copying loop. */
+ output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
+ output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
+ output_asm_insn ("sths,ma %3,2(0,%0)", operands);
+ output_asm_insn ("addib,>= -4,%2,.-12", operands);
+ output_asm_insn ("sths,ma %6,2(0,%0)", operands);
- for (offset = 2; offset + 2 <= n_bytes; offset += 2)
- {
+ /* Handle the residual. */
+ if (n_bytes % 4 != 0)
+ {
+ if (n_bytes % 4 >= 2)
output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
- output_asm_insn ("sths,ma %2,2(0,%0)", operands);
+ if (n_bytes % 2 != 0)
+ output_asm_insn ("ldb 0(0,%1),%6", operands);
+ if (n_bytes % 4 >= 2)
+ output_asm_insn ("sths,ma %3,2(0,%0)", operands);
+ if (n_bytes % 2 != 0)
+ output_asm_insn ("stb %6,0(0,%0)", operands);
+ }
+ return "";
- temp = operands[2];
- operands[2] = operands[3];
- operands[3] = temp;
- }
- if (n_bytes % 2 != 0)
- output_asm_insn ("ldb 0(0,%1),%3", operands);
+ case 1:
+ /* Pre-adjust the loop counter. */
+ operands[4] = GEN_INT (n_bytes - 2);
+ output_asm_insn ("ldi %4,%2", operands);
- output_asm_insn ("sths,ma %2,2(0,%0)", operands);
+ /* Copying loop. */
+ output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
+ output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
+ output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
+ output_asm_insn ("addib,>= -2,%2,.-12", operands);
+ output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
- if (n_bytes % 2 != 0)
+ /* Handle the residual. */
+ if (n_bytes % 2 != 0)
+ {
+ output_asm_insn ("ldb 0(0,%1),%3", operands);
output_asm_insn ("stb %3,0(0,%0)", operands);
+ }
+ return "";
- return "";
- }
-
- output_asm_insn ("ldbs,ma 1(0,%1),%2", operands);
-
- for (offset = 1; offset + 1 <= n_bytes; offset += 1)
- {
- output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
- output_asm_insn ("stbs,ma %2,1(0,%0)", operands);
-
- temp = operands[2];
- operands[2] = operands[3];
- operands[3] = temp;
- }
- output_asm_insn ("stb %2,0(0,%0)", operands);
-
- return "";
- }
-
- if (align != 4)
- abort();
-
- copy_with_loop:
-
- if (size_is_constant)
- {
- /* Size is compile-time determined, and also not
- very small (such small cases are handled above). */
- operands[4] = GEN_INT (n_bytes - 4);
- output_asm_insn ("ldo %4(0),%2", operands);
- }
- else
- {
- /* Decrement counter by 4, and if it becomes negative, jump past the
- word copying loop. */
- output_asm_insn ("addib,<,n -4,%2,.+16", operands);
- }
-
- /* Copying loop. Note that the first load is in the annulled delay slot
- of addib. Is it OK on PA to have a load in a delay slot, i.e. is a
- possible page fault stopped in time? */
- output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
- output_asm_insn ("addib,>= -4,%2,.-4", operands);
- output_asm_insn ("stws,ma %3,4(0,%0)", operands);
-
- /* The counter is negative, >= -4. The remaining number of bytes are
- determined by the two least significant bits. */
-
- if (size_is_constant)
- {
- if (n_bytes % 4 != 0)
- {
- /* Read the entire word of the source block tail. */
- output_asm_insn ("ldw 0(0,%1),%3", operands);
- operands[4] = GEN_INT (n_bytes % 4);
- output_asm_insn ("stbys,e %3,%4(0,%0)", operands);
- }
- }
- else
- {
- /* Add 4 to counter. If it becomes zero, we're done. */
- output_asm_insn ("addib,=,n 4,%2,.+16", operands);
-
- /* Read the entire word of the source block tail. (Also this
- load is in an annulled delay slot.) */
- output_asm_insn ("ldw 0(0,%1),%3", operands);
-
- /* Make %0 point at the first byte after the destination block. */
- output_asm_insn ("addl %2,%0,%0", operands);
- /* Store the leftmost bytes, up to, but not including, the address
- in %0. */
- output_asm_insn ("stbys,e %3,0(0,%0)", operands);
+ default:
+ abort ();
}
- return "";
}
/* Count the number of insns necessary to handle this block move.
rtx insn;
{
rtx pat = PATTERN (insn);
- int size_is_constant;
int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
- unsigned long n_bytes;
- int insn_count = 0;
-
- if (GET_CODE (XEXP (XVECEXP (pat, 0, 5), 0)) == CONST_INT)
- {
- size_is_constant = 1;
- n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
- }
- else
- {
- size_is_constant = 0;
- n_bytes = 0;
- }
+ unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
+ unsigned int n_insns = 0;
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
- if (size_is_constant)
- {
- unsigned long offset;
-
- if (n_bytes == 0)
- return 0;
-
- if (align >= 4)
- {
- /* Don't unroll too large blocks. */
- if (n_bytes > 32)
- goto copy_with_loop;
-
- /* first load */
- insn_count = 1;
-
- /* Count the unrolled insns. */
- for (offset = 4; offset < n_bytes; offset += 4)
- insn_count += 2;
-
- /* Count last store or partial store. */
- insn_count += 1;
- return insn_count * 4;
- }
-
- if (align >= 2 && n_bytes >= 2)
- {
- /* initial load. */
- insn_count = 1;
-
- /* Unrolled loop. */
- for (offset = 2; offset + 2 <= n_bytes; offset += 2)
- insn_count += 2;
-
- /* ??? odd load/store */
- if (n_bytes % 2 != 0)
- insn_count += 2;
-
- /* ??? final store from loop. */
- insn_count += 1;
+ /* The basic opying loop. */
+ n_insns = 6;
- return insn_count * 4;
- }
-
- /* First load. */
- insn_count = 1;
-
- /* The unrolled loop. */
- for (offset = 1; offset + 1 <= n_bytes; offset += 1)
- insn_count += 2;
-
- /* Final store. */
- insn_count += 1;
-
- return insn_count * 4;
- }
-
- if (align != 4)
- abort();
-
- copy_with_loop:
-
- /* setup for constant and non-constant case. */
- insn_count = 1;
-
- /* The copying loop. */
- insn_count += 3;
-
- /* The counter is negative, >= -4. The remaining number of bytes are
- determined by the two least significant bits. */
-
- if (size_is_constant)
+ /* Residuals. */
+ if (n_bytes % (2 * align) != 0)
{
- if (n_bytes % 4 != 0)
- insn_count += 2;
+ /* Any residual caused by unrolling the copy loop. */
+ if (n_bytes % (2 * align) > align)
+ n_insns += 1;
+
+ /* Any residual because the number of bytes was not a
+ multiple of the alignment. */
+ if (n_bytes % align != 0)
+ n_insns += 1;
}
- else
- insn_count += 4;
- return insn_count * 4;
+
+ /* Lengths are expressed in bytes now; each insn is 4 bytes. */
+ return n_insns * 4;
}
\f
even be more efficient.
Avoid this if the callee saved register wasn't used (these are
- leaf functions. */
+ leaf functions). */
if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
load_reg (2, - 20, STACK_POINTER_REGNUM);
}
- /* Reset stack pointer (and possibly frame pointer). The stack */
- /* pointer is initially set to fp + 64 to avoid a race condition.
- ??? What race condition?!? */
+ /* Reset stack pointer (and possibly frame pointer). The stack
+ pointer is initially set to fp + 64 to avoid a race condition. */
else if (frame_pointer_needed)
{
/* Emit a blockage insn here to keep these insns from being moved
abort ();
}
return;
+ /* Reversed floating point comparison. Need special conditions to
+ deal with NaNs properly. */
+ case 'y':
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fprintf (file, "?="); break;
+ case NE:
+ fprintf (file, "!?="); break;
+ case GT:
+ fprintf (file, "!<="); break;
+ case GE:
+ fprintf (file, "!<"); break;
+ case LT:
+ fprintf (file, "!>="); break;
+ case LE:
+ fprintf (file, "!>"); break;
+ default:
+ abort ();
+ }
+ return;
case 'S': /* Condition, operands are (S)wapped. */
switch (GET_CODE (x))
{
break;
}
}
-#if 0
- /* The code here is completely wrong. It attempts to extract parts of
- a CONST_DOUBLE which is wrong since REAL_ARITHMETIC is defined, and it
- extracts the wrong indices (0 instead of 2 and 1 instead of 3) using
- the wrong macro (XINT instead of XWINT).
- Just disable it for now, since the code will never be used anyway! */
- else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
- {
- union { double d; int i[2]; } u;
- union { float f; int i; } u1;
- u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1);
- u1.f = u.d;
- if (code == 'f')
- fprintf (file, "0r%.9g", u1.f);
- else
- fprintf (file, "0x%x", u1.i);
- }
- else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
- {
- union { double d; int i[2]; } u;
- u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1);
- fprintf (file, "0r%.20g", u.d);
- }
-#endif
else
output_addr_const (file, x);
}
if (GET_CODE (in) == SUBREG)
in = SUBREG_REG (in);
- if (FP_REG_CLASS_P (class)
- && GET_CODE (in) == MEM
- && !memory_address_p (DFmode, XEXP (in, 0))
- && memory_address_p (SImode, XEXP (in, 0)))
- return GENERAL_REGS;
-
return NO_REGS;
}
return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
}
+/* Return 1 if OP is valid as a base register in a reg + reg address. */
+
+int
+basereg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ /* Once reload has started everything is considered valid. Reload should
+ only create indexed addresses using the stack/frame pointer, and any
+ others were checked for validity when created by the combine pass.
+
+ Also allow any register when TARGET_NO_SPACE_REGS is in effect since
+ we don't have to worry about the braindamaged implicit space register
+ selection using the basereg only (rather than effective address)
+ screwing us over. */
+ if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed)
+ return (GET_CODE (op) == REG || GET_CODE (op) == CONST_INT);
+
+ /* Stack and frame pointers are always OK for indexing. */
+ if (op == stack_pointer_rtx || op == frame_pointer_rtx)
+ return 1;
+
+ /* The only other valid OPs are pseudo registers with
+ REGNO_POINTER_FLAG set. */
+ if (GET_CODE (op) != REG
+ || REGNO (op) < FIRST_PSEUDO_REGISTER
+ || ! register_operand (op, mode))
+ return 0;
+
+ return REGNO_POINTER_FLAG (REGNO (op));
+}
+
/* Return 1 if this operand is anything other than a hard register. */
int
these things in insns and then not re-recognize the insns, causing
constrain_operands to fail.
- Also note `Q' accepts any memory operand during the reload pass.
- This includes out-of-range displacements in reg+d addressing.
- This makes for better code. (??? For 2.5 address this issue).
-
`R' is unused.
`S' is unused.
#define EXTRA_CONSTRAINT(OP, C) \
((C) == 'Q' ? \
(IS_RELOADING_PSEUDO_P (OP) \
- || (GET_CODE (OP) == MEM \
- && reload_in_progress) \
|| (GET_CODE (OP) == MEM \
&& memory_address_p (GET_MODE (OP), XEXP (OP, 0))\
&& ! symbolic_memory_operand (OP, VOIDmode))) \
in one reasonably fast instruction. */
#define MOVE_MAX 8
+/* Higher than the default as we prefer to use simple move insns
+ (better scheduling and delay slot filling) and because our
+ built-in block move is really a 2X unrolled loop. */
+#define MOVE_RATIO 4
+
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
#define WORD_REGISTER_OPERATIONS
switch on CODE. The purpose for the cost of MULT is to encourage
`synth_mult' to find a synthetic multiply when reasonable. */
-#define RTX_COSTS(X,CODE,OUTER_CODE) \
- case MULT: \
- return (TARGET_SNAKE && ! TARGET_DISABLE_FPREGS \
- && ! TARGET_SOFT_FLOAT \
- ? COSTS_N_INSNS (8) : COSTS_N_INSNS (20)); \
- case DIV: \
- case UDIV: \
- case MOD: \
- case UMOD: \
- return COSTS_N_INSNS (60); \
- case PLUS: \
- if (GET_CODE (XEXP (X, 0)) == MULT \
- && shadd_operand (XEXP (XEXP (X, 0), 1), VOIDmode)) \
- return (2 + rtx_cost (XEXP (XEXP (X, 0), 0), OUTER_CODE) \
- + rtx_cost (XEXP (X, 1), OUTER_CODE)); \
- break;
+#define RTX_COSTS(X,CODE,OUTER_CODE) \
+ case MULT: \
+ if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \
+ return COSTS_N_INSNS (3); \
+ return (TARGET_SNAKE && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT) \
+ ? COSTS_N_INSNS (8) : COSTS_N_INSNS (20); \
+ case DIV: \
+ if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \
+ return COSTS_N_INSNS (14); \
+ case UDIV: \
+ case MOD: \
+ case UMOD: \
+ return COSTS_N_INSNS (60); \
+ case PLUS: /* this includes shNadd insns */ \
+ case MINUS: \
+ if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \
+ return COSTS_N_INSNS (3); \
+ return COSTS_N_INSNS (1); \
+ case ASHIFT: \
+ case ASHIFTRT: \
+ case LSHIFTRT: \
+ return COSTS_N_INSNS (1);
/* Adjust the cost of dependencies. */
extern struct rtx_def *gen_cmp_fp ();
extern void hppa_encode_label ();
-#if 0
-#define PREDICATE_CODES \
- {"reg_or_0_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}}, \
- {"reg_or_cint_move_operand", {SUBREG, REG, CONST_INT}}, \
- {"arith_operand", {SUBREG, REG, CONST_INT}}, \
- {"arith32_operand", {SUBREG, REG, CONST_INT}}, \
- {"arith11_operand", {SUBREG, REG, CONST_INT}}, \
- {"arith5_operand", {SUBREG, REG, CONST_INT}}, \
- {"pre_cint_operand", {CONST_INT}}, \
- {"post_cint_operand", {CONST_INT}}, \
- {"int5_operand", {CONST_INT}}, \
- {"uint5_operand", {CONST_INT}}, \
- {"uint32_operand", {CONST_INT}}, \
- {"int11_operand", {CONST_INT}}, \
- {"and_operand", {SUBREG, REG, CONST_INT}}, \
- {"ior_operand", {CONST_INT}}, \
- {"lhs_lshift_operand", {SUBREG, REG, CONST_INT}}, \
- {"lhs_lshift_cint_operand", {CONST_INT}}, \
- {"plus_xor_ior_operator", {PLUS, XOR, IOR}}, \
- {"shadd_operand", {CONST_INT}}, \
- {"eq_neq_comparison_operator", {EQ, NE}}, \
- {"movb_comparison_operator", {EQ, NE, LT, GE}}, \
- {"pc_or_label_operand", {LABEL_REF, PC}}, \
- {"symbolic_operand", {SYMBOL_REF, LABEL_REF, CONST}}, \
- {"reg_or_nonsymb_mem_operand", {SUBREG, REG, MEM}}, \
- {"move_operand", {SUBREG, REG, CONST_INT, MEM}}, \
- {"pic_label_operand", {LABEL_REF, CONST}}, \
- {"function_label_operand", {SYMBOL_REF}}, \
- {"reg_or_0_or_nonsymb_mem_operand", {SUBREG, REG, CONST_INT, \
- CONST_DOUBLE, MEM}}, \
- {"div_operand", {REG, CONST_INT}}, \
- {"call_operand_address", {SYMBOL_REF, LABEL_REF, CONST_INT, \
- CONST_DOUBLE, CONST, HIGH}},
-#endif
-
/* We want __gcc_plt_call to appear in every program built by
gcc, so we make a reference to it out of __main.
We use the asm statement to fool the optimizer into not
[(match_operand:SF 0 "reg_or_0_operand" "fG")
(match_operand:SF 1 "reg_or_0_operand" "fG")]))]
"! TARGET_SOFT_FLOAT"
- "fcmp,sgl,%Y2 %r0,%r1"
+ "*
+{
+ rtx next_insn;
+
+ /* See if this is later used in a reversed FP branch. If so, reverse our
+ condition and the branch. Doing so avoids a useless add,tr.
+
+ Don't do this if fcmp is in a delay slot since it's too much of a
+ headache to track down things on multiple paths. */
+ if (dbr_sequence_length ())
+ next_insn = NULL;
+ else
+ next_insn = NEXT_INSN (insn);
+ while (next_insn)
+ {
+ /* Jumps, calls and labels stop our search. */
+ if (GET_CODE (next_insn) == JUMP_INSN
+ || GET_CODE (next_insn) == CALL_INSN
+ || GET_CODE (next_insn) == CODE_LABEL)
+ break;
+
+ /* As does another fcmp insn. */
+ if (GET_CODE (next_insn) == INSN
+ && GET_CODE (PATTERN (next_insn)) == SET
+ && GET_CODE (SET_DEST (PATTERN (next_insn))) == REG
+ && REGNO (SET_DEST (PATTERN (next_insn))) == 0)
+ break;
+
+ if (GET_CODE (next_insn) == INSN
+ && GET_CODE (PATTERN (next_insn)) == SEQUENCE)
+ next_insn = XVECEXP (PATTERN (next_insn), 0, 0);
+ else
+ next_insn = NEXT_INSN (next_insn);
+ }
+
+ /* Is NEXT_INSN a branch? */
+ if (next_insn
+ && GET_CODE (next_insn) == JUMP_INSN)
+ {
+ rtx pattern = PATTERN (next_insn);
+
+ /* Is it a reversed fp conditional branch (eg uses add,tr) and
+ CCFP dies, then reverse our conditional and the branch to
+ avoid the add,tr. */
+ if (GET_CODE (pattern) == SET
+ && SET_DEST (pattern) == pc_rtx
+ && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
+ && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
+ && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
+ && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
+ && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
+ && find_regno_note (next_insn, REG_DEAD, 0))
+
+ {
+ rtx tmp;
+
+ tmp = XEXP (SET_SRC (pattern), 1);
+ XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
+ XEXP (SET_SRC (pattern), 2) = tmp;
+ INSN_CODE (next_insn) = -1;
+ return \"fcmp,sgl,%y2 %r0,%r1\";
+ }
+ }
+ return \"fcmp,sgl,%Y2 %r0,%r1\";
+}"
[(set_attr "length" "4")
(set_attr "type" "fpcc")])
[(match_operand:DF 0 "reg_or_0_operand" "fG")
(match_operand:DF 1 "reg_or_0_operand" "fG")]))]
"! TARGET_SOFT_FLOAT"
- "fcmp,dbl,%Y2 %r0,%r1"
+ "*
+{
+ rtx next_insn;
+
+ /* See if this is later used in a reversed FP branch. If so, reverse our
+ condition and the branch. Doing so avoids a useless add,tr.
+
+ Don't do this if fcmp is in a delay slot since it's too much of a
+ headache to track down things on multiple paths. */
+ if (dbr_sequence_length ())
+ next_insn = NULL;
+ else
+ next_insn = NEXT_INSN (insn);
+ while (next_insn)
+ {
+ /* Jumps, calls and labels stop our search. */
+ if (GET_CODE (next_insn) == JUMP_INSN
+ || GET_CODE (next_insn) == CALL_INSN
+ || GET_CODE (next_insn) == CODE_LABEL)
+ break;
+
+ /* As does another fcmp insn. */
+ if (GET_CODE (next_insn) == INSN
+ && GET_CODE (PATTERN (next_insn)) == SET
+ && GET_CODE (SET_DEST (PATTERN (next_insn))) == REG
+ && REGNO (SET_DEST (PATTERN (next_insn))) == 0)
+ break;
+
+ if (GET_CODE (next_insn) == INSN
+ && GET_CODE (PATTERN (next_insn)) == SEQUENCE)
+ next_insn = XVECEXP (PATTERN (next_insn), 0, 0);
+ else
+ next_insn = NEXT_INSN (next_insn);
+ }
+
+ /* Is NEXT_INSN a branch? */
+ if (next_insn
+ && GET_CODE (next_insn) == JUMP_INSN)
+ {
+ rtx pattern = PATTERN (next_insn);
+
+ /* Is it a reversed fp conditional branch (eg uses add,tr) and
+ CCFP dies, then reverse our conditional and the branch to
+ avoid the add,tr. */
+ if (GET_CODE (pattern) == SET
+ && SET_DEST (pattern) == pc_rtx
+ && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
+ && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
+ && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
+ && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
+ && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
+ && find_regno_note (next_insn, REG_DEAD, 0))
+
+ {
+ rtx tmp;
+
+ tmp = XEXP (SET_SRC (pattern), 1);
+ XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
+ XEXP (SET_SRC (pattern), 2) = tmp;
+ INSN_CODE (next_insn) = -1;
+ return \"fcmp,dbl,%y2 %r0,%r1\";
+ }
+ }
+ return \"fcmp,dbl,%Y2 %r0,%r1\";
+}"
[(set_attr "length" "4")
(set_attr "type" "fpcc")])
comiclr,<< %2,%0,0\;ldi %2,%0"
[(set_attr "type" "multi,multi")
(set_attr "length" "8,8")])
+
+(define_insn "abssi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (abs:SI (match_operand:SI 1 "register_operand" "0")))]
+ ""
+ "comiclr,< 0,%0,0\;subi 0,%0,%0"
+ [(set_attr "type" "multi")
+ (set_attr "length" "8")])
+
;;; Experimental conditional move patterns
(define_expand "movsicc"
[(set_attr "type" "load")
(set_attr "length" "8")])
+(define_insn ""
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r"))))]
+ "! TARGET_DISABLE_INDEXING"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"ldwx %2(0,%1),%0\";
+ else
+ return \"ldwx %1(0,%2),%0\";
+}"
+ [(set_attr "type" "load")
+ (set_attr "length" "4")])
+
;; Load or store with base-register modification.
(define_insn "pre_ldwm"
[(set_attr "type" "load")
(set_attr "length" "8")])
+(define_insn ""
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r"))))]
+ "! TARGET_DISABLE_INDEXING"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"ldhx %2(0,%1),%0\";
+ else
+ return \"ldhx %1(0,%2),%0\";
+}"
+ [(set_attr "type" "load")
+ (set_attr "length" "4")])
+
(define_insn ""
[(set (match_operand:HI 3 "register_operand" "=r")
(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "0")
[(set_attr "type" "move,move,move,shift,load,store,move,fpalu")
(set_attr "length" "4,4,4,4,4,4,4,4")])
+(define_insn ""
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r"))))]
+ "! TARGET_DISABLE_INDEXING"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"ldbx %2(0,%1),%0\";
+ else
+ return \"ldbx %1(0,%2),%0\";
+}"
+ [(set_attr "type" "load")
+ (set_attr "length" "4")])
+
(define_insn ""
[(set (match_operand:QI 3 "register_operand" "=r")
(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "0")
""
"
{
- /* If the blocks are not at least word-aligned and rather big (>16 items),
- or the size is indeterminate, don't inline the copy code. A
- procedure call is better since it can check the alignment at
- runtime and make the optimal decisions. */
- if (INTVAL (operands[3]) < 4
- && (GET_CODE (operands[2]) != CONST_INT
- || (INTVAL (operands[2]) / INTVAL (operands[3]) > 8)))
- FAIL;
+ int size, align;
+ /* HP provides very fast block move library routine for the PA;
+ this routine includes:
+
+ 4x4 byte at a time block moves,
+ 1x4 byte at a time with alignment checked at runtime with
+ attempts to align the source and destination as needed
+ 1x1 byte loop
+
+ With that in mind, here's the heuristics to try and guess when
+ the inlined block move will be better than the library block
+ move:
+
+ If the size isn't constant, then always use the library routines.
+
+ If the size is large in respect to the known alignment, then use
+ the library routines.
+
+ If the size is small in repsect to the known alignment, then open
+ code the copy (since that will lead to better scheduling).
+
+ Else use the block move pattern. */
+
+ /* Undetermined size, use the library routine. */
+ if (GET_CODE (operands[2]) != CONST_INT)
+ FAIL;
+
+ size = INTVAL (operands[2]);
+ align = INTVAL (operands[3]);
+ align = align > 4 ? 4 : align;
+ /* If size/alignment > 8 (eg size is large in respect to alignment),
+ then use the library routines. */
+ if (size/align > 16)
+ FAIL;
+
+ /* This does happen, but not often enough to worry much about. */
+ if (size/align < MOVE_RATIO)
+ FAIL;
+
+ /* Fall through means we're going to use our block move pattern. */
operands[0] = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
+ emit_insn (gen_movstrsi_internal (operands[0], operands[1], operands[4],
+ operands[5], operands[2], operands[3],
+ gen_reg_rtx (SImode)));
+ DONE;
}")
;; The operand constraints are written like this to support both compile-time
;; the register with the byte count is clobbered by the copying code, and
;; therefore it is forced to operand 2. If the count is compile-time
;; determined, we need two scratch registers for the unrolled code.
-(define_insn ""
+(define_insn "movstrsi_internal"
[(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r"))
(mem:BLK (match_operand:SI 1 "register_operand" "+r,r")))
(clobber (match_dup 0))
(clobber (match_dup 1))
(clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp
(clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp
+ (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2
(use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count
(use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
""
&& operands[1] != CONST0_RTX (DFmode)
&& ! TARGET_SOFT_FLOAT"
"* return (which_alternative == 0 ? output_move_double (operands)
- : \" fldds%F1 %1,%0\");"
+ : \"fldds%F1 %1,%0\");"
[(set_attr "type" "move,fpload")
(set_attr "length" "16,4")])
[(set_attr "type" "fpload")
(set_attr "length" "8")])
+(define_insn ""
+ [(set (match_operand:DF 0 "register_operand" "=fx")
+ (mem:DF (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r"))))]
+ "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"flddx %2(0,%1),%0\";
+ else
+ return \"flddx %1(0,%2),%0\";
+}"
+ [(set_attr "type" "fpload")
+ (set_attr "length" "4")])
+
(define_insn ""
[(set (mem:DF (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
(const_int 8))
[(set_attr "type" "fpstore")
(set_attr "length" "8")])
+(define_insn ""
+ [(set (mem:DF (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r")))
+ (match_operand:DF 0 "register_operand" "fx"))]
+ "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"fstdx %0,%2(0,%1)\";
+ else
+ return \"fstdx %0,%1(0,%2)\";
+}"
+ [(set_attr "type" "fpstore")
+ (set_attr "length" "4")])
+
(define_expand "movdi"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "")
(match_operand:DI 1 "general_operand" ""))]
[(set_attr "type" "fpload")
(set_attr "length" "8")])
+(define_insn ""
+ [(set (match_operand:SF 0 "register_operand" "=fx")
+ (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r"))))]
+ "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"fldwx %2(0,%1),%0\";
+ else
+ return \"fldwx %1(0,%2),%0\";
+}"
+ [(set_attr "type" "fpload")
+ (set_attr "length" "4")])
+
(define_insn ""
[(set (mem:SF (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
(const_int 4))
}"
[(set_attr "type" "fpstore")
(set_attr "length" "8")])
+
+(define_insn ""
+ [(set (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "basereg_operand" "r")))
+ (match_operand:SF 0 "register_operand" "fx"))]
+ "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT"
+ "*
+{
+ /* Reload can create backwards (relative to cse) unscaled index
+ address modes when eliminating registers and possibly for
+ pseudos that don't get hard registers. Deal with it. */
+ if (operands[1] == hard_frame_pointer_rtx
+ || operands[1] == stack_pointer_rtx)
+ return \"fstwx %0,%2(0,%1)\";
+ else
+ return \"fstwx %0,%1(0,%2)\";
+}"
+ [(set_attr "type" "fpstore")
+ (set_attr "length" "4")])
\f
+
;;- zero extension instructions
(define_insn "zero_extendhisi2"