From: Jeff Law Date: Mon, 27 Nov 1995 07:33:58 +0000 (-0700) Subject: pa.md (abssi2): New pattern. X-Git-Tag: misc/cutover-egcs-0~3390 X-Git-Url: https://gcc.gnu.org/git/?a=commitdiff_plain;h=68944452e4abe12bbef4fa078bf614caa39c85cf;p=gcc.git pa.md (abssi2): New pattern. * pa.md (abssi2): New pattern. * pa.c (secondary_reload_class): Loads from reg+d addresses into FP registers don't need secondary reloads. * pa.h: Delete soem #if 0 code. Update some comments. (EXTRA_CONSTRAINT, case 'Q'): Only accept valid memory addresses. * pa.h (RTX_COSTS): Tege's rewrite. * pa.c (hppa_legitimize_address): Generate unscaled indexed addressing for (plus (symbol_ref) (reg)). (emit_move_sequence): Set REGNO_POINTER_FLAG appropriately to encourage unscaled indexing modes. (basereg_operand): New function for unscaled index address support. * pa.md (unscaled indexing patterns): New patterns for unscaled index address support. * pa.h (MOVE_RATIO): Define. * pa.md (movstrsi expander): Refine tests for when to use the library routine instead of an inlined loop copy. Provide an additional scratch register for use in the inlined loop copy. (movstrsi_internal): Name the pattern for ease of use. Add additional scratch register. * pa.c (output_block_move): Greatly simplify. Use 2X unrolled copy loops to improve performance. (compute_movstrsi_length): Corresponding changes. * pa.c (print_operand): Handle 'y' case for reversed FP comparisons. Delete some #if 0 code. Fix various comment typos. * pa.md (fcmp patterns): Try and reverse the comparison to avoid useless add,tr insns. From-SVN: r10609 --- diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 3bd15e1e4f6a..3cc18eca7a57 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -637,6 +637,16 @@ hppa_legitimize_address (x, oldx, mode) if (GET_CODE (x) == CONST) x = XEXP (x, 0); + /* Special case. Get the SYMBOL_REF into a register and use indexing. + That should always be safe. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) + { + rtx reg = force_reg (SImode, XEXP (x, 1)); + return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0))); + } + /* Note we must reject symbols which represent function addresses since the assembler/linker can't handle arithmetic on plabels. */ if (GET_CODE (x) == PLUS @@ -793,7 +803,7 @@ emit_move_sequence (operands, mode, scratch_reg) /* Handle secondary reloads for loads/stores of FP registers from REG+D addresses where D does not fit in 5 bits, including - (subreg (mem (addr)) cases. */ + (subreg (mem (addr))) cases. */ if (fp_reg_operand (operand0, mode) && ((GET_CODE (operand1) == MEM && ! memory_address_p (DFmode, XEXP (operand1, 0))) @@ -975,9 +985,9 @@ emit_move_sequence (operands, mode, scratch_reg) operands[1] = force_const_mem (mode, operand1); emit_move_sequence (operands, mode, temp); } - /* Likewise for (const (plus (symbol) (const_int)) when generating - pic code during or after reload and const_int will not fit - in 14 bits. */ + /* Likewise for (const (plus (symbol) (const_int))) when + generating pic code during or after reload and const_int + will not fit in 14 bits. */ else if (GET_CODE (operand1) == CONST && GET_CODE (XEXP (operand1, 0)) == PLUS && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT @@ -1008,6 +1018,14 @@ emit_move_sequence (operands, mode, scratch_reg) else temp = gen_reg_rtx (mode); + /* Loading a SYMBOL_REF into a register makes that register + safe to be used as the base in an indexed address. + + Don't mark hard registers though. That loses. */ + if (REGNO (operand0) >= FIRST_PSEUDO_REGISTER) + REGNO_POINTER_FLAG (REGNO (operand0)) = 1; + if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) + REGNO_POINTER_FLAG (REGNO (temp)) = 1; if (ishighonly) set = gen_rtx (SET, mode, operand0, temp); else @@ -1457,18 +1475,13 @@ find_addr_reg (addr) /* Emit code to perform a block move. - Restriction: If the length argument is non-constant, alignment - must be 4. - OPERANDS[0] is the destination pointer as a REG, clobbered. OPERANDS[1] is the source pointer as a REG, clobbered. - if SIZE_IS_CONSTANT - OPERANDS[2] is a register for temporary storage. - OPERANDS[4] is the size as a CONST_INT - else - OPERANDS[2] is a REG which will contain the size, clobbered. + OPERANDS[2] is a register for temporary storage. + OPERANDS[4] is the size as a CONST_INT OPERANDS[3] is a register for temporary storage. - OPERANDS[5] is the alignment safe to use, as a CONST_INT. */ + OPERANDS[5] is the alignment safe to use, as a CONST_INT. + OPERNADS[6] is another temporary register. */ char * output_block_move (operands, size_is_constant) @@ -1476,153 +1489,94 @@ output_block_move (operands, size_is_constant) int size_is_constant; { int align = INTVAL (operands[5]); - unsigned long n_bytes; + unsigned long n_bytes = INTVAL (operands[4]); /* We can't move more than four bytes at a time because the PA has no longer integer move insns. (Could use fp mem ops?) */ if (align > 4) align = 4; - if (size_is_constant) + /* Note that we know each loop below will execute at least twice + (else we would have open-coded the copy). */ + switch (align) { - unsigned long offset; - rtx temp; - - n_bytes = INTVAL (operands[4]); - if (n_bytes == 0) - return ""; - - if (align >= 4) - { - /* Don't unroll too large blocks. */ - if (n_bytes > 32) - goto copy_with_loop; - - /* Read and store using two registers, and hide latency - by deferring the stores until three instructions after - the corresponding load. The last load insn will read - the entire word were the last bytes are, possibly past - the end of the source block, but since loads are aligned, - this is harmless. */ - - output_asm_insn ("ldws,ma 4(0,%1),%2", operands); - - for (offset = 4; offset < n_bytes; offset += 4) - { + case 4: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 8); + output_asm_insn ("ldi %4,%2", operands); + + /* Copying loop. */ + output_asm_insn ("ldws,ma 4(0,%1),%3", operands); + output_asm_insn ("ldws,ma 4(0,%1),%6", operands); + output_asm_insn ("stws,ma %3,4(0,%0)", operands); + output_asm_insn ("addib,>= -8,%2,.-12", operands); + output_asm_insn ("stws,ma %6,4(0,%0)", operands); + + /* Handle the residual. There could be up to 7 bytes of + residual to copy! */ + if (n_bytes % 8 != 0) + { + operands[4] = GEN_INT (n_bytes % 4); + if (n_bytes % 8 >= 4) output_asm_insn ("ldws,ma 4(0,%1),%3", operands); - output_asm_insn ("stws,ma %2,4(0,%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("ldw 0(0,%1),%6", operands); + if (n_bytes % 8 >= 4) + output_asm_insn ("stws,ma %3,4(0,%0)", operands); + if (n_bytes % 4 != 0) + output_asm_insn ("stbys,e %6,%4(0,%0)", operands); + } + return ""; - temp = operands[2]; - operands[2] = operands[3]; - operands[3] = temp; - } - if (n_bytes % 4 == 0) - /* Store the last word. */ - output_asm_insn ("stw %2,0(0,%0)", operands); - else - { - /* Store the last, partial word. */ - operands[4] = GEN_INT (n_bytes % 4); - output_asm_insn ("stbys,e %2,%4(0,%0)", operands); - } - return ""; - } + case 2: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 4); + output_asm_insn ("ldi %4,%2", operands); - if (align >= 2 && n_bytes >= 2) - { - output_asm_insn ("ldhs,ma 2(0,%1),%2", operands); + /* Copying loop. */ + output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); + output_asm_insn ("ldhs,ma 2(0,%1),%6", operands); + output_asm_insn ("sths,ma %3,2(0,%0)", operands); + output_asm_insn ("addib,>= -4,%2,.-12", operands); + output_asm_insn ("sths,ma %6,2(0,%0)", operands); - for (offset = 2; offset + 2 <= n_bytes; offset += 2) - { + /* Handle the residual. */ + if (n_bytes % 4 != 0) + { + if (n_bytes % 4 >= 2) output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); - output_asm_insn ("sths,ma %2,2(0,%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("ldb 0(0,%1),%6", operands); + if (n_bytes % 4 >= 2) + output_asm_insn ("sths,ma %3,2(0,%0)", operands); + if (n_bytes % 2 != 0) + output_asm_insn ("stb %6,0(0,%0)", operands); + } + return ""; - temp = operands[2]; - operands[2] = operands[3]; - operands[3] = temp; - } - if (n_bytes % 2 != 0) - output_asm_insn ("ldb 0(0,%1),%3", operands); + case 1: + /* Pre-adjust the loop counter. */ + operands[4] = GEN_INT (n_bytes - 2); + output_asm_insn ("ldi %4,%2", operands); - output_asm_insn ("sths,ma %2,2(0,%0)", operands); + /* Copying loop. */ + output_asm_insn ("ldbs,ma 1(0,%1),%3", operands); + output_asm_insn ("ldbs,ma 1(0,%1),%6", operands); + output_asm_insn ("stbs,ma %3,1(0,%0)", operands); + output_asm_insn ("addib,>= -2,%2,.-12", operands); + output_asm_insn ("stbs,ma %6,1(0,%0)", operands); - if (n_bytes % 2 != 0) + /* Handle the residual. */ + if (n_bytes % 2 != 0) + { + output_asm_insn ("ldb 0(0,%1),%3", operands); output_asm_insn ("stb %3,0(0,%0)", operands); + } + return ""; - return ""; - } - - output_asm_insn ("ldbs,ma 1(0,%1),%2", operands); - - for (offset = 1; offset + 1 <= n_bytes; offset += 1) - { - output_asm_insn ("ldbs,ma 1(0,%1),%3", operands); - output_asm_insn ("stbs,ma %2,1(0,%0)", operands); - - temp = operands[2]; - operands[2] = operands[3]; - operands[3] = temp; - } - output_asm_insn ("stb %2,0(0,%0)", operands); - - return ""; - } - - if (align != 4) - abort(); - - copy_with_loop: - - if (size_is_constant) - { - /* Size is compile-time determined, and also not - very small (such small cases are handled above). */ - operands[4] = GEN_INT (n_bytes - 4); - output_asm_insn ("ldo %4(0),%2", operands); - } - else - { - /* Decrement counter by 4, and if it becomes negative, jump past the - word copying loop. */ - output_asm_insn ("addib,<,n -4,%2,.+16", operands); - } - - /* Copying loop. Note that the first load is in the annulled delay slot - of addib. Is it OK on PA to have a load in a delay slot, i.e. is a - possible page fault stopped in time? */ - output_asm_insn ("ldws,ma 4(0,%1),%3", operands); - output_asm_insn ("addib,>= -4,%2,.-4", operands); - output_asm_insn ("stws,ma %3,4(0,%0)", operands); - - /* The counter is negative, >= -4. The remaining number of bytes are - determined by the two least significant bits. */ - - if (size_is_constant) - { - if (n_bytes % 4 != 0) - { - /* Read the entire word of the source block tail. */ - output_asm_insn ("ldw 0(0,%1),%3", operands); - operands[4] = GEN_INT (n_bytes % 4); - output_asm_insn ("stbys,e %3,%4(0,%0)", operands); - } - } - else - { - /* Add 4 to counter. If it becomes zero, we're done. */ - output_asm_insn ("addib,=,n 4,%2,.+16", operands); - - /* Read the entire word of the source block tail. (Also this - load is in an annulled delay slot.) */ - output_asm_insn ("ldw 0(0,%1),%3", operands); - - /* Make %0 point at the first byte after the destination block. */ - output_asm_insn ("addl %2,%0,%0", operands); - /* Store the leftmost bytes, up to, but not including, the address - in %0. */ - output_asm_insn ("stbys,e %3,0(0,%0)", operands); + default: + abort (); } - return ""; } /* Count the number of insns necessary to handle this block move. @@ -1635,106 +1589,33 @@ compute_movstrsi_length (insn) rtx insn; { rtx pat = PATTERN (insn); - int size_is_constant; int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); - unsigned long n_bytes; - int insn_count = 0; - - if (GET_CODE (XEXP (XVECEXP (pat, 0, 5), 0)) == CONST_INT) - { - size_is_constant = 1; - n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0)); - } - else - { - size_is_constant = 0; - n_bytes = 0; - } + unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0)); + unsigned int n_insns = 0; /* We can't move more than four bytes at a time because the PA has no longer integer move insns. (Could use fp mem ops?) */ if (align > 4) align = 4; - if (size_is_constant) - { - unsigned long offset; - - if (n_bytes == 0) - return 0; - - if (align >= 4) - { - /* Don't unroll too large blocks. */ - if (n_bytes > 32) - goto copy_with_loop; - - /* first load */ - insn_count = 1; - - /* Count the unrolled insns. */ - for (offset = 4; offset < n_bytes; offset += 4) - insn_count += 2; - - /* Count last store or partial store. */ - insn_count += 1; - return insn_count * 4; - } - - if (align >= 2 && n_bytes >= 2) - { - /* initial load. */ - insn_count = 1; - - /* Unrolled loop. */ - for (offset = 2; offset + 2 <= n_bytes; offset += 2) - insn_count += 2; - - /* ??? odd load/store */ - if (n_bytes % 2 != 0) - insn_count += 2; - - /* ??? final store from loop. */ - insn_count += 1; + /* The basic opying loop. */ + n_insns = 6; - return insn_count * 4; - } - - /* First load. */ - insn_count = 1; - - /* The unrolled loop. */ - for (offset = 1; offset + 1 <= n_bytes; offset += 1) - insn_count += 2; - - /* Final store. */ - insn_count += 1; - - return insn_count * 4; - } - - if (align != 4) - abort(); - - copy_with_loop: - - /* setup for constant and non-constant case. */ - insn_count = 1; - - /* The copying loop. */ - insn_count += 3; - - /* The counter is negative, >= -4. The remaining number of bytes are - determined by the two least significant bits. */ - - if (size_is_constant) + /* Residuals. */ + if (n_bytes % (2 * align) != 0) { - if (n_bytes % 4 != 0) - insn_count += 2; + /* Any residual caused by unrolling the copy loop. */ + if (n_bytes % (2 * align) > align) + n_insns += 1; + + /* Any residual because the number of bytes was not a + multiple of the alignment. */ + if (n_bytes % align != 0) + n_insns += 1; } - else - insn_count += 4; - return insn_count * 4; + + /* Lengths are expressed in bytes now; each insn is 4 bytes. */ + return n_insns * 4; } @@ -2363,7 +2244,7 @@ hppa_expand_prologue() even be more efficient. Avoid this if the callee saved register wasn't used (these are - leaf functions. */ + leaf functions). */ if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED]) emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED), gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM)); @@ -2511,9 +2392,8 @@ hppa_expand_epilogue () load_reg (2, - 20, STACK_POINTER_REGNUM); } - /* Reset stack pointer (and possibly frame pointer). The stack */ - /* pointer is initially set to fp + 64 to avoid a race condition. - ??? What race condition?!? */ + /* Reset stack pointer (and possibly frame pointer). The stack + pointer is initially set to fp + 64 to avoid a race condition. */ else if (frame_pointer_needed) { /* Emit a blockage insn here to keep these insns from being moved @@ -3004,6 +2884,27 @@ print_operand (file, x, code) abort (); } return; + /* Reversed floating point comparison. Need special conditions to + deal with NaNs properly. */ + case 'y': + switch (GET_CODE (x)) + { + case EQ: + fprintf (file, "?="); break; + case NE: + fprintf (file, "!?="); break; + case GT: + fprintf (file, "!<="); break; + case GE: + fprintf (file, "!<"); break; + case LT: + fprintf (file, "!>="); break; + case LE: + fprintf (file, "!>"); break; + default: + abort (); + } + return; case 'S': /* Condition, operands are (S)wapped. */ switch (GET_CODE (x)) { @@ -3161,30 +3062,6 @@ print_operand (file, x, code) break; } } -#if 0 - /* The code here is completely wrong. It attempts to extract parts of - a CONST_DOUBLE which is wrong since REAL_ARITHMETIC is defined, and it - extracts the wrong indices (0 instead of 2 and 1 instead of 3) using - the wrong macro (XINT instead of XWINT). - Just disable it for now, since the code will never be used anyway! */ - else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) - { - union { double d; int i[2]; } u; - union { float f; int i; } u1; - u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1); - u1.f = u.d; - if (code == 'f') - fprintf (file, "0r%.9g", u1.f); - else - fprintf (file, "0x%x", u1.i); - } - else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) - { - union { double d; int i[2]; } u; - u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1); - fprintf (file, "0r%.20g", u.d); - } -#endif else output_addr_const (file, x); } @@ -3527,12 +3404,6 @@ secondary_reload_class (class, mode, in) if (GET_CODE (in) == SUBREG) in = SUBREG_REG (in); - if (FP_REG_CLASS_P (class) - && GET_CODE (in) == MEM - && !memory_address_p (DFmode, XEXP (in, 0)) - && memory_address_p (SImode, XEXP (in, 0))) - return GENERAL_REGS; - return NO_REGS; } @@ -4431,6 +4302,38 @@ shadd_operand (op, mode) return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op))); } +/* Return 1 if OP is valid as a base register in a reg + reg address. */ + +int +basereg_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + /* Once reload has started everything is considered valid. Reload should + only create indexed addresses using the stack/frame pointer, and any + others were checked for validity when created by the combine pass. + + Also allow any register when TARGET_NO_SPACE_REGS is in effect since + we don't have to worry about the braindamaged implicit space register + selection using the basereg only (rather than effective address) + screwing us over. */ + if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed) + return (GET_CODE (op) == REG || GET_CODE (op) == CONST_INT); + + /* Stack and frame pointers are always OK for indexing. */ + if (op == stack_pointer_rtx || op == frame_pointer_rtx) + return 1; + + /* The only other valid OPs are pseudo registers with + REGNO_POINTER_FLAG set. */ + if (GET_CODE (op) != REG + || REGNO (op) < FIRST_PSEUDO_REGISTER + || ! register_operand (op, mode)) + return 0; + + return REGNO_POINTER_FLAG (REGNO (op)); +} + /* Return 1 if this operand is anything other than a hard register. */ int diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 5422df2e7b22..52c9fc965a2c 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -1309,10 +1309,6 @@ extern struct rtx_def *hppa_builtin_saveregs (); these things in insns and then not re-recognize the insns, causing constrain_operands to fail. - Also note `Q' accepts any memory operand during the reload pass. - This includes out-of-range displacements in reg+d addressing. - This makes for better code. (??? For 2.5 address this issue). - `R' is unused. `S' is unused. @@ -1321,8 +1317,6 @@ extern struct rtx_def *hppa_builtin_saveregs (); #define EXTRA_CONSTRAINT(OP, C) \ ((C) == 'Q' ? \ (IS_RELOADING_PSEUDO_P (OP) \ - || (GET_CODE (OP) == MEM \ - && reload_in_progress) \ || (GET_CODE (OP) == MEM \ && memory_address_p (GET_MODE (OP), XEXP (OP, 0))\ && ! symbolic_memory_operand (OP, VOIDmode))) \ @@ -1571,6 +1565,11 @@ while (0) in one reasonably fast instruction. */ #define MOVE_MAX 8 +/* Higher than the default as we prefer to use simple move insns + (better scheduling and delay slot filling) and because our + built-in block move is really a 2X unrolled loop. */ +#define MOVE_RATIO 4 + /* Define if operations between registers always perform the operation on the full register even if a narrower mode is specified. */ #define WORD_REGISTER_OPERATIONS @@ -1685,22 +1684,28 @@ while (0) switch on CODE. The purpose for the cost of MULT is to encourage `synth_mult' to find a synthetic multiply when reasonable. */ -#define RTX_COSTS(X,CODE,OUTER_CODE) \ - case MULT: \ - return (TARGET_SNAKE && ! TARGET_DISABLE_FPREGS \ - && ! TARGET_SOFT_FLOAT \ - ? COSTS_N_INSNS (8) : COSTS_N_INSNS (20)); \ - case DIV: \ - case UDIV: \ - case MOD: \ - case UMOD: \ - return COSTS_N_INSNS (60); \ - case PLUS: \ - if (GET_CODE (XEXP (X, 0)) == MULT \ - && shadd_operand (XEXP (XEXP (X, 0), 1), VOIDmode)) \ - return (2 + rtx_cost (XEXP (XEXP (X, 0), 0), OUTER_CODE) \ - + rtx_cost (XEXP (X, 1), OUTER_CODE)); \ - break; +#define RTX_COSTS(X,CODE,OUTER_CODE) \ + case MULT: \ + if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \ + return COSTS_N_INSNS (3); \ + return (TARGET_SNAKE && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT) \ + ? COSTS_N_INSNS (8) : COSTS_N_INSNS (20); \ + case DIV: \ + if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \ + return COSTS_N_INSNS (14); \ + case UDIV: \ + case MOD: \ + case UMOD: \ + return COSTS_N_INSNS (60); \ + case PLUS: /* this includes shNadd insns */ \ + case MINUS: \ + if (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT) \ + return COSTS_N_INSNS (3); \ + return COSTS_N_INSNS (1); \ + case ASHIFT: \ + case ASHIFTRT: \ + case LSHIFTRT: \ + return COSTS_N_INSNS (1); /* Adjust the cost of dependencies. */ @@ -2154,41 +2159,6 @@ extern struct rtx_def *legitimize_pic_address (); extern struct rtx_def *gen_cmp_fp (); extern void hppa_encode_label (); -#if 0 -#define PREDICATE_CODES \ - {"reg_or_0_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}}, \ - {"reg_or_cint_move_operand", {SUBREG, REG, CONST_INT}}, \ - {"arith_operand", {SUBREG, REG, CONST_INT}}, \ - {"arith32_operand", {SUBREG, REG, CONST_INT}}, \ - {"arith11_operand", {SUBREG, REG, CONST_INT}}, \ - {"arith5_operand", {SUBREG, REG, CONST_INT}}, \ - {"pre_cint_operand", {CONST_INT}}, \ - {"post_cint_operand", {CONST_INT}}, \ - {"int5_operand", {CONST_INT}}, \ - {"uint5_operand", {CONST_INT}}, \ - {"uint32_operand", {CONST_INT}}, \ - {"int11_operand", {CONST_INT}}, \ - {"and_operand", {SUBREG, REG, CONST_INT}}, \ - {"ior_operand", {CONST_INT}}, \ - {"lhs_lshift_operand", {SUBREG, REG, CONST_INT}}, \ - {"lhs_lshift_cint_operand", {CONST_INT}}, \ - {"plus_xor_ior_operator", {PLUS, XOR, IOR}}, \ - {"shadd_operand", {CONST_INT}}, \ - {"eq_neq_comparison_operator", {EQ, NE}}, \ - {"movb_comparison_operator", {EQ, NE, LT, GE}}, \ - {"pc_or_label_operand", {LABEL_REF, PC}}, \ - {"symbolic_operand", {SYMBOL_REF, LABEL_REF, CONST}}, \ - {"reg_or_nonsymb_mem_operand", {SUBREG, REG, MEM}}, \ - {"move_operand", {SUBREG, REG, CONST_INT, MEM}}, \ - {"pic_label_operand", {LABEL_REF, CONST}}, \ - {"function_label_operand", {SYMBOL_REF}}, \ - {"reg_or_0_or_nonsymb_mem_operand", {SUBREG, REG, CONST_INT, \ - CONST_DOUBLE, MEM}}, \ - {"div_operand", {REG, CONST_INT}}, \ - {"call_operand_address", {SYMBOL_REF, LABEL_REF, CONST_INT, \ - CONST_DOUBLE, CONST, HIGH}}, -#endif - /* We want __gcc_plt_call to appear in every program built by gcc, so we make a reference to it out of __main. We use the asm statement to fool the optimizer into not diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index c2c2a26b5e10..47fb46da2d02 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -386,7 +386,71 @@ [(match_operand:SF 0 "reg_or_0_operand" "fG") (match_operand:SF 1 "reg_or_0_operand" "fG")]))] "! TARGET_SOFT_FLOAT" - "fcmp,sgl,%Y2 %r0,%r1" + "* +{ + rtx next_insn; + + /* See if this is later used in a reversed FP branch. If so, reverse our + condition and the branch. Doing so avoids a useless add,tr. + + Don't do this if fcmp is in a delay slot since it's too much of a + headache to track down things on multiple paths. */ + if (dbr_sequence_length ()) + next_insn = NULL; + else + next_insn = NEXT_INSN (insn); + while (next_insn) + { + /* Jumps, calls and labels stop our search. */ + if (GET_CODE (next_insn) == JUMP_INSN + || GET_CODE (next_insn) == CALL_INSN + || GET_CODE (next_insn) == CODE_LABEL) + break; + + /* As does another fcmp insn. */ + if (GET_CODE (next_insn) == INSN + && GET_CODE (PATTERN (next_insn)) == SET + && GET_CODE (SET_DEST (PATTERN (next_insn))) == REG + && REGNO (SET_DEST (PATTERN (next_insn))) == 0) + break; + + if (GET_CODE (next_insn) == INSN + && GET_CODE (PATTERN (next_insn)) == SEQUENCE) + next_insn = XVECEXP (PATTERN (next_insn), 0, 0); + else + next_insn = NEXT_INSN (next_insn); + } + + /* Is NEXT_INSN a branch? */ + if (next_insn + && GET_CODE (next_insn) == JUMP_INSN) + { + rtx pattern = PATTERN (next_insn); + + /* Is it a reversed fp conditional branch (eg uses add,tr) and + CCFP dies, then reverse our conditional and the branch to + avoid the add,tr. */ + if (GET_CODE (pattern) == SET + && SET_DEST (pattern) == pc_rtx + && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 + && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC + && find_regno_note (next_insn, REG_DEAD, 0)) + + { + rtx tmp; + + tmp = XEXP (SET_SRC (pattern), 1); + XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); + XEXP (SET_SRC (pattern), 2) = tmp; + INSN_CODE (next_insn) = -1; + return \"fcmp,sgl,%y2 %r0,%r1\"; + } + } + return \"fcmp,sgl,%Y2 %r0,%r1\"; +}" [(set_attr "length" "4") (set_attr "type" "fpcc")]) @@ -396,7 +460,71 @@ [(match_operand:DF 0 "reg_or_0_operand" "fG") (match_operand:DF 1 "reg_or_0_operand" "fG")]))] "! TARGET_SOFT_FLOAT" - "fcmp,dbl,%Y2 %r0,%r1" + "* +{ + rtx next_insn; + + /* See if this is later used in a reversed FP branch. If so, reverse our + condition and the branch. Doing so avoids a useless add,tr. + + Don't do this if fcmp is in a delay slot since it's too much of a + headache to track down things on multiple paths. */ + if (dbr_sequence_length ()) + next_insn = NULL; + else + next_insn = NEXT_INSN (insn); + while (next_insn) + { + /* Jumps, calls and labels stop our search. */ + if (GET_CODE (next_insn) == JUMP_INSN + || GET_CODE (next_insn) == CALL_INSN + || GET_CODE (next_insn) == CODE_LABEL) + break; + + /* As does another fcmp insn. */ + if (GET_CODE (next_insn) == INSN + && GET_CODE (PATTERN (next_insn)) == SET + && GET_CODE (SET_DEST (PATTERN (next_insn))) == REG + && REGNO (SET_DEST (PATTERN (next_insn))) == 0) + break; + + if (GET_CODE (next_insn) == INSN + && GET_CODE (PATTERN (next_insn)) == SEQUENCE) + next_insn = XVECEXP (PATTERN (next_insn), 0, 0); + else + next_insn = NEXT_INSN (next_insn); + } + + /* Is NEXT_INSN a branch? */ + if (next_insn + && GET_CODE (next_insn) == JUMP_INSN) + { + rtx pattern = PATTERN (next_insn); + + /* Is it a reversed fp conditional branch (eg uses add,tr) and + CCFP dies, then reverse our conditional and the branch to + avoid the add,tr. */ + if (GET_CODE (pattern) == SET + && SET_DEST (pattern) == pc_rtx + && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE + && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE + && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG + && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 + && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC + && find_regno_note (next_insn, REG_DEAD, 0)) + + { + rtx tmp; + + tmp = XEXP (SET_SRC (pattern), 1); + XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); + XEXP (SET_SRC (pattern), 2) = tmp; + INSN_CODE (next_insn) = -1; + return \"fcmp,dbl,%y2 %r0,%r1\"; + } + } + return \"fcmp,dbl,%Y2 %r0,%r1\"; +}" [(set_attr "length" "4") (set_attr "type" "fpcc")]) @@ -761,6 +889,15 @@ comiclr,<< %2,%0,0\;ldi %2,%0" [(set_attr "type" "multi,multi") (set_attr "length" "8,8")]) + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (abs:SI (match_operand:SI 1 "register_operand" "0")))] + "" + "comiclr,< 0,%0,0\;subi 0,%0,%0" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + ;;; Experimental conditional move patterns (define_expand "movsicc" @@ -1302,6 +1439,25 @@ [(set_attr "type" "load") (set_attr "length" "8")]) +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))))] + "! TARGET_DISABLE_INDEXING" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"ldwx %2(0,%1),%0\"; + else + return \"ldwx %1(0,%2),%0\"; +}" + [(set_attr "type" "load") + (set_attr "length" "4")]) + ;; Load or store with base-register modification. (define_insn "pre_ldwm" @@ -1623,6 +1779,25 @@ [(set_attr "type" "load") (set_attr "length" "8")]) +(define_insn "" + [(set (match_operand:HI 0 "register_operand" "=r") + (mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))))] + "! TARGET_DISABLE_INDEXING" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"ldhx %2(0,%1),%0\"; + else + return \"ldhx %1(0,%2),%0\"; +}" + [(set_attr "type" "load") + (set_attr "length" "4")]) + (define_insn "" [(set (match_operand:HI 3 "register_operand" "=r") (mem:HI (plus:SI (match_operand:SI 1 "register_operand" "0") @@ -1689,6 +1864,25 @@ [(set_attr "type" "move,move,move,shift,load,store,move,fpalu") (set_attr "length" "4,4,4,4,4,4,4,4")]) +(define_insn "" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))))] + "! TARGET_DISABLE_INDEXING" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"ldbx %2(0,%1),%0\"; + else + return \"ldbx %1(0,%2),%0\"; +}" + [(set_attr "type" "load") + (set_attr "length" "4")]) + (define_insn "" [(set (match_operand:QI 3 "register_operand" "=r") (mem:QI (plus:SI (match_operand:SI 1 "register_operand" "0") @@ -1727,19 +1921,55 @@ "" " { - /* If the blocks are not at least word-aligned and rather big (>16 items), - or the size is indeterminate, don't inline the copy code. A - procedure call is better since it can check the alignment at - runtime and make the optimal decisions. */ - if (INTVAL (operands[3]) < 4 - && (GET_CODE (operands[2]) != CONST_INT - || (INTVAL (operands[2]) / INTVAL (operands[3]) > 8))) - FAIL; + int size, align; + /* HP provides very fast block move library routine for the PA; + this routine includes: + + 4x4 byte at a time block moves, + 1x4 byte at a time with alignment checked at runtime with + attempts to align the source and destination as needed + 1x1 byte loop + + With that in mind, here's the heuristics to try and guess when + the inlined block move will be better than the library block + move: + + If the size isn't constant, then always use the library routines. + + If the size is large in respect to the known alignment, then use + the library routines. + + If the size is small in repsect to the known alignment, then open + code the copy (since that will lead to better scheduling). + + Else use the block move pattern. */ + + /* Undetermined size, use the library routine. */ + if (GET_CODE (operands[2]) != CONST_INT) + FAIL; + + size = INTVAL (operands[2]); + align = INTVAL (operands[3]); + align = align > 4 ? 4 : align; + /* If size/alignment > 8 (eg size is large in respect to alignment), + then use the library routines. */ + if (size/align > 16) + FAIL; + + /* This does happen, but not often enough to worry much about. */ + if (size/align < MOVE_RATIO) + FAIL; + + /* Fall through means we're going to use our block move pattern. */ operands[0] = copy_to_mode_reg (SImode, XEXP (operands[0], 0)); operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); operands[4] = gen_reg_rtx (SImode); operands[5] = gen_reg_rtx (SImode); + emit_insn (gen_movstrsi_internal (operands[0], operands[1], operands[4], + operands[5], operands[2], operands[3], + gen_reg_rtx (SImode))); + DONE; }") ;; The operand constraints are written like this to support both compile-time @@ -1747,13 +1977,14 @@ ;; the register with the byte count is clobbered by the copying code, and ;; therefore it is forced to operand 2. If the count is compile-time ;; determined, we need two scratch registers for the unrolled code. -(define_insn "" +(define_insn "movstrsi_internal" [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r")) (mem:BLK (match_operand:SI 1 "register_operand" "+r,r"))) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_operand:SI 2 "register_operand" "=r,r")) ;loop cnt/tmp (clobber (match_operand:SI 3 "register_operand" "=&r,&r")) ;item tmp + (clobber (match_operand:SI 6 "register_operand" "=&r,&r")) ;item tmp2 (use (match_operand:SI 4 "arith_operand" "J,2")) ;byte count (use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment "" @@ -1778,7 +2009,7 @@ && operands[1] != CONST0_RTX (DFmode) && ! TARGET_SOFT_FLOAT" "* return (which_alternative == 0 ? output_move_double (operands) - : \" fldds%F1 %1,%0\");" + : \"fldds%F1 %1,%0\");" [(set_attr "type" "move,fpload") (set_attr "length" "16,4")]) @@ -1897,6 +2128,25 @@ [(set_attr "type" "fpload") (set_attr "length" "8")]) +(define_insn "" + [(set (match_operand:DF 0 "register_operand" "=fx") + (mem:DF (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))))] + "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"flddx %2(0,%1),%0\"; + else + return \"flddx %1(0,%2),%0\"; +}" + [(set_attr "type" "fpload") + (set_attr "length" "4")]) + (define_insn "" [(set (mem:DF (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") (const_int 8)) @@ -1936,6 +2186,25 @@ [(set_attr "type" "fpstore") (set_attr "length" "8")]) +(define_insn "" + [(set (mem:DF (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))) + (match_operand:DF 0 "register_operand" "fx"))] + "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"fstdx %0,%2(0,%1)\"; + else + return \"fstdx %0,%1(0,%2)\"; +}" + [(set_attr "type" "fpstore") + (set_attr "length" "4")]) + (define_expand "movdi" [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "") (match_operand:DI 1 "general_operand" ""))] @@ -2202,6 +2471,25 @@ [(set_attr "type" "fpload") (set_attr "length" "8")]) +(define_insn "" + [(set (match_operand:SF 0 "register_operand" "=fx") + (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))))] + "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"fldwx %2(0,%1),%0\"; + else + return \"fldwx %1(0,%2),%0\"; +}" + [(set_attr "type" "fpload") + (set_attr "length" "4")]) + (define_insn "" [(set (mem:SF (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") (const_int 4)) @@ -2240,7 +2528,27 @@ }" [(set_attr "type" "fpstore") (set_attr "length" "8")]) + +(define_insn "" + [(set (mem:SF (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "basereg_operand" "r"))) + (match_operand:SF 0 "register_operand" "fx"))] + "! TARGET_DISABLE_INDEXING && ! TARGET_SOFT_FLOAT" + "* +{ + /* Reload can create backwards (relative to cse) unscaled index + address modes when eliminating registers and possibly for + pseudos that don't get hard registers. Deal with it. */ + if (operands[1] == hard_frame_pointer_rtx + || operands[1] == stack_pointer_rtx) + return \"fstwx %0,%2(0,%1)\"; + else + return \"fstwx %0,%1(0,%2)\"; +}" + [(set_attr "type" "fpstore") + (set_attr "length" "4")]) + ;;- zero extension instructions (define_insn "zero_extendhisi2"