This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Force DFP function args into FP registers per the ABI
- From: Peter Bergner <bergner at vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Janis Johnson <janis187 at us dot ibm dot com>, Ben Elliston <bje at au1 dot ibm dot com>, David Edelsohn <dje at watson dot ibm dot com>
- Date: Fri, 23 Feb 2007 13:23:43 -0600
- Subject: [PATCH] Force DFP function args into FP registers per the ABI
The current DFP code passes decimal float function arguments and function
return values in integer registers. However, the DFP ABI mandates that
they be passed in floating point registers. This patch forces DDmode and
TDmode arguments and return values to be passed in floating point registers.
A later follow on patch will fix up SDmode arguments too. It turns out
that SDmode is harder, since (except for POWER6), there is no instruction
that will load/store a 32-bit memory location into a FP register without
munging the bits (the load/store float insns do an implicit float to/from
double conversion since all binary floating values are stored in double
precision format within the hardware registers).
This has passed bootstrap and regression testing on powerpc64-linux
(32-bit and 64-bit testsuite runs). Ok for mainline?
Peter
2007-02-23 Ben Elliston <bje@au.ibm.com>
Peter Bergner <bergner@vnet.ibm.com>
Janis Johnson <janis187@us.ibm.com>
* config/rs6000/dfp.md: New file.
* config/rs6000/rs6000.md: Include dfp.md.
(add<mode>3_internal1): Disable for DECIMAL_FLOAT_MODE_P operands.
* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Handle DDmode
and TDmode decimal float modes in FP registers.
(num_insns_constant): Likewise.
(rs6000_legitimate_offset_address_p): Likewise.
(rs6000_legitimize_address): Likewise.
(rs6000_legitimize_reload_address): Likewise.
(rs6000_legitimate_address): Likewise.
(rs6000_emit_move): Likewise.
(function_arg_boundary): Likewise.
(function_arg_advance): Likewise.
(rs6000_darwin64_record_arg_recurse): Likewise.
(function_arg): Likewise.
(rs6000_gimplify_va_arg): Likewise.
(rs6000_split_multireg_move): Likewise.
(rs6000_output_function_epilogue): Likewise.
(rs6000_output_function_epilogue): Likewise.
(rs6000_register_move_cost): Likewise.
(rs6000_function_value): Likewise.
(rs6000_libcall_value): Likewise.
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c (revision 122200)
+++ config/rs6000/rs6000.c (working copy)
@@ -1127,11 +1127,11 @@ rs6000_hard_regno_mode_ok (int regno, en
return INT_REGNO_P (regno + HARD_REGNO_NREGS (regno, mode) - 1);
/* The float registers can only hold floating modes and DImode.
- This also excludes decimal float modes. */
+ This excludes the 32-bit decimal float mode for now. */
if (FP_REGNO_P (regno))
return
(SCALAR_FLOAT_MODE_P (mode)
- && !DECIMAL_FLOAT_MODE_P (mode)
+ && mode != SDmode
&& FP_REGNO_P (regno + HARD_REGNO_NREGS (regno, mode) - 1))
|| (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD);
@@ -2310,7 +2310,10 @@ num_insns_constant (rtx op, enum machine
REAL_VALUE_TYPE rv;
REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
- REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+ else
+ REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
high = l[WORDS_BIG_ENDIAN == 0];
low = l[WORDS_BIG_ENDIAN != 0];
}
@@ -3050,6 +3053,7 @@ rs6000_legitimate_offset_address_p (enum
return SPE_CONST_OFFSET_OK (offset);
case DFmode:
+ case DDmode:
if (TARGET_E500_DOUBLE)
return SPE_CONST_OFFSET_OK (offset);
@@ -3062,7 +3066,7 @@ rs6000_legitimate_offset_address_p (enum
if (TARGET_E500_DOUBLE)
return SPE_CONST_OFFSET_OK (offset);
- if (mode == DFmode || !TARGET_POWERPC64)
+ if (mode == DFmode || mode == DDmode || !TARGET_POWERPC64)
extra = 4;
else if (offset & 3)
return false;
@@ -3074,7 +3078,8 @@ rs6000_legitimate_offset_address_p (enum
&& SPE_CONST_OFFSET_OK (offset + 8));
case TImode:
- if (mode == TFmode || !TARGET_POWERPC64)
+ case TDmode:
+ if (mode == TFmode || mode == TDmode || !TARGET_POWERPC64)
extra = 12;
else if (offset & 3)
return false;
@@ -3229,8 +3234,9 @@ rs6000_legitimize_address (rtx x, rtx ol
&& GET_MODE_NUNITS (mode) == 1
&& ((TARGET_HARD_FLOAT && TARGET_FPRS)
|| TARGET_POWERPC64
- || (((mode != DImode && mode != DFmode) || TARGET_E500_DOUBLE)
- && mode != TFmode))
+ || (((mode != DImode && mode != DFmode && mode != DDmode)
+ || TARGET_E500_DOUBLE)
+ && mode != TFmode && mode != TDmode))
&& (TARGET_POWERPC64 || mode != DImode)
&& mode != TImode)
{
@@ -3251,6 +3257,7 @@ rs6000_legitimize_address (rtx x, rtx ol
}
else if (SPE_VECTOR_MODE (mode)
|| (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+ || mode == DDmode || mode == TDmode
|| mode == DImode)))
{
if (mode == DImode)
@@ -3696,10 +3703,11 @@ rs6000_legitimize_reload_address (rtx x,
&& DEFAULT_ABI == ABI_V4
&& !flag_pic
#endif
- /* Don't do this for TFmode, since the result isn't offsettable.
+ /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
The same goes for DImode without 64-bit gprs and DFmode
without fprs. */
&& mode != TFmode
+ && mode != TDmode
&& (mode != DImode || TARGET_POWERPC64)
&& (mode != DFmode || TARGET_POWERPC64
|| (TARGET_FPRS && TARGET_HARD_FLOAT)))
@@ -3769,8 +3777,8 @@ rs6000_legitimize_reload_address (rtx x,
word aligned.
For modes spanning multiple registers (DFmode in 32-bit GPRs,
- 32-bit DImode, TImode, TFmode), indexed addressing cannot be used because
- adjacent memory cells are accessed by adding word-sized offsets
+ 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
+ because adjacent memory cells are accessed by adding word-sized offsets
during assembly output. */
int
rs6000_legitimate_address (enum machine_mode mode, rtx x, int reg_ok_strict)
@@ -3791,6 +3799,7 @@ rs6000_legitimate_address (enum machine_
&& !ALTIVEC_VECTOR_MODE (mode)
&& !SPE_VECTOR_MODE (mode)
&& mode != TFmode
+ && mode != TDmode
/* Restrict addressing for DI because of our SUBREG hackery. */
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DImode))
@@ -3813,6 +3822,7 @@ rs6000_legitimate_address (enum machine_
return 1;
if (mode != TImode
&& mode != TFmode
+ && mode != TDmode
&& ((TARGET_HARD_FLOAT && TARGET_FPRS)
|| TARGET_POWERPC64
|| ((mode != DFmode || TARGET_E500_DOUBLE) && mode != TFmode))
@@ -4169,7 +4179,7 @@ rs6000_emit_set_long_const (rtx dest, HO
}
/* Helper for the following. Get rid of [r+r] memory refs
- in cases where it won't work (TImode, TFmode). */
+ in cases where it won't work (TImode, TFmode, TDmode). */
static void
rs6000_eliminate_indexed_memrefs (rtx operands[2])
@@ -4333,9 +4343,11 @@ rs6000_emit_move (rtx dest, rtx source,
break;
case TFmode:
+ case TDmode:
rs6000_eliminate_indexed_memrefs (operands);
/* fall through */
+ case DDmode:
case DFmode:
case SFmode:
if (CONSTANT_P (operands[1])
@@ -4548,7 +4560,7 @@ rs6000_emit_move (rtx dest, rtx source,
/* Nonzero if we can use a floating-point register to pass this arg. */
#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE) \
(SCALAR_FLOAT_MODE_P (MODE) \
- && !DECIMAL_FLOAT_MODE_P (MODE) \
+ && (MODE) != SDmode \
&& (CUM)->fregno <= FP_ARG_MAX_REG \
&& TARGET_HARD_FLOAT && TARGET_FPRS)
@@ -4790,7 +4802,7 @@ function_arg_boundary (enum machine_mode
&& (GET_MODE_SIZE (mode) == 8
|| (TARGET_HARD_FLOAT
&& TARGET_FPRS
- && mode == TFmode)))
+ && (mode == TFmode || mode == TDmode))))
return 64;
else if (SPE_VECTOR_MODE (mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
@@ -5028,14 +5040,20 @@ function_arg_advance (CUMULATIVE_ARGS *c
{
if (TARGET_HARD_FLOAT && TARGET_FPRS
&& (mode == SFmode || mode == DFmode
+ || mode == DDmode || mode == TDmode
|| (mode == TFmode && !TARGET_IEEEQUAD)))
{
- if (cum->fregno + (mode == TFmode ? 1 : 0) <= FP_ARG_V4_MAX_REG)
+ /* Must use an even/odd register pair. */
+ if (mode == TDmode && cum->fregno % 2)
+ cum->fregno++;
+
+ if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
+ <= FP_ARG_V4_MAX_REG)
cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
else
{
cum->fregno = FP_ARG_V4_MAX_REG + 1;
- if (mode == DFmode || mode == TFmode)
+ if (mode == DFmode || mode == TFmode || mode == DDmode || mode == TDmode)
cum->words += cum->words & 1;
cum->words += rs6000_arg_size (mode, type);
}
@@ -5087,7 +5105,7 @@ function_arg_advance (CUMULATIVE_ARGS *c
cum->words = align_words + n_words;
if (SCALAR_FLOAT_MODE_P (mode)
- && !DECIMAL_FLOAT_MODE_P (mode)
+ && mode != SDmode
&& TARGET_HARD_FLOAT && TARGET_FPRS)
cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
@@ -5306,7 +5324,7 @@ rs6000_darwin64_record_arg_recurse (CUMU
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (mode, cum->fregno++),
GEN_INT (bitpos / BITS_PER_UNIT));
- if (mode == TFmode)
+ if (mode == TFmode || mode == TDmode)
cum->fregno++;
}
else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
@@ -5567,8 +5585,10 @@ function_arg (CUMULATIVE_ARGS *cum, enum
else if (TARGET_SPE_ABI && TARGET_SPE
&& (SPE_VECTOR_MODE (mode)
|| (TARGET_E500_DOUBLE && (mode == DFmode
+ || mode == DDmode
|| mode == DCmode
|| mode == TFmode
+ || mode == TDmode
|| mode == TCmode))))
return rs6000_spe_function_arg (cum, mode, type);
@@ -5576,9 +5596,15 @@ function_arg (CUMULATIVE_ARGS *cum, enum
{
if (TARGET_HARD_FLOAT && TARGET_FPRS
&& (mode == SFmode || mode == DFmode
- || (mode == TFmode && !TARGET_IEEEQUAD)))
+ || (mode == TFmode && !TARGET_IEEEQUAD)
+ || mode == DDmode || mode == TDmode))
{
- if (cum->fregno + (mode == TFmode ? 1 : 0) <= FP_ARG_V4_MAX_REG)
+ /* Must use an even/odd register pair. */
+ if (mode == TDmode && cum->fregno % 2)
+ cum->fregno++;
+
+ if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
+ <= FP_ARG_V4_MAX_REG)
return gen_rtx_REG (mode, cum->fregno);
else
return NULL_RTX;
@@ -5621,10 +5647,11 @@ function_arg (CUMULATIVE_ARGS *cum, enum
{
/* Currently, we only ever need one reg here because complex
doubles are split. */
- gcc_assert (cum->fregno == FP_ARG_MAX_REG && fmode == TFmode);
+ gcc_assert (cum->fregno == FP_ARG_MAX_REG
+ && (fmode == TFmode || fmode == TDmode));
- /* Long double split over regs and memory. */
- fmode = DFmode;
+ /* Long double or _Decimal128 split over regs and memory. */
+ fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
}
/* Do we also need to pass this arg in the parameter save
@@ -6179,6 +6206,7 @@ rs6000_gimplify_va_arg (tree valist, tre
tree lab_false, lab_over, addr;
int align;
tree ptrtype = build_pointer_type (type);
+ int regalign = 0;
if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
{
@@ -6235,7 +6263,9 @@ rs6000_gimplify_va_arg (tree valist, tre
if (TARGET_HARD_FLOAT && TARGET_FPRS
&& (TYPE_MODE (type) == SFmode
|| TYPE_MODE (type) == DFmode
- || TYPE_MODE (type) == TFmode))
+ || TYPE_MODE (type) == TFmode
+ || TYPE_MODE (type) == DDmode
+ || TYPE_MODE (type) == TDmode))
{
/* FP args go in FP registers, if present. */
reg = fpr;
@@ -6276,10 +6306,19 @@ rs6000_gimplify_va_arg (tree valist, tre
u = reg;
if (n_reg == 2 && reg == gpr)
{
+ regalign = 1;
u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), reg,
size_int (n_reg - 1));
u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), reg, u);
}
+ /* _Decimal128 is passed in even/odd fpr pairs; the stored
+ reg number is 0 for f1, so we want to make it odd. */
+ else if (reg == fpr && TYPE_MODE (type) == TDmode)
+ {
+ regalign = 1;
+ t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), reg, size_int (1));
+ u = build2 (MODIFY_EXPR, void_type_node, reg, t);
+ }
t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
t = build2 (GE_EXPR, boolean_type_node, u, t);
@@ -6305,10 +6344,10 @@ rs6000_gimplify_va_arg (tree valist, tre
t = build1 (LABEL_EXPR, void_type_node, lab_false);
append_to_statement_list (t, pre_p);
- if ((n_reg == 2 && reg != gpr) || n_reg > 2)
+ if ((n_reg == 2 && !regalign) || n_reg > 2)
{
/* Ensure that we don't find any more args in regs.
- Alignment has taken care of the n_reg == 2 gpr case. */
+ Alignment has taken care of for special cases. */
t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (reg), reg, size_int (8));
gimplify_and_add (t, pre_p);
}
@@ -12919,7 +12958,7 @@ rs6000_split_multireg_move (rtx dst, rtx
mode = GET_MODE (dst);
nregs = hard_regno_nregs[reg][mode];
if (FP_REGNO_P (reg))
- reg_mode = DFmode;
+ reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
else if (ALTIVEC_REGNO_P (reg))
reg_mode = V16QImode;
else if (TARGET_E500_DOUBLE && mode == TFmode)
@@ -15853,7 +15892,9 @@ rs6000_output_function_epilogue (FILE *f
break;
case DFmode:
+ case DDmode:
case TFmode:
+ case TDmode:
bits = 0x3;
break;
@@ -20192,7 +20233,7 @@ rs6000_register_move_cost (enum machine_
/* Moving between two similar registers is just one instruction. */
else if (reg_classes_intersect_p (to, from))
- return mode == TFmode ? 4 : 2;
+ return (mode == TFmode || mode == TDmode) ? 4 : 2;
/* Everything else has to go through GENERAL_REGS. */
else
@@ -20525,7 +20566,28 @@ rs6000_function_value (tree valtype, tre
mode = TYPE_MODE (valtype);
if (DECIMAL_FLOAT_MODE_P (mode))
- regno = GP_ARG_RETURN;
+ {
+ if (TARGET_HARD_FLOAT && TARGET_FPRS)
+ {
+ switch (mode)
+ {
+ default:
+ gcc_unreachable ();
+ case SDmode:
+ regno = GP_ARG_RETURN;
+ break;
+ case DDmode:
+ regno = FP_ARG_RETURN;
+ break;
+ case TDmode:
+ /* Use f2:f3 specified by the ABI. */
+ regno = FP_ARG_RETURN + 1;
+ break;
+ }
+ }
+ else
+ regno = GP_ARG_RETURN;
+ }
else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS)
regno = FP_ARG_RETURN;
else if (TREE_CODE (valtype) == COMPLEX_TYPE
@@ -20567,7 +20629,28 @@ rs6000_libcall_value (enum machine_mode
}
if (DECIMAL_FLOAT_MODE_P (mode))
- regno = GP_ARG_RETURN;
+ {
+ if (TARGET_HARD_FLOAT && TARGET_FPRS)
+ {
+ switch (mode)
+ {
+ default:
+ gcc_unreachable ();
+ case SDmode:
+ regno = GP_ARG_RETURN;
+ break;
+ case DDmode:
+ regno = FP_ARG_RETURN;
+ break;
+ case TDmode:
+ /* Use f2:f3 specified by the ABI. */
+ regno = FP_ARG_RETURN + 1;
+ break;
+ }
+ }
+ else
+ regno = GP_ARG_RETURN;
+ }
else if (SCALAR_FLOAT_MODE_P (mode)
&& TARGET_HARD_FLOAT && TARGET_FPRS)
regno = FP_ARG_RETURN;
Index: config/rs6000/dfp.md
===================================================================
--- config/rs6000/dfp.md (revision 0)
+++ config/rs6000/dfp.md (revision 0)
@@ -0,0 +1,316 @@
+;; Decimal Floating Point (DFP) patterns.
+;; Copyright (C) 2006
+;; Free Software Foundation, Inc.
+;; Contributed by Ben Elliston (bje@au.ibm.com) and Peter Bergner
+;; (bergner@vnet.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 2, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to the
+;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+;; MA 02110-1301, USA.
+
+(define_expand "movdd"
+ [(set (match_operand:DD 0 "nonimmediate_operand" "")
+ (match_operand:DD 1 "any_operand" ""))]
+ ""
+ "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }")
+
+(define_split
+ [(set (match_operand:DD 0 "gpc_reg_operand" "")
+ (match_operand:DD 1 "const_int_operand" ""))]
+ "! TARGET_POWERPC64 && reload_completed
+ && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+ || (GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == REG
+ && REGNO (SUBREG_REG (operands[0])) <= 31))"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 1))]
+ "
+{
+ int endian = (WORDS_BIG_ENDIAN == 0);
+ HOST_WIDE_INT value = INTVAL (operands[1]);
+
+ operands[2] = operand_subword (operands[0], endian, 0, DDmode);
+ operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
+#if HOST_BITS_PER_WIDE_INT == 32
+ operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
+#else
+ operands[4] = GEN_INT (value >> 32);
+ operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
+#endif
+}")
+
+(define_split
+ [(set (match_operand:DD 0 "gpc_reg_operand" "")
+ (match_operand:DD 1 "const_double_operand" ""))]
+ "! TARGET_POWERPC64 && reload_completed
+ && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+ || (GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == REG
+ && REGNO (SUBREG_REG (operands[0])) <= 31))"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+ "
+{
+ int endian = (WORDS_BIG_ENDIAN == 0);
+ long l[2];
+ REAL_VALUE_TYPE rv;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+ REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+
+ operands[2] = operand_subword (operands[0], endian, 0, DDmode);
+ operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
+ operands[4] = gen_int_mode (l[endian], SImode);
+ operands[5] = gen_int_mode (l[1 - endian], SImode);
+}")
+
+(define_split
+ [(set (match_operand:DD 0 "gpc_reg_operand" "")
+ (match_operand:DD 1 "const_double_operand" ""))]
+ "TARGET_POWERPC64 && reload_completed
+ && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+ || (GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == REG
+ && REGNO (SUBREG_REG (operands[0])) <= 31))"
+ [(set (match_dup 2) (match_dup 3))]
+ "
+{
+ int endian = (WORDS_BIG_ENDIAN == 0);
+ long l[2];
+ REAL_VALUE_TYPE rv;
+#if HOST_BITS_PER_WIDE_INT >= 64
+ HOST_WIDE_INT val;
+#endif
+
+ REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+ REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+
+ operands[2] = gen_lowpart (DImode, operands[0]);
+ /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN. */
+#if HOST_BITS_PER_WIDE_INT >= 64
+ val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
+ | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
+
+ operands[3] = gen_int_mode (val, DImode);
+#else
+ operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
+#endif
+}")
+
+;; Don't have reload use general registers to load a constant. First,
+;; it might not work if the output operand is the equivalent of
+;; a non-offsettable memref, but also it is less efficient than loading
+;; the constant into an FP register, since it will probably be used there.
+;; The "??" is a kludge until we can figure out a more reasonable way
+;; of handling these non-offsettable values.
+(define_insn "*movdd_hardfloat32"
+ [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,f,f,m,!r,!r,!r")
+ (match_operand:DD 1 "input_operand" "r,m,r,f,m,f,G,H,F"))]
+ "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
+ && (gpc_reg_operand (operands[0], DDmode)
+ || gpc_reg_operand (operands[1], DDmode))"
+ "*
+{
+ switch (which_alternative)
+ {
+ default:
+ gcc_unreachable ();
+ case 0:
+ /* We normally copy the low-numbered register first. However, if
+ the first register operand 0 is the same as the second register
+ of operand 1, we must copy in the opposite order. */
+ if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+ return \"mr %L0,%L1\;mr %0,%1\";
+ else
+ return \"mr %0,%1\;mr %L0,%L1\";
+ case 1:
+ if (rs6000_offsettable_memref_p (operands[1])
+ || (GET_CODE (operands[1]) == MEM
+ && (GET_CODE (XEXP (operands[1], 0)) == LO_SUM
+ || GET_CODE (XEXP (operands[1], 0)) == PRE_INC
+ || GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)))
+ {
+ /* If the low-address word is used in the address, we must load
+ it last. Otherwise, load it first. Note that we cannot have
+ auto-increment in that case since the address register is
+ known to be dead. */
+ if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+ operands[1], 0))
+ return \"{l|lwz} %L0,%L1\;{l|lwz} %0,%1\";
+ else
+ return \"{l%U1|lwz%U1} %0,%1\;{l|lwz} %L0,%L1\";
+ }
+ else
+ {
+ rtx addreg;
+
+ addreg = find_addr_reg (XEXP (operands[1], 0));
+ if (refers_to_regno_p (REGNO (operands[0]),
+ REGNO (operands[0]) + 1,
+ operands[1], 0))
+ {
+ output_asm_insn (\"{cal|la} %0,4(%0)\", &addreg);
+ output_asm_insn (\"{lx|lwzx} %L0,%1\", operands);
+ output_asm_insn (\"{cal|la} %0,-4(%0)\", &addreg);
+ return \"{lx|lwzx} %0,%1\";
+ }
+ else
+ {
+ output_asm_insn (\"{lx|lwzx} %0,%1\", operands);
+ output_asm_insn (\"{cal|la} %0,4(%0)\", &addreg);
+ output_asm_insn (\"{lx|lwzx} %L0,%1\", operands);
+ output_asm_insn (\"{cal|la} %0,-4(%0)\", &addreg);
+ return \"\";
+ }
+ }
+ case 2:
+ if (rs6000_offsettable_memref_p (operands[0])
+ || (GET_CODE (operands[0]) == MEM
+ && (GET_CODE (XEXP (operands[0], 0)) == LO_SUM
+ || GET_CODE (XEXP (operands[0], 0)) == PRE_INC
+ || GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)))
+ return \"{st%U0|stw%U0} %1,%0\;{st|stw} %L1,%L0\";
+ else
+ {
+ rtx addreg;
+
+ addreg = find_addr_reg (XEXP (operands[0], 0));
+ output_asm_insn (\"{stx|stwx} %1,%0\", operands);
+ output_asm_insn (\"{cal|la} %0,4(%0)\", &addreg);
+ output_asm_insn (\"{stx|stwx} %L1,%0\", operands);
+ output_asm_insn (\"{cal|la} %0,-4(%0)\", &addreg);
+ return \"\";
+ }
+ case 3:
+ return \"fmr %0,%1\";
+ case 4:
+ return \"lfd%U1%X1 %0,%1\";
+ case 5:
+ return \"stfd%U0%X0 %1,%0\";
+ case 6:
+ case 7:
+ case 8:
+ return \"#\";
+ }
+}"
+ [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
+ (set_attr "length" "8,16,16,4,4,4,8,12,16")])
+
+(define_insn "*movdd_softfloat32"
+ [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r")
+ (match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))]
+ "! TARGET_POWERPC64 && TARGET_SOFT_FLOAT
+ && (gpc_reg_operand (operands[0], DDmode)
+ || gpc_reg_operand (operands[1], DDmode))"
+ "*
+{
+ switch (which_alternative)
+ {
+ default:
+ gcc_unreachable ();
+ case 0:
+ /* We normally copy the low-numbered register first. However, if
+ the first register operand 0 is the same as the second register of
+ operand 1, we must copy in the opposite order. */
+ if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+ return \"mr %L0,%L1\;mr %0,%1\";
+ else
+ return \"mr %0,%1\;mr %L0,%L1\";
+ case 1:
+ /* If the low-address word is used in the address, we must load
+ it last. Otherwise, load it first. Note that we cannot have
+ auto-increment in that case since the address register is
+ known to be dead. */
+ if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+ operands[1], 0))
+ return \"{l|lwz} %L0,%L1\;{l|lwz} %0,%1\";
+ else
+ return \"{l%U1|lwz%U1} %0,%1\;{l|lwz} %L0,%L1\";
+ case 2:
+ return \"{st%U0|stw%U0} %1,%0\;{st|stw} %L1,%L0\";
+ case 3:
+ case 4:
+ case 5:
+ return \"#\";
+ }
+}"
+ [(set_attr "type" "two,load,store,*,*,*")
+ (set_attr "length" "8,8,8,8,12,16")])
+
+; ld/std require word-aligned displacements -> 'Y' constraint.
+; List Y->r and r->Y before r->r for reload.
+(define_insn "*movdd_hardfloat64"
+ [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,f,f,m,*c*l,!r,*h,!r,!r,!r")
+ (match_operand:DD 1 "input_operand" "r,Y,r,f,m,f,r,h,0,G,H,F"))]
+ "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
+ && (gpc_reg_operand (operands[0], DDmode)
+ || gpc_reg_operand (operands[1], DDmode))"
+ "@
+ std%U0%X0 %1,%0
+ ld%U1%X1 %0,%1
+ mr %0,%1
+ fmr %0,%1
+ lfd%U1%X1 %0,%1
+ stfd%U0%X0 %1,%0
+ mt%0 %1
+ mf%1 %0
+ {cror 0,0,0|nop}
+ #
+ #
+ #"
+ [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
+
+(define_insn "*movdd_softfloat64"
+ [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
+ (match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
+ "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+ && (gpc_reg_operand (operands[0], DDmode)
+ || gpc_reg_operand (operands[1], DDmode))"
+ "@
+ ld%U1%X1 %0,%1
+ std%U0%X0 %1,%0
+ mr %0,%1
+ mt%0 %1
+ mf%1 %0
+ #
+ #
+ #
+ {cror 0,0,0|nop}"
+ [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
+ (set_attr "length" "4,4,4,4,4,8,12,16,4")])
+
+(define_expand "movtd"
+ [(set (match_operand:TD 0 "general_operand" "")
+ (match_operand:TD 1 "any_operand" ""))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS"
+ "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }")
+
+; It's important to list the o->f and f->o moves before f->f because
+; otherwise reload, given m->f, will try to pick f->f and reload it,
+; which doesn't make progress. Likewise r->Y must be before r->r.
+(define_insn_and_split "*movtd_internal"
+ [(set (match_operand:TD 0 "nonimmediate_operand" "=o,f,f,r,Y,r")
+ (match_operand:TD 1 "input_operand" "f,o,f,YGHF,r,r"))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS
+ && (gpc_reg_operand (operands[0], TDmode)
+ || gpc_reg_operand (operands[1], TDmode))"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+ [(set_attr "length" "8,8,8,20,20,16")])
+
Index: config/rs6000/rs6000.md
===================================================================
--- config/rs6000/rs6000.md (revision 122200)
+++ config/rs6000/rs6000.md (working copy)
@@ -1493,7 +1493,7 @@ (define_insn "*add<mode>3_internal1"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,?r,r")
(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,r,b")
(match_operand:GPR 2 "add_operand" "r,I,I,L")))]
- ""
+ "!DECIMAL_FLOAT_MODE_P (GET_MODE (operands[0])) && !DECIMAL_FLOAT_MODE_P (GET_MODE (operands[1]))"
"@
{cax|add} %0,%1,%2
{cal %0,%2(%1)|addi %0,%1,%2}
@@ -14531,3 +14531,4 @@ (define_insn "prefetch"
(include "sync.md")
(include "altivec.md")
(include "spe.md")
+(include "dfp.md")