This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

lower-subreg and IBM long double


Should lower-subreg be disabled for IBM long double TFmode?
On powerpc64-linux, this testcase

long double ld_abs (long double x)
{
  return __builtin_fabsl (x);
}

compiled with -m64 -O2 -S generates the horrible code shown on the
left.  The code on the right is ideal, as generated by gcc-4.2.  We
regressed with gcc-4.3.0, ie. with lower-subreg.

.L.ld_abs:		.L.ld_abs:
	fabs 0,1		fmr 0,1
	stfd 2,-32(1)		fabs 1,1
	fcmpu 7,1,0		fcmpu 7,0,1
	ori 2,2,0		beqlr 7
	ld 10,-32(1)		fneg 2,2
	mr 9,10			blr
	beq 7,.L2
	std 10,-24(1)
	ori 2,2,0
	lfd 13,-24(1)
	fneg 13,13
	stfd 13,-24(1)
	ori 2,2,0
	ld 9,-24(1)
.L2:
	stfd 0,-32(1)
	std 9,-8(1)
	ori 2,2,0
	ld 8,-32(1)
	lfd 2,-8(1)
	std 8,-16(1)
	ori 2,2,0
	lfd 1,-16(1)
	blr

It isn't hard to see why we are going wrong.  IBM long double is
really a two element array of double, and the rs6000 backend uses
subregs to access the elements.  The problem is that lower-subreg
lowers to word_mode, so we get DImode.  word_mode makes sense for most
targets where subregs of FP modes might be used to narrow an access
for bit-twiddling operations on the sign bit.  It doesn't make sense
for us.  We want DFmode for FP operations.  An example is the expander
used by the testcase.

(define_expand "abstf2_internal"
  [(set (match_operand:TF 0 "gpc_reg_operand" "")
	(match_operand:TF 1 "gpc_reg_operand" ""))
   (set (match_dup 3) (match_dup 5))
   (set (match_dup 5) (abs:DF (match_dup 5)))
   (set (match_dup 4) (compare:CCFP (match_dup 3) (match_dup 5)))
   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
			   (label_ref (match_operand 2 "" ""))
			   (pc)))
   (set (match_dup 6) (neg:DF (match_dup 6)))]
  "!TARGET_IEEEQUAD
   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
   && TARGET_LONG_DOUBLE_128"
  "
{
  const int hi_word = LONG_DOUBLE_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
  const int lo_word = LONG_DOUBLE_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
  operands[3] = gen_reg_rtx (DFmode);
  operands[4] = gen_reg_rtx (CCFPmode);
  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
}")

The following patch disables lower-subreg for double double TFmode,
bootstrap and regression tests are OK, but I'm a little unsure whether
this is the right thing to do.

	* rs6000.c (TARGET_INIT_LOWER_SUBREG): Define.
	(rs6000_init_lower_subreg): New function.
	* lower-subreg.c (init_lower_subreg): Call targetm.init_lower_subreg.
	* target.def (init_lower_subreg): New.
	* doc/tm.texi.in (TARGET_INIT_LOWER_SUBREG): Document.
	* doc/tm.texi: Regenerate.

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 199781)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -59,6 +59,7 @@
 #include "opts.h"
 #include "tree-vectorizer.h"
 #include "dumpfile.h"
+#include "lower-subreg.h"
 #if TARGET_XCOFF
 #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
 #endif
@@ -1317,6 +1318,8 @@
 #define TARGET_RTX_COSTS rs6000_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+#undef TARGET_INIT_LOWER_SUBREG
+#define TARGET_INIT_LOWER_SUBREG rs6000_init_lower_subreg
 
 #undef TARGET_DWARF_REGISTER_SPAN
 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
@@ -26865,6 +26955,20 @@
   return ret;
 }
 
+static void
+rs6000_init_lower_subreg (void *data)
+{
+  if (!TARGET_IEEEQUAD
+      && TARGET_HARD_FLOAT
+      && (TARGET_FPRS || TARGET_E500_DOUBLE)
+      && TARGET_LONG_DOUBLE_128)
+    {
+      struct target_lower_subreg *info = (struct target_lower_subreg *) data;
+      info->x_choices[0].move_modes_to_split[TFmode] = false;
+      info->x_choices[1].move_modes_to_split[TFmode] = false;
+    }
+}
+
 /* Returns a code for a target-specific builtin that implements
    reciprocal of the function, or NULL_TREE if not available.  */
 
Index: gcc/lower-subreg.c
===================================================================
--- gcc/lower-subreg.c	(revision 199781)
+++ gcc/lower-subreg.c	(working copy)
@@ -39,6 +39,7 @@
 #include "tree-pass.h"
 #include "df.h"
 #include "lower-subreg.h"
+#include "target.h"
 
 #ifdef STACK_GROWS_DOWNWARD
 # undef STACK_GROWS_DOWNWARD
@@ -287,6 +288,9 @@
   if (LOG_COSTS)
     fprintf (stderr, "\nSpeed costs\n===========\n\n");
   compute_costs (true, &rtxes);
+
+  if (targetm.init_lower_subreg)
+    targetm.init_lower_subreg (this_target_lower_subreg);
 }
 
 static bool
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 199781)
+++ gcc/target.def	(working copy)
@@ -2926,6 +2926,12 @@
  void, (int *code, rtx *op0, rtx *op1, bool op0_preserve_value),
  default_canonicalize_comparison)
 
+/* Allow modification of subreg choices.  */
+DEFHOOK
+(init_lower_subreg,
+ "",
+ void, (void *data), NULL)
+
 DEFHOOKPOD
 (atomic_test_and_set_trueval,
  "This value should be set if the result written by\
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 199781)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -6375,6 +6375,12 @@
 registers on machines with lots of registers.
 @end deftypefn
 
+@hook TARGET_INIT_LOWER_SUBREG
+This hook allows modification of the choices the lower_subreg pass
+will make for particular subreg modes.  @var{data} is a pointer to a
+@code{struct target_lower_subreg}.
+@end deftypefn
+
 @node Scheduling
 @section Adjusting the Instruction Scheduler
 

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]