This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RS6000] Fix PR61098, Poor code setting count register


On powerpc64, to set a large loop count we have code like the
following after split1:

(insn 67 14 68 4 (set (reg:DI 160)
        (const_int 99942400 [0x5f50000])) /home/amodra/unaligned_load.c:14 -1
     (nil))
(insn 68 67 42 4 (set (reg:DI 160)
        (ior:DI (reg:DI 160)
            (const_int 57600 [0xe100]))) /home/amodra/unaligned_load.c:14 -1
     (expr_list:REG_EQUAL (const_int 100000000 [0x5f5e100])
        (nil)))

and then test for loop exit with:

(jump_insn 65 31 45 5 (parallel [
            (set (pc)
                (if_then_else (ne (reg:DI 160)
                        (const_int 1 [0x1]))
                    (label_ref:DI 42)
                    (pc)))
            (set (reg:DI 160)
                (plus:DI (reg:DI 160)
                    (const_int -1 [0xffffffffffffffff])))
            (clobber (scratch:CC))
            (clobber (scratch:DI))
        ]) /home/amodra/unaligned_load.c:15 800 {*ctrdi_internal1}
     (int_list:REG_BR_PROB 9899 (nil))
 -> 42)

The jump_insn of course is meant for use with bdnz, which implies a
strong preference for reg 160 to live in the count register.  Trouble
is, the count register doesn't do arithmetic.

So, use a new psuedo for intermediate results.  On looking at this,
I noticed the !TARGET_POWERPC64 code in rs6000_emit_set_long_const was
broken, apparently expecting c1 and c2 to be the high and low 32 bits
of the constant.  That's no longer true, so I've fixed that as well.
Bootstrapped and regression tested powerpc64-linux.  OK for mainline
and branches?

	PR target/61098
	* config/rs6000/rs6000.c (rs6000_emit_set_const): Remove unneeded
	params and return value.  Simplify.  Update comment.
	(rs6000_emit_set_long_const): Remove unneeded param and return
	value.  Correct !TARGET_POWERPC64 handling of constants > 2G.
	If we can, use a new pseudo for intermediate calculations.

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 209926)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1068,7 +1069,7 @@ static tree rs6000_handle_longcall_attribute (tree
 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
-static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
 static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
 static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
@@ -7826,53 +7811,36 @@ rs6000_conditional_register_usage (void)
 }
 
 
-/* Try to output insns to set TARGET equal to the constant C if it can
-   be done in less than N insns.  Do all computations in MODE.
-   Returns the place where the output has been placed if it can be
-   done and the insns have been emitted.  If it would take more than N
-   insns, zero is returned and no insns and emitted.  */
+/* Output insns to set DEST equal to the constant SOURCE.  */
 
-rtx
-rs6000_emit_set_const (rtx dest, enum machine_mode mode,
-		       rtx source, int n ATTRIBUTE_UNUSED)
+void
+rs6000_emit_set_const (rtx dest, rtx source)
 {
-  rtx result, insn, set;
-  HOST_WIDE_INT c0, c1;
+  enum machine_mode mode = GET_MODE (dest);
+  rtx temp, insn, set;
+  HOST_WIDE_INT c;
 
+  gcc_checking_assert (CONST_INT_P (source));
+  c = INTVAL (source);
   switch (mode)
     {
-    case  QImode:
+    case QImode:
     case HImode:
-      if (dest == NULL)
-	dest = gen_reg_rtx (mode);
       emit_insn (gen_rtx_SET (VOIDmode, dest, source));
-      return dest;
+      return;
 
     case SImode:
-      result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
+      temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
 
-      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
-			      GEN_INT (INTVAL (source)
-				       & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
+			      GEN_INT (c & (~ (HOST_WIDE_INT) 0xffff))));
       emit_insn (gen_rtx_SET (VOIDmode, dest,
-			      gen_rtx_IOR (SImode, copy_rtx (result),
-					   GEN_INT (INTVAL (source) & 0xffff))));
-      result = dest;
+			      gen_rtx_IOR (SImode, copy_rtx (temp),
+					   GEN_INT (c & 0xffff))));
       break;
 
     case DImode:
-      switch (GET_CODE (source))
-	{
-	case CONST_INT:
-	  c0 = INTVAL (source);
-	  c1 = -(c0 < 0);
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-
-      result = rs6000_emit_set_long_const (dest, c0, c1);
+      rs6000_emit_set_long_const (dest, c);
       break;
 
     default:
@@ -7883,37 +7851,38 @@ rs6000_conditional_register_usage (void)
   set = single_set (insn);
   if (! CONSTANT_P (SET_SRC (set)))
     set_unique_reg_note (insn, REG_EQUAL, source);
-
-  return result;
 }
 
-/* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
-   fall back to a straight forward decomposition.  We do this to avoid
-   exponential run times encountered when looking for longer sequences
-   with rs6000_emit_set_const.  */
-static rtx
-rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
+/* Output insns to set a DImode DEST equal to the constant C.  */
+
+static void
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 {
+  rtx temp;
+
   if (!TARGET_POWERPC64)
     {
-      rtx operand1, operand2;
+      rtx hi, lo;
 
-      operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
-					DImode);
-      operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
-					DImode);
-      emit_move_insn (operand1, GEN_INT (c1));
-      emit_move_insn (operand2, GEN_INT (c2));
+      hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
+				  DImode);
+      lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
+				  DImode);
+      emit_move_insn (hi, GEN_INT (c >> 32));
+      c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      emit_move_insn (lo, GEN_INT (c));
     }
   else
     {
       HOST_WIDE_INT ud1, ud2, ud3, ud4;
 
-      ud1 = c1 & 0xffff;
-      ud2 = (c1 & 0xffff0000) >> 16;
-      c2 = c1 >> 32;
-      ud3 = c2 & 0xffff;
-      ud4 = (c2 & 0xffff0000) >> 16;
+      ud1 = c & 0xffff;
+      c = c >> 16;
+      ud2 = c & 0xffff;
+      c = c >> 16;
+      ud3 = c & 0xffff;
+      c = c >> 16;
+      ud4 = c & 0xffff;
 
       if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
 	  || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
@@ -7922,67 +7891,74 @@ rs6000_conditional_register_usage (void)
       else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
 	       || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
 	{
-	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+	  emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
 	}
       else if (ud3 == 0 && ud4 == 0)
 	{
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
 	  gcc_assert (ud2 & 0x8000);
-	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  emit_move_insn (copy_rtx (temp),
+			  GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (copy_rtx (temp),
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
-	  emit_move_insn (copy_rtx (dest),
+	  emit_move_insn (dest,
 			  gen_rtx_ZERO_EXTEND (DImode,
 					       gen_lowpart (SImode,
-							    copy_rtx (dest))));
+							    copy_rtx (temp))));
 	}
       else if ((ud4 == 0xffff && (ud3 & 0x8000))
 	       || (ud4 == 0 && ! (ud3 & 0x8000)))
 	{
-	  emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+	  emit_move_insn (copy_rtx (temp),
+			  GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud2 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (copy_rtx (temp),
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud2)));
-	  emit_move_insn (copy_rtx (dest),
-			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+	  emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+			  gen_rtx_ASHIFT (DImode, copy_rtx (temp),
 					  GEN_INT (16)));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
 	}
       else
 	{
-	  emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
-					 - 0x80000000));
+	  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+	  emit_move_insn (copy_rtx (temp),
+			  GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
 	  if (ud3 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (copy_rtx (temp),
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud3)));
 
-	  emit_move_insn (copy_rtx (dest),
-			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+	  emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
+			  gen_rtx_ASHIFT (DImode, copy_rtx (temp),
 					  GEN_INT (32)));
 	  if (ud2 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud2 << 16)));
 	  if (ud1 != 0)
-	    emit_move_insn (copy_rtx (dest),
-			    gen_rtx_IOR (DImode, copy_rtx (dest),
+	    emit_move_insn (dest,
+			    gen_rtx_IOR (DImode, copy_rtx (temp),
 					 GEN_INT (ud1)));
 	}
     }
-  return dest;
 }
 
 /* Helper for the following.  Get rid of [r+r] memory refs
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h	(revision 209926)
+++ gcc/config/rs6000/rs6000-protos.h	(working copy)
@@ -114,7 +114,7 @@ extern void rs6000_emit_cbranch (enum machine_mode
 extern char * output_cbranch (rtx, const char *, int, rtx);
 extern char * output_e500_flip_gt_bit (rtx, rtx);
 extern const char * output_probe_stack_range (rtx, rtx);
-extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int);
+extern void rs6000_emit_set_const (rtx, rtx);
 extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
 extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
 extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 209926)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -8978,12 +8978,8 @@
 	(ior:SI (match_dup 0)
 		(match_dup 3)))]
   "
-{ rtx tem = rs6000_emit_set_const (operands[0], SImode, operands[1], 2);
-
-  if (tem == operands[0])
-    DONE;
-  else
-    FAIL;
+{ rs6000_emit_set_const (operands[0], operands[1]);
+  DONE;
 }")
 
 (define_insn "*mov<mode>_internal2"
@@ -10326,12 +10322,8 @@
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
   "
-{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
-
-  if (tem == operands[0])
-    DONE;
-  else
-    FAIL;
+{ rs6000_emit_set_const (operands[0], operands[1]);
+  DONE;
 }")
 
 (define_split
@@ -10341,12 +10333,8 @@
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
   "
-{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
-
-  if (tem == operands[0])
-    DONE;
-  else
-    FAIL;
+{ rs6000_emit_set_const (operands[0], operands[1]);
+  DONE;
 }")
 
 ;; TImode/PTImode is similar, except that we usually want to compute the

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]