This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH, rs6000] Improve TImode add/sub

From: Pat Haugen <pthaugen at linux dot vnet dot ibm dot com>
To: segher at kernel dot crashing dot org
Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, David Edelsohn <dje dot gcc at gmail dot com>
Date: Wed, 16 Apr 2014 15:51:50 -0500
Subject: Re: [PATCH, rs6000] Improve TImode add/sub
Authentication-results: sourceware.org; auth=none
References: <53448F22 dot 6000309 at linux dot vnet dot ibm dot com> <20140409025635 dot GA22176 at gate dot crashing dot org>

On 04/08/2014 09:56 PM, segher@kernel.crashing.org wrote:

+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
Please leave out the default arguments.  Why does this need skipping
on Darwin?

+;; Define the TImode operations that can be done in a small number
+;; of instructions.  The & constraints are to prevent the register
+;; allocator from allocating registers that overlap with the inputs
+;; (for example, having an input in 7,8 and an output in 6,7).  We
+;; also allow for the output being the same as one of the inputs.
+
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,&r,r,r")
+	(plus:TI (match_operand:TI 1 "gpc_reg_operand" "%r,r,0,0")
+		 (match_operand:TI 2 "reg_or_short_operand" "r,I,r,I")))]
+  "TARGET_POWERPC64"

That's not the correct condition: the carry bit is set based on the 32-bit
carry in 32-bit mode, so the condition has to be TARGET_64BIT.

The adddi3 pattern has !TARGET_POWERPC64 since a 64-bit addition can
be done without addc on a 64-bit machine, no matter what mode the CPU
is in.

+  "*
+{

Might as well leave out this stuff on new code, just use the braces :-)

Updated patch with above comments incorporated. Bootstrap/regtest onBE/LE with no new regressions. Ok for trunk?


-Pat

Index: gcc/testsuite/gcc.target/powerpc/ti_math1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/ti_math1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/ti_math1.c	(revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "addc" 1 } } */
+/* { dg-final { scan-assembler-times "adde" 1 } } */
+/* { dg-final { scan-assembler-times "subfc" 1 } } */
+/* { dg-final { scan-assembler-times "subfe" 1 } } */
+/* { dg-final { scan-assembler-not "subf " } } */
+
+__int128
+add_128 (__int128 *ptr, __int128 val)
+{
+	return (*ptr + val);
+}
+
+__int128
+sub_128 (__int128 *ptr, __int128 val)
+{
+	return (*ptr - val);
+}
+
Index: gcc/testsuite/gcc.target/powerpc/ti_math2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/ti_math2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/ti_math2.c	(revision 0)
@@ -0,0 +1,73 @@
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-options "-O2 -fno-inline" } */
+
+union U {
+  __int128 i128;
+  struct {
+    long l1;
+    long l2;
+  } s;
+};
+
+union U u1,u2;
+
+__int128
+create_128 (long most_sig, long least_sig)
+{
+  union U u;
+
+#if __LITTLE_ENDIAN__
+  u.s.l1 = least_sig;
+  u.s.l2 = most_sig;
+#else
+  u.s.l1 = most_sig;
+  u.s.l2 = least_sig;
+#endif
+  return u.i128;
+}
+
+long most_sig (union U * u)
+{
+#if __LITTLE_ENDIAN__
+  return (*u).s.l2;
+#else
+  return (*u).s.l1;
+#endif
+}
+
+long least_sig (union U * u)
+{
+#if __LITTLE_ENDIAN__
+  return (*u).s.l1;
+#else
+  return (*u).s.l2;
+#endif
+}
+
+__int128
+add_128 (__int128 *ptr, __int128 val)
+{
+	return (*ptr + val);
+}
+
+__int128
+sub_128 (__int128 *ptr, __int128 val)
+{
+	return (*ptr - val);
+}
+
+int
+main (void)
+{
+  /* Do a simple add/sub to make sure carry is happening between the dwords
+     and that dwords are in correct endian order. */
+  u1.i128 = create_128 (1, -1);
+  u2.i128 = add_128 (&u1.i128, 1);
+  if ((most_sig (&u2) != 2) || (least_sig (&u2) != 0))
+    __builtin_abort ();
+  u2.i128 = sub_128 (&u2.i128, 1);
+  if ((most_sig (&u2) != 1) || (least_sig (&u2) != -1))
+    __builtin_abort ();
+  return 0;
+}
+
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 209226)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -6535,6 +6535,49 @@ (define_insn_and_split "*floatunsdisf2_m
   [(set_attr "length" "8")
    (set_attr "type" "fpload")])
 
+;; Define the TImode operations that can be done in a small number
+;; of instructions.  The & constraints are to prevent the register
+;; allocator from allocating registers that overlap with the inputs
+;; (for example, having an input in 7,8 and an output in 6,7).  We
+;; also allow for the output being the same as one of the inputs.
+
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,&r,r,r")
+	(plus:TI (match_operand:TI 1 "gpc_reg_operand" "%r,r,0,0")
+		 (match_operand:TI 2 "reg_or_short_operand" "r,I,r,I")))]
+  "TARGET_64BIT"
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"addc %L0,%L1,%L2\;adde %0,%1,%2\"
+	    : \"addic %L0,%L1,%2\;add%G2e %0,%1\";
+  else
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"addc %0,%1,%2\;adde %L0,%L1,%L2\"
+	    : \"addic %0,%1,%2\;add%G2e %L0,%L1\";
+}
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "subti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,&r,r,r,r")
+	(minus:TI (match_operand:TI 1 "reg_or_short_operand" "r,I,0,r,I")
+		  (match_operand:TI 2 "gpc_reg_operand" "r,r,r,0,0")))]
+  "TARGET_64BIT"
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"subfc %L0,%L2,%L1\;subfe %0,%2,%1\"
+	    : \"subfic %L0,%L2,%1\;subf%G1e %0,%2\";
+  else
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"subfc %0,%2,%1\;subfe %L0,%L2,%L1\"
+	    : \"subfic %0,%2,%1\;subf%G1e %L0,%L2\";
+}
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+
 ;; Define the DImode operations that can be done in a small number
 ;; of instructions.  The & constraints are to prevent the register
 ;; allocator from allocating registers that overlap with the inputs

Follow-Ups:
- Re: [PATCH, rs6000] Improve TImode add/sub
  - From: David Edelsohn

References:
- [PATCH, rs6000] Improve TImode add/sub
  - From: Pat Haugen
- Re: [PATCH, rs6000] Improve TImode add/sub
  - From: segher

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]