This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]


Hi All,

this patch implements a optimization rewriting

x * copysign (1.0, y) and 
x * copysign (-1.0, y) 

to:

x ^ (y & (1 << sign_bit_position))

This is done by creating a special builtin during matching and generate the
appropriate instructions during expand. This new builtin is called XORSIGN.

The expansion of xorsign depends on if the backend has an appropriate optab
available. If this is not the case then we use a modified version of the existing
copysign which does not take the abs value of the first argument as a fall back.

This patch is a revival of a previous patch
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html

Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
Regression done on aarch64-none-linux-gnu and no regressions.

Ok for trunk?

gcc/
2017-06-07  Tamar Christina  <tamar.christina@arm.com>

	* builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
	(BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX): Likewise.
	* match.pd (mult (COPYSIGN:s real_onep @0) @1): New simplifier.
	(mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
	(copysigns @0 (negate @1)): Likewise.
	* builtins.c (expand_builtin_copysign): Promoted local to argument.
	(expand_builtin): Added CASE_FLT_FN_FLOATN_NX (BUILT_IN_XORSIGN) and
	CASE_FLT_FN (BUILT_IN_XORSIGN).
	(BUILT_IN_COPYSIGN): Updated function call.
	* optabs.h (expand_copysign): New bool.
	(expand_xorsign): New.
	* optabs.def (xorsign_optab): New.
	* optabs.c (expand_copysign): New parameter.
	* fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
	* fortran/mathbuiltins.def (XORSIGN): New.

gcc/testsuite/
2017-06-07  Tamar Christina  <tamar.christina@arm.com>

	* gcc.dg/tree-ssa/xorsign.c: New.
	* gcc.dg/xorsign_exec.c: New.
	* gcc.dg/vec-xorsign_exec.c: New.
	* gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant to 2.
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 30462ad0f419721fd0aa2029dbc9f8f5593b5823..2a84bebf5f6235f84a0f46f15ba2fed67b1d5564 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5117,10 +5117,12 @@ expand_builtin_fabs (tree exp, rtx target, rtx subtarget)
 /* Expand EXP, a call to copysign, copysignf, or copysignl.
    Return NULL is a normal call should be emitted rather than expanding the
    function inline.  If convenient, the result should be placed in TARGET.
-   SUBTARGET may be used as the target for computing the operand.  */
+   SUBTARGET may be used as the target for computing the operand.
+   If OP0_NEEDS_ABS is true then abs() will be performed on the first
+   argument.  */
 
 static rtx
-expand_builtin_copysign (tree exp, rtx target, rtx subtarget)
+expand_builtin_copysign (tree exp, rtx target, rtx subtarget, bool op0_needs_abs)
 {
   rtx op0, op1;
   tree arg;
@@ -5134,7 +5136,7 @@ expand_builtin_copysign (tree exp, rtx target, rtx subtarget)
   arg = CALL_EXPR_ARG (exp, 1);
   op1 = expand_normal (arg);
 
-  return expand_copysign (op0, op1, target);
+  return expand_copysign (op0, op1, target, op0_needs_abs);
 }
 
 /* Expand a call to __builtin___clear_cache.  */
@@ -6586,7 +6588,14 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
 
     CASE_FLT_FN (BUILT_IN_COPYSIGN):
     CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
-      target = expand_builtin_copysign (exp, target, subtarget);
+      target = expand_builtin_copysign (exp, target, subtarget, true);
+      if (target)
+	return target;
+      break;
+
+    CASE_FLT_FN (BUILT_IN_XORSIGN):
+    CASE_FLT_FN_FLOATN_NX (BUILT_IN_XORSIGN):
+      target = expand_builtin_copysign (exp, target, subtarget, false);
       if (target)
 	return target;
       break;
@@ -7688,7 +7697,7 @@ builtin_mathfn_code (const_tree t)
   const_call_expr_arg_iterator iter;
 
   if (TREE_CODE (t) != CALL_EXPR
-      || TREE_CODE (CALL_EXPR_FN (t)) != ADDR_EXPR)
+      || (CALL_EXPR_FN (t) && TREE_CODE (CALL_EXPR_FN (t)) != ADDR_EXPR))
     return END_BUILTINS;
 
   fndecl = get_callee_fndecl (t);
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 58d78dbbdee58df77fb7bad904362327704403c5..9508fc35d622369ab5b89fc63d3add3728931279 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -325,6 +325,12 @@ DEF_C99_BUILTIN        (BUILT_IN_COPYSIGNL, "copysignl", BT_FN_LONGDOUBLE_LONGDO
 #define COPYSIGN_TYPE(F) BT_FN_##F##_##F##_##F
 DEF_GCC_FLOATN_NX_BUILTINS (BUILT_IN_COPYSIGN, "copysign", COPYSIGN_TYPE, ATTR_CONST_NOTHROW_LEAF_LIST)
 #undef COPYSIGN_TYPE
+DEF_GCC_BUILTIN        (BUILT_IN_XORSIGN, "xorsign", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_XORSIGNF, "xorsignf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_XORSIGNL, "xorsignl", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
+#define XORSIGN_TYPE(F) BT_FN_##F##_##F##_##F
+DEF_GCC_FLOATN_NX_BUILTINS (BUILT_IN_XORSIGN, "xorsign", XORSIGN_TYPE, ATTR_CONST_NOTHROW_LEAF_LIST)
+#undef XORSIGN_TYPE
 DEF_LIB_BUILTIN        (BUILT_IN_COS, "cos", BT_FN_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_COSF, "cosf", BT_FN_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING)
 DEF_LIB_BUILTIN        (BUILT_IN_COSH, "cosh", BT_FN_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index 44bd8dcc2ad028a8e1ec1cd477d9e8dc1790fb57..8c5fc72aed654f2470d1d57938935e0b87964fe8 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -747,6 +747,16 @@ gfc_init_builtin_functions (void)
   gfc_define_builtin ("__builtin_copysignf", mfunc_float[1], 
 		      BUILT_IN_COPYSIGNF, "copysignf",
 		      ATTR_CONST_NOTHROW_LEAF_LIST);
+
+  gfc_define_builtin ("__builtin_xorsignl", mfunc_longdouble[1],
+                      BUILT_IN_XORSIGNL, "xorsignl",
+                      ATTR_CONST_NOTHROW_LEAF_LIST);
+  gfc_define_builtin ("__builtin_xorsign", mfunc_double[1],
+                      BUILT_IN_XORSIGN, "xorsign",
+                      ATTR_CONST_NOTHROW_LEAF_LIST);
+  gfc_define_builtin ("__builtin_xorsignf", mfunc_float[1],
+                      BUILT_IN_XORSIGNF, "xorsignf",
+                      ATTR_CONST_NOTHROW_LEAF_LIST);
  
   gfc_define_builtin ("__builtin_nextafterl", mfunc_longdouble[1], 
 		      BUILT_IN_NEXTAFTERL, "nextafterl",
diff --git a/gcc/fortran/mathbuiltins.def b/gcc/fortran/mathbuiltins.def
index fadfedb25ffe16806c82318e8c3f13a9993f96ff..e865735c028084dd5df220559adddd23ecec5de9 100644
--- a/gcc/fortran/mathbuiltins.def
+++ b/gcc/fortran/mathbuiltins.def
@@ -58,6 +58,7 @@ DEFINE_MATH_BUILTIN   (HYPOT, "hypot",  1)
    double and long double) and to build the quad-precision decls.  */
 OTHER_BUILTIN (CABS,      "cabs",      cabs,    true)
 OTHER_BUILTIN (COPYSIGN,  "copysign",  2,       true)
+OTHER_BUILTIN (XORSIGN,   "xorsign" ,  2,       true)
 OTHER_BUILTIN (CPOW,      "cpow",      cpow,    true)
 OTHER_BUILTIN (FABS,      "fabs",      1,       true)
 OTHER_BUILTIN (FMOD,      "fmod",      2,       true)
diff --git a/gcc/match.pd b/gcc/match.pd
index 54a8e0449f8301ffaf553c139bbd2d7ccb1e8648..d6ce8f606f1fdf79020cb0f18a010fc554ca39e6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -441,6 +441,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (coss (copysigns @0 @1))
    (coss @0)))
 
+/* x * copysign(1.0, y) -> xorsign(x, y).  */
+(for copysigns (COPYSIGN)
+     xorsigns (XORSIGN)
+ (simplify
+  (mult:c (copysigns:s real_onep @0) @1)
+    (xorsigns @1 @0)))
+
+
+/* x * copysign(-1.0, y) -> xorsign(x, y).  */
+(for copysigns (COPYSIGN)
+     xorsigns (XORSIGN)
+ (simplify
+  (mult:c (copysigns:s real_minus_onep @0) @1)
+    (xorsigns @1 @0)))
+
 /* pow(copysign(x, y), z) -> pow(x, z) if z is an even integer.  */
 (for pows (POW)
      copysigns (COPYSIGN)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index b69e75943cc99d6c5792ae2f151f5343d2bdbc1a..a76d6e3041dd3b7024b6616de8d1390031390b1e 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -3433,24 +3433,26 @@ expand_copysign_bit (machine_mode mode, rtx op0, rtx op1, rtx target,
   return target;
 }
 
-/* Expand the C99 copysign operation.  OP0 and OP1 must be the same
-   scalar floating point mode.  Return NULL if we do not know how to
-   expand the operation inline.  */
+/* Expand the C99 copysign operation or an optimized 'xorsign' depending on
+   the value of OP0_NEEDS_ABS.
+   Essentially xorsign(abs(x),y) == copysign(x,y).
+   OP0 and OP1 must be the same scalar floating point mode.
+   Return NULL if we do not know how to expand the operation inline.  */
 
 rtx
-expand_copysign (rtx op0, rtx op1, rtx target)
+expand_copysign (rtx op0, rtx op1, rtx target, bool op0_needs_abs)
 {
   machine_mode mode = GET_MODE (op0);
   const struct real_format *fmt;
-  bool op0_is_abs;
   rtx temp;
 
   gcc_assert (SCALAR_FLOAT_MODE_P (mode));
   gcc_assert (GET_MODE (op1) == mode);
 
   /* First try to do it with a special instruction.  */
-  temp = expand_binop (mode, copysign_optab, op0, op1,
-		       target, 0, OPTAB_DIRECT);
+  temp = expand_binop (mode, op0_needs_abs ? copysign_optab : xorsign_optab,
+		       op0, op1, target, 0, OPTAB_DIRECT);
+
   if (temp)
     return temp;
 
@@ -3458,12 +3460,11 @@ expand_copysign (rtx op0, rtx op1, rtx target)
   if (fmt == NULL || !fmt->has_signed_zero)
     return NULL_RTX;
 
-  op0_is_abs = false;
-  if (CONST_DOUBLE_AS_FLOAT_P (op0))
+  if (op0_needs_abs && CONST_DOUBLE_AS_FLOAT_P (op0))
     {
       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
 	op0 = simplify_unary_operation (ABS, mode, op0, mode);
-      op0_is_abs = true;
+      op0_needs_abs = false;
     }
 
   if (fmt->signbit_ro >= 0
@@ -3472,7 +3473,7 @@ expand_copysign (rtx op0, rtx op1, rtx target)
 	      && optab_handler (abs_optab, mode) != CODE_FOR_nothing)))
     {
       temp = expand_copysign_absneg (mode, op0, op1, target,
-				     fmt->signbit_ro, op0_is_abs);
+				     fmt->signbit_ro, !op0_needs_abs);
       if (temp)
 	return temp;
     }
@@ -3480,7 +3481,7 @@ expand_copysign (rtx op0, rtx op1, rtx target)
   if (fmt->signbit_rw < 0)
     return NULL_RTX;
   return expand_copysign_bit (mode, op0, op1, target,
-			      fmt->signbit_rw, op0_is_abs);
+			      fmt->signbit_rw, !op0_needs_abs);
 }
 
 /* Generate an instruction whose insn-code is INSN_CODE,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 504c655be0210a72e7f0f34cde0131e21ccf8089..507af9ea81c42eba98780ef326c22847fdd2c043 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -253,6 +253,7 @@ OPTAB_D (asin_optab, "asin$a2")
 OPTAB_D (atan2_optab, "atan2$a3")
 OPTAB_D (atan_optab, "atan$a2")
 OPTAB_D (copysign_optab, "copysign$F$a3")
+OPTAB_D (xorsign_optab, "xorsign$F$a3")
 OPTAB_D (cos_optab, "cos$a2")
 OPTAB_D (exp10_optab, "exp10$a2")
 OPTAB_D (exp2_optab, "exp2$a2")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 728b866f08db01de2cc330ad29088ab252f4d3ad..676be0bc6c8b49e46d868ff6610de2675a8a6b79 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -220,7 +220,8 @@ extern rtx expand_abs (machine_mode, rtx, rtx, int, int);
 extern rtx expand_one_cmpl_abs_nojump (machine_mode, rtx, rtx);
 
 /* Expand the copysign operation.  */
-extern rtx expand_copysign (rtx, rtx, rtx);
+extern rtx expand_copysign (rtx, rtx, rtx, bool);
+
 /* Generate an instruction with a given INSN_CODE with an output and
    an input.  */
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index 9befe184a018515da50a4dd14782d79482fd07d9..b917146bc812301194feef9c145ad93e9c2ee446 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -20,13 +20,13 @@ f1 (float x)
 double
 f2 (double x, double y)
 {
-  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+  return x * ((1.0/12) * __builtin_copysign (2.0, y));
 }
 
 double
 f3 (double x, double y)
 {
-  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+  return (x * (-1.0/12)) * __builtin_copysign (2.0, y);
 }
 
 double
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/xorsign.c b/gcc/testsuite/gcc.dg/tree-ssa/xorsign.c
new file mode 100644
index 0000000000000000000000000000000000000000..5989727bf9a80f5ff487dd7267b96cc32745f345
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/xorsign.c
@@ -0,0 +1,85 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-gimple" } */
+
+double
+check_d_pos (double x, double y)
+{
+  return x * __builtin_copysign (1.0, y);
+}
+
+float
+check_f_pos (float x, float y)
+{
+  return x * __builtin_copysignf (1.0f, y);
+}
+
+long double
+check_l_pos (long double x, long double y)
+{
+  return x * __builtin_copysignl (1.0, y);
+}
+
+/* --------------- */
+
+double
+check_d_neg (double x, double y)
+{
+  return x * __builtin_copysign (-1.0, y);
+}
+
+float
+check_f_neg (float x, float y)
+{
+  return x * __builtin_copysignf (-1.0f, y);
+}
+
+long double
+check_l_neg (long double x, long double y)
+{
+  return x * __builtin_copysignl (-1.0, y);
+}
+
+/* --------------- */
+
+double
+check_d_pos_rev (double x, double y)
+{
+  return __builtin_copysign (1.0, y) * x;
+}
+
+float
+check_f_pos_rev (float x, float y)
+{
+  return __builtin_copysignf (1.0f, y) * x;
+}
+
+long double
+check_l_pos_rev (long double x, long double y)
+{
+  return __builtin_copysignl (1.0, y) * x;
+}
+
+/* --------------- */
+
+double
+check_d_neg_rev (double x, double y)
+{
+  return __builtin_copysign (-1.0, y) * x;
+}
+
+float
+check_f_neg_rev (float x, float y)
+{
+  return __builtin_copysignf (-1.0f, y) * x;
+}
+
+long double
+check_l_neg_rev (long double x, long double y)
+{
+  return __builtin_copysignl (-1.0, y) * x;
+}
+
+/* { dg-final { scan-tree-dump-times "xorsign" 12 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "xorsignf" 4 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "xorsignl" 4 "gimple"} } */
+/* { dg-final { scan-assembler-not "__builtin_xorsign" } } */
diff --git a/gcc/testsuite/gcc.dg/vec-xorsign_exec.c b/gcc/testsuite/gcc.dg/vec-xorsign_exec.c
new file mode 100644
index 0000000000000000000000000000000000000000..f8c8befd336c7f2743a1621d3b0f53d78bab9df7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vec-xorsign_exec.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-additional-options "-march=armv8-a" { target { aarch64*-*-* } } }*/
+
+extern void abort ();
+
+#define N 16
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+	      -12.5f, -15.6f, -18.7f, -21.8f,
+	      24.9f, 27.1f, 30.2f, 33.3f,
+	      36.4f, 39.5f, 42.6f, 45.7f};
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+	      -9.0f, 1.0f, -2.0f, 3.0f,
+	      -4.0f, -5.0f, 6.0f, 7.0f,
+	      -8.0f, -9.0f, 10.0f, 11.0f};
+float r[N];
+
+float ad[N] = {-0.1fd,  -3.2d,  -6.3d,  -9.4d,
+               -12.5d, -15.6d, -18.7d, -21.8d,
+                24.9d,  27.1d,  30.2d,  33.3d,
+                36.4d,  39.5d,  42.6d, 45.7d};
+float bd[N] = {-1.2d,  3.4d, -5.6d,  7.8d,
+               -9.0d,  1.0d, -2.0d,  3.0d,
+               -4.0d, -5.0d,  6.0d,  7.0d,
+               -8.0d, -9.0d, 10.0d, 11.0d};
+float rd[N];
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = a[i] * _builtin_copysignf (1.0f, b[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
+      abort ();
+
+  for (i = 0; i < N; i++)
+    rd[i] = ad[i] * _builtin_copysignd (1.0d, bd[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != ad[i] * __builtin_copysignd (1.0d, bd[i]))
+      abort ();
+
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/xorsign_exec.c b/gcc/testsuite/gcc.dg/xorsign_exec.c
new file mode 100644
index 0000000000000000000000000000000000000000..64bf8044cbd12c1cc744ff9b2a3308d71267bff0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/xorsign_exec.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O -ffast-math" } */
+
+#include <math.h>
+
+extern void abort(void);
+
+static double x = 2.0;
+static float  y = 2.0;
+
+int main()
+{
+  if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5)
+     abort();
+
+  if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5)
+     abort();
+
+  if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5)
+     abort();
+
+  if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5)
+     abort();
+
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]