Currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr.
The latter has a libcall fallback and the IFN can only do optabs.
Because of this the change I made to optimize copysign only works if the
target has impemented the optab, but it should work for those that have the
libcall too.
More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN
then the change made them lose vectorization.
The proper fix for this is to treat the IFN the same as the tree EXPR and to
enhance expand_COPYSIGN to also support vector calls.
I have such a patch for GCC 15 but it's quite big and too invasive for stage-4.
As such this is a minimal fix, just don't apply the transformation and leave
targets which don't have the optab unoptimized.
Targets list for check_effective_target_ifn_copysign was gotten by grepping for
copysign and looking at the optab.
gcc/ChangeLog:
PR tree-optimization/112468
* doc/sourcebuild.texi: Document ifn_copysign.
* match.pd: Only apply transformation if target supports the IFN.
gcc/testsuite/ChangeLog:
PR tree-optimization/112468
* gcc.dg/fold-copysign-1.c: Modify tests based on if target supports
IFN_COPYSIGN.
* gcc.dg/pr55152-2.c: Likewise.
* gcc.dg/tree-ssa/abs-4.c: Likewise.
* gcc.dg/tree-ssa/backprop-6.c: Likewise.
* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
* gcc.dg/tree-ssa/mult-abs-2.c: Likewise.
* lib/target-supports.exp (check_effective_target_ifn_copysign): New.
@item xorsign
Target supports the xorsign optab expansion.
+@item ifn_copysign
+Target supports the copysign optab expansion of float and double for
+both scalar and vector modes.
+
@end table
@subsubsection Environment attributes
(hypots @0 (copysigns @1 @2))
(hypots @0 @1))))
-/* copysign(x, CST) -> abs (x). */
+/* copysign(x, CST) -> abs (x). If the target does not
+ support the copysign optab then canonicalize
+ copysign(x, -CST) -> fneg (abs (x)). */
(for copysigns (COPYSIGN_ALL)
(simplify
(copysigns @0 REAL_CST@1)
(if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
- (abs @0))))
+ (abs @0)
+#if GIMPLE
+ (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+ OPTIMIZE_FOR_BOTH))
+ (negate (abs @0)))
+#endif
+ )))
-/* Transform fneg (fabs (X)) -> copysign (X, -1). */
+#if GIMPLE
+/* Transform fneg (fabs (X)) -> copysign (X, -1) as the canonical
+ representation if the target supports the copysign optab. */
(simplify
(negate (abs @0))
- (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
-
+ (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+ OPTIMIZE_FOR_BOTH))
+ (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
+#endif
/* copysign(copysign(x, y), z) -> copysign(x, z). */
(for copysigns (COPYSIGN_ALL)
(simplify
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-cddce1" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
double foo (double x)
{
return __builtin_copysign (x, minuszero);
}
-/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */
/* { dg-do compile } */
/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
double g (double a)
{
return (a<-a)?a:-a;
}
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */
/* { dg-do compile } */
/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
/* PR tree-optimization/109829 */
float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
/* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
/* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-backprop-details" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
void start (void *);
void end (void *);
TEST_FUNCTION (double, )
TEST_FUNCTION (long double, l)
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */
/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
/* { dg-do compile } */
float f(float x)
{
float t = __builtin_copysignf (1.0f, -x);
return x * t;
}
-/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */
{
return x * (x <= 0.f ? 1.f : -1.f);
}
-/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
+
+/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
|| [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
}
+# Return 1 if the target plus current options supports folding of
+# copysign into IFN_COPYSIGN.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_ifn_copysign { } {
+ return [check_cached_effective_target_indexed ifn_copysign {
+ expr {
+ (([istarget i?86-*-*] || [istarget x86_64-*-*])
+ && [is-effective-target sse])
+ || ([istarget loongarch*-*-*]
+ && [check_effective_target_hard_float])
+ || ([istarget powerpc*-*-*]
+ && ![istarget powerpc-*-linux*paired*])
+ || [istarget alpha*-*-*]
+ || [istarget aarch64*-*-*]
+ || [is-effective-target arm_neon]
+ || ([istarget s390*-*-*]
+ && [check_effective_target_s390_vx])
+ || ([istarget riscv*-*-*]
+ && [check_effective_target_hard_float])
+ }}]
+}
+
# Return 1 if the target plus current options supports a vector
# widening summation of *short* args into *int* result, 0 otherwise.
#