This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix up _mm_f{,n}m{add,sub}_s{s,d} (PR target/54564)


Hi!

The fma-*.c testcase show that these intrinsics probably mean to preserve
the high elements (other than the lowest) of the first argument of the
fmaintrin.h *_s{s,d} intrinsics in the destination (the HW insn preserve
there the destination register, but that varies - for 132 and 213 it is the
first one (but the negation performed for _mm_fnm*_s[sd] breaks it anyway),
for 231 it is the last one).  What the expander did was to put there
an uninitialized pseudo, so we ended up with pretty random content, before
H.J's http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=190492 it happened
to work by accident, but when things changed slightly and reload chose
different alternative, this broke.

The following patch fixes it, by tweaking the header so that the first
argument is not negated (we negate the second one instead), as we don't want
to negate the high elements if e.g. for whatever reason combiner doesn't
match it.  It fixes the expander to use a dup of the X operand as the high
element provider for the pattern, removes the 231 alternatives (because
those provide different destination high elements) and removes commutative
marker (again, that would mean different high elements).

Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested
with
make check-gcc RUNTESTFLAGS='--target_board=valgrind-sim/-m64 i386.exp=\*fma\*'
Ok for trunk/4.7?

2012-09-13  Jakub Jelinek  <jakub@redhat.com>

	PR target/54564
	* config/i386/sse.md (fmai_vmfmadd_<mode>): Use (match_dup 1)
	instead of (match_dup 0) as second argument to vec_merge.
	(*fmai_fmadd_<mode>, *fmai_fmsub_<mode>): Likewise.
	Remove third alternative.
	(*fmai_fnmadd_<mode>, *fmai_fnmsub_<mode>): Likewise.  Negate
	operand 2 instead of operand 1, but put it as first argument
	of fma.

	* config/i386/fmaintrin.h (_mm_fnmadd_sd, _mm_fnmadd_ss,
	_mm_fnmsub_sd, _mm_fnmsub_ss): Negate the second argument instead
	of the first.

--- gcc/config/i386/sse.md.jj	2012-09-05 18:27:03.000000000 +0200
+++ gcc/config/i386/sse.md	2012-09-13 13:49:49.504968716 +0200
@@ -2072,79 +2072,75 @@ (define_expand "fmai_vmfmadd_<mode>"
 	    (match_operand:VF_128 1 "nonimmediate_operand")
 	    (match_operand:VF_128 2 "nonimmediate_operand")
 	    (match_operand:VF_128 3 "nonimmediate_operand"))
-	  (match_dup 0)
+	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA")
 
 (define_insn "*fmai_fmadd_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
         (vec_merge:VF_128
 	  (fma:VF_128
-	    (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
-	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
-	  (match_dup 0)
+	    (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
+	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
+	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
   "@
    vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fmsub_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
         (vec_merge:VF_128
 	  (fma:VF_128
-	    (match_operand:VF_128   1 "nonimmediate_operand" "%0, 0,x")
-	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
+	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
+	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x")
 	    (neg:VF_128
-	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
-	  (match_dup 0)
+	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
   "@
    vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fnmadd_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (neg:VF_128
-	      (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
-	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
-	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm,0"))
-	  (match_dup 0)
+	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
+	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm"))
+	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
   "@
    vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fmai_fnmsub_<mode>"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
         (vec_merge:VF_128
 	  (fma:VF_128
 	    (neg:VF_128
-	      (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
-	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
+	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
 	    (neg:VF_128
-	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
-	  (match_dup 0)
+	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
+	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_FMA"
   "@
    vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
--- gcc/config/i386/fmaintrin.h.jj	2011-09-02 16:29:38.000000000 +0200
+++ gcc/config/i386/fmaintrin.h	2012-09-13 13:32:20.162333244 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -164,7 +164,7 @@ extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B,
+  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
                                             (__v2df)__C);
 }
 
@@ -172,7 +172,7 @@ extern __inline __m128
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B,
+  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
                                            (__v4sf)__C);
 }
 
@@ -212,7 +212,7 @@ extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B,
+  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
                                             -(__v2df)__C);
 }
 
@@ -220,7 +220,7 @@ extern __inline __m128
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B,
+  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
                                            -(__v4sf)__C);
 }
 

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]