[Bug target/70721] Suboptimal code generated when using _mm_min_sd

Tue Apr 19 02:46:00 GMT 2016

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70721

H.J. Lu <hjl.tools at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2016-04-19
                 CC|                            |ubizjak at gmail dot com
   Target Milestone|---                         |7.0
     Ever confirmed|0                           |1

--- Comment #1 from H.J. Lu <hjl.tools at gmail dot com> ---
This is NOT the fix.  But you should get the idea:

[hjl@gnu-6 pr70708]$ cat y.c
#include <emmintrin.h>

double
__attribute ((noinline, noclone))
foo (double a, double b)
{
   __m128d x = _mm_set_sd(a);
   __m128d y = _mm_set_sd(b);
   return _mm_cvtsd_f64(_mm_min_sd(x, y));
}
[hjl@gnu-6 pr70708]$ make y.s
/export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -O2 -S -o y.s y.c
[hjl@gnu-6 pr70708]$ cat y.s
        .file   "y.c"
        .text
        .p2align 4,,15
        .globl  foo
        .type   foo, @function
foo:
.LFB525:
        .cfi_startproc
        minsd   %xmm1, %xmm0
        ret
        .cfi_endproc
.LFE525:
        .size   foo, .-foo
        .ident  "GCC: (GNU) 7.0.0 20160418 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-6 pr70708]$ cat /tmp/x

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a66cfc4..167a564 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -32934,7 +32934,7 @@ static const struct builtin_description bdesc_args[] =

   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd",
IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd",
IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd",
IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_xx_vmsminv2df3, "__builtin_ia32_minsd",
IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd",
IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },

   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd",
IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 78c28c5..c8ce275 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1656,6 +1656,28 @@
    (set_attr "prefix" "<round_saeonly_prefix>")
    (set_attr "mode" "<ssescalarmode>")])

+(define_insn "xx_vm<code>v2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,v")
+        (vec_concat:V2DF
+         (smaxmin:DF
+           (vec_select:DF
+             (match_operand:V2DF 1 "register_operand" "0,v")
+             (parallel [(const_int 0)]))
+           (vec_select:DF
+             (match_operand:V2DF 2 "vector_operand" "xm,vm")
+             (parallel [(const_int 0)])))
+         (vec_select:DF (match_dup 0)
+                        (parallel [(const_int 1)]))))]
+  "TARGET_SSE2"
+  "@
+   <maxmin_float>sd\t{%2, %0|%0, %2}
+   v<maxmin_float>sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_sse_attr" "maxmin")
+   (set_attr "mode" "DF")])
+
 ;; These versions of the min/max patterns implement exactly the operations
 ;;   min = (op1 < op2 ? op1 : op2)
 ;;   max = (!(op1 < op2) ? op1 : op2)
[hjl@gnu-6 pr70708]$