]> gcc.gnu.org Git - gcc.git/commitdiff
i386: Introduce mulv2si3 instruction
authorUros Bizjak <ubizjak@gmail.com>
Fri, 5 May 2023 12:10:18 +0000 (14:10 +0200)
committerUros Bizjak <ubizjak@gmail.com>
Fri, 5 May 2023 12:11:15 +0000 (14:11 +0200)
For SSE2 targets the expander unpacks input elements into the correct
position in the V4SI vector and emits PMULUDQ instruction.  The output
elements are then shuffled back to their positions in the V2SI vector.

For SSE4 targets PMULLD instruction is emitted directly.

gcc/ChangeLog:

* config/i386/mmx.md (mulv2si3): New expander.
(*mulv2si3): New insn pattern.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sse2-mmx-mult-vec.c: New test.

gcc/config/i386/mmx.md
gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c [new file with mode: 0644]

index 872ddbc55f220b6bc5f5f9800f5879801348acd2..6dd203f4fa874d6049f329aaec65e301cc811efe 100644 (file)
    (set_attr "type" "sseadd")
    (set_attr "mode" "TI")])
 
+(define_expand "mulv2si3"
+  [(set (match_operand:V2SI 0 "register_operand")
+       (mult:V2SI
+         (match_operand:V2SI 1 "register_operand")
+         (match_operand:V2SI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  if (!TARGET_SSE4_1)
+    {
+      rtx op1 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[1]),
+                               V2SImode);
+      rtx op2 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[2]),
+                               V2SImode);
+
+      rtx tmp1 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vec_interleave_lowv4si (tmp1, op1, op1));
+      rtx tmp2 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vec_interleave_lowv4si (tmp2, op2, op2));
+
+      rtx res = gen_reg_rtx (V2DImode);
+      emit_insn (gen_vec_widen_umult_even_v4si (res, tmp1, tmp2));
+
+      rtx op0 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_sse2_pshufd_1 (op0, gen_lowpart (V4SImode, res),
+                                   const0_rtx, const2_rtx,
+                                   const0_rtx, const2_rtx));
+
+      emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
+      DONE;
+    }
+})
+
+(define_insn "*mulv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
+       (mult:V2SI
+         (match_operand:V2SI 1 "register_operand" "%0,0,v")
+         (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))]
+  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+  "@
+   pmulld\t{%2, %0|%0, %2}
+   pmulld\t{%2, %0|%0, %2}
+   vpmulld\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "TI")])
+
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
         (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c
new file mode 100644 (file)
index 0000000..cdc9a7b
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#define N 2
+
+int a[N] = {-287807, 604344};
+int b[N] = {474362, 874120};
+int r[N];
+
+int rc[N] = {914249338, -11800128};
+
+static void
+sse2_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = a[i] * b[i];
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != rc[i])
+      abort ();
+}
This page took 0.091118 seconds and 5 git commands to generate.