This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Vectorizing abs(char/short/int) on x86.


On Tue, Oct 29, 2013 at 1:38 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Hello!
>
>> For the define_expand I added as below, the else body is there to
>> avoid fall-through transformations to ABS operation in optabs.c.
>> Otherwise ABS will be converted to other operations even that we have
>> corresponding instructions from SSSE3.
>
> No, it wont be.
>
> Fallthrough will generate the pattern that will be matched by the insn
> pattern above, just like you are doing by hand below.


I think the case is special for abs(). In optabs.c, there is a
function expand_abs() in which the function expand_abs_nojump() is
called. This function first tries the expand function defined for the
target and if it fails it will try max(v, -v) then shift-xor-sub
method. If I don't generate any instruction for SSSE3, the
fall-through will be max(v, -v). I have tested it on my machine.


>
>> (define_expand "abs<mode>2"
>>   [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
>> (abs:VI124_AVX2_48_AVX512F
>>  (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
>>   "TARGET_SSE2"
>> {
>>   if (!TARGET_SSSE3)
>>     ix86_expand_sse2_abs (operands[0], force_reg (<MODE>mode, operands[1]));
>
> Do you really need force_reg here? You are using generic expanders in
> ix86_expand_sse2_abs that can handle non-registers operands just as
> well.

You are right. I have removed force_reg.


>
>>   else
>>     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
>>    gen_rtx_ABS (<MODE>mode, operands[1])));
>>   DONE;
>> })
>
> Please note that your mailer mangles indents. Please indent your code correctly.

Right.. I also attached a text file in which all tabs are there.


The updated patch is pasted below (and also in the attached file).
Thank you very much for your comment!


Cong




diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8a38316..84c7ab5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2013-10-22  Cong Hou  <congh@google.com>
+
+ PR target/58762
+ * config/i386/i386-protos.h (ix86_expand_sse2_abs): New function.
+ * config/i386/i386.c (ix86_expand_sse2_abs): New function.
+ * config/i386/sse.md: Add SSE2 support to abs (8/16/32-bit-int).
+
 2013-10-14  David Malcolm  <dmalcolm@redhat.com>

  * dumpfile.h (gcc::dump_manager): New class, to hold state
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3ab2f3a..ca31224 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -238,6 +238,7 @@ extern void ix86_expand_mul_widen_evenodd (rtx,
rtx, rtx, bool, bool);
 extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
 extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
 extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
+extern void ix86_expand_sse2_abs (rtx, rtx);

 /* In i386-c.c  */
 extern void ix86_target_macros (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 02cbbbd..71905fc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -41696,6 +41696,53 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
        gen_rtx_MULT (mode, op1, op2));
 }

+void
+ix86_expand_sse2_abs (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx tmp0, tmp1;
+
+  switch (mode)
+    {
+      /* For 32-bit signed integer X, the best way to calculate the absolute
+ value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
+      case V4SImode:
+ tmp0 = expand_simple_binop (mode, ASHIFTRT, op1,
+    GEN_INT (GET_MODE_BITSIZE
+ (GET_MODE_INNER (mode)) - 1),
+    NULL, 0, OPTAB_DIRECT);
+ if (tmp0)
+  tmp1 = expand_simple_binop (mode, XOR, op1, tmp0,
+      NULL, 0, OPTAB_DIRECT);
+ if (tmp0 && tmp1)
+  expand_simple_binop (mode, MINUS, tmp1, tmp0,
+       op0, 0, OPTAB_DIRECT);
+ break;
+
+      /* For 16-bit signed integer X, the best way to calculate the absolute
+ value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
+      case V8HImode:
+ tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+ if (tmp0)
+  expand_simple_binop (mode, SMAX, op1, tmp0, op0, 0,
+       OPTAB_DIRECT);
+ break;
+
+      /* For 8-bit signed integer X, the best way to calculate the absolute
+ value of X is min ((unsigned char) X, (unsigned char) (-X)),
+ as SSE2 provides the PMINUB insn.  */
+      case V16QImode:
+ tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+ if (tmp0)
+  expand_simple_binop (V16QImode, UMIN, op1, tmp0, op0, 0,
+       OPTAB_DIRECT);
+ break;
+
+      default:
+ break;
+    }
+}
+
 /* Expand an insert into a vector register through pinsr insn.
    Return true if successful.  */

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c3f6c94..0d9cefe 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8721,7 +8721,7 @@
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])

-(define_insn "abs<mode>2"
+(define_insn "*abs<mode>2"
   [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
  (abs:VI124_AVX2_48_AVX512F
   (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
@@ -8733,6 +8733,20 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])

+(define_expand "abs<mode>2"
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
+ (abs:VI124_AVX2_48_AVX512F
+  (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
+  "TARGET_SSE2"
+{
+  if (!TARGET_SSSE3)
+    ix86_expand_sse2_abs (operands[0], operands[1]);
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+    gen_rtx_ABS (<MODE>mode, operands[1])));
+  DONE;
+})
+
 (define_insn "abs<mode>2"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
  (abs:MMXMODEI
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 075d071..cf5b942 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-22  Cong Hou  <congh@google.com>
+
+ PR target/58762
+ * gcc.dg/vect/pr58762.c: New test.
+
 2013-10-14  Tobias Burnus  <burnus@net-b.de>

  PR fortran/58658
diff --git a/gcc/testsuite/gcc.dg/vect/pr58762.c
b/gcc/testsuite/gcc.dg/vect/pr58762.c
new file mode 100644
index 0000000..6468d0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr58762.c
@@ -0,0 +1,28 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void test1 (char* a, char* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+void test2 (short* a, short* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+void test3 (int* a, int* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect"
+       { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */




>
> Uros.

Attachment: patch-abs.txt
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]