This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Two -mxop wrong-code fixes (PR target/56866)


Hi!

This patch fixes two wrong-code bugs with -mxop.
One is that vpmacsdqh instruction can be only used for vec_widen_smult_odd_v4si
but not vec_widen_umult_odd_v4si.  Consider we have
unsigned V4SImode h* with arguments
{ 3, 3, 3, 3 } h* { 0xaaaaaaab, 0xaaaaaaab, 0xaaaaaaab, 0xaaaaaaab }
(but not known at compile time).  If we use vpmacsdqh, it sign-extends
the numbers and thus computes (3 * 0xffffffffaaaaaaabULL) >> 32,
i.e. 0xffffffff, while we want (3 * 0xaaaaaaabULL) >> 32, i.e. 2.

The second bug is in wrong shift count for immediate xop_rotr.
We want element bitsize - immediate to transform the r>> immediate
into r<< immediate, but (<ssescalarnum> * 8) is correct for that only
for V4SImode - 32.  For V2DImode it is 16 instead of the desired
64, for V8HImode it is 64 instead of the desired 16 and for V16QImode
it is 128 instead of the desired 8. 

Bootstrapped/regtested on x86_64-linux, configured --with-arch=bdver2,
fixes:

-FAIL: gcc.c-torture/execute/pr51581-1.c execution,  -O3 -fomit-frame-pointer 
-FAIL: gcc.c-torture/execute/pr51581-1.c execution,  -O3 -fomit-frame-pointer -funroll-loops 
-FAIL: gcc.c-torture/execute/pr51581-1.c execution,  -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions 
-FAIL: gcc.c-torture/execute/pr51581-1.c execution,  -O3 -g 
-FAIL: gcc.c-torture/execute/pr51581-2.c execution,  -O3 -fomit-frame-pointer 
-FAIL: gcc.c-torture/execute/pr51581-2.c execution,  -O3 -fomit-frame-pointer -funroll-loops 
-FAIL: gcc.c-torture/execute/pr51581-2.c execution,  -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions 
-FAIL: gcc.c-torture/execute/pr51581-2.c execution,  -O3 -g 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O1 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O2 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3 -fomit-frame-pointer 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3 -fomit-frame-pointer -funroll-loops 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3 -g 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -Os 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -Og -g 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O2 -flto -fno-use-linker-plugin -flto-partition=none 
-FAIL: gcc.c-torture/execute/pr53645.c execution,  -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects 
-FAIL: gcc.c-torture/execute/pr56866.c execution,  -O3 -fomit-frame-pointer 
-FAIL: gcc.c-torture/execute/pr56866.c execution,  -O3 -fomit-frame-pointer -funroll-loops 
-FAIL: gcc.c-torture/execute/pr56866.c execution,  -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions 
-FAIL: gcc.c-torture/execute/pr56866.c execution,  -O3 -g 
-FAIL: gcc.dg/vect/pr51581-1.c execution test
-FAIL: gcc.dg/vect/pr51581-2.c execution test
-FAIL: gcc.dg/vect/pr51581-3.c execution test
-FAIL: gcc.dg/vect/pr51581-1.c -flto execution test
-FAIL: gcc.dg/vect/pr51581-2.c -flto execution test
-FAIL: gcc.dg/vect/pr51581-3.c -flto execution test
-FAIL: gcc.target/i386/avx-mul-1.c execution test
-FAIL: gcc.target/i386/avx-pr51581-1.c execution test
-FAIL: gcc.target/i386/avx-pr51581-2.c execution test
-FAIL: gcc.target/i386/pr56866.c execution test
-FAIL: gcc.target/i386/sse2-mul-1.c execution test
-FAIL: gcc.target/i386/sse4_1-mul-1.c execution test
-FAIL: gcc.target/i386/xop-mul-1.c execution test

failures that appear with stock gcc just with the testsuite/
part of the patch applied.  Ok for trunk/4.8 and partly for 4.7
(the i386.c bug has been introduced in 2012-06-25 but the sse.md
bug existed in 4.7 already)?

2013-04-26  Jakub Jelinek  <jakub@redhat.com>

	PR target/56866
	* config/i386/i386.c (ix86_expand_mul_widen_evenodd): Don't
	use xop_pmacsdqh if uns_p.
	* config/i386/sse.md (xop_rotr<mode>3): Fix up computation of
	the immediate rotate count.

	* gcc.c-torture/execute/pr56866.c: New test.
	* gcc.target/i386/pr56866.c: New test.

--- gcc/config/i386/i386.c.jj	2013-04-22 10:26:22.000000000 +0200
+++ gcc/config/i386/i386.c	2013-04-26 10:28:51.793534370 +0200
@@ -40841,7 +40841,7 @@ ix86_expand_mul_widen_evenodd (rtx dest,
      the even slots.  For some cpus this is faster than a PSHUFD.  */
   if (odd_p)
     {
-      if (TARGET_XOP && mode == V4SImode)
+      if (TARGET_XOP && mode == V4SImode && !uns_p)
 	{
 	  x = force_reg (wmode, CONST0_RTX (wmode));
 	  emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
--- gcc/config/i386/sse.md.jj	2013-04-02 20:24:37.000000000 +0200
+++ gcc/config/i386/sse.md	2013-04-26 13:25:32.729590863 +0200
@@ -9924,7 +9924,8 @@ (define_insn "xop_rotr<mode>3"
 	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
   "TARGET_XOP"
 {
-  operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+  operands[3]
+    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
   return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
 }
   [(set_attr "type" "sseishft")
--- gcc/testsuite/gcc.c-torture/execute/pr56866.c.jj	2013-04-26 13:48:21.490810656 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr56866.c	2013-04-26 13:46:10.000000000 +0200
@@ -0,0 +1,45 @@
+/* PR target/56866 */
+
+int
+main ()
+{
+#if __CHAR_BIT__ == 8 && __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_SHORT__ == 2
+  unsigned long long wq[256], rq[256];
+  unsigned int wi[256], ri[256];
+  unsigned short ws[256], rs[256];
+  unsigned char wc[256], rc[256];
+  int t;
+
+  __builtin_memset (wq, 0, sizeof wq);
+  __builtin_memset (wi, 0, sizeof wi);
+  __builtin_memset (ws, 0, sizeof ws);
+  __builtin_memset (wc, 0, sizeof wc);
+  wq[0] = 0x0123456789abcdefULL;
+  wi[0] = 0x01234567;
+  ws[0] = 0x4567;
+  wc[0] = 0x73;
+
+  asm volatile ("" : : "g" (wq), "g" (wi), "g" (ws), "g" (wc) : "memory");
+
+  for (t = 0; t < 256; ++t)
+    rq[t] = (wq[t] >> 8) | (wq[t] << (sizeof (wq[0]) * __CHAR_BIT__ - 8));
+  for (t = 0; t < 256; ++t)
+    ri[t] = (wi[t] >> 8) | (wi[t] << (sizeof (wi[0]) * __CHAR_BIT__ - 8));
+  for (t = 0; t < 256; ++t)
+    rs[t] = (ws[t] >> 9) | (ws[t] << (sizeof (ws[0]) * __CHAR_BIT__ - 9));
+  for (t = 0; t < 256; ++t)
+    rc[t] = (wc[t] >> 5) | (wc[t] << (sizeof (wc[0]) * __CHAR_BIT__ - 5));
+
+  asm volatile ("" : : "g" (rq), "g" (ri), "g" (rs), "g" (rc) : "memory");
+
+  if (rq[0] != 0xef0123456789abcdULL || rq[1])
+    __builtin_abort ();
+  if (ri[0] != 0x67012345 || ri[1])
+    __builtin_abort ();
+  if (rs[0] != 0xb3a2 || rs[1])
+    __builtin_abort ();
+  if (rc[0] != 0x9b || rc[1])
+    __builtin_abort ();
+#endif
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/pr56866.c.jj	2013-04-26 13:50:18.798199429 +0200
+++ gcc/testsuite/gcc.target/i386/pr56866.c	2013-04-26 13:52:13.428603267 +0200
@@ -0,0 +1,16 @@
+/* PR target/56866 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O3 -mxop" } */
+
+#define main xop_test_main
+#include "../../gcc.c-torture/execute/pr56866.c"
+#undef main
+
+#include "xop-check.h"
+
+static void
+xop_test (void)
+{
+  xop_test_main ();
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]