This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix x86 bmi2 bzhi patterns (PR target/65368)


Hi!

The following testcase fails, because the RTL pattern of bmi2_bzhi_*3
didn't really model (not even close) what the instruction was doing,
so when combiner attempted to simplify it for constant arguments, it
simplified it into something wrong.

Fixed thusly, verified we don't regress on combining memory
for the middle operand, bootstrapped/regtested on x86_64-linux and
i686-linux (both Haswell-E), ok for trunk (and eventually backporting it)?

2015-03-10  Jakub Jelinek  <jakub@redhat.com>

	PR target/65368
	* config/i386/i386.md (bmi2_bzhi_<mode>3): Removed define_insn,
	new define_expand.
	(*bmi2_bzhi_<mode>3, *bmi2_bzhi_<mode>3_1): New define_insns.

	* gcc.target/i386/bmi2-bzhi-2.c: New test.

--- gcc/config/i386/i386.md.jj	2015-03-09 08:05:13.000000000 +0100
+++ gcc/config/i386/i386.md	2015-03-10 09:37:56.615909734 +0100
@@ -12678,17 +12678,53 @@ (define_insn "*bmi_blsr_<mode>"
    (set_attr "mode" "<MODE>")])
 
 ;; BMI2 instructions.
-(define_insn "bmi2_bzhi_<mode>3"
+(define_expand "bmi2_bzhi_<mode>3"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (zero_extract:SWI48
+	    (match_operand:SWI48 1 "nonimmediate_operand")
+	    (umin:SWI48
+	      (and:SWI48 (match_operand:SWI48 2 "register_operand")
+			 (const_int 255))
+	      (match_dup 3))
+	    (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_BMI2"
+{
+  operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+})
+
+(define_insn "*bmi2_bzhi_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(and:SWI48 (lshiftrt:SWI48 (const_int -1)
-				   (match_operand:SWI48 2 "register_operand" "r"))
-		   (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+	(zero_extract:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	  (umin:SWI48
+	    (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
+		       (const_int 255))
+	    (match_operand:SWI48 3 "const_int_operand" "n"))
+	  (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2"
+  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
   "bzhi\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "bitmanip")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
+
+(define_mode_attr k [(SI "k") (DI "q")])
+(define_insn "*bmi2_bzhi_<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extract:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	  (umin:SWI48
+	    (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r"))
+	    (match_operand:SWI48 3 "const_int_operand" "n"))
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "bmi2_pdep_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
--- gcc/testsuite/gcc.target/i386/bmi2-bzhi-2.c.jj	2015-03-10 09:43:06.318843305 +0100
+++ gcc/testsuite/gcc.target/i386/bmi2-bzhi-2.c	2015-03-10 09:42:25.000000000 +0100
@@ -0,0 +1,67 @@
+/* PR target/65368 */
+/* { dg-do assemble { target bmi2 } } */
+/* { dg-options "-O2 -mbmi2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+unsigned int a;
+unsigned long long b;
+
+#define A __attribute__((noinline, noclone))
+
+A unsigned int f1 (void) { return _bzhi_u32 (a, 0); }
+A unsigned int f2 (unsigned int x) { return _bzhi_u32 (x, 0); }
+A unsigned int f3 (void) { return _bzhi_u32 (a, 5); }
+A unsigned int f4 (unsigned int x) { return _bzhi_u32 (x, 5); }
+A unsigned int f5 (void) { return _bzhi_u32 (a, 31); }
+A unsigned int f6 (unsigned int x) { return _bzhi_u32 (x, 31); }
+A unsigned int f7 (void) { return _bzhi_u32 (a, 32); }
+A unsigned int f8 (unsigned int x) { return _bzhi_u32 (x, 32); }
+A unsigned int f9 (void) { return _bzhi_u32 (a, 37); }
+A unsigned int f10 (unsigned int x) { return _bzhi_u32 (x, 37); }
+A unsigned int f11 (void) { return _bzhi_u32 (a, 257); }
+A unsigned int f12 (unsigned int x) { return _bzhi_u32 (x, 257); }
+A unsigned int f13 (void) { return _bzhi_u32 (a, 289); }
+A unsigned int f14 (unsigned int x) { return _bzhi_u32 (x, 289); }
+#ifdef __x86_64__
+A unsigned long long f21 (void) { return _bzhi_u64 (b, 0); }
+A unsigned long long f22 (unsigned long long x) { return _bzhi_u64 (x, 0); }
+A unsigned long long f23 (void) { return _bzhi_u64 (b, 5); }
+A unsigned long long f24 (unsigned long long x) { return _bzhi_u64 (x, 5); }
+A unsigned long long f25 (void) { return _bzhi_u64 (b, 63); }
+A unsigned long long f26 (unsigned long long x) { return _bzhi_u64 (x, 63); }
+A unsigned long long f27 (void) { return _bzhi_u64 (b, 64); }
+A unsigned long long f28 (unsigned long long x) { return _bzhi_u64 (x, 64); }
+A unsigned long long f29 (void) { return _bzhi_u64 (b, 69); }
+A unsigned long long f30 (unsigned long long x) { return _bzhi_u64 (x, 69); }
+A unsigned long long f31 (void) { return _bzhi_u64 (b, 257); }
+A unsigned long long f32 (unsigned long long x) { return _bzhi_u64 (x, 257); }
+A unsigned long long f33 (void) { return _bzhi_u64 (b, 321); }
+A unsigned long long f34 (unsigned long long x) { return _bzhi_u64 (x, 321); }
+#endif
+
+static void
+bmi2_test ()
+{
+  a = -1U;
+  b = -1ULL;
+  if (f1 () != 0 || f2 (-1U) != 0
+      || f3 () != 0x1f || f4 (-1U) != 0x1f
+      || f5 () != 0x7fffffffU || f6 (-1U) != 0x7fffffffU
+      || f7 () != -1U || f8 (-1U) != -1U
+      || f9 () != -1U || f10 (-1U) != -1U
+      || f11 () != 1 || f12 (-1U) != 1
+      || f13 () != -1U || f14 (-1U) != -1U)
+    abort ();
+#ifdef __x86_64__
+  if (f21 () != 0 || f22 (-1ULL) != 0
+      || f23 () != 0x1f || f24 (-1ULL) != 0x1f
+      || f25 () != 0x7fffffffffffffffULL || f26 (-1ULL) != 0x7fffffffffffffffULL
+      || f27 () != -1ULL || f28 (-1ULL) != -1ULL
+      || f29 () != -1ULL || f30 (-1ULL) != -1ULL
+      || f31 () != 1 || f32 (-1ULL) != 1
+      || f33 () != -1ULL || f34 (-1ULL) != -1ULL)
+    abort ();
+#endif
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]