[PATCH] Improve x86 and + rotate (PR target/82498)

Jakub Jelinek jakub@redhat.com
Thu Oct 12 19:26:00 GMT 2017


On Thu, Oct 12, 2017 at 10:40:22AM +0200, Uros Bizjak wrote:
> > So, if you aren't against it, I can extend the patch to handle the 4
> > other mask patterns; as for other modes, SImode is what is being handled
> > already, DImode is not a problem, because the FEs truncate the shift counts
> > to integer_type_node already, and for HImode I haven't seen problem
> > probably because most tunings avoid HImode math and so it isn't worth
> > optimizing.
> 
> OK, I think that we can live wtih 4 new patterns. Since these are all
> written in the same way (as in the patch you posted), the ammended
> patch is pre-approved for mainline.

Thanks, here is what I've committed to trunk after another bootstrap/regtest
on x86_64-linux and i686-linux:

2017-10-12  Jakub Jelinek  <jakub@redhat.com>

	PR target/82498
	* config/i386/i386.md (*ashl<mode>3_mask_1,
	*<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
	*<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
	patterns.

	* gcc.target/i386/pr82498-1.c: New test.
	* gcc.target/i386/pr82498-2.c: New test.

--- gcc/config/i386/i386.md.jj	2017-10-11 22:37:55.933863355 +0200
+++ gcc/config/i386/i386.md	2017-10-12 11:30:38.191535974 +0200
@@ -10228,6 +10228,26 @@ (define_insn_and_split "*ashl<mode>3_mas
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*ashl<mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(ashift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (ashift:SWI48 (match_dup 1)
+			 (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*bmi2_ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
@@ -10728,6 +10748,26 @@ (define_insn_and_split "*<shift_insn><mo
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<shift_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_shiftrt:SWI48 (match_dup 1)
+			      (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn_and_split "*<shift_insn><mode>3_doubleword"
   [(set (match_operand:DWI 0 "register_operand" "=&r")
 	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
@@ -11187,6 +11227,26 @@ (define_insn_and_split "*<rotate_insn><m
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_rotate:SWI48 (match_dup 1)
+			     (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; Implement rotation using two double-precision
 ;; shift instructions and a scratch register.
 
@@ -11494,6 +11554,30 @@ (define_insn_and_split "*<btsc><mode>_ma
       (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
+(define_insn_and_split "*<btsc><mode>_mask_1"
+  [(set (match_operand:SWI48 0 "register_operand")
+	(any_or:SWI48
+	  (ashift:SWI48
+	    (const_int 1)
+	    (and:QI
+	      (match_operand:QI 1 "register_operand")
+	      (match_operand:QI 2 "const_int_operand")))
+	  (match_operand:SWI48 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT
+   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_or:SWI48
+	     (ashift:SWI48 (const_int 1)
+			   (match_dup 1))
+	     (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*btr<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(and:SWI48
@@ -11535,6 +11619,30 @@ (define_insn_and_split "*btr<mode>_mask"
       (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
+(define_insn_and_split "*btr<mode>_mask_1"
+  [(set (match_operand:SWI48 0 "register_operand")
+	(and:SWI48
+	  (rotate:SWI48
+	    (const_int -2)
+	    (and:QI
+	      (match_operand:QI 1 "register_operand")
+	      (match_operand:QI 2 "const_int_operand")))
+	  (match_operand:SWI48 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT
+   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (and:SWI48
+	     (rotate:SWI48 (const_int -2)
+			   (match_dup 1))
+	     (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; These instructions are never faster than the corresponding
 ;; and/ior/xor operations when using immediate operand, so with
 ;; 32-bit there's no point.  But in 64-bit, we can't hold the
--- gcc/testsuite/gcc.target/i386/pr82498-1.c.jj	2017-10-12 11:18:48.905128703 +0200
+++ gcc/testsuite/gcc.target/i386/pr82498-1.c	2017-10-12 11:18:48.905128703 +0200
@@ -0,0 +1,52 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+unsigned
+f1 (unsigned x, unsigned char y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f3 (unsigned x, unsigned short y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, unsigned int y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f6 (unsigned x, unsigned short y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
--- gcc/testsuite/gcc.target/i386/pr82498-2.c.jj	2017-10-12 12:14:53.452321121 +0200
+++ gcc/testsuite/gcc.target/i386/pr82498-2.c	2017-10-12 12:08:53.000000000 +0200
@@ -0,0 +1,46 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+int
+f1 (int x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x >> y;
+}
+
+unsigned
+f2 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x >> y;
+}
+
+unsigned
+f3 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x << y;
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x | (1U << y);
+}
+
+unsigned
+f5 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x ^ (1U << y);
+}
+
+unsigned
+f6 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x + 2) & ~(1U << y);
+}


	Jakub



More information about the Gcc-patches mailing list