This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Improve x86 and + rotate (PR target/82498)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 11 Oct 2017 22:59:03 +0200
- Subject: [PATCH] Improve x86 and + rotate (PR target/82498)
- Authentication-results: sourceware.org; auth=none
- Authentication-results: ext-mx01.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
- Authentication-results: ext-mx01.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=jakub at redhat dot com
- Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com AA62D81DE7
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
As can be seen on the testcase below, the *<rotate_insn><mode>3_mask
insn/splitter is able to optimize only the case when the and is
performed in SImode and then the result subreged into QImode,
while if the computation is already in QImode, we don't handle it.
Fixed by adding another pattern, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?
2017-10-11 Jakub Jelinek <jakub@redhat.com>
PR target/82498
* config/i386/i386.md (*<rotate_insn><mode>3_mask_1): New
define_insn_and_split.
* gcc.target/i386/pr82498.c: New test.
--- gcc/config/i386/i386.md.jj 2017-10-10 11:54:11.000000000 +0200
+++ gcc/config/i386/i386.md 2017-10-11 19:24:27.673606778 +0200
@@ -11187,6 +11187,26 @@ (define_insn_and_split "*<rotate_insn><m
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
+(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand")
+ (any_rotate:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand")
+ (and:QI
+ (match_operand:QI 2 "register_operand")
+ (match_operand:QI 3 "const_int_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (any_rotate:SWI48 (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])])
+
;; Implement rotation using two double-precision
;; shift instructions and a scratch register.
--- gcc/testsuite/gcc.target/i386/pr82498.c.jj 2017-10-11 20:21:10.677088306 +0200
+++ gcc/testsuite/gcc.target/i386/pr82498.c 2017-10-11 20:22:31.569101564 +0200
@@ -0,0 +1,52 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+unsigned
+f1 (unsigned x, unsigned char y)
+{
+ if (y == 0)
+ return x;
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+ if (y == 0)
+ return x;
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f3 (unsigned x, unsigned short y)
+{
+ if (y == 0)
+ return x;
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, unsigned int y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f6 (unsigned x, unsigned short y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
Jakub