This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][AArch64] PR target/85512: Tighten SIMD right shift immediate constraints



On 24/04/18 17:09, James Greenhalgh wrote:
On Tue, Apr 24, 2018 at 04:38:31PM +0100, Kyrill Tkachov wrote:
Hi all,

In this testcase it is possible to generate an invalid SISD shift of zero:
Error: immediate value out of range 1 to 64 at operand 3 -- `sshr v9.2s,v0.2s,0'

The SSHR and USHR instructions require a shift from 1 up to the element size.
However our constraints on the scalar shifts that generate these patterns
allow a shift amount of zero as well. The pure GP-reg ASR and LSR
instructions allow a shift amount of zero.

It is unlikely that a shift of zero will survive till the end of compilation,
but it's not impossible, as this PR shows.

The patch tightens up the constraints in the offending patterns by adding two
new constraints that allow shift amounts [1,32] and [1,64] and using them in
*aarch64_ashr_sisd_or_int_<mode>3
and *aarch64_lshr_sisd_or_int_<mode>3.
The left-shift SISD instructions SHL and USHL allow a shift amount of zero so
don't need adjustment The vector shift patterns that map down to SSHR and
USHR already enforce the correct immediate range.

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?
OK if the release managers are fine with this.

I was a little nervous that we were tightening restrictions on an instruction
which has caused trouble in the past, but this patch looks right to me.

Thanks James.

I've cleaned up the testcase a bit to leave only the function that generates the invalid instruction,
making it shorter.

Jakub, is the patch ok to go in for GCC 8 from your perspective?

Thanks,
Kyrill

Thanks,
James

2018-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

      PR target/85512
      * config/aarch64/constraints.md (Usg, Usj): New constraints.
      * config/aarch64/iterators.md (cmode_simd): New mode attribute.
      * config/aarch64/aarch64.md (*aarch64_ashr_sisd_or_int_<mode>3):
      Use the above on operand 2.  Reindent.
      (*aarch64_lshr_sisd_or_int_<mode>3): Likewise.

2018-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

      PR target/85512
      * gcc.dg/pr85512.c: New test.

commit 0856b82380edda94b4e70b19522366fb80ebecff
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Tue Apr 24 13:51:19 2018 +0100

    [AArch64] PR target/85512

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4924168..953edb7 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4403,7 +4403,8 @@ (define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r,r,w,&w,&w")
 	(lshiftrt:GPI
 	 (match_operand:GPI 1 "register_operand" "r,r,w,w,w")
-	 (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,r,Us<cmode>,w,0")))]
+	 (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>"
+			      "Us<cmode>,r,Us<cmode_simd>,w,0")))]
   ""
   "@
    lsr\t%<w>0, %<w>1, %2
@@ -4448,9 +4449,10 @@ (define_split
 ;; Arithmetic right shift using SISD or Integer instruction
 (define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r,r,w,&w,&w")
-        (ashiftrt:GPI
-          (match_operand:GPI 1 "register_operand" "r,r,w,w,w")
-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,r,Us<cmode>,w,0")))]
+	(ashiftrt:GPI
+	  (match_operand:GPI 1 "register_operand" "r,r,w,w,w")
+	  (match_operand:QI 2 "aarch64_reg_or_shift_imm_di"
+			       "Us<cmode>,r,Us<cmode_simd>,w,0")))]
   ""
   "@
    asr\t%<w>0, %<w>1, %2
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index f052103..b5da997 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -153,6 +153,20 @@ (define_constraint "Usf"
        (match_test "!(aarch64_is_noplt_call_p (op)
 		      || aarch64_is_long_call_p (op))")))
 
+(define_constraint "Usg"
+  "@internal
+  A constraint that matches an immediate right shift constant in SImode
+  suitable for a SISD instruction."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 32)")))
+
+(define_constraint "Usj"
+  "@internal
+  A constraint that matches an immediate right shift constant in DImode
+  suitable for a SISD instruction."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 64)")))
+
 (define_constraint "UsM"
   "@internal
   A constraint that matches the immediate constant -1."
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 33933ec..bd97408 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -589,6 +589,9 @@ (define_mode_attr lconst2 [(SI "UsO") (DI "UsP")])
 ;; Map a mode to a specific constraint character.
 (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
 
+;; Map modes to Usg and Usj constraints for SISD right shifts
+(define_mode_attr cmode_simd [(SI "g") (DI "j")])
+
 (define_mode_attr Vtype [(V8QI "8b") (V16QI "16b")
 			 (V4HI "4h") (V8HI  "8h")
                          (V2SI "2s") (V4SI  "4s")
diff --git a/gcc/testsuite/gcc.dg/pr85512.c b/gcc/testsuite/gcc.dg/pr85512.c
new file mode 100644
index 0000000..b581f83
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr85512.c
@@ -0,0 +1,47 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -fno-if-conversion" } */
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+u64
+bar0(u8 u8_0, u16 u16_0, u32 u32_0, u64 u64_0, u8 u8_1, u16 u16_1, u32 u32_1, u64 u64_1, u8 u8_2, u16 u16_2, u32 u32_2, u64 u64_2, u8 u8_3, u16 u16_3, u32 u32_3, u64 u64_3);
+u64
+bar1(u8 u8_0, u16 u16_0, u32 u32_0, u64 u64_0, u8 u8_1, u16 u16_1, u32 u32_1, u64 u64_1, u8 u8_2, u16 u16_2, u32 u32_2, u64 u64_2, u8 u8_3, u16 u16_3, u32 u32_3, u64 u64_3);
+u64
+bar2(u8 u8_0, u16 u16_0, u32 u32_0, u64 u64_0, u8 u8_1, u16 u16_1, u32 u32_1, u64 u64_1, u8 u8_2, u16 u16_2, u32 u32_2, u64 u64_2, u8 u8_3, u16 u16_3, u32 u32_3, u64 u64_3);
+u64
+bar0(u8 u8_0, u16 u16_0, u32 u32_0, u64 u64_0, u8 u8_1, u16 u16_1, u32 u32_1, u64 u64_1, u8 u8_2, u16 u16_2, u32 u32_2, u64 u64_2, u8 u8_3, u16 u16_3, u32 u32_3, u64 u64_3)
+{
+l0:     u32_2 += __builtin_add_overflow_p((u32)(u64)u32_0, (u16)-(u32)u64_2, (u64)-(u64)(unsigned)__builtin_parityll((u16)(u16)(((u64)0x1a6cb5b10 << 0))));
+        u8_3 <<= __builtin_add_overflow((u16)~(u32)u64_2, (u16)(u8)(unsigned)__builtin_popcountll((u16)-(u8)(((u64)0x725582 << 0))), &u32_1);
+        u64_1 -= (u8)~(u8)(0);
+        u16_3 = (u16_3 >> ((u32)~(u8)(1) & 15)) | (u16_3 << ((16 - ((u32)~(u8)(1) & 15)) & 15));
+        u8_2 = __builtin_mul_overflow((u64)(u8)__builtin_bswap32((u32)-(u32)u16_0), (u64)(u16)u16_2, &u8_3) ? (u64)~(u8)u8_3 : (u16)(u16)(((u64)0x7ffffff << 0));
+        u32_0 *= (u8)(u8)(((u64)0x1ffffffffffffff << 0));
+        u32_1 >>= (u64)~(u64)(((u64)0x61bf860d09fb3a << 0)) >= (u8)(u16)(unsigned)__builtin_parityll((u16)(u8)(((u64)0x6 << 0)));
+        u16_0 >>= __builtin_add_overflow_p((u64)-(u8)(((u64)0x68b4dda55e3 << 0)), (u16)(u64)__builtin_bswap64((u16)~(u32)__builtin_bswap32((u32)(u32)u8_3)), (u64)(u16)u16_1);
+        u64_0 += (u8)-(u64)(((u64)0xcc88a5c0292b6ba0 << 0));
+        u32_0 += __builtin_mul_overflow((u8)-(u64)(((u64)0xc89172ea72a << 0)), (u64)(u64)u8_2, &u8_3);
+        u64_0 >>= __builtin_add_overflow((u32)-(u64)(0), (u32)-(u16)u8_1, &u8_2);
+        u16_1 >>= (u32)(u64)u16_1 & 15;
+        u16_3 ^= (u16)~(u16)(1);
+        u32_2 &= (u16)-(u32)(0);
+l1:     u32_3 = (u32_3 >> ((u64)(u32)u32_1 & 31)) | (u32_3 << ((32 - ((u64)(u32)u32_1 & 31)) & 31));
+        u64_1 |= (u64)~(u64)(unsigned)__builtin_parityll((u8)-(u32)u32_1);
+        u8_3 *= __builtin_add_overflow((u64)-(u32)(((u64)0xffff << 0)), (u32)~(u64)(((u64)0x117e3e << 0)), &u32_2);
+        u16_3 = (u16_3 << ((u64)~(u8)(((u64)0xf78e81 << 0)) & 15)) | (u16_3 >> ((16 - ((u64)~(u8)(((u64)0xf78e81 << 0)) & 15)) & 15));
+        u64_1 = (u64)(u16)bar1((u8)((u32)(u64)(((u64)0x3ffffff << 0))), (u16)((u8)(u16)(((u64)0x5b << 0))), (u32)((u32)~(u8)(1)), (u64)((u8)(u16)(unsigned)__builtin_clrsb((u32)~(u32)(unsigned)__builtin_clrsbll((u8)(u16)(((u64)0xffffffff << 0))))), (u8)((u8)-(u64)(((u64)0x3e43180756484 << 0))), (u16)((u8)(u16)(((u64)0x7 << 0))), (u32)((u64)(u32)(((u64)0x285fa35c89 << 0))), (u64)((u32)(u8)(((u64)0x3ffff << 0))), (u8)((u16)-(u32)(((u64)0x73d01 << 0))), (u16)((u16)-(u16)(((u64)0x1fffffffffffff << 0))), (u32)((u16)(u64)(0)), (u64)((u16)(u32)(((u64)0x4c << 0))), (u8)((u64)-(u64)(((u64)0x3fffffffffffff << 0))), (u16)((u16)~(u16)(((u64)0xfffffffff << 0))), (u32)((u64)(u16)(((u64)0x7edb0cc1c << 0))), (u64)((u32)(u64)(((u64)0x1ffffffffff << 0)))) > (u16)-(u64)(((u64)0x7 << 0)) ? (u16)(u8)u64_2 : (u64)(u16)u32_2;
+        u32_0 >>= (u8)(u16)(((u64)0x32 << 0)) != (u16)-(u64)u16_3;
+        u16_1 *= __builtin_mul_overflow_p((u64)(u32)u32_1, (u16)(u8)(((u64)0x4ad149d89bf0be6 << 0)), (u64)(u32)(((u64)0x1bd7589 << 0)));
+        u8_1 &= (u64)-(u64)u8_0;
+        u16_3 %= (u16)(u16)(unsigned)__builtin_clrsbll((u32)~(u32)(((u64)0x3db8721fd79 << 0)));
+        u8_3 >>= (u32)(u8)u8_1 & 7;
+        u64_1 |= (u8)-(u64)(unsigned)__builtin_ffsll((u32)-(u64)bar2((u8)((u16)(u16)(((u64)0x3 << 0))), (u16)((u32)-(u8)(((u64)0x86af5 << 0))), (u32)((u16)-(u64)__builtin_bswap64((u64)-(u64)(0))), (u64)((u16)(u16)(((u64)0x75138426ec84c6 << 0))), (u8)((u64)(u32)(((u64)0x7fffffffff << 0))), (u16)((u32)~(u8)(((u64)0x71aa939dbdf3 << 0))), (u32)((u16)(u32)(((u64)0x8776ee7dbb651a2d << 0))), (u64)((u8)(u64)(0)), (u8)((u16)(u8)(unsigned)__builtin_clrsbll((u16)~(u32)(((u64)0x8df94655ec8430 << 0)))), (u16)((u16)-(u64)(unsigned)__builtin_clrsbll((u32)(u64)(((u64)0x3090a532 << 0)))), (u32)((u16)~(u16)(1)), (u64)((u8)(u32)(((u64)0x7fffffffffff << 0))), (u8)((u32)~(u64)(0)), (u16)((u8)~(u8)(unsigned)__builtin_ffs((u64)(u64)(0))), (u32)((u16)-(u8)(((u64)0x5dfe702 << 0))), (u64)((u8)(u64)(((u64)0x68f2a584e0 << 0)))));
+        u32_3 >>= (u32)-(u32)u32_2 & 31;
+        u8_3 = (u8_3 >> ((u32)-(u8)u8_1 & 7)) | (u8_3 << ((8 - ((u32)-(u8)u8_1 & 7)) & 7));
+        u8_2 >>= (u16)-(u64)u64_3 & 7;
+        u32_1 = (u32_1 >> ((u16)(u16)(1) & 31)) | (u32_1 << ((32 - ((u16)(u16)(1) & 31)) & 31));
+        return u8_0 + u16_0 + u32_0 + u64_0 + u8_1 + u16_1 + u32_1 + u64_1 + u8_2 + u16_2 + u32_2 + u64_2 + u8_3 + u16_3 + u32_3 + u64_3;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]