[PATCH] arm: Fix mve_vshlq* [PR99593]

Fri Mar 19 12:09:08 GMT 2021

> -----Original Message-----
> From: Jakub Jelinek <jakub@redhat.com>
> Sent: 18 March 2021 10:08
> To: Richard Earnshaw <Richard.Earnshaw@arm.com>; Ramana
> Radhakrishnan <Ramana.Radhakrishnan@arm.com>; Kyrylo Tkachov
> <Kyrylo.Tkachov@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Christophe Lyon <christophe.lyon@linaro.org>
> Subject: [PATCH] arm: Fix mve_vshlq* [PR99593]
> 
> Hi!
> 
> As mentioned in the PR, before the r11-6708-
> gbfab355012ca0f5219da8beb04f2fdaf757d34b7
> change v[al]shr<mode>3 expanders were expanding the shifts by register
> to gen_ashl<mode>3_{,un}signed which don't support immediate
> CONST_VECTOR
> shift amounts, but now expand to mve_vshlq_<supf><mode> which does.
> The testcase ICEs, because the constraint doesn't match the predicate and
> because LRA works solely with the constraints, so it can e.g. from
> REG_EQUAL
> propagate there a CONST_VECTOR which matches the constraint but fails
> the
> predicate and only later on other passes will notice the predicate fails
> and ICE.
> 
> Fixed by adding a constraint that matches the immediate part of the
> predicate.
> 
> Bootstrapped/regtested on armv7hl-linux-gnueabi (both the whole patch and
> just the testcase to verify the test in its current form FAILs with
> unmodified trunk), ok for trunk?

Ok.
Thanks,
Kyrill

> 
> 2021-03-18  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR target/99593
> 	* config/arm/constraints.md (Ds): New constraint.
> 	* config/arm/vec-common.md (mve_vshlq_<supf><mode>): Use
> w,Ds
> 	constraint instead of w,Dm.
> 
> 2021-03-18  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	* g++.target/arm/pr99593.C: New test.
> 
> --- gcc/config/arm/vec-common.md.jj	2021-01-29 11:54:15.650661610
> +0100
> +++ gcc/config/arm/vec-common.md	2021-03-15 10:50:26.570150770
> +0100
> @@ -299,7 +299,7 @@ (define_expand "movmisalign<mode>"
>  (define_insn "mve_vshlq_<supf><mode>"
>    [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
>  	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand"
> "w,w")
> -		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon"
> "w,Dm")]
> +		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon"
> "w,Ds")]
>  	 VSHLQ))]
>    "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
>    "@
> --- gcc/config/arm/constraints.md.jj	2021-01-04 10:25:44.136173777
> +0100
> +++ gcc/config/arm/constraints.md	2021-03-15 10:49:28.660795496
> +0100
> @@ -32,7 +32,7 @@
> 
>  ;; The following multi-letter normal constraints have been used:
>  ;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, DN, Dm, Dl, DL, Do, Dv, Dy, Di,
> -;;			 Dt, Dp, Dz, Tu, Te
> +;;			 Ds, Dt, Dp, Dz, Tu, Te
>  ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
>  ;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra,
>  ;;		     Rg, Ri
> @@ -412,6 +412,14 @@ (define_constraint "Dt"
>    (and (match_code "const_double")
>         (match_test "TARGET_32BIT && vfp3_const_double_for_fract_bits
> (op)")))
> 
> +(define_constraint "Ds"
> + "@internal
> +  In ARM/Thumb-2 state a const_vector which can be used as immediate
> +  in vshl instruction."
> + (and (match_code "const_vector")
> +      (match_test "TARGET_32BIT
> +		   && imm_for_neon_lshift_operand (op, GET_MODE (op))")))
> +
>  (define_constraint "Dp"
>   "@internal
>    In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32
> with bits operation"
> --- gcc/testsuite/g++.target/arm/pr99593.C.jj
> +++ gcc/testsuite/g++.target/arm/pr99593.C
> @@ -0,0 +1,129 @@
> +/* { dg-do compile } */
> +/* { dg-options "-std=c++17 -O2 -mfloat-abi=hard -mcpu=generic-armv7-a" }
> */
> +// { dg-require-effective-target arm_hard_ok }
> +// { dg-require-effective-target arm_neon_ok }
> +// { dg-add-options arm_neon }
> +
> +#include <arm_neon.h>
> +
> +typedef uint16x4_t e;
> +typedef int16x4_t f;
> +typedef int32x4_t g;
> +typedef float32x4_t h;
> +typedef uint32x4_t i;
> +g j, p;
> +g k(int l) { return vdupq_n_s32(l); }
> +i n(f l) { return (i)vmovl_u16((e)l); }
> +template <int, typename> struct q;
> +template <int r, typename aa> q<r, aa> operator<(aa s, q<r, aa> t) {
> +  return q<r, aa>(s) < t;
> +}
> +template <typename ab, typename ac, int r> q<r, ab> ad(const q<r, ac> &);
> +typedef q<4, int> ae;
> +template <> class q<4, float> {
> + public:
> + q(h af) : ag(af) {}
> +  q(float) {}
> +  static q ah(void *ai) {
> +    float *l = (float *)ai;
> +    return vld1q_f32(l);
> +  }
> +  q operator+(q o) {
> +    h l = ag, m = o.ag;
> +    return vaddq_f32(l, m);
> +  }
> +  q operator*(q) {
> +    h l = ag, m;
> +    return vmulq_f32(l, m);
> +  }
> +  h ag;
> +};
> +template <> class q<4, unsigned short> {
> + public:
> + q(f af) : ag(af) {}
> +  static q ah(void *ai) {
> +    unsigned short *l = (unsigned short *)ai;
> +    return (f)vld1_s16((int16_t *)l);
> +  }
> +  void aj() {
> +    f m = ag;
> +    vst1_u16(0, (e)m);
> +  }
> +  f ag;
> +};
> +template <> class q<4, int> {
> + public:
> + q(g af) : ag(af) {}
> +  q(int u) { ag = k(u); }
> +  static q ah(void *ai) {
> +    int32_t *l = (int32_t *)ai;
> +    return vld1q_s32(l);
> +  }
> +  q operator&(q o) {
> +    g v = ag & o.ag;
> +    return v;
> +  }
> +  q operator|(q o) {
> +    g w = ag | o.ag;
> +    return w;
> +  }
> +  q operator^(q) {
> +    g x = ag ^ p;
> +    return x;
> +  }
> +  q operator>>(int ak) { return ag >> q(ak).ag; }
> +  q operator<(q) {
> +    g y, z = j < ag;
> +    y = (g)z;
> +    return y;
> +  }
> +  g ag;
> +};
> +template <> ae ad(const q<4, unsigned short> &al) { return g(n(al.ag)); }
> +template <> q<4, unsigned short> ad(const ae &al) {
> +  i l(i(al.ag));
> +  return (f)vmovn_s32((g)l);
> +}
> +q<4, float> am(long long an) {
> +  q ao = q<4, unsigned short>::ah(&an);
> +  ae ak = ad<int>(ao), ap = ak & 8000, aq = ak ^ ap, ar = 55 < aq, as(aq);
> +  q at = as & ar;
> +  ae au = ap | at;
> +  return q<4, float>::ah(&au);
> +}
> +q<4, unsigned short> av(q<4, float> aw) {
> +  ae ak = ae::ah(&aw), ap = ak & 80000000, aq = ap, ax = 5, as = aq >> 3,
> +    ay = 6;
> +  q az = ax & as;
> +  ae au = ay | az;
> +  return ad<unsigned short>(au);
> +}
> +struct ba {
> +  typedef int bb;
> +  static q<4, float> bc(int s) { return am(s); }
> +};
> +q<4, float> bd(q<4, float> s) { return s * 0; }
> +template <typename be> void bf(void *bg, void *al, int bh, int bi) {
> +  int bj;
> +  auto bk(static_cast<typename be::bb *>(al) + bh),
> +    d = static_cast<typename be::bb *>(bg),
> +    bl = be::bc(static_cast<typename be::bb *>(al)[0]), bm = be::bc(0),
> +    c = bm;
> +  for (; bi;) {
> +    auto a = c, bn = be::bc(static_cast<typename be::bb *>(al)[1]),
> +      bo = be::bc(1);
> +    q bp = bn;
> +    q bq = bp;
> +    auto b = bq + bo;
> +    bl = be::bc(static_cast<typename be::bb *>(al)[2]);
> +    bm = be::bc(bk[2]);
> +    c = bl + bm;
> +    q br = a + b;
> +    auto bs = br;
> +    q bt = bd(bs);
> +    av(bt).aj();
> +    d[0] = bj;
> +  }
> +}
> +int bu;
> +void bv() { bf<ba>(0, 0, 0, bu); }
> 
> 	Jakub