[Bug target/93183] New: SVE only makes the last thing conditional with ifconversion

pinskia at gcc dot gnu.org gcc-bugzilla@gcc.gnu.org
Tue Jan 7 10:35:00 GMT 2020


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93183

            Bug ID: 93183
           Summary: SVE only makes the last thing conditional with
                    ifconversion
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: enhancement
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pinskia at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64-linux-gnu

Take:
typedef unsigned char uint8_t;

static inline uint8_t x264_clip_uint8( uint8_t x )
{
  uint8_t t = -x;
  t = t>>7;
  uint8_t t1 = x&(~63);
  return t1 ? t : x;
}


void mc_weight( uint8_t *__restrict dst, uint8_t *__restrict src, int n)
{
        for( int x = 0; x < n*16; x++ )
            dst[x] = x264_clip_uint8(src[x]);
}

---- CUT ----
Currently (with -O3 -mcpu=generic+sve), we get for the inner loop:
.L3:
        ld1b    z0.b, p0/z, [x1, x3]
        movprfx z2, z0
        and     z2.b, z2.b, #0xc0
        neg     z1.b, p1/m, z0.b
        cmpne   p2.b, p1/z, z2.b, #0
        movprfx z0.b, p2/m, z1.b
        lsr     z0.b, p2/m, z0.b, #7
        st1b    z0.b, p0, [x0, x3]
        incb    x3
        whilelo p0.b, w3, w2
        b.any   .L3

--- CUT ---
But we should be able to do:
.L3:
        ld1b    z0.b, p0/z, [x1, x3]
        and     z2.b, z0.b, #0xc0
        cmpne   p2.b, p1/z, z2.b, #0 ;;; << I think p1/z here really should be
p0/z also
        neg     z0.b, p2/m, z0.b ;;; <<< This one should be conditional
        lsr     z0.b, p2/m, z0.b, #7
        st1b    z0.b, p0, [x0, x3]
        incb    x3
        whilelo p0.b, w3, w2
        b.any   .L3


More information about the Gcc-bugs mailing list