This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: -ftree-vectorize can't vectorize plus?
- From: Dorit Nuzman <DORIT at il dot ibm dot com>
- To: Rask Ingemann Lambertsen <rask at sygehus dot dk>
- Cc: gcc at gcc dot gnu dot org
- Date: Mon, 11 Sep 2006 13:20:07 +0300
- Subject: Re: -ftree-vectorize can't vectorize plus?
> A silly little testcase which the vectorizer doesn't vectorize:
>
....
> autovecttest.c:11: note: not vectorized: relevant stmt not
> supported: D.1861_9 = (signed char) D.1860_8
Can these type casts (from uchar to schar and back) be cleaned away by some
pass before vectorization, or do we need to teach the vectorizer to ignore
such type casts?
unsigned char D.1932
unsigned char D.1936
unsigned char D.1939
....
D.1933_9 = (signed char) D.1932_8;
D.1937_17 = (signed char) D.1936_16;
D.1938_18 = D.1937_17 ^ D.1933_9;
D.1939_19 = (unsigned char) D.1938_18;
dorit
> unsigned char qa[128];
> unsigned char qb[128];
> unsigned char qc[128];
> unsigned char qd[128];
>
> void autovectqi (void)
> {
> int i;
>
> for (i = 0; i < 128; i ++)
> qd[i] = qa[i] ^ qb[i] + qc[i];
> }
>
> Revision 116799 with '-O3 -fomit-frame-pointer -S -dp -ftree-vectorize
> -march=prescott' produces:
>
> autovectqi:
> xorl %edx, %edx # 54 *movsi_xor [length = 2]
> .L2:
> movzbl qb(%edx), %eax # 20 *movqi_1/3 [length = 4]
> addb qc(%edx), %al # 21 *addqi_1_lea/2 [length = 3]
> xorb qa(%edx), %al # 23 *xorqi_1/1 [length = 3]
> movb %al, qd(%edx) # 24 *movqi_1/7 [length = 3]
> addl $1, %edx # 26 *addsi_1/1 [length = 3]
> cmpl $128, %edx # 27 *cmpsi_1_insn/1 [length = 6]
> jne .L2 # 28 *jcc_1 [length = 2]
> ret # 51 return_internal [length = 1]
>
>
> If I change 'qb[i] + qc[i]' to e.g. 'qb[i] & qc[i]' the vectorizer
works
> fine.
>
> ;; Function autovectqi (autovectqi)
> [snip lots of stuff]
> autovecttest.c:11: note: Access function of PHI: {0, +, 1}_1
> autovecttest.c:11: note: Analyze phi: qd_23 = PHI <qd_20(4), qd_4(2)>;
> autovecttest.c:11: note: virtual phi. skip.
> autovecttest.c:11: note: === vect_analyze_operations ===
> autovecttest.c:11: note: examining phi: ivtmp.28_1 = PHI <ivtmp.
> 28_2(4), 128(2)>;
> autovecttest.c:11: note: examining phi: i_24 = PHI <i_21(4), 0(2)>;
> autovecttest.c:11: note: examining phi: qd_23 = PHI <qd_20(4), qd_4(2)>;
> autovecttest.c:11: note: ==> examining statement: <L0>:
> autovecttest.c:11: note: irrelevant.
> autovecttest.c:11: note: ==> examining statement: D.1860_8 = qa[i_24]
> autovecttest.c:11: note: num. args = 4 (not unary/binary op).
> autovecttest.c:11: note: vect_is_simple_use: operand qa[i_24]
> autovecttest.c:11: note: not ssa-name.
> autovecttest.c:11: note: use not simple.
> autovecttest.c:11: note: ==> examining statement: D.1861_9 = (signed
> char) D.1860_8
> autovecttest.c:11: note: vect_is_simple_use: operand D.1860_8
> autovecttest.c:11: note: def_stmt: D.1860_8 = qa[i_24]
> autovecttest.c:11: note: type of def: 2.
> autovecttest.c:11: note: no optab.
> autovecttest.c:11: note: vect_is_simple_use: operand (signed char)
D.1860_8
> autovecttest.c:11: note: not ssa-name.
> autovecttest.c:11: note: use not simple.
> autovecttest.c:11: note: not vectorized: relevant stmt not
> supported: D.1861_9 = (signed char) D.1860_8
> autovecttest.c:11: note: bad operation or unsupported loop bound.
> autovecttest.c:11: note: vectorized 0 loops in function.
> autovectqi ()
> {
> unsigned int ivtmp.28;
> int pretmp.22;
> int i;
> unsigned char D.1867;
> signed char D.1866;
> signed char D.1865;
> unsigned char D.1864;
> unsigned char D.1863;
> unsigned char D.1862;
> signed char D.1861;
> unsigned char D.1860;
>
> <bb 2>:
>
> # ivtmp.28_1 = PHI <ivtmp.28_2(4), 128(2)>;
> # i_24 = PHI <i_21(4), 0(2)>;
> <L0>:;
> D.1860_8 = qa[i_24];
> D.1861_9 = (signed char) D.1860_8;
> D.1862_12 = qb[i_24];
> D.1863_15 = qc[i_24];
> D.1864_16 = D.1863_15 + D.1862_12;
> D.1865_17 = (signed char) D.1864_16;
> D.1866_18 = D.1865_17 ^ D.1861_9;
> D.1867_19 = (unsigned char) D.1866_18;
> qd[i_24] = D.1867_19;
> i_21 = i_24 + 1;
> ivtmp.28_2 = ivtmp.28_1 - 1;
> if (ivtmp.28_2 != 0) goto <L5>; else goto <L2>;
>
> <L5>:;
> goto <bb 3> (<L0>);
>
> <L2>:;
> return;
>
> }
> [cut]
>
> --
> Rask Ingemann Lambertsen