This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug middle-end/70434] [5/6 Regression] adding an extraneous cast to vector type results in inferior code
- From: "rguenth at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Wed, 30 Mar 2016 09:00:19 +0000
- Subject: [Bug middle-end/70434] [5/6 Regression] adding an extraneous cast to vector type results in inferior code
- Auto-submitted: auto-generated
- References: <bug-70434-4 at http dot gcc dot gnu dot org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70434
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Keywords|missed-optimization |
--- Comment #6 from Richard Biener <rguenth at gcc dot gnu.org> ---
Note that this would likely regress PR63764 in that we return sth that
might appear as an lvalue.
So the optimization request would be to optimize
v.1_6 = v;
D.2617 = v.1_6;
_9 = (long unsigned int) x_8(D);
_10 = _9 * 4;
_11 = &D.2617 + _10;
_12 = *_11;
to elide the copy from v to D.2617 based on the fact that D.2617 is not
modified.
Changing the patch to only affect what convert_vector_to_pointer_for_subscript
does iff index != INTEGER_CST produces the desired effect on the GIMPLE IL:
foo4 (int x, v4si v)
{
int _2;
int _3;
int _4;
vector(4) int v.1_6;
int _8;
<bb 2>:
_2 = BIT_FIELD_REF <v, 32, 0>;
_3 = BIT_FIELD_REF <v, 32, 32>;
_4 = _2 ^ _3;
BIT_FIELD_REF <v, 32, 0> = _4;
v.1_6 = v;
_8 = VIEW_CONVERT_EXPR<int[4]>(v.1_6)[x_7(D)];
return _8;
bar4 (int x, v4si v)
{
int _2;
int _3;
int _4;
int _7;
<bb 2>:
_2 = BIT_FIELD_REF <v, 32, 0>;
_3 = BIT_FIELD_REF <v, 32, 32>;
_4 = _2 ^ _3;
BIT_FIELD_REF <v, 32, 0> = _4;
_7 = VIEW_CONVERT_EXPR<int[4]>(v)[x_6(D)];
return _7;
but also exposes sth we might not want to accept as valid GIMPLE
(a non-memory array-ref single-RHS). RTL expansion
causes wrong-code here as well, but funnily for the valid
case in bar4, not for foo4...
bar4:
.LFB3:
.cfi_startproc
vmovaps %xmm0, -24(%rsp)
movslq %edi, %rdi
movl -20(%rsp), %eax
xorl %eax, -24(%rsp)
movl -24(%rsp,%rdi,4), %eax
ret
foo4:
.LFB2:
.cfi_startproc
vpextrd $1, %xmm0, %edx
vmovd %xmm0, %eax
movslq %edi, %rdi
xorl %edx, %eax
vpinsrd $0, %eax, %xmm0, %xmm1
vmovaps %xmm1, -24(%rsp)
movl -24(%rsp,%rdi,4), %eax
ret