This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 2/2] Enable elimination of zext/sext
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Cc: Kugan <kugan dot vivekanandarajah at linaro dot org>, Jakub Jelinek <jakub at redhat dot com>, Richard Biener <richard dot guenther at gmail dot com>
- Date: Wed, 27 Aug 2014 12:01:36 +0200
- Subject: Re: [PATCH 2/2] Enable elimination of zext/sext
- Authentication-results: sourceware.org; auth=none
Hello!
> 2014-08-07 Kugan Vivekanandarajah <kuganv@linaro.org>
>
> * calls.c (precompute_arguments): Check
> promoted_for_signed_and_unsigned_p and set the promoted mode.
> (promoted_for_signed_and_unsigned_p): New function.
> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
> and set the promoted mode.
> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.
This patch regresses:
Running target unix
FAIL: libgomp.fortran/simd7.f90 -O2 execution test
FAIL: libgomp.fortran/simd7.f90 -Os execution test
on alphaev6-linux-gnu.
The problem can be illustrated with attached testcase with a
crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
missing SImode extension after DImode shift of SImode subregs for this
part:
--cut here--
# test.23_12 = PHI <0(37), 1(36)>
_242 = ivtmp.181_73 + 2147483645;
_240 = _242 * 2;
_63 = (integer(kind=4)) _240;
if (ubound.6_99 <= 2)
goto <bb 39>;
else
goto <bb 40>;
;; succ: 39
;; 40
;; basic block 39, loop depth 1
;; pred: 38
pretmp_337 = test.23_12 | l_76;
goto <bb 45>;
;; succ: 45
;; basic block 40, loop depth 1
;; pred: 38
_11 = *c_208[0];
if (_11 != _63)
goto <bb 45>;
else
goto <bb 42>;
--cut here--
this expands to:
(code_label 592 591 593 35 "" [0 uses])
(note 593 592 0 NOTE_INSN_BASIC_BLOCK)
;; _63 = (integer(kind=4)) _240;
(insn 594 593 595 (set (reg:SI 538)
(const_int 1073741824 [0x40000000])) -1
(nil))
(insn 595 594 596 (set (reg:SI 539)
(plus:SI (reg:SI 538)
(const_int 1073741824 [0x40000000]))) -1
(nil))
(insn 596 595 597 (set (reg:SI 537)
(plus:SI (reg:SI 539)
(const_int -3 [0xfffffffffffffffd]))) -1
(expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd])
(nil)))
(insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
(plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
(reg:SI 537))) -1
(nil))
(insn 598 597 599 (set (reg:DI 540)
(ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
(const_int 1 [0x1]))) -1
(nil))
(insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
(reg:DI 540)) -1
(nil))
...
(note 610 609 0 NOTE_INSN_BASIC_BLOCK)
;; _11 = *c_208[0];
(insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
(sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
A128]))) simd7.f90:12 -1
(nil))
;; if (_11 != _63)
(insn 612 611 613 40 (set (reg:DI 545)
(eq:DI (reg:DI 120 [ D.1694 ])
(reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
(nil))
(jump_insn 613 612 616 40 (set (pc)
(if_then_else (eq (reg:DI 545)
(const_int 0 [0]))
(label_ref 0)
(pc))) simd7.f90:12 -1
(int_list:REG_BR_PROB 450 (nil)))
which results in following asm:
$L35:
addl $25,$7,$2 # 597 addsi3/1 [length = 4]
addq $2,$2,$2 # 598 ashldi3/1 [length = 4] <------ here
bne $24,$L145 # 601 *bcc_normal [length = 4]
lda $4,4($20) # 627 *adddi_internal/2 [length = 4]
ldl $8,0($20) # 611 *extendsidi2_1/2 [length = 4]
lda $3,3($31) # 74 *movdi/2 [length = 4]
cmpeq $8,$2,$2 # 612 *setcc_internal [length = 4] <-- compare
bne $2,$L40 # 613 *bcc_normal [length = 4]
br $31,$L88 # 2403 jump [length = 4]
.align 4
...
Tracking the values with the debugger shows wrong calculation:
0x000000012000108c <+1788>: addl t10,t12,t1
0x0000000120001090 <+1792>: addq t1,t1,t1
...
0x00000001200010a4 <+1812>: cmpeq t6,t1,t1
0x00000001200010a8 <+1816>: bne t1,0x1200010c0 <foo_+1840>
(gdb) si
0x000000012000108c 17 l = l .or. any (b /= 7 + i)
(gdb) i r t10 t12
t10 0x7 7
t12 0x7ffffffd 2147483645
(gdb) si
0x0000000120001090 17 l = l .or. any (b /= 7 + i)
(gdb) i r t1
t1 0xffffffff80000004 -2147483644
(gdb) si
18 l = l .or. any (c /= 8 + 2 * i)
(gdb) i r t1
t1 0xffffffff00000008 -4294967288
At this point, the calculation should zero-extend SImode value to full
DImode, since compare operates on DImode values. The problematic insn
is (insn 599), which is now a DImode assignment instead of
zero-extend, due to:
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
GET_MODE (target), temp, unsignedp);
}
- convert_move (SUBREG_REG (target), temp, unsignedp);
+ if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+ && (GET_CODE (temp) == SUBREG)
+ && (GET_MODE (target) == GET_MODE (temp))
+ && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+ emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+ else
+ convert_move (SUBREG_REG (target), temp, unsignedp);
}
else if (nontemporal && emit_storent_insn (target, temp))
;
When compiling this code, we have:
lhs = _63
target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
temp = (subreg:SI (reg:DI 540) 0)
So, the code assumes that it is possible to copy (reg:DI 540) directly
to (reg:DI 154). However, this is not the case, since we still have
garbage in the top 32bits.
Reverting the part above fixes the runtime failure, since (insn 599) is now:
(insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
(zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
(nil))
It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.
Uros.
subroutine foo (d, e, f, g, m, n)
integer :: i, j, b(2:9), c(3:n), d(:), e(2:n), f(2:,3:), n
integer, allocatable :: g(:), h(:), k, m
logical :: l
l = .false.
allocate (h(2:7))
i = 4; j = 4; b = 7; c = 8; d = 9; e = 10; f = 11; g = 12; h = 13; k = 14; m = 15
!$omp simd linear(b)linear(c:2)linear(d:3)linear(e:4)linear(f:5)linear(g:6) &
!$omp & linear(h:7)linear(k:8)linear(m:9) reduction(.or.:l)
do i = 0, 63
l = l .or. any (b /= 7 + i)
l = l .or. any (c /= 8 + 2 * i)
b = b + 1; c = c + 2
d = d + 3; e = e + 4; f = f + 5; g = g + 6
h = h + 7; k = k + 8; m = m + 9
end do
if (l .or. i /= 64) call abort
if (any (b /= 7 + 64) .or. any (c /= 8 + 2 * 64)) call abort
end subroutine
interface
subroutine foo (d, e, f, g, m, n)
integer :: d(:), e(2:n), f(2:,3:), n
integer, allocatable :: g(:), m
end subroutine
end interface
integer, parameter :: n = 8
integer :: d(2:18), e(3:n+1), f(5:6,7:9)
integer, allocatable :: g(:), m
allocate (g(7:10))
call foo (d, e, f, g, m, n)
end