This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
Further analysis of g77.f-torture/execute/20000503-1.f
- To: "'gcc-bugs at gcc dot gnu dot org'" <gcc-bugs at gcc dot gnu dot org>
- Subject: Further analysis of g77.f-torture/execute/20000503-1.f
- From: "Billinghurst, David (CRTS)" <David dot Billinghurst at riotinto dot com dot au>
- Date: Thu, 6 Jul 2000 01:22:59 -0000
This continues my investigation of f-torture/execute/20000503-1.f. A binary
search on snapshots shows the bug was introduced between 19991122 and
1991201.
As the failure occurs on all platforms and at all optimisation level, Toon
Moene suggested that fold-const.c may be a candidate. Substantial changes to
fold-const.c were made around 27 Nov 1999, by Richard Kenner and others.
####################################################
The bug has been isolated to the subroutine below.
INTEGER FUNCTION SLASQX( N )
INTEGER N, I0, I, K
I0 = 1
DO I = 4*I0, 2*( I0+N-1 ), 4
K = I
END DO
SLASQX = K
RETURN
END
The problem is the incorrect calculation of the number of loop counts. A
Fortran DO loop always has a definite number of iterations, due to the fact
that assignments to the loop index are illegal and assignments to the loop
bounds or step do not have an effect.
So:
DO I = N1, N2, N3 ! Loop from N1 to N2 with step N3
...
ENDDO
has the following number of iterations:
MAX((N2-N1+N3)/N3,0)
or, in this case (IO=1 and N=20):
N1 = 4*1 = 4, N2 = 2*(1+20-1) = 40, N3 = 4
MAX(40-4+4)/4,0) = 10
The incorrect code calculates J = (I0+N)/2 - I0 = 9
As a guess, the compiler incorrectly tries to transformation
[2*(I0+N-1)-4*I0+4]/4 => (I0+N-1)/2 - I0 + 1
Below are partial annotated RTL dump after RTL generation (g77 -O0 -c -dr)
on i686-pc-linux-gnu. The first from egcs-19991122 and the second from
egcs-19991201
Variable allocation
reg 19 = *N
reg 20 - stack pointer?
mem( reg 20 - 4 ) = ??? function return value ???
mem( reg 20 - 8 ) = I0
mem( reg 20 - 12 ) = I
mem( reg 20 - 16 ) = K
mem( reg 20 - 20 ) = iteration counter, here after J
##### slasqx.f.rtl.egcs-19991122.i686-pc-linux-gnu #####
;; Function slasqx_
(note 2 0 3 "" NOTE_INSN_DELETED)
(note 3 2 92 "" NOTE_INSN_FUNCTION_BEG)
(insn 92 3 6 (use (reg:SI 29)) -1 (nil)
(nil))
(note 6 92 12 (nil) NOTE_INSN_BLOCK_BEG)
(insn 12 6 14 (set (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)
(const_int 1 [0x1])) -1 (nil)
(nil))
# I0 = 1
(note 14 12 19 (nil) NOTE_INSN_BLOCK_BEG)
(insn 19 14 21 (set (reg:SI 24)
(mem/u/f:SI (reg:SI 19 virtual-incoming-args) 0)) -1 (nil)
(nil))
(insn 21 19 22 (set (reg:SI 26)
(mem:SI (reg:SI 24) 0)) -1 (nil)
(nil))
# R24 = N
(insn 22 21 24 (parallel[
(set (reg:SI 25)
(plus:SI (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)
(reg:SI 26)))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(expr_list:REG_EQUAL (plus:SI (mem/f:SI (plus:SI (reg:SI 20
virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)
(mem:SI (reg:SI 24) 0))
(nil)))
# R25 = I0+N
(insn 24 22 25 (parallel[
(set (reg:SI 27)
(plus:SI (reg:SI 25)
(const_int -1 [0xffffffff])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
# R27 = I0+N-1
(insn 25 24 27 (parallel[
(set (reg:SI 28)
(mult:SI (reg:SI 27)
(const_int 2 [0x2])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
# R28 = 2*(I0+N-1)
(insn 27 25 29 (set (reg:SI 30)
(mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)) -1 (nil)
(nil))
# R30 = I0
(insn 29 27 30 (set (reg:SI 31)
(reg:SI 30)) -1 (nil)
(nil))
# R31 = R30
(insn 30 29 32 (parallel[
(set (reg:SI 32)
(ashift:SI (reg:SI 31)
(const_int 2 [0x2])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(expr_list:REG_EQUAL (mult:SI (reg:SI 30)
(const_int 4 [0x4]))
(nil)))
# R32 = 4*I0
(insn 32 30 34 (set (reg:SI 29)
(reg:SI 32)) -1 (nil)
(nil))
# R29 = 4*I0
(insn 34 32 36 (parallel[
(set (reg:SI 33)
(minus:SI (reg:SI 28)
(reg:SI 29)))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
# R31 = 2*(I0+N-1)-4*I0
(insn 36 34 38 (parallel[
(set (reg:SI 34)
(plus:SI (reg:SI 33)
(const_int 4 [0x4])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
# R34 = 2*(I0+N-1)-4*I0+4
(insn 38 36 39 (set (reg:SI 35)
(reg:SI 34)) -1 (nil)
(nil))
# R35 = 2*(I0+N-1)-4*I0+4
(insn 39 38 40 (set (reg:CCNO 17 flags)
(compare:CCNO (reg:SI 35)
(const_int 0 [0x0]))) -1 (nil)
(nil))
(jump_insn 40 39 42 (set (pc)
(if_then_else (ge (reg:CCNO 17 flags)
(const_int 0 [0x0]))
(label_ref 43)
(pc))) -1 (nil)
(nil))
(insn 42 40 43 (parallel[
(set (reg:SI 35)
(plus:SI (reg:SI 35)
(const_int 3 [0x3])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
(code_label 43 42 44 3 "" "" [num uses: 0])
(insn 44 43 46 (parallel[
(set (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -20 [0xffffffec])) 0)
(ashiftrt:SI (reg:SI 35)
(const_int 2 [0x2])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(expr_list:REG_EQUAL (div:SI (reg:SI 34)
(const_int 4 [0x4]))
(nil)))
# If R35 < 0 then add 3 to it (correcting for rounding direction?)
# R44 = ( 2*(I0+N-1)-4*I0+4 + correction)/4
(insn 46 44 47 (set (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -12 [0xfffffff4])) 0)
(reg:SI 29)) -1 (nil)
(nil))
# Loop counter J = R44
# Correct
##### slasqx.f.rtl.egcs-19991201.i686-pc-linux-gnu #####
;; Function slasqx_
(note 2 0 3 "" NOTE_INSN_DELETED)
(note 3 2 89 "" NOTE_INSN_FUNCTION_BEG)
(insn 89 3 87 (use (reg:SI 31)) -1 (nil)
(nil))
(insn 87 89 6 (use (reg:SI 33)) -1 (nil)
(nil))
(note 6 87 12 (nil) NOTE_INSN_BLOCK_BEG)
(insn 12 6 14 (set (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)
(const_int 1 [0x1])) -1 (nil)
(nil))
# I0 = 1
(note 14 12 19 (nil) NOTE_INSN_BLOCK_BEG)
(insn 19 14 21 (set (reg:SI 24)
(mem/u/f:SI (reg:SI 19 virtual-incoming-args) 0)) -1 (nil)
(nil))
(insn 21 19 22 (set (reg:SI 26)
(mem:SI (reg:SI 24) 0)) -1 (nil)
(nil))
# R26 = N
(insn 22 21 23 (parallel[
(set (reg:SI 25)
(plus:SI (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)
(reg:SI 26)))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(expr_list:REG_EQUAL (plus:SI (mem/f:SI (plus:SI (reg:SI 20
virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)
(mem:SI (reg:SI 24) 0))
(nil)))
# R25 = I0+N
(insn 23 22 24 (parallel[
(set (reg:SI 28)
(ashiftrt:SI (reg:SI 25)
(const_int 31 [0x1f])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
(insn 24 23 26 (parallel[
(set (reg:SI 29)
(lshiftrt:SI (reg:SI 28)
(const_int 31 [0x1f])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
(insn 26 24 27 (parallel[
(set (reg:SI 30)
(plus:SI (reg:SI 25)
(reg:SI 29)))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
(insn 27 26 29 (parallel[
(set (reg:SI 27)
(ashiftrt:SI (reg:SI 30)
(const_int 1 [0x1])))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(expr_list:REG_EQUAL (div:SI (reg:SI 25)
(const_int 2 [0x2]))
(nil)))
#R27 = [(I0+N) + ((I0+N)<0?1:0)]/2
#NOTE: Term ((I0+N)<0?1:0) = 0 in this case
#
#This code is trying to compute (Geoff Keating [geoffk@cygnus.com])
#
#(1 + N)/2
#
#rounded _towards_ zero, on machines where the RTL 'div' operation
#rounds towards -infinity. So what it does is compute
#
#(1 + N + (1 + N < 0))/2
#
#and the '1 + N < 0' bit is the tricky bit with the shifts; an ashiftrt
#by 31 gets you -1 if the value is negative and 0 if positive, then a
#logical shift right by 31 gets you 1 if the value is negative and
#still 0 if positive.
(insn 29 27 30 (set (reg:SI 31)
(mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -8 [0xfffffff8])) 0)) -1 (nil)
(nil))
# R31 = I0
(insn 30 29 32 (parallel[
(set (reg:SI 32)
(minus:SI (reg:SI 27)
(reg:SI 31)))
(clobber (reg:CC 17 flags))
] ) -1 (nil)
(nil))
# R32 = R27 - I0 = (I0+N)/2 -I0
(insn 32 30 34 (set (mem/f:SI (plus:SI (reg:SI 20 virtual-stack-vars)
(const_int -20 [0xffffffec])) 0)
(reg:SI 32)) -1 (nil)
(expr_list:REG_EQUAL (minus:SI (reg:SI 27)
(reg:SI 31))
(nil)))
# iteration counter J = (I0+N)/2 - I0
Therefore J = (1+20)/2 -1 = 9, as observed but wrong