[Bug rtl-optimization/20376] The missed-optimization of general induction variables in the new rtl-level loop optimizer cause performance degradation.
canqun at nudt dot edu dot cn
gcc-bugzilla@gcc.gnu.org
Wed Mar 30 14:43:00 GMT 2005
------- Additional Comments From canqun at nudt dot edu dot cn 2005-03-30 14:42 -------
(In reply to comment #7)
> Waiting for a test case...
Part 1
! Test sase for address giv (general induction variable) optimization.
subroutine dot_product (sum, a, b, n)
real*8 a(n), b(n), sum
sum = 0
do i = 1, n
sum = sum + a (i) * b(i)
end do
end
Part 2
// IA-64 Assembly code generated by GCC without address giv splitting
mov ar.lc = r14
.L4:
// Loop is unrolled, but the address givs are not splitted.
// Register r17, r19 is used to calculate all the addresses
// of the array elements.
.mmb
ldfd f7 = [r17]
ldfd f6 = [r19]
nop 0
.mmi
add r17 = r15, r33
add r19 = r15, r34
shladd r15 = r18, 3, r0
;;
.mmf
nop 0
nop 0
fma.d f8 = f7, f6, f9
.mmi
ldfd f7 = [r17]
ldfd f6 = [r19]
add r17 = r15, r33
.mfi
nop 0
shladd r15 = r16, 3, r0
;;
.mmf
nop 0
nop 0
fma.d f8 = f7, f6, f8
.mmi
ldfd f7 = [r17]
ldfd f6 = [r19]
add r17 = r15, r33
.mmb
nop 0
add r19 = r15, r34
nop 0
;;
.mmf
nop 0
nop 0
fma.d f8 = f7, f6, f8
.mmb
ldfd f7 = [r17]
ldfd f6 = [r19]
nop 0
;;
.mmf
nop 0
nop 0
fma.d f8 = f7, f6, f8
;;
.mfb
nop 0
mov f9 = f8
br.cloop.sptk.few .L4
...
.endp dot_product__#
.ident "GCC: (GNU) 4.1.0 20050302 (experimental)"
Part 3
// IA-64 assembly code generated by GCC with address giv splitting
mov ar.lc = r16
.L28:
[.L3:]
[.L2:]
...
// The loop is unrolled, and the address givs are splitted.
// Register r14, r8, r3, r35, r33, r31,r28, r29 is used to
// caculate the address of each array element respectively.
.mmi
ldfd f38 = [r14]
ldfd f39 = [r8]
add r31 = r34, r23
.mmi
ldfd f35 = [r3]
ldfd f37 = [r35]
add r33 = r34, r24
;;
.mmb
ldfd f33 = [r33]
ldfd f34 = [r31]
nop 0
.mmi
add r28 = r30, r23
add r29 = r30, r24
adds r22 = 4, r22
;;
.mmf
ldfd f32 = [r29]
ldfd f15 = [r28]
fma.d f36 = f38, f39, f14
;;
.mmf
nop 0
nop 0
fma.d f13 = f35, f37, f36
;;
.mmf
nop 0
nop 0
fma.d f12 = f33, f34, f13
;;
.mfb
nop 0
fma.d f14 = f32, f15, f12
br.cloop.sptk.few .L28
;;
.L9:
.mfb
stfd [r32] = f14
nop 0
nop 0
...
.endp dot_product__#
.ident "GCC: (GNU) 3.5-tree-ssa 20031221 (CCRG)"
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=20376
More information about the Gcc-bugs
mailing list