This is the mail archive of the
gcc-help@gcc.gnu.org
mailing list for the GCC project.
Generation of post-increment addressing for multiple array access in a loop
- From: Deepti Chopra <deepti_chopra at acmet dot com>
- To: gcc-help at gcc dot gnu dot org
- Date: Fri, 31 Jul 2009 17:45:03 +0530
- Subject: Generation of post-increment addressing for multiple array access in a loop
I have a query regarding post-increment addressing for multiple
array access in a loop.
GCC version used: ver 4.4.0
Target: m32r
Please consider the following cases.
Case I. Single Array Access Within Loop
========================================
Please consider the below test case:
int siLoopIndex;
int siData[40];
int siVar1;
void vAutoIncrementAddressingMode()
{
for (siLoopIndex=0; siLoopIndex<40; siLoopIndex++)
{
siVar1 = siData[siLoopIndex];
}
}
For the above case, the assembly generated is as follows:
Assembly (post-increment generated)
-------------------------------------
(GCC ver 4.4.0, target=m32r, option -O3)
vAutoIncrementAddressingMode:
; PROLOGUE, vars= 0, regs= 0, args= 0, extra= 0
ld24 r4,#siData
ld24 r6,#siData+160
.balign 4
.L2:
ld r5,@r4+ <== Post-increment mode generated
bne r4,r6,.L2
ld24 r4,#siVar1
st r5,@(r4)
ld24 r4,#siLoopIndex
ldi r5,#40 ; 0x28
st r5,@(r4)
jmp lr
For the above test case the optimized gimple obtained is as below:
Optimized gimple (-fdump-tree-optimized)
-----------------------------------------
vAutoIncrementAddressingMode ()
{
long unsigned int D.1238;
long unsigned int ivtmp.34;
int siVar1.1;
<bb 2>:
ivtmp.34 = (long unsigned int) &siData[0]; <== (A)
D.1238 = (long unsigned int) &siData + 160;
<bb 3>:
siVar1.1 = MEM[index: ivtmp.34]; <== (B)
ivtmp.34 = ivtmp.34 + 4; <== (C)
if (ivtmp.34 != D.1238)
goto <bb 3>;
else
goto <bb 4>;
<bb 4>:
siVar1 = siVar1.1;
siLoopIndex = 40;
return;
}
Please note the marked statements (A), (B) and (C).
In the statement (A), the pointer to first location
of array "siData" is set as "ivtmp.22". Please note
that this statement is not inside the loop, but is
hoisted above it.
Now, post-increment generation is identified with
statements (B) and (C) as follows.
siVar1.1 = MEM[index: ivtmp.22]; <== (B)
\_____________/
|
V
X
ivtmp.22 = ivtmp.22 + 4; <== (C)
\_____________________/
|
V
X++
In statement (B), the array "siData" is accessed using
index "ivtmp.22" (call it X). The next statement
increments "X" to point to the next location. Hence,
post-increment operation is identified and assembly
with post-increment addressing gets generated:
Case II. Multiple Array Access Within Loop (with same Array Index)
===================================================================
Now, please consider the below test case:
int siLoopIndex;
int siSum ;
int siData[40],siCoeff[40];
int siVar1;
int siVar2;
void vAutoIncrementAddressingMode()
{
int lsiSum;
for (siLoopIndex=0; siLoopIndex<40; siLoopIndex++)
{
siVar1 = siData[siLoopIndex];
siVar2 = siCoeff[siLoopIndex];
}
}
The assembly generated for the above test case is as follows:
Assembly (no post-increment generated)
----------------------------------------
(GCC ver 4.4.0, target=m32r, option -O3)
vAutoIncrementAddressingMode:
; PROLOGUE, vars= 0, regs= 0, args= 0, extra= 0
ldi r4,#0 ; 0x0
.balign 4
.L2:
ld24 r7,#siData
ld24 r6,#siCoeff
add r7,r4 // r7 points to base address of siData
// r4 contains value of siLoopIndex
// siData[siLoopIndex] accessed by adding
// r7 and $4
add r6,r4 // r6 points to base address of siCoeff
// r4 contains value of siLoopIndex
// siCoeff[siLoopIndex] accessed by adding
// r6 and $4
addi r4,#4 // siLoopIndex is incremented here
add3 r5,r4,#-160
ld r7,@(r7)
ld r6,@(r6)
bnez r5,.L2
ld24 r4,#siVar1
st r7,@(r4)
ld24 r4,#siVar2
st r6,@(r4)
ld24 r4,#siLoopIndex
ldi r5,#40 ; 0x28
st r5,@(r4)
jmp lr
In this case, post-increment addressing mode is not generated.
Please note that here, (base address + siLoopIndex) is used to access
the array locations, for each iteration of the loop. As against Case I
above, where code for pointing to the base address of the array was
hoisted above the loop. And then, only array index was incremented
with each iteration of the loop.
The optimized gimple generated by GCC for this case is as below:
Optimized gimple (-fdump-tree-optimized)
-----------------------------------------
vAutoIncrementAddressingMode ()
{
long unsigned int ivtmp.37;
int siVar2.2;
int siVar1.1;
<bb 2>:
ivtmp.37 = 0;
<bb 3>:
siVar1.1 = MEM[base: &siData + ivtmp.37]; <== Base address + Loop
Index
siVar2.2 = MEM[base: &siCoeff + ivtmp.37];<== Base address + Loop
Index
ivtmp.37 = ivtmp.37 + 4;
if (ivtmp.37 != 160)
goto <bb 3>;
else
goto <bb 4>;
<bb 4>:
siVar1 = siVar1.1;
siVar2 = siVar2.2;
siLoopIndex = 40;
return;
}
I have the understanding that some loop transformation might be
required to convert the above code into the form:
<bb 2>:
ivtmp.22 = (long unsigned int) &siData[0];
ivtmp.23 = (long unsigned int) &siCoeff[0];
<bb 3>:
siVar1.1 = MEM[index: ivtmp.22];
siVar2.2 = MEM[index: ivtmp.23];
ivtmp.22 = ivtmp.22 + 4;
ivtmp.23 = ivtmp.23 + 4;
Please verify my understanding.
Case III. Multiple Array Access Within Loop (with different Array Index)
=========================================================================
As an additional observation, when I changed the array index variable,
to access the second array, the post-increment mode got generated.
int siLoopIndex1;
int siLoopIndex2;
int siData[40],siCoeff[40];
int siVar1;
int siVar2;
void vAutoIncrementAddressingMode()
{
for (siLoopIndex1=0; siLoopIndex1<40; siLoopIndex1++)
{
siVar1 = siData[siLoopIndex1];
siVar2 = siCoeff[siLoopIndex2];
siLoopIndex2++;
}
}
Assembly (post-increment generated)
-------------------------------------
(GCC ver 4.4.0, target=m32r, option -O3)
vAutoIncrementAddressingMode:
; PROLOGUE, vars= 0, regs= 1, args= 0, extra= 0
ld24 r3,#siLoopIndex2
ld24 r6,#siCoeff
ld24 r4,#siData
ld24 r2,#siData+160
push lr
ld lr,@(r3)
sll3 r5,lr,#2
add r5,r6
.balign 4
.L2:
ld r7,@r4+ <== Post-Increment generated
ld r6,@r5+ <== Post-Increment generated
bne r4,r2,.L2
ld24 r4,#siVar1
st r7,@(r4)
ld24 r4,#siVar2
st r6,@(r4)
ld24 r4,#siLoopIndex1
addi lr,#40
ldi r5,#40 ; 0x28
st lr,@(r3)
st r5,@(r4)
pop lr
jmp lr
The optimized gimple for the above case is as follows:
Optimized gimple (-fdump-tree-optimized)
-----------------------------------------
vAutoIncrementAddressingMode ()
{
long unsigned int D.1251;
long unsigned int ivtmp.45;
long unsigned int ivtmp.42;
int pretmp.18;
int siVar2.3;
int siVar1.1;
<bb 2>:
pretmp.18 = siLoopIndex2;
<== Code Hoisted ==>
ivtmp.42 = (long unsigned int) &siData[0];
<== Code Hoisted ==>
ivtmp.45 = (long unsigned int) &siCoeff[pretmp.18];
D.1251 = (long unsigned int) &siData + 160;
<bb 3>:
siVar1.1 = MEM[index: ivtmp.42];
siVar2.3 = MEM[index: ivtmp.45];
ivtmp.42 = ivtmp.42 + 4;
ivtmp.45 = ivtmp.45 + 4;
if (ivtmp.42 != D.1251)
goto <bb 3>;
else
goto <bb 4>;
<bb 4>:
siVar1 = siVar1.1;
siVar2 = siVar2.3;
siLoopIndex2 = [plus_expr] pretmp.18 + 40;
siLoopIndex1 = 40;
return;
}
As we see above, code for pointing to base addresses of arrays gets
hoisted.
Could you please explain why the post-increment mode does not get
generated in Case II? What would need to be done to generate post
increment mode for Case II?