This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
An optimization problem, perhaps arm-elf related
- From: Tobias Ringstrom <tori at ringstrom dot mine dot nu>
- To: gcc at gcc dot gnu dot org
- Date: Thu, 7 Feb 2002 19:02:58 +0100 (CET)
- Subject: An optimization problem, perhaps arm-elf related
I have a small problem with gcc 3.0.3 for arm-elf. The C code looks like
this: (I know its stupid coding. It's a trimmed down [broken] version,
but it shows the same problem. Removing the volatile makes no difference,
by the way.)
static __inline set(unsigned mask)
{
*(volatile unsigned*)0xffff0030 = mask;
}
static __inline clear(unsigned mask)
{
*(volatile unsigned*)0xffff0034 = mask;
}
void fpga_prog(unsigned x)
{
if (x == 0) set(1); else clear(2);
if (x == 1) set(1); else clear(2);
if (x == 2) set(1); else clear(2);
if (x == 3) set(1); else clear(2);
}
void fpga_prog_loop(unsigned x)
{
int i;
for (i = 0; i < 4; ++i) {
if (x == i) set(1); else clear(2);
}
}
The problem is that the manual loop unrolling of fpga_prog generates much
worse code than the -funroll-loops version of fpga_prog_loop. Compiling
with "arm-elf-gcc -S -O3 -funroll-loops tmp.c", I get the following nice
code for fpga_prog_loop:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, current_function_anonymous_args = 0
str lr, [sp, #-4]!
mvn r3, #65280
mov r1, #1
mov r2, #2
cmp r0, #0
streq r1, [r3, #-207]
strne r2, [r3, #-203]
sub lr, r3, #207
sub ip, r3, #203
cmp r0, #1
streq r1, [lr, #0]
strne r2, [ip, #0]
cmp r0, #2
streq r1, [lr, #0]
strne r2, [ip, #0]
cmp r0, #3
streq r1, [lr, #0]
strne r2, [ip, #0]
ldr pc, [sp], #4
For the manually unrolled loop fpga_prog, I get the following much worse
code:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, current_function_anonymous_args = 0
@ link register save eliminated.
cmp r0, #0
mvneq r2, #65280
mvnne r2, #65280
subeq r1, r2, #207
subne r1, r2, #203
moveq ip, #1
movne ip, #2
streq ip, [r1, #0]
strne ip, [r1, #0]
cmp r0, #1
mvneq ip, #65280
mvnne r2, #65280
subeq r1, ip, #207
subne r1, r2, #203
movne ip, #2
strne ip, [r1, #0]
streq r0, [r1, #0]
cmp r0, #2
mvneq r2, #65280
mvnne r2, #65280
subeq r1, r2, #207
subne r1, r2, #203
moveq ip, #1
movne ip, #2
streq ip, [r1, #0]
strne ip, [r1, #0]
cmp r0, #3
mvneq ip, #65280
mvnne r2, #65280
subeq r0, ip, #207
moveq r1, #1
subne r0, r2, #203
movne r1, #2
streq r1, [r0, #0]
strne r1, [r0, #0]
mov pc, lr
Is there an optimization that I must use that is not included in O3, or is
it a bug? Please let me know if you want me to supply more information,
and please CC me since I'm not on the list.
/Tobias