(i386-linux x sh-elf) GIV recombination opportunities missed
Toshiyasu Morita
tm@netcom.com
Thu May 18 19:10:00 GMT 2000
I've been looking at the latest toolchain, and I'm still seeing quite
a few giv recombination misses.
Here's one which is rather nasty. r3 is r12 - 4 throughout the entire
loop, but the loop code fails to combine the two givs. This causes r3
to be spilled/restored to/from the stack in the loop, which is
unnecessary.
vers: CVS current
opts: -O2 -m4-single-only -ml -S -g
file: slalom.i from http://shell14.ba.best.com/pub.t/tm2/stress-1.14.tar.gz
1346 0698 636D mov r6,r13
1347 069a C363 mov r12,r3 <- here
1350 069c 4B11 mov.l r4,@(44,r1)
1353 069e 00E2 mov #0,r2
1354 06a0 047D add #4,r13
1355 06a2 221E mov.l r2,@(8,r14)
1356 06a4 FC73 add #-4,r3 <- here
1357 06a6 241E mov.l r2,@(16,r14)
1358 06a8 D51E mov.l r13,@(20,r14)
1359 06aa 361E mov.l r3,@(24,r14) <- here
1360 06ac 171E mov.l r1,@(28,r14)
1524:slalom.i **** numpat = loop[iface][1] - loop[iface][0] + 1;
1363 06ae E755 mov.l @(28,r14),r5
1525:slalom.i **** tmp3 = 0.0;
1526:slalom.i **** if (iface >= 3)
1366 .L168:
...
1390 06c2 E650 mov.l @(24,r14),r0
1391 06c4 08FC fmov.s @r0,fr12
...
1528:slalom.i **** numcol = (int) (sqrt(numpat * box[iface] / box[iface + 1]) + 0.5);
1394 .LM206:
1395 06c6 5A4A lds r10,fpul
1396 06c8 C8F1 fmov.s @r12,fr1 <- here
1397 06ca 2DF4 float fpul,fr4
1398 06cc E551 mov.l @(20,r14),r1
1399 06ce 12F4 fmul fr1,fr4
1400 06d0 18F1 fmov.s @r1,fr1
1401 06d2 13F4 fdiv fr1,fr4
1402 06d4 4CF0 fmov fr4,fr0
1403 06d6 6DF0 fsqrt fr0
1404 06d8 04F0 fcmp/eq fr0,fr0
1405 06da 0289 bt .L138
1406 06dc 4CD1 mov.l .L158,r1
1407 06de 0B41 jsr @r1
1408 06e0 0900 nop
...
1533:slalom.i **** width = box[iface] / numcol;
1428 .LM209:
1429 06f8 5A48 lds r8,fpul
1430 06fa C8F5 fmov.s @r12,fr5 <- here
...
1554:slalom.i **** area[ipatch] = tmp4;
1587 .LM247:
1588 07a2 40E0 mov #64,r0
1590 .LM248:
1591 07a4 CAF6 fmov.s fr12,@r6
...
1629 07c0 E451 mov.l @(16,r14),r1
1630 07c2 047D add #4,r13
1631 07c4 E552 mov.l @(20,r14),r2
1632 07c6 047C add #4,r12 <- here
1633 07c8 E653 mov.l @(24,r14),r3 <- here
1634 07ca 0871 add #8,r1
1635 07cc E755 mov.l @(28,r14),r5
1636 07ce 0472 add #4,r2
1637 07d0 E257 mov.l @(8,r14),r7
1638 07d2 0473 add #4,r3 <- here
1639 07d4 141E mov.l r1,@(16,r14)
1640 07d6 0875 add #8,r5
1641 07d8 05E1 mov #5,r1
1642 07da 0177 add #1,r7
1643 07dc 251E mov.l r2,@(20,r14)
1644 07de 1737 cmp/gt r1,r7
1645 07e0 361E mov.l r3,@(24,r14) <- here
1646 07e2 571E mov.l r5,@(28,r14)
1647 07e4 028D bt.s .L167
1648 07e6 721E mov.l r7,@(8,r14)
1649 07e8 62AF bra .L168
1650 07ea E755 mov.l @(28,r14),r5
1651 .L167:
Here's another sample which is somewhat more benign; no register spill
is generated, but there are two givs which fail to be combined:
vers: CVS current
opts: -O2 -m4-single-only -ml -S -g
file: imdct.i from http://shell14.ba.best.com/pub.t/tm2/stress-1.14.tar.gz
3358:./imdct.i **** for(i=0; i< 512 /8; i++)
2384 .LM350:
2385 080c 0369 mov r0,r9 <- here
2386 080e 0368 mov r0,r8
2387 0810 8591 mov.w .L215,r1
2388 0812 0479 add #4,r9 <- here
2389 0814 8490 mov.w .L216,r0
2390 0816 40EB mov #64,r11
2391 0818 E262 mov.l @r14,r2
2392 081a 46D4 mov.l .L212,r4
2393 081c 1C38 add r1,r8
2394 081e 48DC mov.l .L217,r12
2395 0820 0C32 add r0,r2
2396 0822 7E9D mov.w .L218,r13
2397 0824 1C34 add r1,r4
2398 0826 E260 mov.l @r14,r0
2399 0828 C366 mov r12,r6
2400 082a 46D1 mov.l .L219,r1
2401 082c 0476 add #4,r6
2402 082e 46D3 mov.l .L220,r3
2403 0830 0365 mov r0,r5 <- here
2404 0832 40D7 mov.l .L212,r7
2405 0834 0C3D add r0,r13
2406 0836 759A mov.w .L221,r10
2407 0838 0475 add #4,r5 <- here
2408 083a 271E mov.l r2,@(28,r14)
2409 083c 0477 add #4,r7
2410 083e 191E mov.l r1,@(36,r14)
2411 0840 00E2 mov #0,r2
2412 0842 381E mov.l r3,@(32,r14)
2413 0844 0361 mov r0,r1
2414 0846 CA1E mov.l r12,@(40,r14)
2415 .L187:
3359:./imdct.i **** {
...
2443 0866 1AF5 fmov.s fr1,@r5 <- here
3362:./imdct.i **** y[512 /4+2*i] = -buf_1[i].real * window[512 /4+2*i];
2446 0868 E953 mov.l @(36,r14),r3
2449 086a 0875 add #8,r5 <- here
...
2510 08b8 98F2 fmov.s @r9,fr2 <- here
2511 08ba 3793 mov.w .L225,r3
2512 08bc 2830 sub r2,r0
2513 08 be C6F1 fmov.s @(r0, r12), fr1
2514 08 c0 0879 add #8,r9 <- here
...
2534 08d8 0872 add #8,r2
2537 08da 12F2 fmul fr1,fr2
2538 08dc 1360 mov r1,r0
2541 08de 0871 add #8,r1
2544 08e0 B28F bf.s .L187
2545 08e2 27F3 fmov.s fr2,@(r0,r3)
Toshi
More information about the Gcc-bugs
mailing list