(i386-linux x sh-elf) GIV recombination opportunities missed

Thu May 18 19:10:00 GMT 2000

I've been looking at the latest toolchain, and I'm still seeing quite
a few giv recombination misses.

Here's one which is rather nasty. r3 is r12 - 4 throughout the entire
loop, but the loop code fails to combine the two givs. This causes r3 
to be spilled/restored to/from the stack in the loop, which is 
unnecessary.

vers: CVS current
opts: -O2 -m4-single-only -ml -S -g
file: slalom.i from http://shell14.ba.best.com/pub.t/tm2/stress-1.14.tar.gz

 1346 0698 636D                 mov     r6,r13
 1347 069a C363                 mov     r12,r3          <- here
 1350 069c 4B11                 mov.l   r4,@(44,r1)
 1353 069e 00E2                 mov     #0,r2
 1354 06a0 047D                 add     #4,r13
 1355 06a2 221E                 mov.l   r2,@(8,r14)
 1356 06a4 FC73                 add     #-4,r3          <- here
 1357 06a6 241E                 mov.l   r2,@(16,r14)
 1358 06a8 D51E                 mov.l   r13,@(20,r14)
 1359 06aa 361E                 mov.l   r3,@(24,r14)    <- here
 1360 06ac 171E                 mov.l   r1,@(28,r14)
1524:slalom.i      ****                 numpat = loop[iface][1] - loop[iface][0] + 1;
 1363 06ae E755                 mov.l   @(28,r14),r5
1525:slalom.i      ****                 tmp3 = 0.0;
1526:slalom.i      ****                 if (iface >= 3)
 1366                   .L168:
...
 1390 06c2 E650                 mov.l   @(24,r14),r0
 1391 06c4 08FC                 fmov.s  @r0,fr12
...
1528:slalom.i      ****                 numcol = (int) (sqrt(numpat * box[iface] / box[iface + 1]) + 0.5);
 1394                   .LM206:
 1395 06c6 5A4A                 lds     r10,fpul
 1396 06c8 C8F1                 fmov.s  @r12,fr1        <- here
 1397 06ca 2DF4                 float   fpul,fr4
 1398 06cc E551                 mov.l   @(20,r14),r1
 1399 06ce 12F4                 fmul    fr1,fr4
 1400 06d0 18F1                 fmov.s  @r1,fr1
 1401 06d2 13F4                 fdiv    fr1,fr4
 1402 06d4 4CF0                 fmov    fr4,fr0
 1403 06d6 6DF0                 fsqrt   fr0
 1404 06d8 04F0                 fcmp/eq fr0,fr0
 1405 06da 0289                 bt      .L138
 1406 06dc 4CD1                 mov.l   .L158,r1
 1407 06de 0B41                 jsr     @r1
 1408 06e0 0900                 nop
...
1533:slalom.i      ****                 width = box[iface] / numcol;
 1428                   .LM209:
 1429 06f8 5A48                 lds     r8,fpul
 1430 06fa C8F5                 fmov.s  @r12,fr5        <- here
...
1554:slalom.i      ****                                 area[ipatch] = tmp4;
 1587                   .LM247:
 1588 07a2 40E0                 mov     #64,r0
 1590                   .LM248:
 1591 07a4 CAF6                 fmov.s  fr12,@r6
...
 1629 07c0 E451                 mov.l   @(16,r14),r1
 1630 07c2 047D                 add     #4,r13
 1631 07c4 E552                 mov.l   @(20,r14),r2
 1632 07c6 047C                 add     #4,r12          <- here
 1633 07c8 E653                 mov.l   @(24,r14),r3    <- here
 1634 07ca 0871                 add     #8,r1
 1635 07cc E755                 mov.l   @(28,r14),r5
 1636 07ce 0472                 add     #4,r2
 1637 07d0 E257                 mov.l   @(8,r14),r7
 1638 07d2 0473                 add     #4,r3           <- here
 1639 07d4 141E                 mov.l   r1,@(16,r14)
 1640 07d6 0875                 add     #8,r5
 1641 07d8 05E1                 mov     #5,r1
 1642 07da 0177                 add     #1,r7
 1643 07dc 251E                 mov.l   r2,@(20,r14)
 1644 07de 1737                 cmp/gt  r1,r7
 1645 07e0 361E                 mov.l   r3,@(24,r14)    <- here
 1646 07e2 571E                 mov.l   r5,@(28,r14)
 1647 07e4 028D                 bt.s    .L167
 1648 07e6 721E                 mov.l   r7,@(8,r14)
 1649 07e8 62AF                 bra     .L168
 1650 07ea E755                 mov.l   @(28,r14),r5
 1651                   .L167:

Here's another sample which is somewhat more benign; no register spill
is generated, but there are two givs which fail to be combined:

vers: CVS current
opts: -O2 -m4-single-only -ml -S -g
file: imdct.i from http://shell14.ba.best.com/pub.t/tm2/stress-1.14.tar.gz

3358:./imdct.i     ****         for(i=0; i< 512 /8; i++)
 2384                   .LM350:
 2385 080c 0369                 mov     r0,r9           <- here
 2386 080e 0368                 mov     r0,r8
 2387 0810 8591                 mov.w   .L215,r1
 2388 0812 0479                 add     #4,r9           <- here
 2389 0814 8490                 mov.w   .L216,r0
 2390 0816 40EB                 mov     #64,r11
 2391 0818 E262                 mov.l   @r14,r2
 2392 081a 46D4                 mov.l   .L212,r4
 2393 081c 1C38                 add     r1,r8
 2394 081e 48DC                 mov.l   .L217,r12
 2395 0820 0C32                 add     r0,r2
 2396 0822 7E9D                 mov.w   .L218,r13
 2397 0824 1C34                 add     r1,r4
 2398 0826 E260                 mov.l   @r14,r0
 2399 0828 C366                 mov     r12,r6
 2400 082a 46D1                 mov.l   .L219,r1
 2401 082c 0476                 add     #4,r6
 2402 082e 46D3                 mov.l   .L220,r3
 2403 0830 0365                 mov     r0,r5           <- here
 2404 0832 40D7                 mov.l   .L212,r7
 2405 0834 0C3D                 add     r0,r13
 2406 0836 759A                 mov.w   .L221,r10
 2407 0838 0475                 add     #4,r5           <- here
 2408 083a 271E                 mov.l   r2,@(28,r14)
 2409 083c 0477                 add     #4,r7
 2410 083e 191E                 mov.l   r1,@(36,r14)
 2411 0840 00E2                 mov     #0,r2
 2412 0842 381E                 mov.l   r3,@(32,r14)
 2413 0844 0361                 mov     r0,r1
 2414 0846 CA1E                 mov.l   r12,@(40,r14)
 2415                   .L187:
3359:./imdct.i     ****         {
...
 2443 0866 1AF5                 fmov.s  fr1,@r5         <- here
3362:./imdct.i     ****                 y[512 /4+2*i]    = -buf_1[i].real       * window[512 /4+2*i];
 2446 0868 E953                 mov.l   @(36,r14),r3
 2449 086a 0875                 add     #8,r5           <- here
...
 2510 08b8 98F2                 fmov.s  @r9,fr2         <- here
 2511 08ba 3793                 mov.w   .L225,r3
 2512 08bc 2830                 sub     r2,r0
2513 08 be C6F1                 fmov.s  @(r0, r12), fr1
2514 08 c0 0879                 add     #8,r9           <- here
...
 2534 08d8 0872                 add     #8,r2
 2537 08da 12F2                 fmul    fr1,fr2
 2538 08dc 1360                 mov     r1,r0
 2541 08de 0871                 add     #8,r1
 2544 08e0 B28F                 bf.s    .L187
 2545 08e2 27F3                 fmov.s  fr2,@(r0,r3)

Toshi