This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Installed] Patch to handling of floats in sh constant pools
- From: Richard Sandiford <rsandifo at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: 18 Nov 2002 13:32:11 +0000
- Subject: [Installed] Patch to handling of floats in sh constant pools
If TARGET_SHCOMPACT, and we have a sequence of constants like this:
A: SFmode
B: DFmode
C: SFmode
D: SFmode
sh.c:dump_table() will realise that B needs a word of padding
and will try to find a later constant to plug the gap. In this
case it picks C, so the final constant order is: A C B D.
This reordering complicates machine_dependent_reorg(), which tries
to access consecutive floats using post-increment addressing.
It uses this code to check whether constants might be reordered:
if (TARGET_SHCOMPACT)
{
/* The first SFmode constant after a DFmode
constant may be pulled before a sequence
of DFmode constants, so the second SFmode
needs a label, just in case. */
if (GET_MODE_SIZE (mode) == 4)
{
if (last_float && may_need_align)
last_float = 0;
may_need_align = 0;
}
if (last_float
&& (GET_MODE_SIZE (GET_MODE (last_float))
!= GET_MODE_SIZE (mode)))
{
last_float = 0;
if (GET_MODE_SIZE (mode) == 4)
may_need_align = 1;
}
}
Problem is, we also have this code beforehand:
if (last_float
&& reg_set_between_p (r0_rtx, last_float_move, scan))
last_float = 0;
So, if r0 is set between the uses of B and C, the second nested
if statement above wonn't detect the 'start of new sequence'
condition for C. Thus D might be accessed consecutively with C.
The code also doesn't check for DImode constants, which need
the same sort of alignment as DFmode ones.
Rather than fix the current scheme, it seems easier to stop
dump_table() from reordering constants that are accessed as
part of a post-increment sequence.
There's a trade-off in doing that. On the one hand, it means
that the second SFmode constant no longer needs a label 'just
in case', so the patch should reduce the number of 'mova'
instructions.
On the other hand, moving a constant can save 4 bytes, a bigger
win (spacewise) than avoiding a 'mova' instruction. But we can
often move some other constant instead, one that isn't part of
a sequence.
Tested on sh64-elf. Approved by Alex off list.
Richard
* config/sh/sh.c (pool_node): New field: part_of_sequence_p.
(add_constant): Set it.
(dump_table): Don't reorder a constant if part_of_sequence_p.
(machine_dependent_reorg): Assume that float constants will
stay in their original order if used as a sequence.
Index: config/sh/sh.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/sh/sh.c,v
retrieving revision 1.178
diff -c -d -p -F^[(a-zA-Z0-9_^#] -r1.178 sh.c
*** config/sh/sh.c 4 Nov 2002 16:57:10 -0000 1.178
--- config/sh/sh.c 15 Nov 2002 16:26:10 -0000
*************** typedef struct
*** 2244,2249 ****
--- 2244,2253 ----
rtx label; /* Label of value. */
rtx wend; /* End of window. */
enum machine_mode mode; /* Mode of value. */
+
+ /* True if this constant is accessed as part of a post-increment
+ sequence. Note that HImode constants are never accessed in this way. */
+ bool part_of_sequence_p;
} pool_node;
/* The maximum number of constants that can fit into one pool, since
*************** add_constant (x, mode, last_value)
*** 2317,2328 ****
/* Need a new one. */
pool_vector[pool_size].value = x;
if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
! lab = 0;
else
lab = gen_label_rtx ();
pool_vector[pool_size].mode = mode;
pool_vector[pool_size].label = lab;
pool_vector[pool_size].wend = NULL_RTX;
if (lab && pool_window_label)
{
newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
--- 2321,2336 ----
/* Need a new one. */
pool_vector[pool_size].value = x;
if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
! {
! lab = 0;
! pool_vector[pool_size - 1].part_of_sequence_p = true;
! }
else
lab = gen_label_rtx ();
pool_vector[pool_size].mode = mode;
pool_vector[pool_size].label = lab;
pool_vector[pool_size].wend = NULL_RTX;
+ pool_vector[pool_size].part_of_sequence_p = (lab == 0);
if (lab && pool_window_label)
{
newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
*************** dump_table (scan)
*** 2395,2401 ****
break;
case SImode:
case SFmode:
! if (align_insn)
{
for (lab = p->label; lab; lab = LABEL_REFS (lab))
emit_label_before (lab, align_insn);
--- 2403,2409 ----
break;
case SImode:
case SFmode:
! if (align_insn && !p->part_of_sequence_p)
{
for (lab = p->label; lab; lab = LABEL_REFS (lab))
emit_label_before (lab, align_insn);
*************** machine_dependent_reorg (first)
*** 3718,3724 ****
behind. */
rtx barrier = find_barrier (num_mova, mova, insn);
rtx last_float_move, last_float = 0, *last_float_addr;
- int may_need_align = 1;
if (num_mova && ! mova_p (mova))
{
--- 3726,3731 ----
*************** machine_dependent_reorg (first)
*** 3776,3802 ****
if (last_float
&& reg_set_between_p (r0_rtx, last_float_move, scan))
last_float = 0;
! if (TARGET_SHCOMPACT)
! {
! /* The first SFmode constant after a DFmode
! constant may be pulled before a sequence
! of DFmode constants, so the second SFmode
! needs a label, just in case. */
! if (GET_MODE_SIZE (mode) == 4)
! {
! if (last_float && may_need_align)
! last_float = 0;
! may_need_align = 0;
! }
! if (last_float
! && (GET_MODE_SIZE (GET_MODE (last_float))
! != GET_MODE_SIZE (mode)))
! {
! last_float = 0;
! if (GET_MODE_SIZE (mode) == 4)
! may_need_align = 1;
! }
! }
lab = add_constant (src, mode, last_float);
if (lab)
emit_insn_before (gen_mova (lab), scan);
--- 3783,3793 ----
if (last_float
&& reg_set_between_p (r0_rtx, last_float_move, scan))
last_float = 0;
! if (last_float
! && TARGET_SHCOMPACT
! && GET_MODE_SIZE (mode) != 4
! && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
! last_float = 0;
lab = add_constant (src, mode, last_float);
if (lab)
emit_insn_before (gen_mova (lab), scan);
*** /dev/null Tue Nov 14 21:44:43 2000
--- testsuite/gcc.c-torture/execute/20021118-2.c Fri Nov 15 16:25:35 2002
***************
*** 0 ****
--- 1,50 ----
+ /* Originally added to test SH constant pool layout. t1() failed for
+ non-PIC and t2() failed for PIC. */
+
+ int t1 (float *f, int i,
+ void (*f1) (double),
+ void (*f2) (float, float))
+ {
+ f1 (3.0);
+ f[i] = f[i + 1];
+ f2 (2.5f, 3.5f);
+ }
+
+ int t2 (float *f, int i,
+ void (*f1) (double),
+ void (*f2) (float, float),
+ void (*f3) (float))
+ {
+ f3 (6.0f);
+ f1 (3.0);
+ f[i] = f[i + 1];
+ f2 (2.5f, 3.5f);
+ }
+
+ void f1 (double d)
+ {
+ if (d != 3.0)
+ abort ();
+ }
+
+ void f2 (float f1, float f2)
+ {
+ if (f1 != 2.5f || f2 != 3.5f)
+ abort ();
+ }
+
+ void f3 (float f)
+ {
+ if (f != 6.0f)
+ abort ();
+ }
+
+ int main ()
+ {
+ float f[3] = { 2.0f, 3.0f, 4.0f };
+ t1 (f, 0, f1, f2);
+ t2 (f, 1, f1, f2, f3);
+ if (f[0] != 3.0f && f[1] != 4.0f)
+ abort ();
+ exit (0);
+ }