This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: -mpower4 sched2 bug miscompiles linux kernel
- From: David Edelsohn <dje at watson dot ibm dot com>
- To: Alan Modra <amodra at bigpond dot net dot au>
- Cc: Jeffrey Law <law at redhat dot com>, Geoff Keating <geoffk at geoffk dot org>, gcc-patches at gcc dot gnu dot org
- Date: Tue, 10 Sep 2002 16:09:11 -0400
- Subject: Re: -mpower4 sched2 bug miscompiles linux kernel
I tracked down the -mstring problem to adjust_automodify_address
expecting the address argument to already contain the offset. See the
comment above the function in emit-rtl.c and its use in expr.c. To use
adjust_automodify_address, the code needs to be organized as:
if (mode == BLKmode)
{
/* Move the address into scratch registers. The movstrsi
patterns require zero offset. */
rtx dest_reg = copy_addr_to_reg (plus_constant (XEXP (orig_dest,
0),
offset));
rtx src_reg = copy_addr_to_reg (plus_constant (XEXP (orig_src,
0),
offset));
/* Addresses in registers are known to be legitimate. */
src = adjust_automodify_address_nv (orig_src, mode,
src_reg, offset);
dest = adjust_automodify_address_nv (orig_dest, mode,
dest_reg, offset);
emit_insn ((*gen_func.movstrsi) (dest, src,
GEN_INT (move_bytes & 31),
align_rtx));
if (bytes > move_bytes)
{
if (! TARGET_POWERPC64)
{
emit_insn (gen_addsi3 (src_reg, src_reg,
GEN_INT (move_bytes)));
emit_insn (gen_addsi3 (dest_reg, dest_reg,
GEN_INT (move_bytes)));
}
else
{
emit_insn (gen_adddi3 (src_reg, src_reg,
GEN_INT (move_bytes)));
emit_insn (gen_adddi3 (dest_reg, dest_reg,
GEN_INT (move_bytes)));
}
}
}
Based on the other uses of adjust_automodify_address (and the
name), that function is supposed to be used for auto-increment and
auto-decrement addresses, which is not how it is used in your patch.
I think the initial bug may be related to the block move code
explicitly incrementing the address. The adjust_automodify_address call
restores GCC's knowledge about the increment, but the design still goes
behind the back of the compiler.
This leads me to believe that we fundamentally should be using
adjust_address in all instances and then later ensuring the address is in
a register. We only should need to copy the address to a register if
reload cannot legitimize the address itself. I assume that is the case?
Appended is my current version of the patch.
David
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.374
diff -c -p -r1.374 rs6000.c
*** rs6000.c 10 Sep 2002 12:39:19 -0000 1.374
--- rs6000.c 10 Sep 2002 19:50:48 -0000
*************** struct builtin_description
*** 167,173 ****
static void rs6000_add_gc_roots PARAMS ((void));
static int num_insns_constant_wide PARAMS ((HOST_WIDE_INT));
- static rtx expand_block_move_mem PARAMS ((enum machine_mode, rtx, rtx));
static void validate_condition_mode
PARAMS ((enum rtx_code, enum machine_mode));
static rtx rs6000_generate_compare PARAMS ((enum rtx_code));
--- 167,172 ----
*************** rs6000_common_init_builtins ()
*** 6044,6064 ****
}
}
- /* Generate a memory reference for expand_block_move, copying volatile,
- and other bits from an original memory reference. */
-
- static rtx
- expand_block_move_mem (mode, addr, orig_mem)
- enum machine_mode mode;
- rtx addr;
- rtx orig_mem;
- {
- rtx mem = gen_rtx_MEM (mode, addr);
-
- MEM_COPY_ATTRIBUTES (mem, orig_mem);
- return mem;
- }
-
/* Expand a block move operation, and return 1 if successful. Return 0
if we should let the compiler generate normal code.
--- 6063,6068 ----
*************** expand_block_move (operands)
*** 6081,6094 ****
int align;
int bytes;
int offset;
- int num_reg;
- int i;
- rtx src_reg;
- rtx dest_reg;
- rtx src_addr;
- rtx dest_addr;
- rtx tmp_reg;
- rtx stores[MAX_MOVE_REG];
int move_bytes;
/* If this is not a fixed size move, just call memcpy */
--- 6085,6090 ----
*************** expand_block_move (operands)
*** 6110,6123 ****
if (bytes > (TARGET_POWERPC64 ? 64 : 32))
return 0;
- /* Move the address into scratch registers. */
- dest_reg = copy_addr_to_reg (XEXP (orig_dest, 0));
- src_reg = copy_addr_to_reg (XEXP (orig_src, 0));
-
if (TARGET_STRING) /* string instructions are available */
{
! for ( ; bytes > 0; bytes -= move_bytes)
{
if (bytes > 24 /* move up to 32 bytes at a time */
&& ! fixed_regs[5]
&& ! fixed_regs[6]
--- 6106,6122 ----
if (bytes > (TARGET_POWERPC64 ? 64 : 32))
return 0;
if (TARGET_STRING) /* string instructions are available */
{
! for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
{
+ union {
+ rtx (*movstrsi) PARAMS ((rtx, rtx, rtx, rtx));
+ rtx (*mov) PARAMS ((rtx, rtx));
+ } gen_func;
+ enum machine_mode mode = BLKmode;
+ rtx src, dest, tmp_reg;
+
if (bytes > 24 /* move up to 32 bytes at a time */
&& ! fixed_regs[5]
&& ! fixed_regs[6]
*************** expand_block_move (operands)
*** 6129,6143 ****
&& ! fixed_regs[12])
{
move_bytes = (bytes > 32) ? 32 : bytes;
! emit_insn (gen_movstrsi_8reg (expand_block_move_mem (BLKmode,
! dest_reg,
! orig_dest),
! expand_block_move_mem (BLKmode,
! src_reg,
! orig_src),
! GEN_INT ((move_bytes == 32)
! ? 0 : move_bytes),
! align_rtx));
}
else if (bytes > 16 /* move up to 24 bytes at a time */
&& ! fixed_regs[5]
--- 6128,6134 ----
&& ! fixed_regs[12])
{
move_bytes = (bytes > 32) ? 32 : bytes;
! gen_func.movstrsi = gen_movstrsi_8reg;
}
else if (bytes > 16 /* move up to 24 bytes at a time */
&& ! fixed_regs[5]
*************** expand_block_move (operands)
*** 6148,6161 ****
&& ! fixed_regs[10])
{
move_bytes = (bytes > 24) ? 24 : bytes;
! emit_insn (gen_movstrsi_6reg (expand_block_move_mem (BLKmode,
! dest_reg,
! orig_dest),
! expand_block_move_mem (BLKmode,
! src_reg,
! orig_src),
! GEN_INT (move_bytes),
! align_rtx));
}
else if (bytes > 8 /* move up to 16 bytes at a time */
&& ! fixed_regs[5]
--- 6139,6145 ----
&& ! fixed_regs[10])
{
move_bytes = (bytes > 24) ? 24 : bytes;
! gen_func.movstrsi = gen_movstrsi_6reg;
}
else if (bytes > 8 /* move up to 16 bytes at a time */
&& ! fixed_regs[5]
*************** expand_block_move (operands)
*** 6164,6177 ****
&& ! fixed_regs[8])
{
move_bytes = (bytes > 16) ? 16 : bytes;
! emit_insn (gen_movstrsi_4reg (expand_block_move_mem (BLKmode,
! dest_reg,
! orig_dest),
! expand_block_move_mem (BLKmode,
! src_reg,
! orig_src),
! GEN_INT (move_bytes),
! align_rtx));
}
else if (bytes >= 8 && TARGET_POWERPC64
/* 64-bit loads and stores require word-aligned
--- 6148,6154 ----
&& ! fixed_regs[8])
{
move_bytes = (bytes > 16) ? 16 : bytes;
! gen_func.movstrsi = gen_movstrsi_4reg;
}
else if (bytes >= 8 && TARGET_POWERPC64
/* 64-bit loads and stores require word-aligned
*************** expand_block_move (operands)
*** 6179,6286 ****
&& (align >= 8 || (! STRICT_ALIGNMENT && align >= 4)))
{
move_bytes = 8;
! tmp_reg = gen_reg_rtx (DImode);
! emit_move_insn (tmp_reg,
! expand_block_move_mem (DImode,
! src_reg, orig_src));
! emit_move_insn (expand_block_move_mem (DImode,
! dest_reg, orig_dest),
! tmp_reg);
}
else if (bytes > 4 && !TARGET_POWERPC64)
{ /* move up to 8 bytes at a time */
move_bytes = (bytes > 8) ? 8 : bytes;
! emit_insn (gen_movstrsi_2reg (expand_block_move_mem (BLKmode,
! dest_reg,
! orig_dest),
! expand_block_move_mem (BLKmode,
! src_reg,
! orig_src),
! GEN_INT (move_bytes),
! align_rtx));
}
else if (bytes >= 4 && (align >= 4 || ! STRICT_ALIGNMENT))
{ /* move 4 bytes */
move_bytes = 4;
! tmp_reg = gen_reg_rtx (SImode);
! emit_move_insn (tmp_reg,
! expand_block_move_mem (SImode,
! src_reg, orig_src));
! emit_move_insn (expand_block_move_mem (SImode,
! dest_reg, orig_dest),
! tmp_reg);
}
else if (bytes == 2 && (align >= 2 || ! STRICT_ALIGNMENT))
{ /* move 2 bytes */
move_bytes = 2;
! tmp_reg = gen_reg_rtx (HImode);
! emit_move_insn (tmp_reg,
! expand_block_move_mem (HImode,
! src_reg, orig_src));
! emit_move_insn (expand_block_move_mem (HImode,
! dest_reg, orig_dest),
! tmp_reg);
}
else if (bytes == 1) /* move 1 byte */
{
move_bytes = 1;
! tmp_reg = gen_reg_rtx (QImode);
! emit_move_insn (tmp_reg,
! expand_block_move_mem (QImode,
! src_reg, orig_src));
! emit_move_insn (expand_block_move_mem (QImode,
! dest_reg, orig_dest),
! tmp_reg);
}
else
{ /* move up to 4 bytes at a time */
move_bytes = (bytes > 4) ? 4 : bytes;
! emit_insn (gen_movstrsi_1reg (expand_block_move_mem (BLKmode,
! dest_reg,
! orig_dest),
! expand_block_move_mem (BLKmode,
! src_reg,
! orig_src),
! GEN_INT (move_bytes),
! align_rtx));
}
! if (bytes > move_bytes)
{
! if (! TARGET_POWERPC64)
{
! emit_insn (gen_addsi3 (src_reg, src_reg,
! GEN_INT (move_bytes)));
! emit_insn (gen_addsi3 (dest_reg, dest_reg,
! GEN_INT (move_bytes)));
}
! else
{
! emit_insn (gen_adddi3 (src_reg, src_reg,
! GEN_INT (move_bytes)));
! emit_insn (gen_adddi3 (dest_reg, dest_reg,
! GEN_INT (move_bytes)));
}
}
}
}
else /* string instructions not available */
{
! num_reg = offset = 0;
! for ( ; bytes > 0; (bytes -= move_bytes), (offset += move_bytes))
{
! /* Calculate the correct offset for src/dest */
! if (offset == 0)
! {
! src_addr = src_reg;
! dest_addr = dest_reg;
! }
! else
! {
! src_addr = plus_constant (src_reg, offset);
! dest_addr = plus_constant (dest_reg, offset);
! }
/* Generate the appropriate load and store, saving the stores
for later. */
--- 6156,6235 ----
&& (align >= 8 || (! STRICT_ALIGNMENT && align >= 4)))
{
move_bytes = 8;
! mode = DImode;
! gen_func.mov = gen_movdi;
}
else if (bytes > 4 && !TARGET_POWERPC64)
{ /* move up to 8 bytes at a time */
move_bytes = (bytes > 8) ? 8 : bytes;
! gen_func.movstrsi = gen_movstrsi_2reg;
}
else if (bytes >= 4 && (align >= 4 || ! STRICT_ALIGNMENT))
{ /* move 4 bytes */
move_bytes = 4;
! mode = SImode;
! gen_func.mov = gen_movsi;
}
else if (bytes == 2 && (align >= 2 || ! STRICT_ALIGNMENT))
{ /* move 2 bytes */
move_bytes = 2;
! mode = HImode;
! gen_func.mov = gen_movhi;
}
else if (bytes == 1) /* move 1 byte */
{
move_bytes = 1;
! mode = QImode;
! gen_func.mov = gen_movqi;
}
else
{ /* move up to 4 bytes at a time */
move_bytes = (bytes > 4) ? 4 : bytes;
! gen_func.movstrsi = gen_movstrsi_1reg;
}
! src = adjust_address (orig_src, mode, offset);
! dest = adjust_address (orig_dest, mode, offset);
!
! if (mode == BLKmode)
{
! if (!REG_P (XEXP (src, 0)))
{
! src = gen_rtx_MEM (BLKmode,
! copy_addr_to_reg (XEXP (src, 0)));
! MEM_COPY_ATTRIBUTES (src, orig_src);
}
! if (!REG_P (XEXP (dest, 0)))
{
! dest = gen_rtx_MEM (BLKmode,
! copy_addr_to_reg (XEXP (dest, 0)));
! MEM_COPY_ATTRIBUTES (dest, orig_dest);
}
+ emit_insn ((*gen_func.movstrsi) (dest, src,
+ GEN_INT (move_bytes & 31),
+ align_rtx));
+ }
+ else
+ {
+ tmp_reg = gen_reg_rtx (mode);
+
+ emit_insn ((*gen_func.mov) (tmp_reg, src));
+ emit_insn ((*gen_func.mov) (dest, tmp_reg));
}
}
}
else /* string instructions not available */
{
! rtx stores[MAX_MOVE_REG];
! int num_reg = 0;
! int i;
!
! for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
{
! rtx (*gen_mov_func) PARAMS ((rtx, rtx));
! enum machine_mode mode;
! rtx src, dest, tmp_reg;
/* Generate the appropriate load and store, saving the stores
for later. */
*************** expand_block_move (operands)
*** 6290,6345 ****
&& (align >= 8 || (! STRICT_ALIGNMENT && align >= 4)))
{
move_bytes = 8;
! tmp_reg = gen_reg_rtx (DImode);
! emit_insn (gen_movdi (tmp_reg,
! expand_block_move_mem (DImode,
! src_addr,
! orig_src)));
! stores[num_reg++] = gen_movdi (expand_block_move_mem (DImode,
! dest_addr,
! orig_dest),
! tmp_reg);
}
else if (bytes >= 4 && (align >= 4 || ! STRICT_ALIGNMENT))
{
move_bytes = 4;
! tmp_reg = gen_reg_rtx (SImode);
! emit_insn (gen_movsi (tmp_reg,
! expand_block_move_mem (SImode,
! src_addr,
! orig_src)));
! stores[num_reg++] = gen_movsi (expand_block_move_mem (SImode,
! dest_addr,
! orig_dest),
! tmp_reg);
}
else if (bytes >= 2 && (align >= 2 || ! STRICT_ALIGNMENT))
{
move_bytes = 2;
! tmp_reg = gen_reg_rtx (HImode);
! emit_insn (gen_movhi (tmp_reg,
! expand_block_move_mem (HImode,
! src_addr,
! orig_src)));
! stores[num_reg++] = gen_movhi (expand_block_move_mem (HImode,
! dest_addr,
! orig_dest),
! tmp_reg);
}
else
{
move_bytes = 1;
! tmp_reg = gen_reg_rtx (QImode);
! emit_insn (gen_movqi (tmp_reg,
! expand_block_move_mem (QImode,
! src_addr,
! orig_src)));
! stores[num_reg++] = gen_movqi (expand_block_move_mem (QImode,
! dest_addr,
! orig_dest),
! tmp_reg);
}
if (num_reg >= MAX_MOVE_REG)
{
for (i = 0; i < num_reg; i++)
--- 6239,6273 ----
&& (align >= 8 || (! STRICT_ALIGNMENT && align >= 4)))
{
move_bytes = 8;
! mode = DImode;
! gen_mov_func = gen_movdi;
}
else if (bytes >= 4 && (align >= 4 || ! STRICT_ALIGNMENT))
{
move_bytes = 4;
! mode = SImode;
! gen_mov_func = gen_movsi;
}
else if (bytes >= 2 && (align >= 2 || ! STRICT_ALIGNMENT))
{
move_bytes = 2;
! mode = HImode;
! gen_mov_func = gen_movhi;
}
else
{
move_bytes = 1;
! mode = QImode;
! gen_mov_func = gen_movqi;
}
+ src = adjust_address (orig_src, mode, offset);
+ dest = adjust_address (orig_dest, mode, offset);
+ tmp_reg = gen_reg_rtx (mode);
+
+ emit_insn ((*gen_mov_func) (tmp_reg, src));
+ stores[num_reg++] = (*gen_mov_func) (dest, tmp_reg);
+
if (num_reg >= MAX_MOVE_REG)
{
for (i = 0; i < num_reg; i++)