This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
choose_reload_regs

To: gcc-patches at gcc dot gnu dot org
Subject: choose_reload_regs
From: Alan Goluban <agoluban at rasips1 dot rasip dot fer dot hr>
Date: Thu, 24 Aug 2000 15:57:01 +0200 (MET DST)



B U G   R E P O R T:
********************

Version of gcc:
---------------
gcc version 2.95.2
Compiler is configured with --with-gnu-as option.

Description:
------------
  I found this problem on mips-sgi-irix5.3 platform, but  I think it's not
specific to that platform, as the problem (again, I think) resides in
"reload1.c" file. Originally, the problem emerged while compiling a
source frequently intermixed with "asm" assembler-statements. After my
program crashed with SIGSEGV, I examined assembler source generated by
gcc and discovered that one of the input-operands of the asm-statement
overlapped with the list of explicitly clobbered registers. After
rearrangement of clobbered registers, there was no overlap, and the program
worked well. So I decided to see where the problem is.

As the original source file generates about 20000 lines of assembler, here
is much more simpler source code, which even doesn't use "asm" statements.
It produces incorrect result due to the same gcc problem that causes original
(20000 lines) code to crash.

The following code produces incorrect result when compiled with -O2. If it
is compiled without optimization, or with -O, it produces correct result.

/***************************************************/
extern int	printf(const char *, ...);
int x[8] = {
	0x7df687f5, 0x04c3e29e,
	0x473b4134, 0x040f5d4c,
	0x4a3c6521, 0x1fa50649,
	0x7e8faa3c, 0x705abcdd
};
int y[8] = {
	0x1dbe8552, 0x2f37db49,
	0x539940ec, 0x16b7d56a,
	0x7bf8d5a4, 0x7dd0b065,
	0x44ba0959, 0x2d40ca96
};
int main(void){
	long long a, b, c, aa, bb, cc;
	int i, z[8];
	for(i = 0; i < 8; i += 2){
		a = x[i];
		b = y[i];
		aa = x[i + 1];
		bb = y[i + 1];
		c = a*bb - aa*b;
		cc = a*b + aa*bb;
		c += ((long long)1 << 25);
		cc += ((long long)1 << 25);
		c >>= 26;
		cc >>= 26;
		z[i] = (int)c;
		z[i + 1] = (int)cc;
	}
	for(i = 0; i < 8; i++){
		printf("z[%02d]=%08x\n", i, z[i]);
	}
	return 0;
}
/***************************************************/

This code is a little complicated, but simplifying eliminates bug, as this
bug is in fact a "race condition".
Snapshot of commmands for recreating this problem, assuming the code is in
file problem.c, follows:

% gcc -o bad -O2 problem.c
% gcc -o good problem.c
% ./bad > bad_out
% ./good > good_out
% diff bad_out good_out
4c4
< z[03]=e7c5c080
---
> z[03]=e7c5c040
%

Here, I include the assembler output of the command "gcc -S -g -O2 problem.c",
to which I would reference below, in the explanation of the problem and patch:

#################################################
	.file	1 "bww.c"
	.option pic2
gcc2_compiled.:
__gnu_compiled_c:
.stabs "/tmp/finished/src/arch/mipsr4k/gcc-bug/",100,0,0,$Ltext0
.stabs "bww.c",100,0,0,$Ltext0
	.text
$Ltext0:
.stabs "int:t1=r1;0020000000000;0017777777777;",128,0,0,0
.stabs "char:t2=r2;0;255;",128,0,0,0
.stabs "long int:t3=r1;0020000000000;0017777777777;",128,0,0,0
.stabs "unsigned int:t4=r1;0000000000000;0037777777777;",128,0,0,0
.stabs "long unsigned int:t5=r1;0000000000000;0037777777777;",128,0,0,0
.stabs "long long int:t6=r1;01000000000000000000000;0777777777777777777777;",128,0,0,0
.stabs "long long unsigned int:t7=r1;0000000000000;01777777777777777777777;",128,0,0,0
.stabs "short int:t8=r8;-32768;32767;",128,0,0,0
.stabs "short unsigned int:t9=r9;0;65535;",128,0,0,0
.stabs "signed char:t10=r10;-128;127;",128,0,0,0
.stabs "unsigned char:t11=r11;0;255;",128,0,0,0
.stabs "float:t12=r1;4;0;",128,0,0,0
.stabs "double:t13=r1;8;0;",128,0,0,0
.stabs "long double:t14=r1;8;0;",128,0,0,0
.stabs "complex int:t15=s8real:1,0,32;imag:1,32,32;;",128,0,0,0
.stabs "complex float:t16=r16;4;0;",128,0,0,0
.stabs "complex double:t17=r17;8;0;",128,0,0,0
.stabs "complex long double:t18=r18;8;0;",128,0,0,0
.stabs "void:t19=19",128,0,0,0
	.globl	x
	.data
.stabs "x:G20=ar1;0;7;1",32,0,2,0
	.align	2
x:
	.word	2113308661
	.word	79946398
	.word	1195065652
	.word	68115788
	.word	1245472033
	.word	530908745
	.word	2123344444
	.word	1884994781
	.globl	y
.stabs "y:G20",32,0,8,0
	.align	2
y:
	.word	499025234
	.word	792189769
	.word	1402552556
	.word	381146474
	.word	2079905188
	.word	2110828645
	.word	1153042777
	.word	759220886
	.rdata
	.align	2
$LC0:

	.byte	0x7a,0x5b,0x25,0x30,0x32,0x64,0x5d,0x3d
	.byte	0x25,0x30,0x38,0x78,0xa,0x0
	.text
	.align	2
	.globl	main
$LM1:
	.stabn 68,0,14,$LM1
.stabs "bww.c",132,0,0,$Ltext0
	.ent	main
main:
	.frame	$sp,144,$31		# vars= 72, regs= 11/0, args= 16, extra= 8
	.mask	0xd0ff0000,-8
	.fmask	0x00000000,0
	.set	noreorder
	.cpload	$25
	.set	reorder
	subu	$sp,$sp,144
	.cprestore 16
$LM2:
	.stabn 68,0,15,$LM2
$LBB2:
$LM3:
	.stabn 68,0,17,$LM3
	addu	$2,$sp,24
	la	$25,y+4
	la	$24,x+4
	sw	$fp,132($sp)
	li	$fp,6			# 0x6
	sw	$31,136($sp)
	sw	$28,128($sp)
	sw	$23,124($sp)
	sw	$22,120($sp)
	sw	$21,116($sp)
	sw	$20,112($sp)
	sw	$19,108($sp)
	sw	$18,104($sp)
	sw	$17,100($sp)
	sw	$16,96($sp)
	sw	$2,88($sp)
	sw	$2,92($sp)
$L6:
$LM4:
	.stabn 68,0,18,$LM4
	lw	$2,-4($24)
$LM5:
	.stabn 68,0,21,$LM5
	lw	$3,0($25)
$LM6:
	.stabn 68,0,18,$LM6
	move	$15,$2
$LM7:
	.stabn 68,0,21,$LM7
	move	$11,$3
$LM8:
	.stabn 68,0,22,$LM8
	multu	$15,$11
$LM9:
	.stabn 68,0,19,$LM9
	lw	$4,-4($25)
$LM10:
	.stabn 68,0,20,$LM10
	lw	$5,0($24)
$LM11:
	.stabn 68,0,21,$LM11
	sra	$10,$3,31
$LM12:
	.stabn 68,0,22,$LM12
	mfhi	$16
	mflo	$17
$LM13:
	.stabn 68,0,19,$LM13
	#nop
	move	$3,$4
$LM14:
	.stabn 68,0,20,$LM14
	move	$7,$5
$LM15:
	.stabn 68,0,22,$LM15
	multu	$7,$3
	mfhi	$8
	mflo	$9
$LM16:
	.stabn 68,0,23,$LM16
	#nop
	#nop
	multu	$15,$3
$LM17:
	.stabn 68,0,18,$LM17
	sra	$14,$2,31
$LM18:
	.stabn 68,0,19,$LM18
	sra	$2,$4,31
$LM19:
	.stabn 68,0,20,$LM19
	sra	$6,$5,31
$LM20:
	.stabn 68,0,23,$LM20
	mfhi	$4
	mflo	$5
	#nop
	#nop
	multu	$7,$11
	mfhi	$12
	mflo	$13
$LM21:
	.stabn 68,0,22,$LM21
	#nop
	#nop
	mult	$15,$10
	mflo	$18
	#nop
	#nop
	mult	$11,$14
	mflo	$19
	#nop
	#nop
	mult	$7,$2
	mflo	$20
	#nop
	#nop
	mult	$3,$6
	mflo	$21
$LM22:
	.stabn 68,0,23,$LM22
	#nop
	#nop
	mult	$15,$2
	mflo	$22
	#nop
	#nop
	mult	$3,$14
	mflo	$14
	#nop
	#nop
	mult	$7,$10
$LM23:
	.stabn 68,0,17,$LM23
	addu	$25,$25,8
$LM24:
	.stabn 68,0,23,$LM24
	mflo	$23
$LM25:
	.stabn 68,0,17,$LM25
	#nop
	addu	$24,$24,8
	addu	$fp,$fp,-2
$LM26:
	.stabn 68,0,23,$LM26
	mult	$11,$6
$LM27:
	.stabn 68,0,22,$LM27
	addu	$16,$16,$18
	addu	$16,$16,$19
	addu	$8,$8,$20
	addu	$8,$8,$21
	sltu	$2,$17,$9
	subu	$17,$17,$9
	subu	$16,$16,$8
	subu	$16,$16,$2
$LM28:
	.stabn 68,0,23,$LM28
	addu	$4,$4,$22
	addu	$4,$4,$14
	addu	$12,$12,$23
	mflo	$6
	#nop
	#nop
	addu	$12,$12,$6
	addu	$5,$5,$13
	sltu	$2,$5,$13
	addu	$4,$4,$12
	addu	$4,$4,$2
$LM29:
	.stabn 68,0,24,$LM29
	li	$2,0		# line 231: high 32 bits of (1 << 25)
	li	$3,33554432	# line 232: low 32 bits of (1 << 25)
	addu	$17,$17,$3
	sltu	$6,$17,$3	# line 234: carry is in $6
	addu	$16,$16,$2
	addu	$16,$16,$6
$LM30:
	.stabn 68,0,25,$LM30
	addu	$5,$5,$3	# line 239: adding low-part of CONST
	sltu	$2,$5,$3	# line 240: carry is in $2 -> clobbering $2
	addu	$4,$4,$2	# line 241: adding high-part of CONST (error)
	addu	$4,$4,$2	# line 242: adding carry ($2)
$LM31:
	.stabn 68,0,26,$LM31
	srl	$17,$17,26
	sll	$3,$16,6
	or	$17,$17,$3
	sra	$16,$16,26
$LM32:
	.stabn 68,0,27,$LM32
	srl	$5,$5,26
	sll	$2,$4,6
	or	$5,$5,$2
$LM33:
	.stabn 68,0,28,$LM33
	lw	$3,92($sp)
$LM34:
	.stabn 68,0,27,$LM34
	sra	$4,$4,26
$LM35:
	.stabn 68,0,28,$LM35
	sw	$17,0($3)
$LM36:
	.stabn 68,0,29,$LM36
	sw	$5,4($3)
$LM37:
	.stabn 68,0,17,$LM37
	addu	$3,$3,8
	.set	noreorder
	.set	nomacro
	bgez	$fp,$L6
	sw	$3,92($sp)
	.set	macro
	.set	reorder

$LM38:
	.stabn 68,0,31,$LM38
	move	$fp,$0
	lw	$16,88($sp)
$L11:
$LM39:
	.stabn 68,0,32,$LM39
	lw	$6,0($16)
$LM40:
	.stabn 68,0,32,$LM40
	move	$5,$fp
$LM41:
	.stabn 68,0,31,$LM41
	addu	$fp,$fp,1
$LM42:
	.stabn 68,0,32,$LM42
	la	$4,$LC0
	la	$25,printf
	jal	$31,$25
$LM43:
	.stabn 68,0,31,$LM43
	slt	$3,$fp,8
	.set	noreorder
	.set	nomacro
	bne	$3,$0,$L11
	addu	$16,$16,4
	.set	macro
	.set	reorder

$LM44:
	.stabn 68,0,34,$LM44
	lw	$31,136($sp)
	lw	$fp,132($sp)
	lw	$23,124($sp)
	lw	$22,120($sp)
	lw	$21,116($sp)
	lw	$20,112($sp)
	lw	$19,108($sp)
	lw	$18,104($sp)
	lw	$17,100($sp)
	lw	$16,96($sp)
	move	$2,$0
	.set	noreorder
	.set	nomacro
	j	$31
	addu	$sp,$sp,144
	.set	macro
	.set	reorder

$LM45:
	.stabn 68,0,35,$LM45
$LBE2:
	.end	main
.stabs "main:F1",36,0,14,main
.stabs "a:r6",64,0,15,14
.stabs "b:r6",64,0,15,2
.stabs "c:r6",64,0,15,16
.stabs "aa:r6",64,0,15,6
.stabs "bb:r6",64,0,15,10
.stabs "cc:r6",64,0,15,4
.stabs "i:r1",64,0,16,30
.stabs "z:20",128,0,16,-120
.stabn 192,0,0,$LBB2
.stabn 224,0,0,$LBE2

	.globl printf .text
#################################################

I included standard comments in this listing, beginning with "# line",
followed by line number and description of what that instruction does.
The lines of particular interest are 231, 232, 234, 239, 240, 241 and 242.





P A T C H:
**********

As I don't know much about gcc-internals, I would probably use funny terms
and word constructions, so I must apologize in advance. Moreover, I'm not
sure whether this is correct place to insert the changes, but using
intuition I think that's it. Aside of intuition, "patched" version of gcc
generates correct code for both the above described problematic programs
"problem.c" and for my original source with asms. Also, patched gcc compiles
without problems.


Changes to the gcc sources (patch-file):
----------------------------------------

If "gcc-2.95.2" is original source-tree and "gcc-2.95.2.p" patched one,
the patch file is (generated with gnu-diff, with args -cp):
---------------------------------------------------------------
*** gcc-2.95.2/gcc/reload1.c	Wed Jul  7 03:05:39 1999
--- gcc-2.95.2.p/gcc/reload1.c	Wed Aug 23 20:48:14 2000
*************** choose_reload_regs (chain)
*** 5942,5948 ****
  
  		      if (k == nr)
  			{
! 			  int i1;
  
  			  last_reg = (GET_MODE (last_reg) == mode
  				      ? last_reg : gen_rtx_REG (mode, i));
--- 5942,5948 ----
  
  		      if (k == nr)
  			{
! 			  int i1, i2;
  
  			  last_reg = (GET_MODE (last_reg) == mode
  				      ? last_reg : gen_rtx_REG (mode, i));
*************** choose_reload_regs (chain)
*** 5960,5965 ****
--- 5960,5973 ----
  				 reload_earlyclobbers[i1]))
  			      break;
  
+ 			  /* Virtual register resides in 'nr' hard
+ 			     registers. So check hard register range
+ 			     from i to i+nr-1 if it gets clobbered by
+ 			     the current insn (or 'asm' statement) */
+ 			  for(i2 = 0; i2 < nr; i2++)
+ 			     if(regno_clobbered_p(i + i2, insn))
+ 				break;
+ 
  			  if (i1 != n_earlyclobbers
  			      || ! (reload_reg_free_for_value_p
  				    (i, reload_opnum[r], reload_when_needed[r],
*************** choose_reload_regs (chain)
*** 5968,5973 ****
--- 5976,5984 ----
  			      || (TEST_HARD_REG_BIT (reg_used_in_insn, i)
  				  && reload_out[r]
  				  && ! TEST_HARD_REG_BIT (reg_reloaded_dead, i))
+ 			      /* Also, on't use this register(s) if
+ 				 it(they) would be clobbered by this insn */
+ 			      || (i2 != nr)
  			      /* Don't clobber the frame pointer.  */
  			      || (i == HARD_FRAME_POINTER_REGNUM
  				  && reload_out[r])
---------------------------------------------------------------


Explanation of the problem:
---------------------------

The problem starts when the function choose_reload_regs() is trying to figure
out if the current instruction can inherit a reload register from previous
instructions. As the flags are "-O2", the code falls to little lengthy
"if statement" (starting at line 5963 in original reload1.c):

---------------------------------------------------------------------
			  if (i1 != n_earlyclobbers
			      || ! (reload_reg_free_for_value_p
				    (i, reload_opnum[r], reload_when_needed[r],
				     reload_in[r], reload_out[r], r, 1))
			      /* Don't use it if we'd clobber a pseudo reg.  */
			      || (TEST_HARD_REG_BIT (reg_used_in_insn, i)
				  && reload_out[r]
				  && ! TEST_HARD_REG_BIT (reg_reloaded_dead, i))
			      /* Don't clobber the frame pointer.  */
			      || (i == HARD_FRAME_POINTER_REGNUM
				  && reload_out[r])
			      /* Don't really use the inherited spill reg
				 if we need it wider than we've got it.  */
			      || (GET_MODE_SIZE (reload_mode[r])
				  > GET_MODE_SIZE (mode))
			      || ! TEST_HARD_REG_BIT (reg_class_contents[(int) reload_reg_class[r]],
						      i)

			      /* If find_reloads chose reload_out as reload
				 register, stay with it - that leaves the
				 inherited register for subsequent reloads.  */
			      || (reload_out[r] && reload_reg_rtx[r]
				  && rtx_equal_p (reload_out[r],
						  reload_reg_rtx[r])))
---------------------------------------------------------------------

It checks for many things about compatible sizes of modes, etc. But one thing
seems obvious: there is a check whether there is an overlap among earlyclobbers
of the current insn an potential reload register, but there is no check for
overlap of explicit clobbers and potential reload register (! or ?). The
probability for this error seems to be small, as compile option must be O2
AND the problematic insn must have one of its inputs in virtual register AND
there must me previous insn which performed reloading of the same virtual
register to some hard register AND that hard register still must be valid...
Probably there is much more ANDs than I can figure out.

I added some debug code, if the upper if-statement is false, and current insn
clobbers choosed reg. Print is obtained by calling debug_rtx. I've done it in
code, rather than stopping at that point in a debugger and calling debug_rtx.
When I run such modifyed cc1 on problem.c, I got:


r=0;
debug_rtx(reload_in[0]):
*****************************
(const_int 33554432 [0x2000000])
*****************************

debug_rtx(reload_in_reg[0]):
*****************************
(reg:DI 129)
*****************************

regno=129, word=0, i=REGNO(reg_last_reload_reg[129])+word=2,
INSN_UID(insn)=166 INSN_UID(reg_reloaded_insn[i])=161

debug_rtx(reg_reloaded_insn[i]):
*****************************
(insn 161 442 162 (parallel[ 
            (set (reg/v:DI 16 s0)
                (plus:DI (reg/v:DI 16 s0)
                     (reg:DI 2 v0)))
            (clobber (reg:SI 6 a2))
        ] ) 10 {adddi3_internal_1} (insn_list/j/c 129 (nil))
      (expr_list:REG_UNUSED (reg:SI 6 a2)
        (nil)))
*****************************

debug_rtx(insn):
*****************************
(insn 166 162 167 (parallel[ 
            (set (reg/v:DI 4 a0)
                (plus:DI (reg/v:DI 4 a0)
                     (const_int 33554432 [0x2000000])))
            (clobber (reg:SI 2 v0))
        ] ) 10 {adddi3_internal_1} (insn_list/j/c 156 (nil))
      (expr_list:REG_UNUSED (reg:SI 2 v0)
        (nil)))
*****************************

This dump shows that insn 161 adds 64-bit constant (1 << 25) (reloaded in
register pair $2,$3) to the register pair $16,$17 (DI 16). Actual reload
insn is at lines 231 and 232 of the assmebler-dump. Register $6 is clobbered
with carry from adding low-parts (line 234). Insn 166 adds the same constant
(1 << 25) to the register pair $4,$5 (DI 4), so it is ok to inherit the
register pair $2,$3 (virtual register 129) from insn 161. But, insn 166
clobbers $2 with carry, (at line 240) and the original code in reload1.c
does not check for this case! Insn 166 first adds (now clobbered) high-part
at line 241, and then adds carry at line 242. High-part of constant and
carry are both in $2! Any time carry is 1 at this point, there is an error.
The output difference would be more drastic, if high-part contained other
value rather than zero, as it would be clobbered to 0 or 1 (the value carry
can have).

After applying this patch, modifyed gcc (cc1) generated correct output. Diff
between previous (incorrect) and new (correct) assembler output is:

------------------------------------------
239,240c239,243
<       addu    $5,$5,$3
<       sltu    $2,$5,$3
---
>       move    $6,$2
>       move    $7,$3
>       addu    $5,$5,$7
>       sltu    $2,$5,$7
>       addu    $4,$4,$6
242d244
<       addu    $4,$4,$2
256c258
<       lw      $3,92($sp)
---
>       lw      $7,92($sp)
262c264
<       sw      $17,0($3)
---
>       sw      $17,0($7)
265c267
<       sw      $5,4($3)
---
>       sw      $5,4($7)
268c270
<       addu    $3,$3,8
---
>       addu    $7,$7,8
272c274
<       sw      $3,92($sp)
---
>       sw      $7,92($sp)
------------------------------------------

Here, it is clear that the whole virtual register 129 which is in $2,$3 is
first copyed to $6,$7 (with new reload-insn). Now clobbering of $2 is
irrelevant for correct operation.


Explanation of the patch:
---------------------------

From the above explanation of the problem, the solution is almost evident.
Patched choose_reload_regs() additionally checks the range of the registers
from potential valid reload register 'i' to 'i+nr-1'. As the 'nr' is the
number of hard registers needed by current insn, it needs the register
range [i, i+nr> which *must not* be clobbered by insn. This is realized with
small for-loop in newly added variable 'i2'. If 'i2 != nr', it means that
one of the hard registers in [i, i+nr> is clobbered, so reload cannot be
inherited from the previous insn and petential reload register must be
rejected.


C H A N G E L O G
*****************

If this is *really* patch, then the changelog entry would be something like:

   * reload1.c (choose_reload_regs): Test for clobbering inherited reload
	registers.


P R O L O G U E
***************

First af all, I apologize for my bad English.
Next, I hope this report will be of some use. If this "patch" is for some
reason incorrect, maybe it points close to the solution of the problem...


Alan Goluban
Faculty of Electrical Engineering and Computing
University of Zagreb
Croatia
email: alan.goluban@fer.hr agoluban@rasip.fer.hr
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]