This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH RFC: My version of the lower-subreg patch


On Fri, Dec 15, 2006 at 06:05:25PM -0800, Ian Lance Taylor wrote:
> Anyhow, I wanted to put this patch out for comments, to see what
> people think of it.

   I would like to have an option -f[no-]lower-subreg so I can easily
compile code with and without the subreg lowering pass for comparison
purposes.

> I'd be interested in results on machines other than the i386.

   I gave it a shot on my 16-bit x86 target:

udivmodsi4.c: In function '__udivmodsi4':
udivmodsi4.c:25: internal compiler error: RTL check: expected code 'reg',
have 'concatn' in update_reg_offset, at emit-rtl.c:858

(gdb) frame 2
#2  0x08259673 in update_reg_offset (new=0xb7d9e1a0, reg=0x35a,
offset=141763191) at emit-rtl.c:858
858       decl = REG_EXPR (reg);
(gdb) call debug_rtx (new)
(reg:HI 41)
(gdb) print reg
$2 = (rtx) 0x35a

(gdb) frame 3
#3  0x0825f49d in gen_reg_rtx_offset (reg=0xb7d96b80, mode=HImode, offset=0)
at emit-rtl.c:929
929       update_reg_offset (new, reg, offset);
(gdb) call debug_rtx (reg)
(concatn/v:SI [
        (nil)
        (nil)
    ])

(gdb) frame 4
#4  0x0858615e in decompose_multiword_subregs (update_life=0 '\0') at
lower-subreg.c:303
303         RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i *
UNITS_PER_WORD);

(gdb) list
298       PUT_CODE (reg, CONCATN);
299       v = rtvec_alloc (words);
300       XVEC (reg, 0) = v;
301
302       for (i = 0; i < words; ++i)
303         RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i *
UNITS_PER_WORD);

   I think that PUT_CODE (reg, CONCATN) is placed too early. I also ran into
this (obvious) one:

udivmodsi4.c: In function '__udivmodsi4':
udivmodsi4.c:25: internal compiler error: RTL check: expected code 'reg',
have 'set' in decompose_multiword_subregs, at lower-subreg.c:732

730           else if (retval
731                    && REG_P (SET_SRC (set))
732                    && HARD_REGISTER_P (set))

 so I applied this on top of your patch:

--- gcc/lower-subreg.c-orig	2006-12-25 00:52:59.000000000 +0100
+++ gcc/lower-subreg.c	2006-12-25 15:27:38.000000000 +0100
@@ -294,14 +294,14 @@
 
   words = GET_MODE_SIZE (GET_MODE (reg));
   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
-  PUT_CODE (reg, CONCATN);
   v = rtvec_alloc (words);
-  XVEC (reg, 0) = v;
 
   for (i = 0; i < words; ++i)
     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
 
+  PUT_CODE (reg, CONCATN);
+  XVEC (reg, 0) = v;
+
   /* ??? We should probably call a function in regclass.c to do
      this.  */
   memset (VEC_index (reg_info_p, reg_n_info, regno), 0, sizeof (reg_info));
@@ -728,7 +728,7 @@
 		cmi = SIMPLE_PSEUDO_REG_MOVE;
 	      else if (retval
 		       && REG_P (SET_SRC (set))
-		       && HARD_REGISTER_P (set))
+		       && HARD_REGISTER_P (SET_SRC (set)))
 		{
 		  /* We don't want to decompose an assignment which
 		     copies the value returned by a libcall to a

   Then decompose_multiword_subregs() would choke on this:

udivmodsi4.c: In function '__udivmodsi4':
udivmodsi4.c:25: internal compiler error: in decompose_multiword_subregs, at
lower-subreg.c:848

	i = apply_change_group ();
	gcc_assert (i);

(gdb) call debug_rtx (insn)
(insn 51 244 52 10 udivmodsi4.c:8 (set (subreg:HI (concatn/v:SI [
                    (reg:HI 43 [ res ])
                    (reg:HI 44 [ res+2 ])
                ]) 0)
        (const_int 0 [0x0])) 23 {*movhi} (nil)
    (nil))

where "*movhi" (icode 23) is defined like this (EQ16 = any 16-bit mode):

(define_insn "*mov<mode>"
  [(set (match_operand:EQ16 0 "nonimmediate_operand" "=R,rm")
        (match_operand:EQ16 1 "general_operand" "rm,Ri"))]
  "!MEM_P (operands[0]) || !MEM_P (operands[1])"
  "movw\t%1,\t%0"
)

   Simplifying the subreg produces this:

(gdb) call debug_rtx (insn)
(insn 51 244 52 10 udivmodsi4.c:8 (set (reg:HI 43 [ res ])
        (const_int 0 [0x0])) -1 (nil)
    (nil))

   Rerecognizing that returns icode = 1 and num_clobbers = 1 because it
matches this insn pattern which happens to be placed before the plain
"*movhi" insn pattern:

(define_insn "*mov<mode>_const0"
        [(set (match_operand:MO 0 "register_operand" "=<r>")
              (const_int 0))
         (clobber (reg:CC CC_REG))]
        ""
        "xor<s>\t%0,\t%0"
)

   CC_REG is a hard register. There's a comment in insn_invalid_p():

  /* If we have to add CLOBBERs, fail if we have to add ones that reference
     hard registers since our callers can't know if they are live or not. 
     Otherwise, add them.  */

   So the change is rejected, which trips the assert. I solved it by
reordering the two movhi variants. Other targets might run into the same
issue.

   As for code generation, here's __popcountsi2() from libgcc2.c before the
patch:

__popcountsi2:
	pushw	%bx		;# 87	*pushhi1_086
	pushw	%bp		;# 88	*pushhi1_086
	subw	$4,	%sp	;# 89	*subhi3/2
	movw	%sp,	%bp	;# 79	*movhi/1
	movw	10(%bp),%ax	;# 80	*movhi/1
	movw	%ax,	(%bp)	;# 11	*movhi/2
	movw	12(%bp),%ax	;# 82	*movhi/1
	movw	%ax,	2(%bp)	;# 12	*movhi/2
	movw	$__popcount_tab, %bx	;# 20	*movhi/2
	movb	(%bp),	%dl	;# 56	*extzv_byte/1
	movb	%dl,	%al	;# 84	*movqi/1
	xlat	(%bx)		;# 57	*xlatqi2_subreg
	movw	%ax,	%dx	;# 85	*movhi/1
	xorb	%dh,	%dh	;# 58	*andhi3/3
	movb	1(%bp),	%al	;# 61	*extzv_byte/1
	xlat	(%bx)		;# 62	*xlatqi2_subreg
	xorb	%ah,	%ah	;# 63	*andhi3/3
	addw	%ax,	%dx	;# 27	*addhi3/5
	movb	2(%bp),	%al	;# 66	*extzv_byte/1
	xlat	(%bx)		;# 67	*xlatqi2_subreg
	xorb	%ah,	%ah	;# 68	*andhi3/3
	addw	%ax,	%dx	;# 33	*addhi3/5
	movb	3(%bp),	%al	;# 71	*extzv_byte/1
	xlat	(%bx)		;# 72	*xlatqi2_subreg
	xorb	%ah,	%ah	;# 73	*andhi3/3
	addw	%dx,	%ax	;# 39	*addhi3/5
	addw	$4,	%sp	;# 92	*addhi3/5
	popw	%bp		;# 93	*pophi1
	popw	%bx		;# 94	*pophi1
	ret			;# 95	*return

   The "*extzv_byte" pattern only works on memory or the first eight
registers (0-1 cx, 2-3 ax, 4-5 dx and 6-7 bx). The "*xlatqi2_subreg" pattern
requires 2-3 ax and 6-7 bx. As a result, the SImode parameter is copied to
the stack (insns 80, 11, 82 and 12) because the available registers are
fragmented (0-1 cx and 4-5 dx). This is where the lower-subreg pass should
greatly help this function by splitting the parameter into two Himode parts
for separate allocation.

   Insn 84 is a reload caused by local-alloc choosing register dx (insn 56)
despite regclass preferring reg ax (2-3), i.e. an unrelated issue.

   With the patch, and a target tweak to replace

(zero_extract:QI (reg:SI 23) (const_int 8) (const_int 24))

with

(zero_extract:QI (subreg:HI (reg:SI 23) 2) (const_int 8) (const_int 8)):

to allow decomposition into HImode parts:

__popcountsi2:
	pushw	%cx		;# 84	*pushhi1_086
	pushw	%bx		;# 85	*pushhi1_086
	pushw	%bp		;# 86	*pushhi1_086
	subw	$2,	%sp	;# 87	*subhi3/2
	movw	%sp,	%bp	;# 79	*movhi/1
	movw	10(%bp),%ax	;# 80	*movhi/1
	movw	%ax,	(%bp)	;# 11	*movhi/2
	movw	12(%bp),%cx	;# 12	*movhi/1
	movw	$__popcount_tab, %bx	;# 20	*movhi/2
	movb	%al,	%dl	;# 56	*extzv_byte/1
	movb	%dl,	%al	;# 82	*movqi/1
	xlat	(%bx)		;# 57	*xlatqi2_subreg
	movw	%ax,	%dx	;# 83	*movhi/1
	xorb	%dh,	%dh	;# 58	*andhi3/3
	movb	1(%bp),	%al	;# 61	*extzv_byte/1
	xlat	(%bx)		;# 62	*xlatqi2_subreg
	xorb	%ah,	%ah	;# 63	*andhi3/3
	addw	%ax,	%dx	;# 27	*addhi3/5
	movb	%cl,	%al	;# 66	*extzv_byte/1
	xlat	(%bx)		;# 67	*xlatqi2_subreg
	xorb	%ah,	%ah	;# 68	*andhi3/3
	addw	%ax,	%dx	;# 33	*addhi3/5
	movb	%ch,	%al	;# 71	*extzv_byte/1
	xlat	(%bx)		;# 72	*xlatqi2_subreg
	xorb	%ah,	%ah	;# 73	*andhi3/3
	addw	%dx,	%ax	;# 39	*addhi3/5
	addw	$2,	%sp	;# 90	*addhi3/5
	popw	%bp		;# 91	*pophi1
	popw	%bx		;# 92	*pophi1
	popw	%cx		;# 93	*pophi1
	ret			;# 94	*return

   It most definitely helped. The high part of the SImode parameter now fits
into cx. Note that the high part (loaded in insn 12) isn't needed until insn
66 (the third "*extzv_byte" insn), so cx could have been allocated to both
the low and high parts - I thought we had a pass to move the load closer to
its use.

   Btw, it looked much worseeight months ago
<URL:http://gcc.gnu.org/ml/gcc-patches/2006-04/msg00835.html>:
40 instructions vs. 31 instructions now. I'm hoping to see another 7 insns
go away: 87, 11, 56, 82, 68, 73 and 90. Eventually.

   Thanks for picking up the lower-subreg patch.

-- 
Rask Ingemann Lambertsen


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]