[PATCH] bit-field merging improvements to combine.c
Roger Sayle
roger@eyesopen.com
Thu Dec 29 05:02:00 GMT 2005
The best way to describe/motivate the following patch is by example.
Consider the following function:
typedef unsigned char v4qi __attribute__ ((vector_size (4)));
v4qi x;
void foo()
{
v4qi t = { 1, 2, 3, 4 };
x = t;
}
On x86_64, mainline generates the following code:
foo: xorl %eax, %eax
movb $1, %al
movb $2, %ah
andl $-16711681, %eax
orl $196608, %eax
andl $16777215, %eax
orl $67108864, %eax
movl %eax, x(%rip)
ret
with the patch to combine.c below, we instead generate
foo: movl $67305985, x(%rip)
ret
There is already code in combine.c whose purpose is to spot that
a constant is assigned to a register, followed by an assignment
of a second constant to a part/subword of that register. However,
this existing code currently only looks for instances where the
initial assignment is of a double-word value, and the following
assignment is to the upper or lower halves, via a SUBREG. The
patch below generalizes this existing code to additionally handle
strict_low_part and zero_extract destinations.
In the example above, the first successful combination attempt
merges:
(insn 11 9 12 2 (set (reg:SI 58)
(const_int 0 [0x0])) 40 {*movsi_1} ...)
with
(insn 12 11 14 2 (set (strict_low_part (subreg:QI (reg:SI 58) 0))
(const_int 1 [0x1])) 58 {*movstrictqi_1} ...)
and the second successful merge combination further combines:
(insn 12 11 14 2 (set (reg:SI 58)
(const_int 1 [0x1])) 40 {*movsi_1} ...)
with
(insn 14 12 16 2 (set (zero_extract:DI (subreg:DI (reg:SI 58) 0)
(const_int 8 [0x8])
(const_int 8 [0x8]))
(const_int 2 [0x2])) 71 {movdi_insv_1_rex64} ...)
and so on.
The following patch has been tested on x86_64-unknown-linux-gnu with
a full "make bootstrap", all default languages, and regression tested
with a top-level "make -k check" with no new failures.
Ok for mainline? If there are no obvious problems I'll commit this
to mainline next week.
2005-12-28 Roger Sayle <roger@eyesopen.com>
* combine.c (reg_subword_p): New predicate to test whether the
destination of a set refers to a subword/piece of a register.
(try_combine): Generalize the code to merge the setting of a
pseudo to a constant followed by a set of a subword of that
register to a constant.
Index: combine.c
===================================================================
*** combine.c (revision 109033)
--- combine.c (working copy)
*************** can_change_dest_mode (rtx x, int added_s
*** 1706,1711 ****
--- 1706,1732 ----
&& !REG_USERVAR_P (x));
}
+
+ /* Check whether X, the destination of a set, refers to part of
+ the register specified by REG. */
+
+ static bool
+ reg_subword_p (rtx x, rtx reg)
+ {
+ /* Check that reg is an integer mode register. */
+ if (!REG_P (reg) || GET_MODE_CLASS (GET_MODE (reg)) != MODE_INT)
+ return false;
+
+ if (GET_CODE (x) == STRICT_LOW_PART
+ || GET_CODE (x) == ZERO_EXTRACT)
+ x = XEXP (x, 0);
+
+ return GET_CODE (x) == SUBREG
+ && SUBREG_REG (x) == reg
+ && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT;
+ }
+
+
/* Try to combine the insns I1 and I2 into I3.
Here I1 and I2 appear earlier than I3.
I1 can be zero; then we combine just I2 into I3.
*************** try_combine (rtx i3, rtx i2, rtx i1, int
*** 1870,1943 ****
}
}
! /* If I2 is setting a double-word pseudo to a constant and I3 is setting
! one of those words to another constant, merge them by making a new
constant. */
if (i1 == 0
&& (temp = single_set (i2)) != 0
&& (GET_CODE (SET_SRC (temp)) == CONST_INT
|| GET_CODE (SET_SRC (temp)) == CONST_DOUBLE)
- && REG_P (SET_DEST (temp))
- && GET_MODE_CLASS (GET_MODE (SET_DEST (temp))) == MODE_INT
- && GET_MODE_SIZE (GET_MODE (SET_DEST (temp))) == 2 * UNITS_PER_WORD
&& GET_CODE (PATTERN (i3)) == SET
! && GET_CODE (SET_DEST (PATTERN (i3))) == SUBREG
! && SUBREG_REG (SET_DEST (PATTERN (i3))) == SET_DEST (temp)
! && GET_MODE_CLASS (GET_MODE (SET_DEST (PATTERN (i3)))) == MODE_INT
! && GET_MODE_SIZE (GET_MODE (SET_DEST (PATTERN (i3)))) == UNITS_PER_WORD
! && GET_CODE (SET_SRC (PATTERN (i3))) == CONST_INT)
{
! HOST_WIDE_INT lo, hi;
! if (GET_CODE (SET_SRC (temp)) == CONST_INT)
! lo = INTVAL (SET_SRC (temp)), hi = lo < 0 ? -1 : 0;
! else
{
! lo = CONST_DOUBLE_LOW (SET_SRC (temp));
! hi = CONST_DOUBLE_HIGH (SET_SRC (temp));
}
!
! if (subreg_lowpart_p (SET_DEST (PATTERN (i3))))
{
! /* We don't handle the case of the target word being wider
! than a host wide int. */
! gcc_assert (HOST_BITS_PER_WIDE_INT >= BITS_PER_WORD);
! lo &= ~(UWIDE_SHIFT_LEFT_BY_BITS_PER_WORD (1) - 1);
! lo |= (INTVAL (SET_SRC (PATTERN (i3)))
! & (UWIDE_SHIFT_LEFT_BY_BITS_PER_WORD (1) - 1));
}
! else if (HOST_BITS_PER_WIDE_INT == BITS_PER_WORD)
! hi = INTVAL (SET_SRC (PATTERN (i3)));
! else if (HOST_BITS_PER_WIDE_INT >= 2 * BITS_PER_WORD)
{
! int sign = -(int) ((unsigned HOST_WIDE_INT) lo
! >> (HOST_BITS_PER_WIDE_INT - 1));
!
! lo &= ~ (UWIDE_SHIFT_LEFT_BY_BITS_PER_WORD
! (UWIDE_SHIFT_LEFT_BY_BITS_PER_WORD (1) - 1));
! lo |= (UWIDE_SHIFT_LEFT_BY_BITS_PER_WORD
! (INTVAL (SET_SRC (PATTERN (i3)))));
! if (hi == sign)
! hi = lo < 0 ? -1 : 0;
}
- else
- /* We don't handle the case of the higher word not fitting
- entirely in either hi or lo. */
- gcc_unreachable ();
! combine_merges++;
! subst_insn = i3;
! subst_low_cuid = INSN_CUID (i2);
! added_sets_2 = added_sets_1 = 0;
! i2dest = SET_DEST (temp);
! i2dest_killed = dead_or_set_p (i2, i2dest);
! SUBST (SET_SRC (temp),
! immed_double_const (lo, hi, GET_MODE (SET_DEST (temp))));
! newpat = PATTERN (i2);
! goto validate_replacement;
}
#ifndef HAVE_cc0
--- 1891,2030 ----
}
}
! /* If I2 is setting a pseudo to a constant and I3 is setting some
! sub-part of it to another constant, merge them by making a new
constant. */
if (i1 == 0
&& (temp = single_set (i2)) != 0
&& (GET_CODE (SET_SRC (temp)) == CONST_INT
|| GET_CODE (SET_SRC (temp)) == CONST_DOUBLE)
&& GET_CODE (PATTERN (i3)) == SET
! && (GET_CODE (SET_SRC (PATTERN (i3))) == CONST_INT
! || GET_CODE (SET_SRC (PATTERN (i3))) == CONST_DOUBLE)
! && reg_subword_p (SET_DEST (PATTERN (i3)), SET_DEST (temp)))
{
! rtx dest = SET_DEST (PATTERN (i3));
! int offset = -1;
! int width = 0;
! if (GET_CODE (dest) == STRICT_LOW_PART)
{
! width = GET_MODE_BITSIZE (GET_MODE (XEXP (dest, 0)));
! offset = 0;
}
! else if (GET_CODE (dest) == ZERO_EXTRACT)
{
! if (GET_CODE (XEXP (dest, 1)) == CONST_INT
! && GET_CODE (XEXP (dest, 2)) == CONST_INT)
! {
! width = INTVAL (XEXP (dest, 1));
! offset = INTVAL (XEXP (dest, 2));
! if (BITS_BIG_ENDIAN)
! offset = GET_MODE_BITSIZE (GET_MODE (XEXP (dest, 0)))
! - width - offset;
! }
}
! else if (subreg_lowpart_p (dest))
{
! width = GET_MODE_BITSIZE (GET_MODE (dest));
! offset = 0;
! }
! /* ??? Preserve the original logic to handle setting the high word
! of double-word pseudos, where inner is half the size of outer
! but not the lowpart. This could be generalized by handling
! SUBREG_BYTE, WORDS_BIG_ENDIAN and BYTES_BIG_ENDIAN ourselves.
! Unfortunately this logic is tricky to get right and probably
! not worth the effort. */
! else if (GET_MODE_BITSIZE (GET_MODE (SET_DEST (temp)))
! == 2 * GET_MODE_BITSIZE (GET_MODE (dest)))
! {
! width = GET_MODE_BITSIZE (GET_MODE (dest));
! offset = width;
}
! if (offset >= 0)
! {
! HOST_WIDE_INT mhi, ohi, ihi;
! HOST_WIDE_INT mlo, olo, ilo;
! rtx inner = SET_SRC (PATTERN (i3));
! rtx outer = SET_SRC (temp);
! if (GET_CODE (outer) == CONST_INT)
! {
! olo = INTVAL (outer);
! ohi = olo < 0 ? -1 : 0;
! }
! else
! {
! olo = CONST_DOUBLE_LOW (outer);
! ohi = CONST_DOUBLE_HIGH (outer);
! }
! if (GET_CODE (inner) == CONST_INT)
! {
! ilo = INTVAL (inner);
! ihi = ilo < 0 ? -1 : 0;
! }
! else
! {
! ilo = CONST_DOUBLE_LOW (inner);
! ihi = CONST_DOUBLE_HIGH (inner);
! }
!
! if (width < HOST_BITS_PER_WIDE_INT)
! {
! mlo = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
! mhi = 0;
! }
! else if (width < HOST_BITS_PER_WIDE_INT * 2)
! {
! mhi = ((unsigned HOST_WIDE_INT) 1
! << (width - HOST_BITS_PER_WIDE_INT)) - 1;
! mlo = -1;
! }
! else
! {
! mlo = -1;
! mhi = -1;
! }
!
! ilo &= mlo;
! ihi &= mhi;
!
! if (offset >= HOST_BITS_PER_WIDE_INT)
! {
! mhi = mlo << (offset - HOST_BITS_PER_WIDE_INT);
! mlo = 0;
! ihi = ilo << (offset - HOST_BITS_PER_WIDE_INT);
! ilo = 0;
! }
! else if (offset > 0)
! {
! mhi = (mhi << offset) | ((unsigned HOST_WIDE_INT) mlo
! >> (HOST_BITS_PER_WIDE_INT - offset));
! mlo = mlo << offset;
! ihi = (ihi << offset) | ((unsigned HOST_WIDE_INT) ilo
! >> (HOST_BITS_PER_WIDE_INT - offset));
! ilo = ilo << offset;
! }
!
! olo = (olo & ~mlo) | ilo;
! ohi = (ohi & ~mhi) | ihi;
!
! combine_merges++;
! subst_insn = i3;
! subst_low_cuid = INSN_CUID (i2);
! added_sets_2 = added_sets_1 = 0;
! i2dest = SET_DEST (temp);
! i2dest_killed = dead_or_set_p (i2, i2dest);
!
! SUBST (SET_SRC (temp),
! immed_double_const (olo, ohi, GET_MODE (SET_DEST (temp))));
!
! newpat = PATTERN (i2);
! goto validate_replacement;
! }
}
#ifndef HAVE_cc0
Roger
--
More information about the Gcc-patches
mailing list