This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] lower-subreg: Decompose multiword shifts and zero-extends
- From: Andreas Krebbel <Andreas dot Krebbel at de dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 6 Aug 2007 16:02:08 +0200
- Subject: [PATCH] lower-subreg: Decompose multiword shifts and zero-extends
Hello,
here is an enhanced version of my decompose multiword shifts patch.
This version also deals with multiword zero-extends of wordmode
pseudos as suggested by Ulrich and with more than word wide shifts as
suggested by Paolo.
Handling zero-extends as well fixes also the problem Rask were seeing
on i386:
With the new patch and -O2 -march=k6 -m32 I get (for the first
function of the attached testcase):
foo:
pushl %ebp
movl %esp, %ebp
movl 12(%ebp), %eax
movl 8(%ebp), %edx
leave
ret
instead of (without any patch):
foo:
pushl %ebp # 39 *pushsi2 [length = 1]
movl %esp, %ebp # 40 *movsi_1/1 [length = 2]
movl 8(%ebp), %edx # 37 *movsi_1/1 [length = 3]
movl 12(%ebp), %eax # 10 *movsi_1/1 [length = 3]
leave # 43 leave [length = 1]
movl %edx, %ecx # 46 *movsi_1/1 [length = 2]
movl %ecx, %edx # 11 *movsi_1/1 [length = 2]
ret # 44 return_internal [length = 1]
So there is also a small improvement here.
Bootstrapped on s390, s390x, i686 and x86_64.
No testsuite regressions.
Ok for mainline?
Bye,
-Andreas-
2007-08-06 Andreas Krebbel <krebbel1@de.ibm.com>
* lower-subreg.c (resolve_subreg_use): Remove assertion.
(find_decomposable_shift_zeroextend,
resolve_shift_zeroextend): New functions.
(decompose_multiword_subregs): Use the functions above to decompose
multiword shifts and zero-extends.
Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c.orig 2007-08-06 10:56:55.000000000 +0200
--- gcc/lower-subreg.c 2007-08-06 14:34:01.000000000 +0200
*************** resolve_subreg_use (rtx *px, void *data)
*** 525,532 ****
{
/* Return 1 to the caller to indicate that we found a direct
reference to a register which is being decomposed. This can
! happen inside notes. */
! gcc_assert (!insn);
return 1;
}
--- 525,531 ----
{
/* Return 1 to the caller to indicate that we found a direct
reference to a register which is being decomposed. This can
! happen inside notes or multiword shift instructions. */
return 1;
}
*************** resolve_use (rtx pat, rtx insn)
*** 944,949 ****
--- 943,1105 ----
return false;
}
+ /* Checks if INSN is a decomposable multiword-shift or zero-extend and
+ sets the decomposable_context bitmap accordingly. A non-zero value
+ is returned if a decomposable insn has been found. */
+
+ static int
+ find_decomposable_shift_zeroextend (rtx insn)
+ {
+ rtx set;
+ rtx op;
+ rtx op_operand;
+
+ set = single_set (insn);
+ if (!set)
+ return 0;
+
+ op = SET_SRC (set);
+ if (GET_CODE (op) != ASHIFT
+ && GET_CODE (op) != LSHIFTRT
+ && GET_CODE (op) != ZERO_EXTEND)
+ return 0;
+
+ op_operand = XEXP (op, 0);
+ if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
+ || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
+ || HARD_REGISTER_NUM_P (REGNO (op_operand))
+ || !SCALAR_INT_MODE_P (GET_MODE (op)))
+ return 0;
+
+ if (GET_CODE (op) == ZERO_EXTEND)
+ {
+ if (GET_MODE (op_operand) != word_mode
+ || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
+ return 0;
+ }
+ else /* left or right shift */
+ {
+ if (GET_CODE (XEXP (op, 1)) != CONST_INT
+ || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
+ || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
+ return 0;
+ }
+
+ bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
+
+ if (GET_CODE (op) != ZERO_EXTEND)
+ bitmap_set_bit (decomposable_context, REGNO (op_operand));
+
+ return 1;
+ }
+
+ /* Decompose a word wide shift (in INSN) of a multiword
+ pseudo or a multiword zero-extend of a wordmode pseudo into a move
+ and 'set to zero' insn. Return a pointer to the new insn when a
+ replacement was done. */
+
+ static rtx
+ resolve_shift_zeroextend (rtx insn)
+ {
+ rtx set;
+ rtx op;
+ rtx op_operand;
+ rtx insns;
+ rtx src_reg, dest_reg, dest_zero;
+ int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
+
+ set = single_set (insn);
+ if (!set)
+ return NULL_RTX;
+
+ op = SET_SRC (set);
+ if (GET_CODE (op) != ASHIFT
+ && GET_CODE (op) != LSHIFTRT
+ && GET_CODE (op) != ZERO_EXTEND)
+ return NULL_RTX;
+
+ op_operand = XEXP (op, 0);
+
+ if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
+ return NULL_RTX;
+
+ /* src_reg_num is the number of the word mode register which we
+ are operating on. For a left shift and a zero_extend on little
+ endian machines this is register 0. */
+ src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
+
+ if (WORDS_BIG_ENDIAN)
+ src_reg_num = 1 - src_reg_num;
+
+ if (GET_CODE (op) == ZERO_EXTEND)
+ dest_reg_num = src_reg_num;
+ else
+ dest_reg_num = 1 - src_reg_num;
+
+ offset1 = UNITS_PER_WORD * dest_reg_num;
+ offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
+ src_offset = UNITS_PER_WORD * src_reg_num;
+
+ if (WORDS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ {
+ offset1 += UNITS_PER_WORD - 1;
+ offset2 += UNITS_PER_WORD - 1;
+ src_offset += UNITS_PER_WORD - 1;
+ }
+
+ start_sequence ();
+
+ if (resolve_reg_p (SET_DEST (set)))
+ {
+ gcc_assert (GET_CODE (SET_DEST (set)) == CONCATN);
+
+ dest_reg = XVECEXP (SET_DEST (set), 0, dest_reg_num);
+ dest_zero = XVECEXP (SET_DEST (set), 0, 1 - dest_reg_num);
+ }
+ else
+ {
+ dest_reg = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset1);
+ dest_zero = gen_rtx_SUBREG (word_mode, SET_DEST (set), offset2);
+ }
+
+ if (resolve_reg_p (op_operand))
+ {
+ gcc_assert (GET_CODE (op_operand) == CONCATN);
+
+ src_reg = XVECEXP (op_operand, 0, src_reg_num);
+ }
+ else
+ src_reg = gen_rtx_SUBREG (word_mode, op_operand, src_offset);
+
+ if (GET_CODE (op) != ZERO_EXTEND)
+ {
+ int shift_count = INTVAL (XEXP (op, 1));
+ if (shift_count > BITS_PER_WORD)
+ src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
+ LSHIFT_EXPR : RSHIFT_EXPR,
+ word_mode, src_reg,
+ build_int_cst (NULL_TREE,
+ shift_count - BITS_PER_WORD),
+ dest_reg, 1);
+ }
+
+ if (dest_reg != src_reg)
+ emit_move_insn (dest_reg, src_reg);
+ emit_move_insn (dest_zero, CONST0_RTX (word_mode));
+ insns = get_insns ();
+
+ end_sequence ();
+
+ emit_insn_before (insns, insn);
+
+ if (dump_file)
+ fprintf (dump_file, "; Replacing insn: %d with insns: %d and %d\n",
+ INSN_UID (insn), INSN_UID (insns), INSN_UID (NEXT_INSN (insns)));
+
+ delete_insn (insn);
+ return insns;
+ }
+
/* Look for registers which are always accessed via word-sized SUBREGs
or via copies. Decompose these registers into several word-sized
pseudo-registers. */
*************** decompose_multiword_subregs (void)
*** 1003,1008 ****
--- 1159,1167 ----
|| GET_CODE (PATTERN (insn)) == USE)
continue;
+ if (find_decomposable_shift_zeroextend (insn))
+ continue;
+
recog_memoized (insn);
extract_insn (insn);
*************** decompose_multiword_subregs (void)
*** 1152,1157 ****
--- 1311,1329 ----
SET_BIT (sub_blocks, bb->index);
}
}
+ else
+ {
+ rtx decomposed_shift;
+
+ decomposed_shift = resolve_shift_zeroextend (insn);
+ if (decomposed_shift != NULL_RTX)
+ {
+ changed = true;
+ insn = decomposed_shift;
+ recog_memoized (insn);
+ extract_insn (insn);
+ }
+ }
for (i = recog_data.n_operands - 1; i >= 0; --i)
for_each_rtx (recog_data.operand_loc[i],
Index: gcc/testsuite/gcc.dg/multiword-1.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/multiword-1.c 2007-08-06 10:57:17.000000000 +0200
***************
*** 0 ****
--- 1,67 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3" } */
+
+ typedef unsigned int u32;
+ typedef unsigned long long u64;
+
+ u64 __attribute__((noinline))
+ foo (u32 high, u32 low)
+ {
+ return ((u64)high << 32) | low;
+ }
+
+ u32 __attribute__((noinline))
+ right (u64 t)
+ {
+ return (u32)(t >> 32);
+ }
+
+ u64 __attribute__((noinline))
+ left (u32 t)
+ {
+ return (u64)t << 32;
+ }
+
+ u32 __attribute__((noinline))
+ right2 (u64 t)
+ {
+ return (u32)(t >> 40);
+ }
+
+ u64 __attribute__((noinline))
+ left2 (u32 t)
+ {
+ return (u64)t << 40;
+ }
+
+ u64 __attribute__((noinline))
+ zeroextend (u32 t)
+ {
+ return (u64)t;
+ }
+
+ extern void abort ();
+
+ int
+ main ()
+ {
+ if (foo (13000, 12000) != 55834574860000ULL)
+ abort ();
+
+ if (right (55834574860000ULL) != 13000)
+ abort ();
+
+ if (left (13000) != 55834574848000ULL)
+ abort ();
+
+ if (right2 (55834574860000ULL) != 50)
+ abort ();
+
+ if (left2 (13000) != 14293651161088000ULL)
+ abort ();
+
+ if (zeroextend (13000) != 13000ULL)
+ abort ();
+
+ return 0;
+ }