Index: lower-subreg.c =================================================================== --- lower-subreg.c (revision 162181) +++ lower-subreg.c (working copy) @@ -69,10 +69,39 @@ static bitmap non_decomposable_context; avoid generating accesses to its subwords in integer modes. */ static bitmap subreg_context; +/* Bit N in this bitmap is set if regno N is used in a context in + which we can decompose it, but must preserve the original register. */ +static bitmap preserved_context; + /* Bit N in the bitmap in element M of this array is set if there is a copy from reg M to reg N. */ static VEC(bitmap,heap) *reg_copy_graph; +/* This array maps register numbers to the CONCAT rtx produced by + decomposition. Entries are nonnull only for regnos which are set + in decomposable_context and clear in non_decomposable_context. */ +static rtx *reg_decomposed_rtx; + +/* The number of elements in the previous array. Necessary because we + create new registers after allocating it. */ +static unsigned max_decomposable_reg_num; + +/* Return a suitable integer mode to use for resolving a register of + mode MODE. BLKmode indicates that resolving it is not possible. */ + +static enum machine_mode +resolve_mode (enum machine_mode mode) +{ + /* Reject PARTIAL_INT modes. They are used for processor specific + purposes and it's probably best not to tamper with them. */ + if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) + return BLKmode; + + if (!SCALAR_INT_MODE_P (mode)) + mode = mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0); + return mode; +} + /* Return whether X is a simple object which we can take a word_mode subreg of. */ @@ -126,10 +155,7 @@ simple_move (rtx insn) x = SET_SRC (set); if (x != recog_data.operand[0] && x != recog_data.operand[1]) return NULL_RTX; - /* For the src we can handle ASM_OPERANDS, and it is beneficial for - things like x86 rdtsc which returns a DImode value. */ - if (GET_CODE (x) != ASM_OPERANDS - && !simple_move_operand (x)) + if (!simple_move_operand (x)) return NULL_RTX; /* We try to decompose in integer modes, to avoid generating @@ -137,15 +163,8 @@ simple_move (rtx insn) registers. That means that we can't decompose if this is a non-integer mode for which there is no integer mode of the same size. */ - mode = GET_MODE (SET_SRC (set)); - if (!SCALAR_INT_MODE_P (mode) - && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0) - == BLKmode)) - return NULL_RTX; - - /* Reject PARTIAL_INT modes. They are used for processor specific - purposes and it's probably best not to tamper with them. */ - if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) + mode = resolve_mode (GET_MODE (SET_SRC (set))); + if (mode == BLKmode) return NULL_RTX; return set; @@ -231,6 +250,8 @@ enum classify_move_insn { /* Not a simple move from one location to another. */ NOT_SIMPLE_MOVE, + /* The destination of an instruction which wasn't otherwise recognized. */ + NOT_SIMPLE_MOVE_DEST, /* A simple move from one pseudo-register to another. */ SIMPLE_PSEUDO_REG_MOVE, /* A simple move involving a non-pseudo-register. */ @@ -325,6 +346,10 @@ find_decomposable_subregs (rtx *px, void { switch (*pcmi) { + case NOT_SIMPLE_MOVE_DEST: + bitmap_set_bit (decomposable_context, regno); + bitmap_set_bit (preserved_context, regno); + break; case NOT_SIMPLE_MOVE: bitmap_set_bit (non_decomposable_context, regno); break; @@ -353,6 +378,30 @@ find_decomposable_subregs (rtx *px, void return 0; } +/* Return the CONCATN into which we are decomposing X, or NULL_RTX if we + X cannot be resolved. */ + +static rtx +resolve_reg (rtx x) +{ + if (GET_CODE (x) == CONCATN) + return x; + if (REG_P (x) && REGNO (x) < max_decomposable_reg_num) + return reg_decomposed_rtx[REGNO (x)]; + return NULL_RTX; +} + +/* If X is a SUBREG of a register which we need to resolve, return the CONCATN + into which we are decomposing the register. Return NULL_RTX otherwise. */ + +static rtx +resolve_subreg (rtx x) +{ + if (GET_CODE (x) != SUBREG) + return NULL_RTX; + return resolve_reg (SUBREG_REG (x)); +} + /* Decompose REGNO into word-sized components. We smash the REG node in place. This ensures that (1) something goes wrong quickly if we fail to make some replacement, and (2) the debug information inside @@ -367,8 +416,6 @@ decompose_register (unsigned int regno) reg = regno_reg_rtx[regno]; - regno_reg_rtx[regno] = NULL_RTX; - words = GET_MODE_SIZE (GET_MODE (reg)); words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; @@ -376,8 +423,12 @@ decompose_register (unsigned int regno) for (i = 0; i < words; ++i) RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); - PUT_CODE (reg, CONCATN); - XVEC (reg, 0) = v; + if (!REGNO_REG_SET_P (preserved_context, regno)) + regno_reg_rtx[regno] = NULL_RTX; + + reg = gen_rtx_CONCATN (GET_MODE (reg), v); + + reg_decomposed_rtx[regno] = reg; if (dump_file) { @@ -421,32 +472,31 @@ static rtx simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op, enum machine_mode innermode, unsigned int byte) { - rtx ret; + rtx ret, t; /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. If OP is a SUBREG of a CONCATN, then it must be a simple mode change with the same size and offset 0, or it must extract a part. We shouldn't see anything else here. */ - if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) + t = resolve_subreg (op); + if (t != NULL_RTX) { rtx op2; if ((GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) && SUBREG_BYTE (op) == 0) - return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), - GET_MODE (SUBREG_REG (op)), byte); + return simplify_gen_subreg_concatn (outermode, t, GET_MODE (t), byte); - op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), - SUBREG_BYTE (op)); + op2 = simplify_subreg_concatn (GET_MODE (op), t, SUBREG_BYTE (op)); if (op2 == NULL_RTX) { /* We don't handle paradoxical subregs here. */ gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op))); gcc_assert (GET_MODE_SIZE (GET_MODE (op)) - <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))); - op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), + <= GET_MODE_SIZE (GET_MODE (t))); + op2 = simplify_subreg_concatn (outermode, t, byte + SUBREG_BYTE (op)); gcc_assert (op2 != NULL_RTX); return op2; @@ -457,6 +507,10 @@ simplify_gen_subreg_concatn (enum machin gcc_assert (innermode == GET_MODE (op)); } + t = resolve_reg (op); + if (t != NULL_RTX) + op = t; + if (GET_CODE (op) == CONCATN) return simplify_subreg_concatn (outermode, op, byte); @@ -476,26 +530,6 @@ simplify_gen_subreg_concatn (enum machin return ret; } -/* Return whether we should resolve X into the registers into which it - was decomposed. */ - -static bool -resolve_reg_p (rtx x) -{ - return GET_CODE (x) == CONCATN; -} - -/* Return whether X is a SUBREG of a register which we need to - resolve. */ - -static bool -resolve_subreg_p (rtx x) -{ - if (GET_CODE (x) != SUBREG) - return false; - return resolve_reg_p (SUBREG_REG (x)); -} - /* This is called via for_each_rtx. Look for SUBREGs which need to be decomposed. */ @@ -504,14 +538,15 @@ resolve_subreg_use (rtx *px, void *data) { rtx insn = (rtx) data; rtx x = *px; + rtx t; if (x == NULL_RTX) return 0; - if (resolve_subreg_p (x)) + t = resolve_subreg (x); + if (t != NULL_RTX) { - x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), - SUBREG_BYTE (x)); + x = simplify_subreg_concatn (GET_MODE (x), t, SUBREG_BYTE (x)); /* It is possible for a note to contain a reference which we can decompose. In this case, return 1 to the caller to indicate @@ -526,7 +561,7 @@ resolve_subreg_use (rtx *px, void *data) return -1; } - if (resolve_reg_p (x)) + if (resolve_reg (x) != NULL_RTX) { /* Return 1 to the caller to indicate that we found a direct reference to a register which is being decomposed. This can @@ -545,23 +580,25 @@ static int adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED) { rtx x = *px; + rtx t; if (x == NULL_RTX) return 0; - if (resolve_subreg_p (x)) + t = resolve_subreg (x); + if (t != NULL_RTX) { - x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), - SUBREG_BYTE (x)); + x = simplify_subreg_concatn (GET_MODE (x), t, SUBREG_BYTE (x)); if (x) *px = x; else - x = copy_rtx (*px); + x = gen_rtx_SUBREG (GET_MODE (x), copy_rtx (t), SUBREG_BYTE (x)); } - if (resolve_reg_p (x)) - *px = copy_rtx (x); + t = resolve_reg (x); + if (t != NULL_RTX) + *px = copy_rtx (t); return 0; } @@ -595,7 +632,7 @@ resolve_reg_notes (rtx insn) { case REG_DEAD: case REG_UNUSED: - if (resolve_reg_p (XEXP (note, 0))) + if (resolve_reg (XEXP (note, 0)) != NULL_RTX) del = true; break; @@ -629,6 +666,27 @@ can_decompose_p (rtx x) return true; } +/* Emit a sequence of moves to move SRC to DEST one word at a time; + the operands have a size of WORDS. DEST_MODE and SRC_MODE are the + modes of the operands for use with simplify_gen_subreg_concatn. */ + +static void +emit_simple_move_pieces (rtx dest, rtx src, enum machine_mode dest_mode, + enum machine_mode src_mode, unsigned int words) +{ + unsigned int i; + + if (REG_P (dest) + && !resolve_reg (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) + emit_clobber (dest); + + for (i = 0; i < words; ++i) + emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, dest_mode, + i * UNITS_PER_WORD), + simplify_gen_subreg_concatn (word_mode, src, src_mode, + i * UNITS_PER_WORD)); +} + /* Decompose the registers used in a simple move SET within INSN. If we don't change anything, return INSN, otherwise return the start of the sequence of moves. */ @@ -659,7 +717,7 @@ resolve_simple_move (rtx set, rtx insn) real_dest = NULL_RTX; if (GET_CODE (src) == SUBREG - && resolve_reg_p (SUBREG_REG (src)) + && resolve_reg (SUBREG_REG (src)) != NULL_RTX && (SUBREG_BYTE (src) != 0 || (GET_MODE_SIZE (orig_mode) != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) @@ -674,7 +732,7 @@ resolve_simple_move (rtx set, rtx insn) the SUBREG is larger than word size. */ if (GET_CODE (dest) == SUBREG - && resolve_reg_p (SUBREG_REG (dest)) + && resolve_reg (SUBREG_REG (dest)) != NULL_RTX && (SUBREG_BYTE (dest) != 0 || (GET_MODE_SIZE (orig_mode) != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) @@ -695,10 +753,10 @@ resolve_simple_move (rtx set, rtx insn) if (src == SET_SRC (set) && dest == SET_DEST (set) - && !resolve_reg_p (src) - && !resolve_subreg_p (src) - && !resolve_reg_p (dest) - && !resolve_subreg_p (dest)) + && !resolve_reg (src) + && !resolve_subreg (src) + && !resolve_reg (dest) + && !resolve_subreg (dest)) { end_sequence (); return insn; @@ -725,8 +783,7 @@ resolve_simple_move (rtx set, rtx insn) effects, we need to move via a temporary register. */ if (!can_decompose_p (src) - || side_effects_p (src) - || GET_CODE (src) == ASM_OPERANDS) + || side_effects_p (src)) { rtx reg; @@ -747,17 +804,14 @@ resolve_simple_move (rtx set, rtx insn) if (!can_decompose_p (dest) || (side_effects_p (dest) && !pushing) || (!SCALAR_INT_MODE_P (dest_mode) - && !resolve_reg_p (dest) - && !resolve_subreg_p (dest))) + && !resolve_reg (dest) + && !resolve_subreg (dest))) { if (real_dest == NULL_RTX) real_dest = dest; - if (!SCALAR_INT_MODE_P (dest_mode)) - { - dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT, - MODE_INT, 0); - gcc_assert (dest_mode != BLKmode); - } + dest_mode = resolve_mode (dest_mode); + gcc_assert (dest_mode != BLKmode); + dest = gen_reg_rtx (dest_mode); if (REG_P (real_dest)) REG_ATTRS (dest) = REG_ATTRS (real_dest); @@ -796,20 +850,7 @@ resolve_simple_move (rtx set, rtx insn) } } else - { - unsigned int i; - - if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) - emit_clobber (dest); - - for (i = 0; i < words; ++i) - emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, - dest_mode, - i * UNITS_PER_WORD), - simplify_gen_subreg_concatn (word_mode, src, - orig_mode, - i * UNITS_PER_WORD)); - } + emit_simple_move_pieces (dest, src, dest_mode, orig_mode, words); if (real_dest != NULL_RTX) { @@ -851,7 +892,7 @@ resolve_clobber (rtx pat, rtx insn) int ret; reg = XEXP (pat, 0); - if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) + if (!resolve_reg (reg) && !resolve_subreg (reg)) return false; orig_mode = GET_MODE (reg); @@ -886,7 +927,7 @@ resolve_clobber (rtx pat, rtx insn) static bool resolve_use (rtx pat, rtx insn) { - if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) + if (resolve_reg (XEXP (pat, 0)) || resolve_subreg (XEXP (pat, 0))) { delete_insn (insn); return true; @@ -986,7 +1027,7 @@ resolve_shift_zext (rtx insn) op_operand = XEXP (op, 0); - if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) + if (!resolve_reg (SET_DEST (set)) && !resolve_reg (op_operand)) return NULL_RTX; /* src_reg_num is the number of the word mode register which we @@ -1059,6 +1100,64 @@ resolve_shift_zext (rtx insn) return insns; } +/* Resolve INSN, which is not a simple move or any other simple case. + It may be a shift_zext we can resolve, or it may be a complex insn + with a destination that can be resolved. , */ + +static void +resolve_complex_insn (rtx insn) +{ + rtx decomposed_shift; + + decomposed_shift = resolve_shift_zext (insn); + if (decomposed_shift != NULL_RTX) + insn = decomposed_shift; + else + { + rtx set = single_set (insn); + rtx t = NULL_RTX; + + if (set) + t = resolve_reg (SET_DEST (set)); + + if (t != NULL_RTX) + { + rtx dest = SET_DEST (set); + enum machine_mode dest_mode; + rtx insns; + unsigned int i, words; + + if (dump_file) + fprintf (dump_file, "; Resolving dest of insn %d\n", + INSN_UID (insn)); + + dest_mode = resolve_mode (GET_MODE (dest)); + gcc_assert (dest_mode != BLKmode); + + words = GET_MODE_SIZE (dest_mode); + words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + start_sequence (); + for (i = 0; i < words; i++) + emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, + dest_mode, + i * UNITS_PER_WORD), + simplify_gen_subreg (word_mode, dest, + GET_MODE (dest), + i * UNITS_PER_WORD)); + insns = get_insns (); + end_sequence (); + + emit_insn_after (insns, insn); + } + else + return; + } + + recog_memoized (insn); + extract_insn (insn); +} + /* Look for registers which are always accessed via word-sized SUBREGs or via copies. Decompose these registers into several word-sized pseudo-registers. */ @@ -1097,6 +1196,7 @@ decompose_multiword_subregs (void) than scanning all the insns. */ decomposable_context = BITMAP_ALLOC (NULL); + preserved_context = BITMAP_ALLOC (NULL); non_decomposable_context = BITMAP_ALLOC (NULL); subreg_context = BITMAP_ALLOC (NULL); @@ -1128,7 +1228,10 @@ decompose_multiword_subregs (void) set = simple_move (insn); if (!set) - cmi = NOT_SIMPLE_MOVE; + { + cmi = NOT_SIMPLE_MOVE; + set = single_set (insn); + } else { if (find_pseudo_copy (set)) @@ -1140,21 +1243,20 @@ decompose_multiword_subregs (void) n = recog_data.n_operands; for (i = 0; i < n; ++i) { - for_each_rtx (&recog_data.operand[i], - find_decomposable_subregs, - &cmi); + enum classify_move_insn cmi1 = cmi; - /* We handle ASM_OPERANDS as a special case to support - things like x86 rdtsc which returns a DImode value. - We can decompose the output, which will certainly be - operand 0, but not the inputs. */ + /* We can allow non-decomposable stores; we will emit insns to + split the multi-word register into its components after such + a store. */ + if (cmi1 == NOT_SIMPLE_MOVE + && !JUMP_P (insn) + && set + && recog_data.operand_loc[i] == &SET_DEST (set)) + cmi1 = NOT_SIMPLE_MOVE_DEST; - if (cmi == SIMPLE_MOVE - && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) - { - gcc_assert (i == 0); - cmi = NOT_SIMPLE_MOVE; - } + for_each_rtx (&recog_data.operand[i], + find_decomposable_subregs, + &cmi1); } } } @@ -1173,14 +1275,17 @@ decompose_multiword_subregs (void) sub_blocks = sbitmap_alloc (last_basic_block); sbitmap_zero (sub_blocks); + max_decomposable_reg_num = max_reg_num (); + reg_decomposed_rtx = XCNEWVEC (rtx, max_decomposable_reg_num); EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) decompose_register (regno); FOR_EACH_BB (bb) { - rtx insn; + rtx insn, next; - FOR_BB_INSNS (bb, insn) + /* Use the safe variant, which avoids scanning insns we emit. */ + FOR_BB_INSNS_SAFE (bb, insn, next) { rtx pat; @@ -1234,17 +1339,7 @@ decompose_multiword_subregs (void) } } else - { - rtx decomposed_shift; - - decomposed_shift = resolve_shift_zext (insn); - if (decomposed_shift != NULL_RTX) - { - insn = decomposed_shift; - recog_memoized (insn); - extract_insn (insn); - } - } + resolve_complex_insn (insn); for (i = recog_data.n_operands - 1; i >= 0; --i) for_each_rtx (recog_data.operand_loc[i], @@ -1301,6 +1396,8 @@ decompose_multiword_subregs (void) } } + free (reg_decomposed_rtx); + reg_decomposed_rtx = NULL; sbitmap_free (sub_blocks); } @@ -1316,6 +1413,7 @@ decompose_multiword_subregs (void) VEC_free (bitmap, heap, reg_copy_graph); BITMAP_FREE (decomposable_context); + BITMAP_FREE (preserved_context); BITMAP_FREE (non_decomposable_context); BITMAP_FREE (subreg_context); }