Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi (revision 184104) +++ gcc/doc/tm.texi (working copy) @@ -10871,6 +10871,16 @@ conditional execution instructions inste 1 if it does use cc0. @end defmac +@deftypefn {Target Hook} rtx TARGET_CMOVE_MD_EXTRA (enum ifcvt_pass @var{if_pass}, rtx @var{dest}, rtx @var{compare}, rtx @var{op0}, rtx @var{op1}) +This function returns the insn for any machine specific conditional +moves to set @var{dest} to @var{op0} if @var{compare} is non-zero or +@var{op1} if it is zero. The function returns a null pointer if there +are no machine specific conditional moves. The @var{if_pass} argument +is @var{ifcvt_before_combine} on the first pass of if conversion, +@var{ifcvt_after_cobmine} on the second pass of if conversion, and +@var{ifcvt_after_reload} on the third pass of if conversion. +@end deftypefn + @defmac IFCVT_MODIFY_TESTS (@var{ce_info}, @var{true_expr}, @var{false_expr}) Used if the target needs to perform machine-dependent modifications on the conditionals used for turning basic blocks into conditionally executed code. Index: gcc/doc/tm.texi.in =================================================================== --- gcc/doc/tm.texi.in (revision 184104) +++ gcc/doc/tm.texi.in (working copy) @@ -10749,6 +10749,16 @@ conditional execution instructions inste 1 if it does use cc0. @end defmac +@hook TARGET_CMOVE_MD_EXTRA +This function returns the insn for any machine specific conditional +moves to set @var{dest} to @var{op0} if @var{compare} is non-zero or +@var{op1} if it is zero. The function returns a null pointer if there +are no machine specific conditional moves. The @var{if_pass} argument +is @var{ifcvt_before_combine} on the first pass of if conversion, +@var{ifcvt_after_cobmine} on the second pass of if conversion, and +@var{ifcvt_after_reload} on the third pass of if conversion. +@end deftypefn + @defmac IFCVT_MODIFY_TESTS (@var{ce_info}, @var{true_expr}, @var{false_expr}) Used if the target needs to perform machine-dependent modifications on the conditionals used for turning basic blocks into conditionally executed code. Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 184104) +++ gcc/doc/invoke.texi (working copy) @@ -839,7 +839,9 @@ See RS/6000 and PowerPC Options. -mno-recip-precision @gol -mveclibabi=@var{type} -mfriz -mno-friz @gol -mpointers-to-nested-functions -mno-pointers-to-nested-functions @gol --msave-toc-indirect -mno-save-toc-indirect} +-msave-toc-indirect -mno-save-toc-indirect @gol +-mcmove-adjacent-memory -mno-cmove-adjacent-memory @gol +-mcmove-adjacent-memory=@var{size}} @emph{RX Options} @gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol @@ -17209,6 +17211,27 @@ stack location in the function prologue a pointer on AIX and 64-bit Linux systems. If the TOC value is not saved in the prologue, it is saved just before the call through the pointer. The @option{-mno-save-toc-indirect} option is the default. + +@item -mcmove-adjacent-memory +@itemx -mno-cmove-adjacent-memory +@item -mcmove-adjacent-memory=@var{size} +@opindex mcmove-adjacent-memory +Generate (do not generate) code to optimize conditional moves where +the true and false parts of the conditional move are in adjacent +memory locations. The default is +@option{-mno-cmove-adjacent-memory}. This option depends on the +@option{-misel} option being set. + +This option is intended to speed up code that uses linked lists using +binary trees with right and left pointers. Typically, the code goes +through the list looking for an element and at each point in the +search it decides to go down the right or left . The right and left +pointers are usually adjcent in the structure, and in the same cache +line. + +The argument @var{size} is used to override the default of the number +of bytes that the compiler uses to decide if two fields are adjacent. +By default, the value @var{16} is used. @end table @node RX Options Index: gcc/targhooks.c =================================================================== --- gcc/targhooks.c (revision 184104) +++ gcc/targhooks.c (working copy) @@ -1,5 +1,5 @@ /* Default target hook functions. - Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011 + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This file is part of GCC. @@ -1449,4 +1449,17 @@ default_pch_valid_p (const void *data_p, return NULL; } +/* Machine specific conditional moves that are not handled by the machine + independent code. */ + +rtx +default_cmove_md_extra (enum ifcvt_pass if_pass ATTRIBUTE_UNUSED, + rtx dest ATTRIBUTE_UNUSED, + rtx compare ATTRIBUTE_UNUSED, + rtx op0 ATTRIBUTE_UNUSED, + rtx op1 ATTRIBUTE_UNUSED) +{ + return NULL_RTX; +} + #include "gt-targhooks.h" Index: gcc/targhooks.h =================================================================== --- gcc/targhooks.h (revision 184104) +++ gcc/targhooks.h (working copy) @@ -1,5 +1,5 @@ /* Default target hook functions. - Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011 + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This file is part of GCC. @@ -178,3 +178,4 @@ extern enum machine_mode default_get_reg extern void *default_get_pch_validity (size_t *); extern const char *default_pch_valid_p (const void *, size_t); +extern rtx default_cmove_md_extra (enum ifcvt_pass, rtx, rtx, rtx, rtx); Index: gcc/target.def =================================================================== --- gcc/target.def (revision 184104) +++ gcc/target.def (working copy) @@ -1534,6 +1534,14 @@ DEFHOOK bool, (struct ao_ref_s *ref), default_ref_may_alias_errno) +/* Machine specific conditional moves that are not handled by the machine + independent code. */ +DEFHOOK +(cmove_md_extra, + "", + rtx, (enum ifcvt_pass if_pass, rtx dest, rtx compare, rtx op0, rtx op1), + default_cmove_md_extra) + /* Support for named address spaces. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_ADDR_SPACE_" Index: gcc/target.h =================================================================== --- gcc/target.h (revision 184104) +++ gcc/target.h (working copy) @@ -149,6 +149,14 @@ enum vect_cost_for_stmt vec_promote_demote }; +/* Passes of if-conversion. */ +enum ifcvt_pass +{ + ifcvt_before_combine, + ifcvt_after_combine, + ifcvt_after_reload +}; + /* The target structure. This holds all the backend hooks. */ #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS; Index: gcc/ifcvt.c =================================================================== --- gcc/ifcvt.c (revision 184104) +++ gcc/ifcvt.c (working copy) @@ -1,6 +1,6 @@ /* If-conversion support. Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, - 2011 + 2011, 2012 Free Software Foundation, Inc. This file is part of GCC. @@ -97,9 +97,9 @@ static rtx noce_get_condition (rtx, rtx static int noce_operand_ok (const_rtx); static void merge_if_block (ce_if_block_t *); static int find_cond_trap (basic_block, edge, edge); -static basic_block find_if_header (basic_block, int); +static basic_block find_if_header (basic_block, int, enum ifcvt_pass); static int block_jumps_and_fallthru_p (basic_block, basic_block); -static int noce_find_if_block (basic_block, edge, edge, int); +static int noce_find_if_block (basic_block, edge, edge, int, enum ifcvt_pass); static int cond_exec_find_if_block (ce_if_block_t *); static int find_if_case_1 (basic_block, edge, edge); static int find_if_case_2 (basic_block, edge, edge); @@ -762,6 +762,9 @@ struct noce_if_info /* Estimated cost of the particular branch instruction. */ int branch_cost; + + /* Which ifcvt pass this is. */ + enum ifcvt_pass if_pass; }; static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int); @@ -2438,7 +2441,7 @@ noce_process_if_block (struct noce_if_in basic_block join_bb = if_info->join_bb; /* JOIN */ rtx jump = if_info->jump; rtx cond = if_info->cond; - rtx insn_a, insn_b; + rtx insn_a, insn_b, insn; rtx set_a, set_b; rtx orig_x, x, a, b; @@ -2533,7 +2536,19 @@ noce_process_if_block (struct noce_if_in /* Don't operate on sources that may trap or are volatile. */ if (! noce_operand_ok (a) || ! noce_operand_ok (b)) - return FALSE; + { + /* Give the back end a chance to handle conditional moves that aren't + supported here. The machine still might have some way of handling + sources that trap or are volatile. */ + insn = targetm.cmove_md_extra (if_info->if_pass, x, cond, a, b); + if (insn) + { + emit_insn_before_setloc (insn, jump, INSN_LOCATOR (insn_a)); + goto success; + } + + return FALSE; + } retry: /* Set up the info block for our subroutines. */ @@ -2633,6 +2648,16 @@ noce_process_if_block (struct noce_if_in goto success; } + /* Give the back end a chance to handle conditional moves that aren't + supported here. */ + insn = targetm.cmove_md_extra (if_info->if_pass, x, cond, a, b); + if (insn) + { + emit_insn_before_setloc (insn, if_info->jump, + INSN_LOCATOR (if_info->insn_a)); + goto success; + } + if (!else_bb && set_b) { insn_b = set_b = NULL_RTX; @@ -2976,7 +3001,7 @@ cond_move_process_if_block (struct noce_ static int noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge, - int pass) + int pass, enum ifcvt_pass if_pass) { basic_block then_bb, else_bb, join_bb; bool then_else_reversed = false; @@ -3076,6 +3101,7 @@ noce_find_if_block (basic_block test_bb, if_info.then_else_reversed = then_else_reversed; if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb), predictable_edge_p (then_edge)); + if_info.if_pass = if_pass; /* Do the real work. */ @@ -3207,7 +3233,7 @@ merge_if_block (struct ce_if_block * ce_ first block if some transformation was done. Return NULL otherwise. */ static basic_block -find_if_header (basic_block test_bb, int pass) +find_if_header (basic_block test_bb, int pass, enum ifcvt_pass if_pass) { ce_if_block_t ce_info; edge then_edge; @@ -3259,7 +3285,7 @@ find_if_header (basic_block test_bb, int #endif if (!reload_completed - && noce_find_if_block (test_bb, then_edge, else_edge, pass)) + && noce_find_if_block (test_bb, then_edge, else_edge, pass, if_pass)) goto success; if (reload_completed @@ -4334,7 +4360,7 @@ dead_or_predicable (basic_block test_bb, /* Main entry point for all if-conversion. */ static void -if_convert (void) +if_convert (enum ifcvt_pass if_pass) { basic_block bb; int pass; @@ -4380,7 +4406,7 @@ if_convert (void) { basic_block new_bb; while (!df_get_bb_dirty (bb) - && (new_bb = find_if_header (bb, pass)) != NULL) + && (new_bb = find_if_header (bb, pass, if_pass)) != NULL) bb = new_bb; } @@ -4450,7 +4476,7 @@ rest_of_handle_if_conversion (void) if (dump_file) dump_flow_info (dump_file, dump_flags); cleanup_cfg (CLEANUP_EXPENSIVE); - if_convert (); + if_convert (ifcvt_before_combine); } cleanup_cfg (0); @@ -4490,7 +4516,7 @@ gate_handle_if_after_combine (void) static unsigned int rest_of_handle_if_after_combine (void) { - if_convert (); + if_convert (ifcvt_after_combine); return 0; } @@ -4525,7 +4551,7 @@ gate_handle_if_after_reload (void) static unsigned int rest_of_handle_if_after_reload (void) { - if_convert (); + if_convert (ifcvt_after_reload); return 0; } Index: gcc/config/rs6000/rs6000.opt =================================================================== --- gcc/config/rs6000/rs6000.opt (revision 184104) +++ gcc/config/rs6000/rs6000.opt (working copy) @@ -1,7 +1,7 @@ ; Options for the rs6000 port of the compiler ; -; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software -; Foundation, Inc. +; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +; Free Software Foundation, Inc. ; Contributed by Aldy Hernandez . ; ; This file is part of GCC. @@ -532,3 +532,11 @@ Use/do not use r11 to hold the static li msave-toc-indirect Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save Control whether we save the TOC in the prologue for indirect calls or generate the save inline + +mcmove-adjacent-memory +Target Report Var(TARGET_CMOVE_ADJACENT_MEMORY,16) Init(0) Save +Control whether we load both sides of a conditional move if they are adjacent in memory. + +mcmove-adjacent-memory= +Target Report RejectNegative Joined UInteger Var(TARGET_CMOVE_ADJACENT_MEMORY) +Modify the line size for figuring out if the two memory items are adjacent. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 184104) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -1229,6 +1229,7 @@ static bool rs6000_legitimate_constant_p static bool rs6000_save_toc_in_prologue_p (void); static void rs6000_code_end (void) ATTRIBUTE_UNUSED; static void rs6000_set_up_by_prologue (struct hard_reg_set_container *); +static rtx rs6000_cmove_md_extra (enum ifcvt_pass, rtx, rtx, rtx, rtx); /* Hash table stuff for keeping track of TOC entries. */ @@ -1672,6 +1673,9 @@ static const struct attribute_spec rs600 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok + +#undef TARGET_CMOVE_MD_EXTRA +#define TARGET_CMOVE_MD_EXTRA rs6000_cmove_md_extra /* Simplifications for entries below. */ @@ -16348,6 +16352,138 @@ rs6000_emit_vector_cond_expr (rtx dest, return 1; } +/* If a memory reference is an offsettable memory reference, return true and + return the base part and offset. */ + +static inline bool +rs6000_decompose_offsettable_memref (rtx op, rtx *base, HOST_WIDE_INT *offset) +{ + rtx addr, right, left; + + *base = NULL_RTX; + *offset = -1; + + if (!MEM_P (op) || side_effects_p (op) || volatile_refs_p (op)) + return false; + + addr = XEXP (op, 0); + if (GET_CODE (addr) == REG) + { + *base = addr; + *offset = 0; + return true; + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) + { + right = XEXP (addr, 0); + left = XEXP (addr, 1); + if (!REG_P (right) || GET_CODE (left) != CONST_INT) + return false; + + *base = right; + *offset = INTVAL (left); + return true; + } + + return false; +} + +/* Do any machine specific optimization of conditional moves. On the rs6000, + if both sides of the conditional move are memory operations that seem to be + within the same cache block, then the second load is 'free', and we use + load/load/isel if the user enables the option. This is useful for balanced + binary trees/tries where you chase through a list following either the right + or left pointers. If the pointer to the memory address aren't appropriately + aligned, then this could potentially cause a page fault or a trap. */ + +static rtx +rs6000_cmove_md_extra (enum ifcvt_pass if_pass, rtx dest, rtx compare, rtx op0, + rtx op1) +{ + enum machine_mode mode0 = GET_MODE (op0); + enum machine_mode mode1 = GET_MODE (op1); + HOST_WIDE_INT size = GET_MODE_SIZE (mode0); + HOST_WIDE_INT op0_size = size; + HOST_WIDE_INT op1_size = size; + HOST_WIDE_INT line_size = TARGET_CMOVE_ADJACENT_MEMORY; + HOST_WIDE_INT offset0 = -1; + HOST_WIDE_INT offset1 = -1; + rtx op0_no_extend = op0; + rtx op1_no_extend = op1; + rtx ret = NULL_RTX; + rtx base0, base1, cmp_op0, cmp_op1; + enum rtx_code cmp_code = GET_CODE (compare); + + if (if_pass == ifcvt_after_reload) + return NULL_RTX; + + /* Allow sign/zero extend, but only do before combine, since the force_reg + will split the load and the sign/zero extend. Combine will join these + back together. */ + if (if_pass == ifcvt_before_combine) + { + if (GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) + { + op0_no_extend = XEXP (op0, 0); + op0_size = GET_MODE_SIZE (GET_MODE (op0_no_extend)); + } + + if (GET_CODE (op1) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) + { + op1_no_extend = XEXP (op1, 0); + op1_size = GET_MODE_SIZE (GET_MODE (op1_no_extend)); + } + } + + if (!TARGET_ISEL || line_size < 2*size || mode0 != mode1 + || !MEM_P (op0_no_extend) || !MEM_P (op1_no_extend) + || !INTEGRAL_MODE_P (mode0)) + return NULL_RTX; + + /* Only do this optimization for simple comparisons. */ + if (GET_RTX_CLASS (cmp_code) != RTX_COMPARE + && GET_RTX_CLASS (cmp_code) != RTX_COMM_COMPARE) + return NULL_RTX; + + cmp_op0 = XEXP (compare, 0); + if (!REG_P (cmp_op0)) + return NULL_RTX; + + cmp_op1 = XEXP (compare, 1); + if (!REG_P (cmp_op1) && cmp_op1 != const0_rtx) + return NULL_RTX; + + /* Are memory references reg+offset? */ + if (!rs6000_decompose_offsettable_memref (op0_no_extend, &base0, &offset0)) + return NULL_RTX; + + if (!rs6000_decompose_offsettable_memref (op1_no_extend, &base1, &offset1)) + return NULL_RTX; + + /* Are the base pointers the same? */ + if (!rtx_equal_p (base0, base1)) + return NULL_RTX; + + /* Are the two fields aligned on a word/double word boundary? */ + if ((offset0 % op0_size) != 0 || (offset1 % op1_size) != 0) + return NULL_RTX; + + /* Are the memory locations in different cache lines? */ + if ((offset0 / line_size) != (offset1 / line_size)) + return NULL_RTX; + + start_sequence (); + /* We reverse op0/op1 here because we are replacing an if statement, where it + jumps to do the ELSE part if the condition is true. */ + if (rs6000_emit_int_cmove (dest, + copy_rtx (compare), + force_reg (mode1, copy_rtx (op1)), + force_reg (mode0, copy_rtx (op0)))) + ret = get_insns (); + end_sequence (); + return ret; +} + /* Emit a conditional move: move TRUE_COND to DEST if OP of the operands of the last comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the hardware has no such operation. */