Index: gcc/common.opt =================================================================== --- gcc/common.opt (revision 209009) +++ gcc/common.opt (working copy) @@ -1745,6 +1745,10 @@ flive-range-shrinkage Common Report Var(flag_live_range_shrinkage) Init(0) Optimization Relief of register pressure through live range shrinkage +fmerge-paired-loadstore +Common Report Var(flag_merge_paired_loadstore) Init(2) Optimization +Perform a target dependent pass merging paired loadstore + frename-registers Common Report Var(flag_rename_registers) Init(2) Optimization Perform a register renaming optimization pass Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 209009) +++ gcc/Makefile.in (working copy) @@ -1289,6 +1289,7 @@ OBJS = \ langhooks.o \ lcm.o \ lists.o \ + merge-paired-loadstore.o \ loop-doloop.o \ loop-init.o \ loop-invariant.o \ Index: gcc/params.def =================================================================== --- gcc/params.def (revision 209009) +++ gcc/params.def (working copy) @@ -262,6 +262,14 @@ DEFPARAM(PARAM_MAX_HOIST_DEPTH, "Maximum depth of search in the dominator tree for expressions to hoist", 30, 0, 0) +/* Paired memory access instructions can't be merged if they are too far + from each other in instruction flow. This is used to avoid quadratic + behavior in merging paired load store algorithm. */ +DEFPARAM(PARAM_MAX_MERGE_PAIRED_LOADSTORE_DISTANCE, + "max-merge-paired-loadstore-distance", + "Only paired instructions within this distance can be merged", + 4, 0, 0) + /* This parameter limits the number of insns in a loop that will be unrolled, and by how much the loop is unrolled. Index: gcc/target.def =================================================================== --- gcc/target.def (revision 209009) +++ gcc/target.def (working copy) @@ -2026,6 +2026,24 @@ The default is @code{false}.", bool, (void), hook_bool_void_false) +/* The target-specific load store pair hook function. */ +DEFHOOK +(merge_paired_loadstore, + "If non-null, this hook checks whether two consecutive memory access\n\ +instructions can be paired and returns the paired instruction.\n\ +On ARM architecture, for example, it checks and merges two ldr or str\n\ +instructions into a single ldrd or strd instruction.\n\ +\n\ +@var{x} is the first instruction and @var{y} is the second one.\n\ +@var{mem_x} and @var{mem_y} are memory references of the two instructions.\n\ +@var{is_hoist} indicates the two will be paired by hoisting. It is provided\n\ +for backend to check, for example, ARM target doesn't want to merge a load\n\ +pair by sinking the first instruction. @var{is_load} indicates a load pair.\n\ +\n\ +It is the hook's responsibility to generate and return valid instruction\n\ +after merging.", + rtx, (rtx x, rtx y, rtx mem_x, rtx mem_y, bool is_hoist, bool is_load), NULL) + /* Set up target-specific built-in functions. */ DEFHOOK (init_builtins, Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 209009) +++ gcc/tree-pass.h (working copy) @@ -554,6 +554,7 @@ extern rtl_opt_pass *make_pass_stack_adjustments ( extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt); extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_merge_paired_loadstore (gcc::context *ctxt); extern rtl_opt_pass *make_pass_cprop_hardreg (gcc::context *ctxt); extern rtl_opt_pass *make_pass_reorder_blocks (gcc::context *ctxt); extern rtl_opt_pass *make_pass_branch_target_load_optimize2 (gcc::context Index: gcc/testsuite/gcc.target/arm/merge-paired-loadstore.c =================================================================== --- gcc/testsuite/gcc.target/arm/merge-paired-loadstore.c (revision 0) +++ gcc/testsuite/gcc.target/arm/merge-paired-loadstore.c (revision 0) @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_prefer_ldrd_strd } */ +/* { dg-options "-O2" } */ + +struct +{ + int x; + int y; + char c; + int d; +}a; + +int foo(int x, int y) +{ + int c; + a.x = x; + c = a.x; + a.d = c; + a.y = y; + + return 0; +} +/* { dg-final { scan-assembler "strd" } } */ Index: gcc/passes.def =================================================================== --- gcc/passes.def (revision 209009) +++ gcc/passes.def (working copy) @@ -385,6 +385,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_peephole2); NEXT_PASS (pass_if_after_reload); NEXT_PASS (pass_regrename); + NEXT_PASS (pass_merge_paired_loadstore); NEXT_PASS (pass_cprop_hardreg); NEXT_PASS (pass_fast_rtl_dce); NEXT_PASS (pass_duplicate_computed_gotos); Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 209009) +++ gcc/timevar.def (working copy) @@ -241,6 +241,7 @@ DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thre DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments") DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2") +DEFTIMEVAR (TV_MERGE_PAIRED_LOADSTORE, "merge paired loadstore") DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers") DEFTIMEVAR (TV_CPROP_REGISTERS , "hard reg cprop") DEFTIMEVAR (TV_SCHED2 , "scheduling 2") Index: gcc/doc/tm.texi.in =================================================================== --- gcc/doc/tm.texi.in (revision 209009) +++ gcc/doc/tm.texi.in (working copy) @@ -8198,6 +8198,8 @@ to by @var{ce_info}. @hook TARGET_MACHINE_DEPENDENT_REORG +@hook TARGET_MERGE_PAIRED_LOADSTORE + @hook TARGET_INIT_BUILTINS @hook TARGET_BUILTIN_DECL Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi (revision 209009) +++ gcc/doc/tm.texi (working copy) @@ -10910,6 +10910,22 @@ You need not implement the hook if it has nothing definition is null. @end deftypefn +@deftypefn {Target Hook} rtx TARGET_MERGE_PAIRED_LOADSTORE (rtx @var{x}, rtx @var{y}, rtx @var{mem_x}, rtx @var{mem_y}, bool @var{is_hoist}, bool @var{is_load}) +If non-null, this hook checks whether two consecutive memory access +instructions can be paired and returns the paired instruction. +On ARM architecture, for example, it checks and merges two ldr or str +instructions into a single ldrd or strd instruction. + +@var{x} is the first instruction and @var{y} is the second one. +@var{mem_x} and @var{mem_y} are memory references of the two instructions. +@var{is_hoist} indicates the two will be paired by hoisting. It is provided +for backend to check, for example, ARM target doesn't want to merge a load +pair by sinking the first instruction. @var{is_load} indicates a load pair. + +It is the hook's responsibility to generate and return valid instruction +after merging. +@end deftypefn + @deftypefn {Target Hook} void TARGET_INIT_BUILTINS (void) Define this hook if you have any machine-specific built-in functions that need to be defined. It should be a function that performs the Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 209009) +++ gcc/doc/invoke.texi (working copy) @@ -380,6 +380,7 @@ Objective-C and Objective-C++ Dialects}. -fno-ira-share-spill-slots -fira-verbose=@var{n} @gol -fisolate-erroneous-paths-dereference -fisolate-erroneous-paths-attribute -fivopts -fkeep-inline-functions -fkeep-static-consts -flive-range-shrinkage @gol +-fmerge-paired-loadstore @gol -floop-block -floop-interchange -floop-strip-mine -floop-nest-optimize @gol -floop-parallelize-all -flto -flto-compression-level @gol -flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants @gol @@ -9160,6 +9161,15 @@ a ``home register''. Enabled by default with @option{-funroll-loops} and @option{-fpeel-loops}. +@item -fmerge-paired-loadstore +@opindex fmerge-paired-loadstore +Performs a target-specific pass over the instruction stream to merge +consecutive memory access instructions by calling target dependent hook +function @code{TARGET_MERGE_PAIRED_LOADSTORE}. + +Enabled at all optimization level if @code{TARGET_MERGE_PAIRED_LOADSTORE} is +defined by a backend. + @item -ftracer @opindex ftracer Perform tail duplication to enlarge superblock size. This transformation @@ -9522,6 +9532,14 @@ This is used to avoid quadratic behavior in hoisti The value of 0 does not limit on the search, but may slow down compilation of huge functions. The default value is 30. +@item max-merge-paired-loadstore-distance +Only two memory access instructions within this distance in instruction +flow can be paired. +This is used to avoid quadratic behavior in merging paired load store +algorithm. The value of 0 does not limit on the search, but may slow +down compilation of huge functions. The default value is 4, which is +large enough to capture most opportunities. + @item max-tail-merge-comparisons The maximum amount of similar bbs to compare a bb with. This is used to avoid quadratic behavior in tree tail merging. The default value is 10. Index: gcc/merge-paired-loadstore.c =================================================================== --- gcc/merge-paired-loadstore.c (revision 0) +++ gcc/merge-paired-loadstore.c (revision 0) @@ -0,0 +1,886 @@ +/* Target dependent optimization pass merging paired loadstore instructions. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Bin Cheng, ARM + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file implements the target dependent load store pair pass + of the compiler. It is a simple pass whose purpose is to take + advantage of double-word memory access instructions of some + machines, like ARM. + + GCC generally does this kind of optimization in peephole pass. + Unfortunately peephole is too weak to handle complex cases in + which the consecutive memory instructions are intervened by + other instructions. + + Statistical data show there are many load store pair opportunities + generated by spilling during reigster allocation. To catch all + these cases, this pass is run after register allocation. To be + precise, it is after the register renaming pass. Also this pass + is run before the second scheduler pass, thus the scheduler still + has its chance to rearrange the order of merged instructions. + + This pass iterates over instruction stream for each basic block, + trying to find a paired instruction for each memory access insn + whose address expression is in the form of "[base_offset]". It + only guarantee that the two consecutive memory access instructions + has no data dependency issues. After a pair is found it calls to + TARGET_MERGE_PAIRED_LOADSTORE hook to do further target dependent + checks. The hook returns new merged instruction if the pair can + be merged. It's the target hook's responsibility to generate and + return valid instruction. At last, this pass inserts the newly + returned instruction at proper position, removes the old instructions. + + TODO: Some targets support writeback addressing mode, this pass + should be able to handle these memory references. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl-error.h" +#include "tm_p.h" +#include "insn-config.h" +#include "insn-attr.h" +#include "hard-reg-set.h" +#include "recog.h" +#include "regs.h" +#include "addresses.h" +#include "expr.h" +#include "function.h" +#include "flags.h" +#include "basic-block.h" +#include "reload.h" +#include "target.h" +#include "params.h" +#include "tree-pass.h" +#include "df.h" +#include "insn-codes.h" + +/* Flags indicate which memory unit has been clobbered between pair + instructions. */ +#define MEM_CLOB_NONE (0) +#define MEM_CLOB_SELF (1) +#define MEM_CLOB_PRED (2) +#define MEM_CLOB_SUCC (4) +#define MEM_CLOB_ALL (MEM_CLOB_PRED | MEM_CLOB_SELF | MEM_CLOB_SUCC) +#define MEM_CLOBBERED_P(flags, flag) ((flags & flag) == flag) + +/* Flags indicate how the pair instructions can be mereged. */ +#define PAIR_NONE (0) +#define PAIR_SINK (1) +#define PAIR_HOIST (2) +#define PAIR_ANY (PAIR_SINK | PAIR_HOIST) + +/* Currently only support base as a REG or SYMBOL_REF. */ +#define ADDR_BASE_P(x) (GET_CODE (x) == REG || GET_CODE (x) == SYMBOL_REF) + +/* Bases are considered same if they are same REG or SYMBOL_REF. */ +#define BASE_SAME_P(b1, b2) \ + (GET_CODE (b1) == GET_CODE (b2) \ + && ((REG_P (b1) && REGNO (b1) == REGNO (b2)) \ + || (GET_CODE (b1) == SYMBOL_REF \ + && SYMBOL_REF_DECL (b1) == SYMBOL_REF_DECL (b2)))) + +/* Record information for each memory reference. */ +struct mem_info_t +{ + rtx mem; /* The memmory reference itself. */ + rtx base; /* Base part of address expression. */ + rtx offset; /* Offset part of address expression. */ + bool is_load; /* Indicate if it's a load or store. */ +}; + +/* Record information for each insn with memory reference. */ + +struct insn_info_t +{ + rtx insn; /* The instruction. */ + rtx src; /* Source operand for a set instructon. */ + rtx dest; /* Dest operand for a set instruction. */ + vec mem_refs; /* memory references in this insn. */ +}; + +/* Array of pointers to information for each instruction. */ + +static vec insn_infos; + +/* Record information for load store pair. */ + +struct pair_info_t +{ + struct insn_info_t *x; /* The first insn in pair. */ + struct insn_info_t *y; /* The second insn in pair. */ + HARD_REG_SET reg_uses; /* Register uses info after X. */ + HARD_REG_SET reg_defs; /* Register defs info after X. */ + /* For memory unit accessed by insn X, there are two adjacent + memory units with which X can be merged, like: + + |<--unit size-->| + +---------------+---------------+---------------+ + | MEM_PRED | MEM_X | MEM_SUCC | + +---------------+---------------+---------------+ + + If one of memory units before and after MEM_X is clobbered + by some instructions between X and Y, it's still possible + to have X and Y paired. To handle this case, we need to + record the clobber information as iterating over instruction + stream looking for Y. Once we find a candidate for Y, we + need to check whether it accesses memory unit which has + already been clobbered. If both adjacent memory units are + clobbered, we need to start over because there is no way X + can be paired with any following instructions. */ + int mem_clob; + /* For a candidate pair of instructions, it's possible to merge + it by either sinking insn X or hoisting insn Y. Sometime + the pair can only be merged in one way, for example, if the + register operand of X is clobbered by instructions between X + and Y, then we can only merge the pair by hoisting insn Y. */ + int pair_kind; +}pair_info; + +/* Maximal number of memory references we support in single isntruction. */ +#define MAX_MEM_REF_NUM (8) + +struct mem_ref_data +{ + int num; + rtx mem[MAX_MEM_REF_NUM]; +}; + +/* Record instructions which have more than MAX_MEM_REF_NUM references. */ + +static sbitmap multi_mem_map; + +/* If MEM is in the form of [base+offset], extract the two parts + of address and set to BASE and OFFSET, otherwise return false + after clearing BASE and OFFSET. */ + +static bool +extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (addr)). */ + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + + if (ADDR_BASE_P (addr)) + { + *base = addr; + *offset = const0_rtx; + return true; + } + + if (GET_CODE (addr) == PLUS + && ADDR_BASE_P (XEXP (addr, 0)) + && CONST_INT_P (XEXP (addr, 1))) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return true; + } + + *base = NULL_RTX; + *offset = NULL_RTX; + + return false; +} + +/* Check if REG is modified by INSN. */ + +static bool +reg_modified_p (rtx reg, rtx insn) +{ + df_ref *def_rec; + unsigned int start_regno, end_regno; + + start_regno = REGNO (reg); + end_regno = END_HARD_REGNO (reg); + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + { + unsigned int regno = DF_REF_REGNO (*def_rec); + if (regno >= start_regno && regno < end_regno) + return true; + } + + return false; +} + +/* Check instruction recorded in PINSN_INFO to see if it is the + first instruction of load store pair. If yes, record the + information in PPAIR_INFO and return TRUE. */ + +static bool +find_pair_x_insn (struct pair_info_t *ppair_info, + struct insn_info_t *pinsn_info) +{ + bool is_load; + rtx mem, base; + rtx insn = pinsn_info->insn; + + if (GET_CODE (PATTERN (insn)) != SET) + return false; + + mem = pinsn_info->mem_refs[0].mem; + base = pinsn_info->mem_refs[0].base; + is_load = pinsn_info->mem_refs[0].is_load; + /* Skip if it's an unknown or volatile memory reference. */ + if (base == NULL_RTX || MEM_VOLATILE_P (mem)) + return false; + + /* Don't bother if base is a register and we are modifying it. */ + if (REG_P (base) && reg_modified_p (base, insn)) + return false; + + ppair_info->x = pinsn_info; + ppair_info->mem_clob = MEM_CLOB_NONE; + ppair_info->pair_kind = PAIR_ANY; + CLEAR_HARD_REG_SET (ppair_info->reg_uses); + CLEAR_HARD_REG_SET (ppair_info->reg_defs); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Found the first insn of %s pair: \n", + is_load ? "load" : "store"); + dump_insn_slim (dump_file, insn); + } + + return true; +} + +/* Check if references of INSN overlap with hard register set REGS. + CHK_DEF indicates whether we check defs or uses. */ + +static bool +ref_overlaps_hard_reg_set_p (const HARD_REG_SET regs, rtx insn, bool chk_def) +{ + df_ref *ref_rec = (chk_def ? DF_INSN_DEFS (insn) : DF_INSN_USES (insn)); + + for (; *ref_rec; ref_rec++) + { + unsigned int regno = DF_REF_REGNO (*ref_rec); + if (TEST_HARD_REG_BIT (regs, regno)) + return true; + } + + return false; +} + +/* Given the first memory access instruction in pair recorded in + PINFO, this function checks whether INSN can be paired with it. + MEM is in the form of [BASE+OFFSET]. IS_LOAD is true if it is + a load pair. */ + +static bool +valid_insn_pair_p (struct pair_info_t *pinfo, rtx insn, rtx mem, + rtx base, rtx offset, bool is_load) +{ + int cant_do = PAIR_NONE; + int pair_type = MEM_CLOB_NONE; + HOST_WIDE_INT off_x, off_y, size; + struct mem_info_t *pmem_info_x = &pinfo->x->mem_refs[0]; + enum machine_mode mode = GET_MODE (pmem_info_x->mem); + + gcc_assert (pmem_info_x->is_load == is_load); + + /* Can't be paired if refs use different base. */ + if (base == NULL_RTX || !BASE_SAME_P (pmem_info_x->base, base)) + return false; + + /* Can't be paired if memory refs have different mode. */ + if (GET_MODE (mem) != mode) + return false; + + /* Can't be paired if it's a control flow instruction. */ + if (may_trap_or_fault_p (PATTERN (insn)) + && find_reg_note (insn, REG_EH_REGION, NULL_RTX)) + return false; + + /* Decide how the two memory accesses are paired. */ + off_x = INTVAL (pmem_info_x->offset); + off_y = INTVAL (offset); + size = GET_MODE_SIZE (mode); + if (off_y + size == off_x) + pair_type = MEM_CLOB_PRED; + if (off_x + size == off_y) + pair_type = MEM_CLOB_SUCC; + + /* Can't be paired if the two memory references are not consecutive. */ + if (pair_type == MEM_CLOB_NONE) + return false; + + /* Can't be paired if mem is volatile. */ + if (volatile_refs_p (mem)) + return false; + + /* Can't be paired if base is a register and we are modifying it. */ + if (REG_P (base) && reg_modified_p (base, insn)) + return false; + + /* Clear PAIR_SINK if we can't merge the pair by sinking the first + instruction because of dependecies. */ + if ((pinfo->pair_kind & PAIR_SINK) != 0 + && (ref_overlaps_hard_reg_set_p (pinfo->reg_defs, + pinfo->x->insn, false) + || ref_overlaps_hard_reg_set_p (pinfo->reg_defs, + pinfo->x->insn, true) + || ref_overlaps_hard_reg_set_p (pinfo->reg_uses, + pinfo->x->insn, true))) + pinfo->pair_kind &= (~PAIR_SINK); + + /* Clear PAIR_SINK if the memory unit of the first instruction + is clobbered between. */ + if ((pinfo->pair_kind & PAIR_SINK) != 0 + && MEM_CLOBBERED_P (pinfo->mem_clob, MEM_CLOB_SELF)) + pinfo->pair_kind &= (~PAIR_SINK); + + /* Clear PAIR_HOIST if we can't merge the pair by hoisting the + second instruction because of dependencies. */ + if (ref_overlaps_hard_reg_set_p (pinfo->reg_defs, insn, false) + || ref_overlaps_hard_reg_set_p (pinfo->reg_defs, insn, true) + || ref_overlaps_hard_reg_set_p (pinfo->reg_uses, insn, true)) + cant_do |= PAIR_HOIST; + + /* Clear PAIR_HOIST if the memory unit of the second instruction + is clobbered between. */ + if (MEM_CLOBBERED_P (pinfo->mem_clob, pair_type)) + cant_do |= PAIR_HOIST; + + /* Take out cant_do to see if we can merge the pair properly. */ + if ((pinfo->pair_kind & (~cant_do)) == PAIR_NONE) + return false; + + pinfo->pair_kind &= (~cant_do); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Found the second insn of %s pair:\n", + is_load ? "load" : "store"); + dump_insn_slim (dump_file, insn); + } + + return true; +} + +static int +mark_mem_use_1 (rtx *x, void *data) +{ + struct mem_ref_data *mem_data = (struct mem_ref_data *)data; + if (MEM_P (*x)) + { + if (mem_data->num < MAX_MEM_REF_NUM) + mem_data->mem[mem_data->num] = *x; + + mem_data->num++; + } + + return 0; +} + +static void +mark_mem_use (rtx *x, void *data) +{ + for_each_rtx (x, mark_mem_use_1, data); +} + +static void +mark_mem_store (rtx x, const_rtx set ATTRIBUTE_UNUSED, void *data) +{ + struct mem_ref_data *mem_data = (struct mem_ref_data *)data; + if (MEM_P (x)) + { + if (mem_data->num < MAX_MEM_REF_NUM) + mem_data->mem[mem_data->num] = x; + + mem_data->num++; + } +} + +/* Initialize memory reference information for INSN. */ + +static void +init_insn_info (rtx insn) +{ + int i, n_stores; + unsigned int uid = INSN_UID (insn); + struct mem_ref_data mem_data; + struct insn_info_t *pinfo; + + gcc_assert (NONDEBUG_INSN_P (insn)); + + if (uid >= SBITMAP_SIZE (multi_mem_map)) + multi_mem_map = sbitmap_resize (multi_mem_map, uid + 10, 0); + + mem_data.num = 0; + note_stores (PATTERN (insn), mark_mem_store, &mem_data); + n_stores = mem_data.num; + note_uses (&PATTERN (insn), mark_mem_use, &mem_data); + + if (mem_data.num == 0) + return; + else if (mem_data.num > MAX_MEM_REF_NUM) + { + /* This instruction has too many memory refs that we + can't handle. */ + bitmap_set_bit (multi_mem_map, uid); + return; + } + pinfo = XCNEW (struct insn_info_t); + pinfo->mem_refs = vNULL; + pinfo->mem_refs.safe_grow_cleared (mem_data.num); + for (i = 0; i < mem_data.num; i++) + { + bool is_load = (i >= n_stores); + + pinfo->insn = insn; + pinfo->mem_refs[i].mem = mem_data.mem[i]; + extract_base_offset_in_addr (mem_data.mem[i], + &pinfo->mem_refs[i].base, + &pinfo->mem_refs[i].offset); + pinfo->mem_refs[i].is_load = is_load; + } + if (GET_CODE (PATTERN (insn)) == SET) + { + pinfo->src = SET_SRC (PATTERN (insn)); + pinfo->dest = SET_DEST (PATTERN (insn)); + } + + if (uid >= insn_infos.length ()) + insn_infos.safe_grow_cleared (uid + 10); + + if (insn_infos[uid] != NULL) + { + insn_infos[uid]->mem_refs.release (); + free (insn_infos[uid]); + } + + insn_infos[uid] = pinfo; +} + +/* Initialize memory reference information for each instruction. */ + +static void +init_insn_infos (void) +{ + rtx insn; + basic_block bb; + unsigned int max_uid = get_max_uid (); + + multi_mem_map = sbitmap_alloc (max_uid); + bitmap_clear (multi_mem_map); + insn_infos.safe_grow_cleared (max_uid); + + FOR_EACH_BB_FN (bb, cfun) + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + init_insn_info (insn); + } +} + +/* Given the first memory access instruction of pair recorded in + PPAIR_INFO, this function analyzes memory reference PMEM_INFO + in PINSN_INSN. If CANDIDATE is true, checks whether it can be + paired with the first instruction. */ + +static bool +process_mem_access (struct pair_info_t *ppair_info, + struct insn_info_t *pinsn_info, + struct mem_info_t *pmem_info, bool candidate) +{ + bool is_load, can_pair_p = candidate; + rtx insn = pinsn_info->insn; + rtx mem = NULL_RTX; + rtx base = NULL_RTX, offset = NULL_RTX; + struct mem_info_t *pmem_info_x = &ppair_info->x->mem_refs[0]; + + is_load = pmem_info->is_load; + mem = pmem_info->mem; + base = pmem_info->base; + offset = pmem_info->offset; + /* Only pair memory accesse in SET instructions. */ + if (GET_CODE (PATTERN (insn)) != SET) + can_pair_p = false; + + /* Only pair known and non-volatile mempoy access. */ + if (base == NULL_RTX || MEM_VOLATILE_P (mem)) + can_pair_p = false; + + /* If this is a same kind memory reference with the first instruction. */ + if (pmem_info_x->is_load == is_load) + { + gcc_assert (ppair_info->mem_clob != MEM_CLOB_ALL); + /* Check if it can be paired with the first instruction. */ + if (can_pair_p + && valid_insn_pair_p (ppair_info, insn, mem, base, offset, is_load)) + { + ppair_info->y = pinsn_info; + return true; + } + /* If it is a load pair, we don't need to bother whether this + memory access clobbers any memory unit of the pair. */ + if (is_load) + return false; + } + + if (base == NULL_RTX || !BASE_SAME_P (pmem_info_x->base, base)) + { + /* Set mem_clob to MEM_CLOB_ALL if addressing mode or base + is different from the first instruction. */ + ppair_info->mem_clob = MEM_CLOB_ALL; + } + else + { + /* Check the address in the form of [base+offset] to see if it + clobbers any memory unit of the pair. Record the clobber + information. */ + HOST_WIDE_INT mode_x = GET_MODE_SIZE (GET_MODE (pmem_info_x->mem)); + HOST_WIDE_INT mode_y = GET_MODE_SIZE (GET_MODE (mem)); + HOST_WIDE_INT offset_t; + HOST_WIDE_INT offset_x = INTVAL (pmem_info_x->offset); + HOST_WIDE_INT offset_y = INTVAL (offset); + + offset_t = offset_x; + if (offset_t + mode_x > offset_y && offset_y + mode_y > offset_t) + ppair_info->mem_clob |= MEM_CLOB_SELF; + + offset_t = offset_x + mode_x; + if (offset_t + mode_x > offset_y && offset_y + mode_y > offset_t) + ppair_info->mem_clob |= MEM_CLOB_SUCC; + + offset_t = offset_x - mode_x; + if (offset_t + mode_x > offset_y && offset_y + mode_y > offset_t) + ppair_info->mem_clob |= MEM_CLOB_PRED; + } + + return false; +} + +/* Given the first memory access instruction of pair recorded in + PINFO, this function analyzes each memory access in INSN. + + Record the information in PINFO and return true if INSN can be + paired with the first instruction. */ + +static bool +process_mem_accesses (struct pair_info_t *pinfo, rtx insn) +{ + unsigned int i; + struct insn_info_t *pinsn_info = insn_infos[INSN_UID (insn)]; + + if (pinsn_info == NULL) + return false; + + /* Insn has more than MAX_MEM_REF_NUM memory references. */ + if (bitmap_bit_p (multi_mem_map, INSN_UID (insn))) + { + pinfo->mem_clob = MEM_CLOB_ALL; + return false; + } + + gcc_assert (pinsn_info->mem_refs.length () > 0); + if (pinsn_info->mem_refs.length () > 1) + { + for (i = 0; i < pinsn_info->mem_refs.length (); i++) + { + process_mem_access (pinfo, pinsn_info, + &pinsn_info->mem_refs[i], false); + /* Quick return. */ + if (pinfo->mem_clob == MEM_CLOB_ALL) + break; + } + return false; + } + else + return process_mem_access (pinfo, pinsn_info, + &pinsn_info->mem_refs[0], true); +} + +/* Process INSN and record register access information in PINFO. */ + +static void +process_reg_accesses (struct pair_info_t *pinfo, rtx insn) +{ + df_ref *ref_rec; + + for (ref_rec = DF_INSN_DEFS (insn); *ref_rec; ref_rec++) + SET_HARD_REG_BIT (pinfo->reg_defs, DF_REF_REGNO (*ref_rec)); + + for (ref_rec = DF_INSN_USES (insn); *ref_rec; ref_rec++) + SET_HARD_REG_BIT (pinfo->reg_uses, DF_REF_REGNO (*ref_rec)); +} + +/* Given the first memory access instruction of pair recorded in + PINFO, this function checks whether it's possible to be paired + across INSN. */ + +static bool +pair_candidate_killed (struct pair_info_t *pinfo, rtx insn) +{ + df_ref *def_rec; + unsigned int start_regno, end_regno; + struct mem_info_t *pmem_info = &pinfo->x->mem_refs[0]; + + /* Can't pair accross special instructions. */ + if (BARRIER_P (insn) || LABEL_P (insn) + || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)) + return true; + + /* If it's a pair of memory load, it's possible to cross const + or pure call. */ + if (CALL_P (insn) + && !(pmem_info->is_load && RTL_CONST_OR_PURE_CALL_P (insn))) + return true; + + /* Check whether the base register of memory address is clobbered + by this instruction. */ + if (REG_P (pmem_info->base)) + { + start_regno = REGNO (pmem_info->base); + end_regno = END_HARD_REGNO (pmem_info->base); + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + { + unsigned int regno = DF_REF_REGNO (*def_rec); + if (regno >= start_regno && regno <= end_regno) + return true; + } + } + + return false; +} + +/* Given paired memory access instructions recorded in PINFO, + this function tries to merge the pair in a target dependent + manner. New instruction is inserted, old instructions are + deleted if they can be paired successfully. */ + +static bool +merge_paired_loadstore (struct pair_info_t *pinfo) +{ + bool is_hoist, is_load = pinfo->x->mem_refs[0].is_load; + rtx loc, mem_x, mem_y, new_rtx, new_insn; + int saved = pinfo->pair_kind; + + gcc_assert (pinfo->pair_kind != PAIR_NONE); + gcc_assert (is_load == pinfo->y->mem_refs[0].is_load); + + /* Prefer to hoist loads and sink stores. */ + if (pinfo->pair_kind == PAIR_ANY) + pinfo->pair_kind = (is_load ? PAIR_HOIST : PAIR_SINK); + + if (pinfo->pair_kind == PAIR_HOIST) + { + is_hoist = true; + loc = pinfo->x->insn; + } + else + { + is_hoist = false; + loc = pinfo->y->insn; + } + + mem_x = pinfo->x->mem_refs[0].mem; + mem_y = pinfo->y->mem_refs[0].mem; + + if (targetm.merge_paired_loadstore + && (new_rtx = targetm.merge_paired_loadstore (pinfo->x->insn, + pinfo->y->insn, + mem_x, mem_y, is_hoist, + is_load)) != NULL_RTX) + { + if (dump_file != NULL) + { + fprintf (dump_file, "Merged %s pair: %s\n", + is_load ? "load" : "store", + is_hoist ? "hoist" : "sink"); + dump_insn_slim (dump_file, pinfo->x->insn); + dump_insn_slim (dump_file, pinfo->y->insn); + } + new_insn = emit_insn_after (new_rtx, loc); + init_insn_info (new_insn); + delete_insn (pinfo->x->insn); + delete_insn (pinfo->y->insn); + + return true; + } + + /* Restore pair kind if it can't be merged. */ + pinfo->pair_kind = saved; + return false; +} + +/* Given the first memory access instruction recorded in PINFO, + this function tries to find the other paired memory access + instruction in basic block BB, and record its information. + + Return false if there is no paired instruction in BB. */ + +static bool +find_pair_y_insn (struct pair_info_t *pinfo, basic_block bb) +{ + rtx insn; + int distance = PARAM_VALUE (PARAM_MAX_MERGE_PAIRED_LOADSTORE_DISTANCE); + + for (insn = NEXT_INSN (pinfo->x->insn); + insn != NULL_RTX && insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (pair_candidate_killed (pinfo, insn)) + break; + + if (process_mem_accesses (pinfo, insn)) + { + gcc_assert (pinfo->mem_clob != MEM_CLOB_ALL); + return true; + } + + if (pinfo->mem_clob == MEM_CLOB_ALL) + break; + + distance--; + if (distance == 0) + break; + + process_reg_accesses (pinfo, insn); + } + + return false; +} + +static bool +gate_handle_merge_paired_loadstore (void) +{ + return (optimize > 0 + && flag_merge_paired_loadstore > 0 + && targetm.merge_paired_loadstore != NULL); +} + +/* Merge consecutive memory accesses in a target dependent manner. */ + +static unsigned int +rest_of_handle_merge_paired_loadstore (void) +{ + unsigned int i; + rtx insn, next; + basic_block bb; + + if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS) + return 0; + + df_analyze (); + init_insn_infos (); + + FOR_EACH_BB_FN (bb, cfun) + { + if (bb->index < NUM_FIXED_BLOCKS) + continue; + + insn = BB_HEAD (bb); + for (; insn != NULL_RTX && insn != BB_END (bb); insn = next) + { + rtx next2 = NULL_RTX; + + next = NEXT_INSN (insn); + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* Check if insn has exact one memory reference. */ + if (insn_infos[INSN_UID (insn)] == NULL + || insn_infos[INSN_UID (insn)]->mem_refs.length () != 1) + continue; + + if (!find_pair_x_insn (&pair_info, insn_infos[INSN_UID (insn)])) + continue; + + if (!find_pair_y_insn (&pair_info, bb)) + continue; + + /* Record next's next insn if it's the second insn in pair. */ + if (next != NULL_RTX && pair_info.y->insn == next) + next2 = NEXT_INSN (next); + + /* Continue with next2 if next is merged in this round. */ + if (merge_paired_loadstore (&pair_info) && next2 != NULL_RTX) + next = next2; + } + } + + sbitmap_free (multi_mem_map); + for (i = 0; i < insn_infos.length (); i++) + { + struct insn_info_t *pinsn_info = insn_infos[i]; + + if (pinsn_info == NULL) + continue; + + pinsn_info->mem_refs.release (); + free (pinsn_info); + } + insn_infos.truncate (0); + + return 0; +} + +namespace { + +const pass_data pass_data_merge_paired_loadstore = +{ + RTL_PASS, /* type */ + "merge_paired_loadstore", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_MERGE_PAIRED_LOADSTORE, + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destoryed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */ +}; + +class pass_merge_paired_loadstore : public rtl_opt_pass +{ +public: + pass_merge_paired_loadstore (gcc::context *ctxt) + : rtl_opt_pass (pass_data_merge_paired_loadstore, ctxt) + {} + + /* opt_pass methods: */ + /* The epiphany backend creates a second instance of this pass, so we need + a cloned method. */ + opt_pass *clone () { return new pass_merge_paired_loadstore (m_ctxt); } + bool gate () { return gate_handle_merge_paired_loadstore (); } + unsigned int execute () { return rest_of_handle_merge_paired_loadstore (); } +}; // class pass_merge_paired_loadstore + +} // anon namespace + +rtl_opt_pass * +make_pass_merge_paired_loadstore (gcc::context *ctxt) +{ + return new pass_merge_paired_loadstore (ctxt); +} Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 209009) +++ gcc/config/arm/arm.c (working copy) @@ -286,6 +286,9 @@ static unsigned arm_add_stmt_cost (void *data, int static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); +static rtx arm_merge_paired_loadstore (rtx x, rtx y, + rtx mem_x, rtx mem_y, + bool is_hoist, bool is_load); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -668,6 +671,9 @@ static const struct attribute_spec arm_attribute_t #undef TARGET_ASAN_SHADOW_OFFSET #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset +#undef TARGET_MERGE_PAIRED_LOADSTORE +#define TARGET_MERGE_PAIRED_LOADSTORE arm_merge_paired_loadstore + #undef MAX_INSN_PER_IT_BLOCK #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) @@ -31116,4 +31122,150 @@ arm_asan_shadow_offset (void) return (unsigned HOST_WIDE_INT) 1 << 29; } +/* X and Y are two ldr instructions which can be merged into a single + ldrd instruction. IS_HOIST indicates the two instructions should + be merged by hoisting Y. Return true if merging the two instructions + introduces additional pipeline stall, for example: + + (a) // IS_HOIST == false + + ldr rx, [rb, off1] ;X + //other insns + ldr ry, [rb, off2] ;Y + + (b) // IS_HOIST == true + + ldr rx, [rb, off1] ;X + insn uses rx + //other insns + ldr ry, [rb, off2] ;Y */ + +static bool +load_stall_introduced_p (rtx x, rtx y, bool is_hoist) +{ + rtx base; + rtx insn; + + if (!is_hoist) + return true; + + base = SET_DEST (PATTERN (x)); + /* Find the first real instruction after X. */ + for (insn = NEXT_INSN (x); + insn != NULL_RTX && !NONDEBUG_INSN_P (insn); + insn = NEXT_INSN (insn)) + ; + + /* Check if the instruction uses the loaded register of X. */ + if (REG_P (base) && insn != NULL_RTX && insn != y) + { + df_ref *use_rec; + unsigned int start_regno, end_regno; + + start_regno = REGNO (base); + end_regno = END_HARD_REGNO (base); + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + { + unsigned int regno = DF_REF_REGNO (*use_rec); + if (regno >= start_regno && regno < end_regno) + return true; + } + } + + return false; +} + +/* Implement the TARGET_MERGE_PAIRED_LOADSTORE hook. + + Currently we only support to merge two ldr or str instructions + into a single ldrd or strd, given below conditions are met: + 1) The two instructions are valid for merging. + 2) Target processor prefers to use ldrd/strd instruction. + 3) Merging instructions doesn't introduce pipeline stall. + 4) Merging instructions doesn't cause alignment issue. + + If the two instructions can be successfully merged, this hook + returns the new instruction. Otherwise returns NULL_RTX. */ + +static rtx +arm_merge_paired_loadstore (rtx x, rtx y, rtx mem_x, rtx mem_y, + bool is_hoist, bool is_load) +{ + rtx opds[4]; + rtx body_x, body_y; + gcc_assert (x != NULL_RTX && y != NULL_RTX); + + if (!TARGET_LDRD || !current_tune->prefer_ldrd_strd) + return NULL_RTX; + + body_x = PATTERN (x); + body_y = PATTERN (y); + if (GET_CODE (body_x) != SET || GET_CODE (body_y) != SET) + return NULL_RTX; + + if (is_load) + { + opds[0] = SET_DEST (body_x); + opds[1] = SET_DEST (body_y); + opds[2] = SET_SRC (body_x); + opds[3] = SET_SRC (body_y); + } + else + { + opds[0] = SET_SRC (body_x); + opds[1] = SET_SRC (body_y); + opds[2] = SET_DEST (body_x); + opds[3] = SET_DEST (body_y); + } + + if (!REG_P (opds[0]) || !REG_P (opds[1]) + || !MEM_P (opds[2]) || !MEM_P (opds[3])) + return NULL_RTX; + + gcc_assert (opds[2] == mem_x && opds[3] ==mem_y); + + /* Skip if merging two ldr introduces additional pipeline stall. */ + if (is_load && load_stall_introduced_p (x, y, is_hoist)) + return NULL_RTX; + + if (GET_MODE (opds[0]) != SImode || GET_MODE (opds[1]) != SImode + || GET_MODE (opds[2]) != SImode || GET_MODE (opds[3]) != SImode) + return NULL_RTX; + + if (!arm_general_register_operand (opds[0], SImode) + || !arm_general_register_operand (opds[1], SImode)) + return NULL_RTX; + + if (unaligned_access + && ((MEM_ALIGN (opds[2]) & 3) != 0 || (MEM_ALIGN (opds[3]) & 3) != 0)) + return NULL_RTX; + + if (!gen_operands_ldrd_strd (opds, is_load, false, false)) + return NULL_RTX; + + if (TARGET_ARM) + { + opds[0] = gen_rtx_REG (DImode, REGNO (opds[0])); + opds[2] = adjust_address (opds[2], DImode, 0); + return gen_rtx_SET (VOIDmode, opds[0], opds[2]); + } + else if (TARGET_THUMB2) + { + rtx t1, t2; + if (is_load) + { + t1 = gen_rtx_SET (VOIDmode, opds[0], opds[2]); + t2 = gen_rtx_SET (VOIDmode, opds[1], opds[3]); + } + else + { + t1 = gen_rtx_SET (VOIDmode, opds[2], opds[0]); + t2 = gen_rtx_SET (VOIDmode, opds[3], opds[1]); + } + return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)); + } + else + return NULL_RTX; +} + #include "gt-arm.h"