Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi (revision 157974) +++ gcc/doc/tm.texi (working copy) @@ -11075,3 +11075,41 @@ value of @code{TARGET_CONST_ANCHOR} is a MIPS, where add-immediate takes a 16-bit signed value, @code{TARGET_CONST_ANCHOR} is set to @samp{0x8000}. The default value is zero, which disables this optimization. @end deftypevr + +@deftypefn {Target Hook} rtx TARGET_GET_PIC_REG (void) +Return the pic_reg pseudo register which holds the base address of GOT. +It is only required by the simplify-got optimization. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_CLEAR_PIC_REG (void) +After successful simplify-got optimization, the pic_reg is useless. So a +target can use this hook to clear pic_reg. +@end deftypefn + +@deftypefn {Target Hook} rtx TARGET_LOADED_GLOBAL_VAR (rtx insn, rtx * offset_reg) +This hook is used to detect if the given @var{insn} loads a global +variable's address from GOT with the form of + +@smallexample +(set @var{address_reg} (mem (plus pic_reg @var{off}))) +@end smallexample + +If so store @var{off} into the memory pointed to by @var{offset_reg} and +return the global variable whose address will be loaded. Otherwise return +@code{NULL_RTX}. +@end deftypefn + +@deftypefn {Target Hook} int TARGET_MAX_USAGE_TO_AVOID_PIC_REG (void) +This hook returns a threshold value used to determine if simplify-got will +actually be performed. If there are more GOT accesses than the returned +number, the GOT access insns will not be rewritten. Otherwise we will +rewrite these insns. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_LOAD_GLOBAL_ADDRESS (rtx @var{symbol}, rtx @var{offset_reg}, rtx @var{address_reg}, rtx @var{load_insn}) +This hook does the actual rewriting of GOT access insn @var{load_insn}. +The global variable is @var{symbol}. The global address should be loaded +into @var{address_reg}. The register @var{offset_reg} was previously used +to hold the offset from GOT base to the GOT entry of the global variable. +Now it can be used as a scratch register. +@end deftypefn Index: gcc/hooks.c =================================================================== --- gcc/hooks.c (revision 157974) +++ gcc/hooks.c (working copy) @@ -276,6 +276,13 @@ hook_rtx_tree_int_null (tree a ATTRIBUTE return NULL; } +/* Generic hook that returns NULL_RTX. */ +rtx +hook_rtx_void_null (void) +{ + return NULL; +} + /* Generic hook that takes three trees and returns the last one as is. */ tree hook_tree_tree_tree_tree_3rd_identity (tree a ATTRIBUTE_UNUSED, Index: gcc/hooks.h =================================================================== --- gcc/hooks.h (revision 157974) +++ gcc/hooks.h (working copy) @@ -80,6 +80,7 @@ extern bool default_can_output_mi_thunk_ extern rtx hook_rtx_rtx_identity (rtx); extern rtx hook_rtx_rtx_null (rtx); extern rtx hook_rtx_tree_int_null (tree, int); +extern rtx hook_rtx_void_null (void); extern const char *hook_constcharptr_const_tree_null (const_tree); extern const char *hook_constcharptr_const_rtx_null (const_rtx); Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 157974) +++ gcc/tree-pass.h (working copy) @@ -481,6 +481,7 @@ extern struct rtl_opt_pass pass_rtl_dse3 extern struct rtl_opt_pass pass_rtl_cprop; extern struct rtl_opt_pass pass_rtl_pre; extern struct rtl_opt_pass pass_rtl_hoist; +extern struct rtl_opt_pass pass_simplify_got; extern struct rtl_opt_pass pass_rtl_store_motion; extern struct rtl_opt_pass pass_cse_after_global_opts; extern struct rtl_opt_pass pass_rtl_ifcvt; Index: gcc/target.h =================================================================== --- gcc/target.h (revision 157974) +++ gcc/target.h (working copy) @@ -1166,6 +1166,27 @@ struct gcc_target bool (*can_inline_p) (tree, tree); } target_option; + /* Functions used to simplify GOT access. */ + struct simplify_got_access { + /* Function to get the pic_reg which holds the base address of GOT. */ + rtx (*get_pic_reg) (void); + + /* Function to clear the pic_reg which is useless now. */ + void (*clear_pic_reg) (void); + + /* Function to detect if the specified insn loads a global variable's + address from GOT. If so returns that symbol. */ + rtx (*loaded_global_var) (rtx, rtx *); + + /* This function returns a threshold value. If we have more GOT accesses + than the returned number, we won't rewrite the GOT access insns. + Otherwise we'll rewrite these insns. */ + int (*max_usage_to_avoid_pic_reg) (void); + + /* This function do the actual rewriting of GOT accesses. */ + void (*load_global_address) (rtx, rtx, rtx, rtx); + } got_access; + /* For targets that need to mark extra registers as live on entry to the function, they should define this target hook and set their bits in the bitmap passed in. */ Index: gcc/testsuite/gcc.target/arm/got2.c =================================================================== --- gcc/testsuite/gcc.target/arm/got2.c (revision 0) +++ gcc/testsuite/gcc.target/arm/got2.c (revision 0) @@ -0,0 +1,11 @@ +/* We should not use GOT_PREL relocation to load global address with so + many global accesses. */ + +/* { dg-options "-Os -fpic" } */ +/* { dg-final { scan-assembler-not "GOT_PREL" } } */ + +extern int x1, x2, x3, x4, x5; +int sum() +{ + return x1 + x2 + x3 + x4 + x5; +} Index: gcc/testsuite/gcc.target/arm/got1.c =================================================================== --- gcc/testsuite/gcc.target/arm/got1.c (revision 0) +++ gcc/testsuite/gcc.target/arm/got1.c (revision 0) @@ -0,0 +1,10 @@ +/* { dg-options "-Os -fpic" } */ +/* { dg-final { scan-assembler "GOT_PREL" } } */ + +extern int x; +int foo(int j) +{ + int t = x; + x = j; + return x; +} Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 157974) +++ gcc/timevar.def (working copy) @@ -175,6 +175,7 @@ DEFTIMEVAR (TV_LOOP_DOLOOP , " DEFTIMEVAR (TV_CPROP , "CPROP") DEFTIMEVAR (TV_PRE , "PRE") DEFTIMEVAR (TV_HOIST , "code hoisting") +DEFTIMEVAR (TV_SIMPLIFY_GOT , "simplify got") DEFTIMEVAR (TV_LSM , "LSM") DEFTIMEVAR (TV_TRACER , "tracer") DEFTIMEVAR (TV_WEB , "web") Index: gcc/simplify-got.c =================================================================== --- gcc/simplify-got.c (revision 0) +++ gcc/simplify-got.c (revision 0) @@ -0,0 +1,176 @@ +/* Simplify the code to load global variable's address from GOT. + Copyright (C) 2010 + Free Software Foundation, Inc. + Contributed by Wei Guozhi . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file contains optimization for global variable's address loading + from GOT. + + When generating PIC code, we need to load global variable's address from + GOT. Many targets do this as following: + + (set pic_reg ...) # load the base address of GOT into pic_reg. + ... + (set off_set ...) # load the offset from the base of GOT to + # a global variable's GOT entry. + (set address # load the address from GOT. + (mem (plus pic_reg off_set))) + ... + + If the target has an alternative method (usually uses a different + relocation) to load the global address and in some cases it has less + cost and avoid the pic_reg, we can use this pass to improve it. + + In order to employ this optimization the target must satisfy the + following constraints: + + 1. There should be at least 2 methods to load a global variable's + address from GOT. + + 2. By default all global variables accesses use the method described + above. + + 3. If the number of global address loading doesn't exceed a threshold + the alternative method is better. Otherwise we should keep the default + method. + + 4. The alternative method doesn't use the base of GOT (pic_reg). +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "target.h" +#include "tree-pass.h" +#include "df.h" +#include "timevar.h" + +/* Information needed when rewrite the GOT access insns. */ +struct global_var_info +{ + rtx symbol; /* The global variable. */ + rtx offset_reg; /* Register contains the GOT entry offset. */ + rtx address_reg; /* Register contains the final global address. */ + rtx load_insn; /* The insn which loads the address from GOT. */ +}; + +/* This optimization is enabled only when the pic_reg is actually used. */ +static bool +gate_handle_simplify_got (void) +{ + return (optimize > 0) && targetm.got_access.get_pic_reg (); +} + +static unsigned int +rest_of_handle_simplify_got (void) +{ + df_ref ref; + rtx use = NULL_RTX; + int i, count = 0; + struct global_var_info* global_vars; + rtx pic_reg = targetm.got_access.get_pic_reg (); + gcc_assert (pic_reg); + + ref = DF_REG_USE_CHAIN (REGNO (pic_reg)); + global_vars = XNEWVEC(struct global_var_info, + DF_REG_USE_COUNT (REGNO (pic_reg))); + + /* Check if all uses of pic_reg are loading global address through the + default method. */ + while (ref) + { + rtx insn = DF_REF_INSN (ref); + + /* Check for the special USE insn, it is not a real usage of pic_reg. */ + if (GET_CODE (PATTERN (insn)) == USE) + use = insn; + else + { + /* If an insn both set and use pic_reg, it is in the process of + constructing the value of pic_reg. We shoul also ignore it. */ + rtx set = single_set (insn); + if (!(set && (SET_DEST (set) == pic_reg))) + { + rtx offset_reg; + rtx symbol = targetm.got_access.loaded_global_var (insn, + &offset_reg); + if (symbol) + { + gcc_assert (set); + global_vars[count].symbol = symbol; + global_vars[count].offset_reg = offset_reg; + global_vars[count].address_reg = SET_DEST (set); + global_vars[count].load_insn = insn; + count++; + } + else + { + /* This insn doesn't load a global address, but it has + other unexpected usage of pic_reg, give up. */ + free (global_vars); + return 0; + } + } + } + ref = DF_REF_NEXT_REG(ref); + } + + /* If there are too many address loading, we can't get benefit. */ + if (count > targetm.got_access.max_usage_to_avoid_pic_reg ()) + { + free (global_vars); + return 0; + } + + /* Rewrite the global address loading insns. */ + for (i=0; imachine->pic_reg; +} + +/* Clear the pic_reg to NULL. */ +void +arm_clear_pic_reg (void) +{ + cfun->machine->pic_reg = NULL_RTX; +} + +/* Return the maximum number of global address loadings should be rewritten. + If we have more, simplify-got won't change them since it can't bring + improvement. + + The default global address loading instructions are: + + ldr r3, .L2 # A + ldr r2, .L2+4 # B +.LPIC0: + add r3, pc # A + ldr r4, [r3, r2] # B + ... +.L2: + .word _GLOBAL_OFFSET_TABLE_-(.LPIC0+4) # A + .word i(GOT) + + The new instruction sequence is: + + ldr r3, .L2 # C +.LPIC0: + add r3, pc # C + ldr r3, [r3] # C + ... +.L2: + i(GOT_PREL)+(.-(.LPIC1+4)) # C + + Suppose the maximum number of global address loading is n, we will have + + cost(A) + cost(B) * n = cost(C) * n <1> + + The detail cost of each item depends on the target instruction set. */ +int +arm_max_usage_to_avoid_pic_reg (void) +{ + if (TARGET_THUMB) + /* Expression <1> is: + 2*2 + 4 + (2*2 + 4) * n = (3*2 + 4) * n + n = 4 */ + return 4; + else + /* Expression <1> is: + 4 * 2 + 4 + (4 * 2 + 4) * n = (4 * 3 + 4) * n + n = 3 */ + return 3; +} + +/* Detect if INSN loads a global address. If so returns the symbol and + stores the register contains the offset of GOT entry into OFFSET_REG. */ +rtx +arm_loaded_global_var (rtx insn, rtx * offset_reg) +{ + rtx set = single_set (insn); + rtx pic_reg = cfun->machine->pic_reg; + gcc_assert (pic_reg); + + /* Global address loading instruction has the pattern: + (SET address_reg (MEM (PLUS pic_reg offset_reg))) */ + if (set && MEM_P (SET_SRC (set)) + && (GET_CODE (XEXP (SET_SRC (set),0)) == PLUS)) + { + unsigned int regno; + df_ref def; + rtx def_insn; + rtx src; + rtx plus = XEXP (SET_SRC (set),0); + rtx op0 = XEXP (plus, 0); + rtx op1 = XEXP (plus, 1); + if (op1 == pic_reg) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + if ((op0 != pic_reg) || (!REG_P (op1))) + return NULL_RTX; + + regno = REGNO (op1); + if ((DF_REG_USE_COUNT (regno) != 1) || (DF_REG_DEF_COUNT (regno) != 1)) + return NULL_RTX; + + /* The offset loading insn has the pattern: + (SET offset_reg (UNSPEC [symbol] UNSPEC_PIC_SYM))*/ + def = DF_REG_DEF_CHAIN (regno); + def_insn = DF_REF_INSN (def); + set = single_set (def_insn); + if (SET_DEST (set) != op1) + return NULL_RTX; + + src = SET_SRC (set); + if ((GET_CODE (src) != UNSPEC) || (XINT (src, 1) != UNSPEC_PIC_SYM)) + return NULL_RTX; + + *offset_reg = op1; + return RTVEC_ELT (XVEC (src, 0), 0); + } + + return NULL_RTX; +} + +/* Rewrite the global address loading instructions. + SYMBOL is the global variable. OFFSET_REG contains the offset of the + GOT entry. ADDRESS_REG will receive the final global address. + LOAD_INSN is the original insn which loads the address from GOT. */ +void +arm_load_global_address (rtx symbol, rtx offset_reg, + rtx address_reg, rtx load_insn) +{ + rtx offset, got_prel; + rtx labelno = GEN_INT (pic_labelno++); + rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + rtx set = single_set (load_insn); + + /* The first insn: + (SET offset_reg (address_of_GOT_entry(symbol) - pc)) + The expression (address_of_GOT_entry(symbol) - pc) is expressed by + got_prel, which is actually represented by R_ARM_GOT_PREL relocation. */ + l1 = gen_rtx_CONST (VOIDmode, l1); + l1 = plus_constant (l1, TARGET_ARM ? 8 : 4); + offset = gen_rtx_MINUS (VOIDmode, pc_rtx, l1); + got_prel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, offset), + UNSPEC_GOT_PREL_SYM); + got_prel = gen_rtx_CONST (Pmode, got_prel); + if (TARGET_32BIT) + emit_insn_before (gen_pic_load_addr_32bit (offset_reg, got_prel), + load_insn); + else + emit_insn_before (gen_pic_load_addr_thumb1 (offset_reg, got_prel), + load_insn); + + /* The second insn: + (SET offset_reg (PLUS offset_reg pc_rtx)) */ + if (TARGET_ARM) + emit_insn_before (gen_pic_add_dot_plus_eight (offset_reg, offset_reg, + labelno), + load_insn); + else + emit_insn_before (gen_pic_add_dot_plus_four (offset_reg, offset_reg, + labelno), + load_insn); + + /* The last insn to access the GOT entry: + (SET address_reg (MEM offset_reg)) + We reuse the existed load instruction. */ + XEXP (SET_SRC (set), 0) = offset_reg; + df_insn_rescan (load_insn); +} + #include "gt-arm.h" Index: gcc/config/arm/arm.md =================================================================== --- gcc/config/arm/arm.md (revision 157974) +++ gcc/config/arm/arm.md (working copy) @@ -101,6 +101,7 @@ ; a given symbolic address. (UNSPEC_THUMB1_CASESI 25) ; A Thumb1 compressed dispatch-table call. (UNSPEC_RBIT 26) ; rbit operation. + (UNSPEC_GOT_PREL_SYM 27) ; Specify an R_ARM_GOT_PREL relocation of a symbol. ] )