[PATCH,rs6000] Optimize pcrel access of globals [ping]
Aaron Sawdey
acsawdey@linux.ibm.com
Tue Jan 19 04:45:58 GMT 2021
Ping.
Aaron Sawdey, Ph.D. sawdey@linux.ibm.com
IBM Linux on POWER Toolchain
> On Dec 9, 2020, at 11:04 AM, acsawdey@linux.ibm.com wrote:
>
> From: Aaron Sawdey <acsawdey@linux.ibm.com>
>
> Ping. I've folded in the changes to comments suggested by Will Schmidt.
>
> This patch implements a RTL pass that looks for pc-relative loads of the
> address of an external variable using the PCREL_GOT relocation and a
> single load or store that uses that external address.
>
> Produced by a cast of thousands:
> * Michael Meissner
> * Peter Bergner
> * Bill Schmidt
> * Alan Modra
> * Segher Boessenkool
> * Aaron Sawdey
>
> Passes bootstrap/regtest on ppc64le power10. Should have no effect on
> other processors. OK for trunk?
>
> Thanks!
> Aaron
>
> gcc/ChangeLog:
>
> * config.gcc: Add pcrel-opt.c and pcrel-opt.o.
> * config/rs6000/pcrel-opt.c: New file.
> * config/rs6000/pcrel-opt.md: New file.
> * config/rs6000/predicates.md: Add d_form_memory predicate.
> * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_PCREL_OPT.
> * config/rs6000/rs6000-passes.def: Add pass_pcrel_opt.
> * config/rs6000/rs6000-protos.h: Add reg_to_non_prefixed(),
> offsettable_non_prefixed_memory(), output_pcrel_opt_reloc(),
> and make_pass_pcrel_opt().
> * config/rs6000/rs6000.c (reg_to_non_prefixed): Make global.
> (rs6000_option_override_internal): Add pcrel-opt.
> (rs6000_delegitimize_address): Support pcrel-opt.
> (rs6000_opt_masks): Add pcrel-opt.
> (offsettable_non_prefixed_memory): New function.
> (reg_to_non_prefixed): Make global.
> (rs6000_asm_output_opcode): Reset next_insn_prefixed_p.
> (output_pcrel_opt_reloc): New function.
> * config/rs6000/rs6000.md (loads_extern_addr): New attr.
> (pcrel_extern_addr): Set loads_extern_addr.
> Add include for pcrel-opt.md.
> * config/rs6000/rs6000.opt: Add -mpcrel-opt.
> * config/rs6000/t-rs6000: Add rules for pcrel-opt.c and
> pcrel-opt.md.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pcrel-opt-inc-di.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-df.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-di.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-hi.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-qi.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-sf.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-si.c: New test.
> * gcc.target/powerpc/pcrel-opt-ld-vector.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-df.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-di.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-hi.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-qi.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-sf.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-si.c: New test.
> * gcc.target/powerpc/pcrel-opt-st-vector.c: New test.
> ---
> gcc/config.gcc | 6 +-
> gcc/config/rs6000/pcrel-opt.c | 888 ++++++++++++++++++
> gcc/config/rs6000/pcrel-opt.md | 386 ++++++++
> gcc/config/rs6000/predicates.md | 23 +
> gcc/config/rs6000/rs6000-cpus.def | 2 +
> gcc/config/rs6000/rs6000-passes.def | 8 +
> gcc/config/rs6000/rs6000-protos.h | 4 +
> gcc/config/rs6000/rs6000.c | 116 ++-
> gcc/config/rs6000/rs6000.md | 8 +-
> gcc/config/rs6000/rs6000.opt | 4 +
> gcc/config/rs6000/t-rs6000 | 7 +-
> .../gcc.target/powerpc/pcrel-opt-inc-di.c | 18 +
> .../gcc.target/powerpc/pcrel-opt-ld-df.c | 36 +
> .../gcc.target/powerpc/pcrel-opt-ld-di.c | 43 +
> .../gcc.target/powerpc/pcrel-opt-ld-hi.c | 42 +
> .../gcc.target/powerpc/pcrel-opt-ld-qi.c | 42 +
> .../gcc.target/powerpc/pcrel-opt-ld-sf.c | 42 +
> .../gcc.target/powerpc/pcrel-opt-ld-si.c | 41 +
> .../gcc.target/powerpc/pcrel-opt-ld-vector.c | 36 +
> .../gcc.target/powerpc/pcrel-opt-st-df.c | 36 +
> .../gcc.target/powerpc/pcrel-opt-st-di.c | 37 +
> .../gcc.target/powerpc/pcrel-opt-st-hi.c | 42 +
> .../gcc.target/powerpc/pcrel-opt-st-qi.c | 42 +
> .../gcc.target/powerpc/pcrel-opt-st-sf.c | 36 +
> .../gcc.target/powerpc/pcrel-opt-st-si.c | 41 +
> .../gcc.target/powerpc/pcrel-opt-st-vector.c | 36 +
> 26 files changed, 2013 insertions(+), 9 deletions(-)
> create mode 100644 gcc/config/rs6000/pcrel-opt.c
> create mode 100644 gcc/config/rs6000/pcrel-opt.md
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index dc6d68bd4eb..1e4862785ea 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -509,7 +509,7 @@ or1k*-*-*)
> ;;
> powerpc*-*-*)
> cpu_type=rs6000
> - extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o"
> + extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o pcrel-opt.o"
> extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
> extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
> @@ -524,6 +524,7 @@ powerpc*-*-*)
> esac
> extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c \$(srcdir)/config/rs6000/rs6000-call.c"
> + target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/pcrel-opt.c"
> ;;
> pru-*-*)
> cpu_type=pru
> @@ -535,8 +536,9 @@ riscv*)
> ;;
> rs6000*-*-*)
> extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> - extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o"
> + extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o pcrel-opt.o"
> target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c \$(srcdir)/config/rs6000/rs6000-call.c"
> + target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/pcrel-opt.c"
> ;;
> sparc*-*-*)
> cpu_type=sparc
> diff --git a/gcc/config/rs6000/pcrel-opt.c b/gcc/config/rs6000/pcrel-opt.c
> new file mode 100644
> index 00000000000..c49358ddf51
> --- /dev/null
> +++ b/gcc/config/rs6000/pcrel-opt.c
> @@ -0,0 +1,888 @@
> +/* Subroutines used support the pc-relative linker optimization.
> + Copyright (C) 2020 Free Software Foundation, Inc.
> +
> + This file is part of GCC.
> +
> + GCC is free software; you can redistribute it and/or modify it
> + under the terms of the GNU General Public License as published
> + by the Free Software Foundation; either version 3, or (at your
> + option) any later version.
> +
> + GCC is distributed in the hope that it will be useful, but WITHOUT
> + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
> + License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with GCC; see the file COPYING3. If not see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* This file implements a RTL pass that looks for pc-relative loads of the
> + address of an external variable using the PCREL_GOT relocation and a single
> + load that uses that external address. If that is found we create the
> + PCREL_OPT relocation to possibly convert:
> +
> + pld addr_reg,var@pcrel@got
> +
> + <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> + lwz data_reg,0(addr_reg)
> +
> + into:
> +
> + plwz data_reg,var@pcrel
> +
> + <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> + nop
> +
> + If the variable is not defined in the main program or the code using it is
> + not in the main program, the linker puts the address in the .got section and
> + generates:
> +
> + .section .got
> + .Lvar_got:
> + .dword var
> +
> + .section .text
> + pld addr_reg,.Lvar_got@pcrel
> +
> + <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> + lwz data_reg,0(addr_reg)
> +
> + We look for a single usage in the basic block where the external
> + address is loaded. Multiple uses or references in another basic block will
> + force us to not use the PCREL_OPT relocation.
> +
> + We also optimize stores to the address of an external variable using the
> + PCREL_GOT relocation and a single store that uses that external address. If
> + that is found we create the PCREL_OPT relocation to possibly convert:
> +
> + pld addr_reg,var@pcrel@got
> +
> + <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> + stw data_reg,0(addr_reg)
> +
> + into:
> +
> + pstw data_reg,var@pcrel
> +
> + <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> + nop
> +
> + If the variable is not defined in the main program or the code using it is
> + not in the main program, the linker put the address in the .got section and
> + do:
> +
> + .section .got
> + .Lvar_got:
> + .dword var
> +
> + .section .text
> + pld addr_reg,.Lvar_got@pcrel
> +
> + <possibly other insns that do not use 'addr_reg' or 'data_reg'>
> +
> + stw data_reg,0(addr_reg)
> +
> + We only look for a single usage in the basic block where the external
> + address is loaded. Multiple uses or references in another basic block will
> + force us to not use the PCREL_OPT relocation. */
> +
> +#define IN_TARGET_CODE 1
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "tree.h"
> +#include "memmodel.h"
> +#include "expmed.h"
> +#include "optabs.h"
> +#include "recog.h"
> +#include "df.h"
> +#include "tm_p.h"
> +#include "ira.h"
> +#include "print-tree.h"
> +#include "varasm.h"
> +#include "explow.h"
> +#include "expr.h"
> +#include "output.h"
> +#include "tree-pass.h"
> +#include "rtx-vector-builder.h"
> +#include "print-rtl.h"
> +#include "insn-attr.h"
> +#include "insn-codes.h"
> +
> +/* Various counters. */
> +static struct {
> + unsigned long extern_addrs;
> + unsigned long loads;
> + unsigned long adjacent_loads;
> + unsigned long failed_loads;
> + unsigned long stores;
> + unsigned long adjacent_stores;
> + unsigned long failed_stores;
> +} counters;
> +
> +/* Return a marker to identify the PCREL_OPT load address and
> + load/store instruction. We use a unique integer which is appended
> + to ".Lpcrel" to make the label. */
> +
> +static rtx
> +pcrel_opt_next_marker (void)
> +{
> + static unsigned int pcrel_opt_next_num;
> +
> + pcrel_opt_next_num++;
> + return GEN_INT (pcrel_opt_next_num);
> +}
> +
> +/* Optimize a PC-relative load address to be used in a load.
> +
> + If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no
> + additional references to the address register, the address register dies at
> + the load, and no references to the load), convert insns of the form:
> +
> + (set (reg:DI addr)
> + (symbol_ref:DI "ext_symbol"))
> +
> + ...
> +
> + (set (reg:<MODE> value)
> + (mem:<MODE> (reg:DI addr)))
> +
> + into:
> +
> + (parallel [(set (reg:DI addr)
> + (unspec:<MODE> [(symbol_ref:DI "ext_symbol")
> + (const_int label_num)
> + (const_int 0)]
> + UNSPEC_PCREL_OPT_LD_ADDR))
> + (set (reg:DI data)
> + (unspec:DI [(const_int 0)]
> + UNSPEC_PCREL_OPT_LD_ADDR))])
> +
> + ...
> +
> + (parallel [(set (reg:<MODE>)
> + (unspec:<MODE> [(mem:<MODE> (reg:DI addr))
> + (reg:DI data)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_LD_RELOC))
> + (clobber (reg:DI addr))])
> +
> + If the register being loaded is the same register that was used to hold the
> + external address, we generate the following insn instead:
> +
> + (set (reg:DI data)
> + (unspec:DI [(symbol_ref:DI "ext_symbol")
> + (const_int label_num)
> + (const_int 1)]
> + UNSPEC_PCREL_OPT_LD_ADDR))
> +
> + In the first insn, we set both the address of the external variable, and
> + mark that the variable being loaded both are created in that insn, and are
> + consumed in the second insn. It doesn't matter what mode the register that
> + we will ultimately do the load into, so we use DImode. We just need to mark
> + that both registers may be set in the first insn, and will be used in the
> + second insn.
> +
> + The UNSPEC_PCREL_OPT_LD_ADDR insn will generate the load address plus
> + a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_LD_RELOC
> + insn will generate the .reloc to tell the linker to tie the load address and
> + load using that address together.
> +
> + pld b,ext_symbol@got@pcrel
> + .Lpcrel1:
> +
> + ...
> +
> + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
> + lwz r,0(b)
> +
> + If ext_symbol is defined in another object file in the main program and we
> + are linking the main program, the linker will convert the above instructions
> + to:
> +
> + plwz r,ext_symbol@got@pcrel
> +
> + ...
> +
> + nop */
> +
> +static void
> +pcrel_opt_load (rtx_insn *addr_insn, /* insn loading address. */
> + rtx_insn *load_insn) /* insn using address. */
> +{
> + rtx addr_set = PATTERN (addr_insn);
> + gcc_assert (GET_CODE (addr_set) == SET);
> +
> + rtx addr_reg = SET_DEST (addr_set);
> + gcc_assert (base_reg_operand (addr_reg, Pmode));
> +
> + rtx addr_symbol = SET_SRC (addr_set);
> + gcc_assert (pcrel_external_address (addr_symbol, Pmode));
> +
> + rtx load_set = PATTERN (load_insn);
> + gcc_assert (GET_CODE (load_set) == SET);
> +
> + /* Make sure there are no references to the register being loaded
> + between the two insns. */
> + rtx reg = SET_DEST (load_set);
> + if (!register_operand (reg, GET_MODE (reg))
> + || reg_used_between_p (reg, addr_insn, load_insn)
> + || reg_set_between_p (reg, addr_insn, load_insn))
> + return;
> +
> + rtx mem = SET_SRC (load_set);
> + machine_mode reg_mode = GET_MODE (reg);
> + machine_mode mem_mode = GET_MODE (mem);
> + rtx mem_inner = mem;
> + unsigned int reg_regno = reg_or_subregno (reg);
> +
> + /* LWA is a DS format instruction, but LWZ is a D format instruction. We use
> + DImode for the mode to force checking whether the bottom 2 bits are 0.
> + However FPR and vector registers uses the LFIWAX/LXSIWAX instructions
> + which only have indexed forms. */
> + if (GET_CODE (mem) == SIGN_EXTEND && GET_MODE (XEXP (mem, 0)) == SImode)
> + {
> + if (!INT_REGNO_P (reg_regno))
> + return;
> +
> + mem_inner = XEXP (mem, 0);
> + mem_mode = DImode;
> + }
> +
> + else if (GET_CODE (mem) == SIGN_EXTEND
> + || GET_CODE (mem) == ZERO_EXTEND
> + || GET_CODE (mem) == FLOAT_EXTEND)
> + {
> + mem_inner = XEXP (mem, 0);
> + mem_mode = GET_MODE (mem_inner);
> + }
> +
> + if (!MEM_P (mem_inner))
> + return;
> +
> + /* If the address isn't a non-prefixed offsettable instruction, we can't do
> + the optimization. */
> + if (!offsettable_non_prefixed_memory (reg, mem_mode, mem_inner))
> + return;
> +
> + /* Allocate a new PC-relative label, and update the load external address
> + insn.
> +
> + If the register being loaded is different from the address register, we
> + need to indicate both registers are set at the load of the address.
> +
> + (parallel [(set (reg load)
> + (unspec [(symbol_ref addr_symbol)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_LD_ADDR))
> + (set (reg addr)
> + (unspec [(const_int 0)]
> + UNSPEC_PCREL_OPT_LD_ADDR))])
> +
> + If the register being loaded is the same as the address register, we use
> + an alternate form:
> +
> + (set (reg load)
> + (unspec [(symbol_ref addr_symbol)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG)) */
> + unsigned int addr_regno = reg_or_subregno (addr_reg);
> + rtx label_num = pcrel_opt_next_marker ();
> + rtx reg_di = gen_rtx_REG (DImode, reg_regno);
> + rtx addr_pattern;
> +
> + /* Create the load address, either using the pattern with an explicit clobber
> + if the address register is not the same as the register being loaded, or
> + using the pattern that requires the address register to be the address
> + loaded. */
> + if (addr_regno != reg_regno)
> + addr_pattern = gen_pcrel_opt_ld_addr (addr_reg, addr_symbol, label_num,
> + reg_di);
> + else
> + addr_pattern = gen_pcrel_opt_ld_addr_same_reg (addr_reg, addr_symbol,
> + label_num);
> +
> + validate_change (addr_insn, &PATTERN (addr_insn), addr_pattern, false);
> +
> + /* Update the load insn. If the mem had a sign/zero/float extend, add that
> + also after doing the UNSPEC. Add an explicit clobber of the external
> + address register just to make it clear that the address register dies.
> +
> + (parallel [(set (reg:<MODE> data)
> + (unspec:<MODE> [(mem (addr_reg)
> + (reg:DI data)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_LD_RELOC))
> + (clobber (reg:DI addr_reg))]) */
> + rtvec v_load = gen_rtvec (3, mem_inner, reg_di, label_num);
> + rtx new_load = gen_rtx_UNSPEC (GET_MODE (mem_inner), v_load,
> + UNSPEC_PCREL_OPT_LD_RELOC);
> +
> + if (GET_CODE (mem) != GET_CODE (mem_inner))
> + new_load = gen_rtx_fmt_e (GET_CODE (mem), reg_mode, new_load);
> +
> + rtx new_load_set = gen_rtx_SET (reg, new_load);
> + rtx load_clobber = gen_rtx_CLOBBER (VOIDmode,
> + (addr_regno == reg_regno
> + ? gen_rtx_SCRATCH (Pmode)
> + : addr_reg));
> + rtx new_load_pattern
> + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_load_set, load_clobber));
> +
> + validate_change (load_insn, &PATTERN (load_insn), new_load_pattern, false);
> +
> + /* Note whether the changes were sucessful or not. */
> + if (apply_change_group ())
> + {
> + /* PCREL_OPT load optimization succeeded. */
> + counters.loads++;
> + if (next_nonnote_insn (addr_insn) == load_insn)
> + counters.adjacent_loads++;
> +
> + if (dump_file)
> + fprintf (dump_file,
> + "PCREL_OPT load (addr insn = %d, use insn = %d).\n",
> + INSN_UID (addr_insn),
> + INSN_UID (load_insn));
> +
> + df_analyze ();
> + }
> + else
> + {
> + /* PCREL_OPT load optimization did not succeed. */
> + counters.failed_loads++;
> + if (dump_file)
> + fprintf (dump_file,
> + "PCREL_OPT load failed (addr insn = %d, use insn = %d).\n",
> + INSN_UID (addr_insn),
> + INSN_UID (load_insn));
> + }
> +
> + return;
> +}
> +
> +/* Optimize a PC-relative load address to be used in a store.
> +
> + If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no
> + additional references to the address register, the address register dies at
> + the load, and no references to the load), convert insns of the form:
> +
> + (set (reg:DI addr)
> + (symbol_ref:DI "ext_symbol"))
> +
> + ...
> +
> + (set (mem:<MODE> (reg:DI addr))
> + (reg:<MODE> value))
> +
> + into:
> +
> + (parallel [(set (reg:DI addr)
> + (unspec:DI [(symbol_ref:DI "ext_symbol")
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_ST_ADDR))
> + (use (reg:<MODE> value))])
> +
> + ...
> +
> + (parallel [(set (mem:<MODE> (reg:DI addr))
> + (unspec:<MODE> [(reg:<MODE>)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (reg:DI addr))])
> +
> + The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a
> + definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC insn
> + will generate the .reloc to tell the linker to tie the load address and load
> + using that address together.
> +
> + pld b,ext_symbol@got@pcrel
> + .Lpcrel1:
> +
> + ...
> +
> + .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
> + stw r,0(b)
> +
> + If ext_symbol is defined in another object file in the main program and we
> + are linking the main program, the linker will convert the above instructions
> + to:
> +
> + pstwz r,ext_symbol@got@pcrel
> +
> + ...
> +
> + nop */
> +
> +static void
> +pcrel_opt_store (rtx_insn *addr_insn, /* insn loading address. */
> + rtx_insn *store_insn) /* insn using address. */
> +{
> + rtx addr_old_set = PATTERN (addr_insn);
> + gcc_assert (GET_CODE (addr_old_set) == SET);
> +
> + rtx addr_reg = SET_DEST (addr_old_set);
> + gcc_assert (base_reg_operand (addr_reg, Pmode));
> +
> + rtx addr_symbol = SET_SRC (addr_old_set);
> + gcc_assert (pcrel_external_address (addr_symbol, Pmode));
> +
> + rtx store_set = PATTERN (store_insn);
> + gcc_assert (GET_CODE (store_set) == SET);
> +
> + rtx mem = SET_DEST (store_set);
> + if (!MEM_P (mem))
> + return;
> +
> + machine_mode mem_mode = GET_MODE (mem);
> + rtx reg = SET_SRC (store_set);
> +
> + /* Don't allow storing the address of the external variable. Make sure the
> + value being stored wasn't updated. */
> + if (!register_operand (reg, GET_MODE (reg))
> + && reg_or_subregno (reg) != reg_or_subregno (addr_reg)
> + && !reg_set_between_p (reg, addr_insn, store_insn))
> + return;
> +
> + /* If the address isn't a non-prefixed offsettable instruction, we can't do
> + the optimization. */
> + if (!offsettable_non_prefixed_memory (reg, mem_mode, mem))
> + return;
> +
> + /* Allocate a new PC-relative label, and update the load address insn.
> +
> + (parallel [(set (reg addr)
> + (unspec [(symbol_ref symbol)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_ST_ADDR))
> + (use (reg store))]) */
> + rtx label_num = pcrel_opt_next_marker ();
> + rtvec v_addr = gen_rtvec (2, addr_symbol, label_num);
> + rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr,
> + UNSPEC_PCREL_OPT_ST_ADDR);
> + rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec);
> + rtx addr_use = gen_rtx_USE (VOIDmode, reg);
> + rtx addr_new_pattern
> + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use));
> +
> + validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, false);
> +
> + /* Update the store insn. Add an explicit clobber of the external address
> + register just to be sure there are no additional uses of the address
> + register.
> +
> + (parallel [(set (mem (addr_reg)
> + (unspec:<MODE> [(reg)
> + (const_int label_num)]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (reg:DI addr_reg))]) */
> + rtvec v_store = gen_rtvec (2, reg, label_num);
> + rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store,
> + UNSPEC_PCREL_OPT_ST_RELOC);
> +
> + rtx new_store_set = gen_rtx_SET (mem, new_store);
> + rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg);
> + rtx new_store_pattern
> + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber));
> +
> + validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, false);
> +
> + /* Note whether changes succeeded or not. */
> + if (apply_change_group ())
> + {
> + /* PCREL_OPT store succeeded. */
> + counters.stores++;
> + if (next_nonnote_insn (addr_insn) == store_insn)
> + counters.adjacent_stores++;
> +
> + if (dump_file)
> + fprintf (dump_file,
> + "PCREL_OPT store (addr insn = %d, use insn = %d).\n",
> + INSN_UID (addr_insn),
> + INSN_UID (store_insn));
> +
> + df_analyze();
> + }
> + else
> + {
> + /* PCREL_OPT store failed. */
> + counters.failed_stores++;
> + if (dump_file)
> + fprintf (dump_file,
> + "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n",
> + INSN_UID (addr_insn),
> + INSN_UID (store_insn));
> + }
> +
> + return;
> +}
> +
> +/* Return the register used as the base register of MEM. We look for
> + BSWAP and UNSPEC (which might be LFIWAX/LFIWZX/STFIWX) to exclude
> + instructions that do not have a pc-relative form. We don't
> + explicitly look for lxvd2x (rotate or vec_select) because we do not
> + expect to see that generated for p9 or newer anyway. */
> +
> +static rtx
> +get_mem_base_reg (rtx mem)
> +{
> + const char * fmt;
> + /* If we have a zero_extend, etc., strip them. */
> + while (!MEM_P (mem))
> + {
> + if (GET_CODE (mem) == BSWAP
> + || GET_CODE (mem) == UNSPEC)
> + return NULL_RTX;
> + if (GET_RTX_LENGTH (GET_CODE (mem)) < 1)
> + return NULL_RTX;
> + fmt = GET_RTX_FORMAT (GET_CODE (mem));
> + if (fmt[0] != 'e')
> + return NULL_RTX;
> + mem = XEXP (mem, 0);
> + if (mem == NULL_RTX )
> + return NULL_RTX;
> + }
> +
> + rtx addr_rtx;
> + if (!MEM_SIZE_KNOWN_P (mem))
> + return NULL_RTX;
> +
> + addr_rtx = (XEXP (mem, 0));
> + if (GET_CODE (addr_rtx) == PRE_MODIFY)
> + addr_rtx = XEXP (addr_rtx, 1);
> +
> + while (GET_CODE (addr_rtx) == PLUS
> + && CONST_INT_P (XEXP (addr_rtx, 1)))
> + addr_rtx = XEXP (addr_rtx, 0);
> +
> + return REG_P (addr_rtx) ? addr_rtx : NULL_RTX;
> +}
> +
> +/* Check whether INSN contains an invalid reference to REGNO. If TYPE is a
> + load or store instruction, then we cannot allow any definitions of REGNO.
> + If TYPE is a load instruction, then we cannot allow any uses either. */
> +
> +static bool
> +insn_references_regno_p (rtx_insn *insn, unsigned int regno, enum attr_type type)
> +{
> + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> + df_ref ref;
> +
> + /* All definitions of REGNO are invalid. */
> + for (ref = DF_INSN_INFO_DEFS (insn_info); ref; ref = DF_REF_NEXT_LOC (ref))
> + if (DF_REF_REGNO (ref) == regno)
> + return true;
> +
> + /* Any uses of REGNO are invalid if we're attempting to optimize a load. */
> + if (type == TYPE_LOAD
> + || type == TYPE_FPLOAD
> + || type == TYPE_VECLOAD)
> + for (ref = DF_INSN_INFO_USES (insn_info); ref; ref = DF_REF_NEXT_LOC (ref))
> + if (DF_REF_REGNO (ref) == regno)
> + return true;
> +
> + return false;
> +}
> +
> +/* Given an insn that loads up a base register with the address of an
> + external symbol, see if we can optimize it with the PCREL_OPT
> + optimization. */
> +
> +static void
> +pcrel_opt_address (rtx_insn *addr_insn)
> +{
> + counters.extern_addrs++;
> +
> + /* Do some basic validation. */
> + rtx addr_set = PATTERN (addr_insn);
> + if (GET_CODE (addr_set) != SET)
> + return;
> +
> + rtx addr_reg = SET_DEST (addr_set);
> + rtx addr_symbol = SET_SRC (addr_set);
> +
> + if (!base_reg_operand (addr_reg, Pmode)
> + || !pcrel_external_address (addr_symbol, Pmode))
> + return;
> +
> + /* The address register must have exactly one definition. */
> + struct df_insn_info *insn_info = DF_INSN_INFO_GET (addr_insn);
> + if (!insn_info)
> + return;
> +
> + df_ref def = df_single_def (insn_info);
> + if (!def)
> + return;
> +
> + /* Make sure there is at least one use. */
> + df_link *chain = DF_REF_CHAIN (def);
> + if (!chain || !chain->ref)
> + return;
> +
> + /* Get the insn of the possible load or store. */
> + rtx_insn *use_insn = DF_REF_INSN (chain->ref);
> +
> + /* Ensure there are no other uses. */
> + for (chain = chain->next; chain; chain = chain->next)
> + if (chain->ref && DF_REF_INSN_INFO (chain->ref))
> + {
> + gcc_assert (DF_REF_INSN (chain->ref));
> + if (NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
> + return;
> + }
> +
> + /* The use instruction must be a single non-prefixed instruction. */
> + if (get_attr_length (use_insn) != 4)
> + return;
> +
> + /* The address and the memory operation must be in the same basic block. */
> + if (BLOCK_FOR_INSN (use_insn) != BLOCK_FOR_INSN (addr_insn))
> + return;
> +
> + /* If this isn't a simple SET, skip doing the optimization. */
> + if (GET_CODE (PATTERN (use_insn)) != SET)
> + return;
> +
> + /* Check the insns between loading the address and its use to classify what
> + type of insn it is. */
> + rtx_insn *insn;
> + rtx_insn *last_insn_in_bb = BB_END (BLOCK_FOR_INSN (use_insn));
> + enum attr_type use_insn_type = get_attr_type (use_insn);
> + unsigned int use_regno;
> +
> + switch (use_insn_type)
> + {
> + case TYPE_LOAD:
> + case TYPE_FPLOAD:
> + case TYPE_VECLOAD:
> + /* Make sure our address register is the same register used in the
> + base address of the load. */
> + if (addr_reg != get_mem_base_reg (SET_SRC (PATTERN (use_insn))))
> + return;
> + use_regno = REGNO (SET_DEST (PATTERN (use_insn)));
> + break;
> + case TYPE_STORE:
> + case TYPE_FPSTORE:
> + case TYPE_VECSTORE:
> + /* Make sure our address register is the same register used in the
> + base address of the store. */
> + if (addr_reg != get_mem_base_reg (SET_DEST (PATTERN (use_insn))))
> + return;
> + use_regno = REGNO (SET_SRC (PATTERN (use_insn)));
> + break;
> + default:
> + /* We can only optimize loads and stores. Ignore everything else. */
> + return;
> + }
> +
> +
> + for (insn = NEXT_INSN (addr_insn);
> + insn != use_insn;
> + insn = NEXT_INSN (insn))
> + {
> + /* If we see things like labels, calls, etc., or we've reached the end
> + of the block without seeing the load or store, then don't do the
> + PCREL_OPT optimization. */
> + if (!insn
> + || LABEL_P (insn)
> + || JUMP_P (insn)
> + || CALL_P (insn)
> + || BARRIER_P (insn)
> + || insn == last_insn_in_bb)
> + return;
> +
> + /* For a normal insn, see if it is a load or store. */
> + if (NONDEBUG_INSN_P (insn)
> + && GET_CODE (PATTERN (insn)) != USE
> + && GET_CODE (PATTERN (insn)) != CLOBBER)
> + {
> + switch (get_attr_type (insn))
> + {
> + case TYPE_LOAD:
> + /* While load of the external address is a 'load' for scheduling
> + purposes, it should be safe to allow loading other external
> + addresses between the load of the external address we are
> + currently looking at and the load or store using that
> + address. */
> + if (get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
> + break;
> + /* fall through */
> +
> + case TYPE_FPLOAD:
> + case TYPE_VECLOAD:
> + /* Don't do the PCREL_OPT store optimization if there is a load
> + operation. For example, the load might be trying to load the
> + value being stored in between getting the address and doing
> + the store. */
> + if (use_insn_type == TYPE_STORE
> + || use_insn_type == TYPE_FPSTORE
> + || use_insn_type == TYPE_VECSTORE)
> + return;
> + break;
> +
> + case TYPE_STORE:
> + case TYPE_FPSTORE:
> + case TYPE_VECSTORE:
> + /* Don't do the PCREL_OPT load optimization if there is a store
> + operation. Perhaps the store might be to the global variable
> + through a pointer. */
> + return;
> +
> + case TYPE_LOAD_L:
> + case TYPE_STORE_C:
> + case TYPE_HTM:
> + case TYPE_HTMSIMPLE:
> + /* Don't do the optimization through atomic operations. */
> + return;
> +
> + default:
> + break;
> + }
> + }
> +
> + /* Check for invalid references of the non-address register that is
> + used in the load or store instruction. */
> + if (insn_references_regno_p (insn, use_regno, use_insn_type))
> + return;
> + }
> +
> + /* Is this a load or a store? */
> + switch (use_insn_type)
> + {
> + case TYPE_LOAD:
> + case TYPE_FPLOAD:
> + case TYPE_VECLOAD:
> + pcrel_opt_load (addr_insn, use_insn);
> + break;
> +
> + case TYPE_STORE:
> + case TYPE_FPSTORE:
> + case TYPE_VECSTORE:
> + pcrel_opt_store (addr_insn, use_insn);
> + break;
> +
> + default:
> + gcc_unreachable ();
> + }
> +}
> +
> +/* Optimize pcrel external variable references. */
> +
> +static unsigned int
> +pcrel_opt_pass (function *fun)
> +{
> + basic_block bb;
> + rtx_insn *insn, *curr_insn = 0;
> +
> + memset ((char *) &counters, '\0', sizeof (counters));
> +
> + /* Dataflow analysis for use-def chains. */
> + df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> + df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
> + df_note_add_problem ();
> + df_analyze ();
> + df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE);
> +
> + if (dump_file)
> + fprintf (dump_file, "\n");
> +
> + /* Look at each basic block to see if there is a load of an external
> + variable's external address, and a single load/store using that external
> + address. */
> + FOR_ALL_BB_FN (bb, fun)
> + {
> + FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> + {
> + if (NONJUMP_INSN_P (insn)
> + && single_set (insn)
> + && get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
> + pcrel_opt_address (insn);
> + }
> + }
> +
> + if (dump_file)
> + {
> + fprintf (dump_file,
> + "\n# of load(s) of an address of an external symbol = %lu\n",
> + counters.extern_addrs);
> +
> + fprintf (dump_file, "# of PCREL_OPT load(s) = %lu (adjacent %lu)\n",
> + counters.loads, counters.adjacent_loads);
> +
> + if (counters.failed_loads)
> + fprintf (dump_file, "# of failed PCREL_OPT load(s) = %lu\n",
> + counters.failed_loads);
> +
> + fprintf (dump_file, "# of PCREL_OPT store(s) = %lu (adjacent %lu)\n",
> + counters.stores, counters.adjacent_stores);
> +
> + if (counters.failed_stores)
> + fprintf (dump_file, "# of failed PCREL_OPT store(s) = %lu\n",
> + counters.failed_stores);
> +
> + fprintf (dump_file, "\n");
> + }
> +
> + df_remove_problem (df_chain);
> + df_process_deferred_rescans ();
> + df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE);
> + df_analyze ();
> + return 0;
> +}
> +
> +/* Optimize pc-relative references for the new PCREL_OPT pass. */
> +const pass_data pass_data_pcrel_opt =
> +{
> + RTL_PASS, /* type. */
> + "pcrel_opt", /* name. */
> + OPTGROUP_NONE, /* optinfo_flags. */
> + TV_NONE, /* tv_id. */
> + 0, /* properties_required. */
> + 0, /* properties_provided. */
> + 0, /* properties_destroyed. */
> + 0, /* todo_flags_start. */
> + TODO_df_finish, /* todo_flags_finish. */
> +};
> +
> +/* Pass data structures. */
> +class pcrel_opt : public rtl_opt_pass
> +{
> +public:
> + pcrel_opt (gcc::context *ctxt)
> + : rtl_opt_pass (pass_data_pcrel_opt, ctxt)
> + {}
> +
> + ~pcrel_opt (void)
> + {}
> +
> + /* opt_pass methods: */
> + virtual bool gate (function *)
> + {
> + return (TARGET_PCREL && TARGET_PCREL_OPT && optimize);
> + }
> +
> + virtual unsigned int execute (function *fun)
> + {
> + return pcrel_opt_pass (fun);
> + }
> +
> + opt_pass *clone ()
> + {
> + return new pcrel_opt (m_ctxt);
> + }
> +};
> +
> +rtl_opt_pass *
> +make_pass_pcrel_opt (gcc::context *ctxt)
> +{
> + return new pcrel_opt (ctxt);
> +}
> diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md
> new file mode 100644
> index 00000000000..95338f9d2fe
> --- /dev/null
> +++ b/gcc/config/rs6000/pcrel-opt.md
> @@ -0,0 +1,386 @@
> +;; Machine description for the PCREL_OPT optimization.
> +;; Copyright (C) 2020 Free Software Foundation, Inc.
> +;; Contributed by Michael Meissner (meissner@linux.ibm.com)
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published
> +;; by the Free Software Foundation; either version 3, or (at your
> +;; option) any later version.
> +
> +;; GCC is distributed in the hope that it will be useful, but WITHOUT
> +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
> +;; License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +;; Support for the PCREL_OPT optimization. PCREL_OPT looks for instances where
> +;; an external variable is used only once, either for reading or for writing.
> +;;
> +;; If we are optimizing a single read, normally the code would look like:
> +;;
> +;; (set (reg:DI <ptr>)
> +;; (symbol_ref:DI "<extern_addr>")) # <data> is currently dead
> +;;
> +;; ... # insns do not need to be adjacent
> +;;
> +;; (set (reg:SI <data>)
> +;; (mem:SI (reg:DI <xxx>))) # <ptr> dies with this insn
> +;;
> +;; We transform this into:
> +;;
> +;; (parallel [(set (reg:DI <ptr>)
> +;; (unspec:SI [(symbol_ref:DI <extern_addr>)
> +;; (const_int <marker>)]
> +;; UNSPEC_PCREL_OPT_LD_ADDR))
> +;; (set (reg:DI <data>)
> +;; (unspec:DI [(const_int 0)]
> +;; UNSPEC_PCREL_OPT_LD_ADDR))])
> +;;
> +;; ...
> +;;
> +;; (parallel [(set (reg:SI <data>)
> +;; (unspec:SI [(mem:SI (reg:DI <ptr>))
> +;; (reg:DI <data>)
> +;; (const_int <marker>)]
> +;; UNSPEC_PCREL_OPT_LD))
> +;; (clobber (reg:DI <ptr>))])
> +;;
> +;; The marker is an integer constant that links the load of the external
> +;; address to the load of the actual variable.
> +;;
> +;; In the first insn, we set both the address of the external variable, and
> +;; mark that the variable being loaded both are created in that insn, and are
> +;; consumed in the second insn. It doesn't matter what mode the register that
> +;; we will ultimately do the load into, so we use DImode. We just need to mark
> +;; that both registers may be set in the first insn, and will be used in the
> +;; second insn.
> +;;
> +;; Since we use UNSPEC's and link both the the register holding the external
> +;; address and the value being loaded, it should prevent other passes from
> +;; modifying it.
> +;;
> +;; If the register being loaded is the same as the base register, we use an
> +;; alternate form of the insns.
> +;;
> +;; (set (reg:DI <data_ptr>)
> +;; (unspec:DI [(symbol_ref:DI <extern_addr>)
> +;; (const_int <marker>)]
> +;; UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
> +;;
> +;; ...
> +;;
> +;; (parallel [(set (reg:SI <data>)
> +;; (unspec:SI [(mem:SI (reg:DI <ptr>))
> +;; (reg:DI <data>)
> +;; (const_int <marker>)]
> +;; UNSPEC_PCREL_OPT_LD))
> +;; (clobber (reg:DI <ptr>))])
> +
> +(define_c_enum "unspec"
> + [UNSPEC_PCREL_OPT_LD_ADDR
> + UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG
> + UNSPEC_PCREL_OPT_LD_RELOC
> + UNSPEC_PCREL_OPT_ST_ADDR
> + UNSPEC_PCREL_OPT_ST_RELOC])
> +
> +;; Modes that are supported for PCREL_OPT
> +(define_mode_iterator PO [QI HI SI DI TI SF DF KF
> + V1TI V2DI V4SI V8HI V16QI V2DF V4SF
> + (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
> +
> +;; Vector modes for PCREL_OPT
> +(define_mode_iterator PO_VECT [TI KF V1TI V2DI V4SI V8HI V16QI V2DF V4SF
> + (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
> +
> +;; Insn for loading the external address, where the register being loaded is not
> +;; the same as the register being loaded with the data.
> +(define_insn "pcrel_opt_ld_addr"
> + [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b")
> + (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
> + (match_operand 2 "const_int_operand" "n,n")]
> + UNSPEC_PCREL_OPT_LD_ADDR))
> + (set (match_operand:DI 3 "gpc_reg_operand" "=r,wa")
> + (unspec:DI [(const_int 0)]
> + UNSPEC_PCREL_OPT_LD_ADDR))]
> + "TARGET_PCREL_OPT
> + && reg_or_subregno (operands[0]) != reg_or_subregno (operands[3])"
> + "ld %0,%a1\n.Lpcrel%2:"
> + [(set_attr "prefixed" "yes")
> + (set_attr "type" "load")
> + (set_attr "loads_extern_addr" "yes")])
> +
> +;; Alternate form of loading up the external address that is the same register
> +;; as the final load.
> +(define_insn "pcrel_opt_ld_addr_same_reg"
> + [(set (match_operand:DI 0 "base_reg_operand" "=b")
> + (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
> + (match_operand 2 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))]
> + "TARGET_PCREL_OPT"
> + "ld %0,%a1\n.Lpcrel%2:"
> + [(set_attr "prefixed" "yes")
> + (set_attr "type" "load")
> + (set_attr "loads_extern_addr" "yes")])
> +
> +;; PCREL_OPT modes that are optimized for loading or storing GPRs.
> +(define_mode_iterator PO_GPR [QI HI SI DI SF DF])
> +
> +(define_mode_attr PO_GPR_LD [(QI "lbz")
> + (HI "lhz")
> + (SI "lwz")
> + (SF "lwz")
> + (DI "ld")
> + (DF "ld")])
> +
> +;; PCREL_OPT load operation of GPRs. Operand 4 (the register used to hold the
> +;; address of the external symbol) is SCRATCH if the same register is used for
> +;; the normal load.
> +(define_insn "*pcrel_opt_ld<mode>_gpr"
> + [(parallel [(set (match_operand:PO_GPR 0 "int_reg_operand" "+r")
> + (unspec:PO_GPR [(match_operand:PO_GPR 1 "d_form_memory" "o")
> + (match_operand:DI 2 "int_reg_operand" "0")
> + (match_operand 3 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_LD_RELOC))
> + (clobber (match_scratch:DI 4 "=bX"))])]
> + "TARGET_PCREL_OPT
> + && (GET_CODE (operands[4]) == SCRATCH
> + || reg_mentioned_p (operands[4], operands[1]))"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return "<PO_GPR_LD> %0,%1";
> +}
> + [(set_attr "type" "load")])
> +
> +;; PCREL_OPT load with sign/zero extension
> +(define_insn "*pcrel_opt_ldsi_<u><mode>_gpr"
> + [(set (match_operand:EXTSI 0 "int_reg_operand" "+r")
> + (any_extend:EXTSI
> + (unspec:SI [(match_operand:SI 1 "d_form_memory" "o")
> + (match_operand:DI 2 "int_reg_operand" "0")
> + (match_operand 3 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_LD_RELOC)))
> + (clobber (match_scratch:DI 4 "=bX"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return "lw<az> %0,%1";
> +}
> + [(set_attr "type" "load")])
> +
> +(define_insn "*pcrel_opt_ldhi_<u><mode>_gpr"
> + [(set (match_operand:EXTHI 0 "int_reg_operand" "+r")
> + (any_extend:EXTHI
> + (unspec:HI [(match_operand:HI 1 "d_form_memory" "o")
> + (match_operand:DI 2 "int_reg_operand" "0")
> + (match_operand 3 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_LD_RELOC)))
> + (clobber (match_scratch:DI 4 "=bX"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return "lh<az> %0,%1";
> +}
> + [(set_attr "type" "load")])
> +
> +(define_insn "*pcrel_opt_ldqi_u<mode>_gpr"
> + [(set (match_operand:EXTQI 0 "int_reg_operand" "+r")
> + (zero_extend:EXTQI
> + (unspec:QI [(match_operand:QI 1 "d_form_memory" "o")
> + (match_operand:DI 2 "int_reg_operand" "0")
> + (match_operand 3 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_LD_RELOC)))
> + (clobber (match_scratch:DI 4 "=bX"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return "lbz %0,%1";
> +}
> + [(set_attr "type" "load")])
> +
> +;; Scalar types that can be optimized by loading them into floating point
> +;; or Altivec registers.
> +(define_mode_iterator PO_FP [DI DF SF])
> +
> +;; Load instructions to load up scalar floating point or 64-bit integer values
> +;; into floating point registers or Altivec registers.
> +(define_mode_attr PO_FPR_LD [(DI "lfd") (DF "lfd") (SF "lfs")])
> +(define_mode_attr PO_AVX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")])
> +
> +;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers.
> +(define_insn "*pcrel_opt_ld<mode>_vsx"
> + [(set (match_operand:PO_FP 0 "vsx_register_operand" "+d,v")
> + (unspec:PO_FP [(match_operand:PO_FP 1 "d_form_memory" "o,o")
> + (match_operand:DI 2 "vsx_register_operand" "0,0")
> + (match_operand 3 "const_int_operand" "n,n")]
> + UNSPEC_PCREL_OPT_LD_RELOC))
> + (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return which_alternative ? "<PO_AVX_LD> %0,%1" : "<PO_FPR_LD> %0,%1";
> +}
> + [(set_attr "type" "fpload")])
> +
> +;; PCREL_OPT optimization extending SFmode to DFmode via a load.
> +(define_insn "*pcrel_opt_ldsf_df"
> + [(set (match_operand:DF 0 "vsx_register_operand" "+d,v")
> + (float_extend:DF
> + (unspec:SF [(match_operand:SF 1 "d_form_memory" "o,o")
> + (match_operand:DI 2 "vsx_register_operand" "0,0")
> + (match_operand 3 "const_int_operand" "n,n")]
> + UNSPEC_PCREL_OPT_LD_RELOC)))
> + (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return which_alternative ? "lxssp %0,%1" : "lfs %0,%1";
> +}
> + [(set_attr "type" "fpload")])
> +
> +;; PCREL_OPT load operation of vector/float128 types into vector registers.
> +(define_insn "*pcrel_opt_ld<mode>"
> + [(set (match_operand:PO_VECT 0 "vsx_register_operand" "+wa")
> + (unspec:PO_VECT [(match_operand:PO_VECT 1 "d_form_memory" "o")
> + (match_operand:DI 2 "vsx_register_operand" "0")
> + (match_operand 3 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_LD_RELOC))
> + (clobber (match_operand:DI 4 "base_reg_operand" "=b"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[3]);
> + return "lxv %x0,%1";
> +}
> + [(set_attr "type" "vecload")])
> +
> +
> +;; PCREL_OPT optimization for stores. We need to put the label after the PLD
> +;; instruction, because the assembler might insert a NOP before the PLD for
> +;; alignment.
> +;;
> +;; If we are optimizing a single write, normally the code would look like:
> +;;
> +;; (set (reg:DI <ptr>)
> +;; (symbol_ref:DI "<extern_addr>")) # <data> must be live here
> +;;
> +;; ... # insns do not need to be adjacent
> +;;
> +;; (set (mem:SI (reg:DI <xxx>))
> +;; (reg:SI <data>)) # <ptr> dies with this insn
> +;;
> +;; We optimize this to be:
> +;;
> +;; (parallel [(set (reg:DI <ptr>)
> +;; (unspec:DI [(symbol_ref:DI "<extern_addr>")
> +;; (const_int <marker>)]
> +;; UNSPEC_PCREL_OPT_ST_ADDR))
> +;; (use (reg:<MODE> <data>))])
> +;;
> +;; ... # insns do not need to be adjacent
> +;;
> +;; (parallel [(set (mem:<MODE> (reg:DI <ptr>))
> +;; (unspec:<MODE> [(reg:<MODE> <data>)
> +;; (const_int <marker>)]
> +;; UNSPEC_PCREL_OPT_ST_RELOC))
> +;; (clobber (reg:DI <ptr>))])
> +
> +(define_insn "*pcrel_opt_st_addr<mode>"
> + [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
> + (unspec:DI [(match_operand:DI 1 "pcrel_external_address")
> + (match_operand 2 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_ST_ADDR))
> + (use (match_operand:PO 3 "gpc_reg_operand" "rwa"))]
> + "TARGET_PCREL_OPT"
> + "ld %0,%a1\n.Lpcrel%2:"
> + [(set_attr "prefixed" "yes")
> + (set_attr "type" "load")
> + (set_attr "loads_extern_addr" "yes")])
> +
> +;; PCREL_OPT stores.
> +(define_insn "*pcrel_opt_st<mode>"
> + [(set (match_operand:QHSI 0 "d_form_memory" "=o")
> + (unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r")
> + (match_operand 2 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[2]);
> + return "st<wd> %1,%0";
> +}
> + [(set_attr "type" "store")])
> +
> +(define_insn "*pcrel_opt_stdi"
> + [(set (match_operand:DI 0 "d_form_memory" "=o,o,o")
> + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v")
> + (match_operand 2 "const_int_operand" "n,n,n")]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
> + "TARGET_PCREL_OPT && TARGET_POWERPC64"
> +{
> + output_pcrel_opt_reloc (operands[2]);
> + switch (which_alternative)
> + {
> + case 0: return "std %1,%0";
> + case 1: return "stfd %1,%0";
> + case 2: return "stxsd %1,%0";
> + default: gcc_unreachable ();
> + }
> +}
> + [(set_attr "type" "store,fpstore,fpstore")])
> +
> +(define_insn "*pcrel_opt_stsf"
> + [(set (match_operand:SF 0 "d_form_memory" "=o,o,o")
> + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r")
> + (match_operand 2 "const_int_operand" "n,n,n")]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[2]);
> + switch (which_alternative)
> + {
> + case 0: return "stfs %1,%0";
> + case 1: return "stxssp %1,%0";
> + case 2: return "stw %1,%0";
> + default: gcc_unreachable ();
> + }
> +}
> + [(set_attr "type" "fpstore,fpstore,store")])
> +
> +(define_insn "*pcrel_opt_stdf"
> + [(set (match_operand:DF 0 "d_form_memory" "=o,o,o")
> + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r")
> + (match_operand 2 "const_int_operand" "n,n,n")]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
> + "TARGET_PCREL_OPT
> + && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))"
> +{
> + output_pcrel_opt_reloc (operands[2]);
> + switch (which_alternative)
> + {
> + case 0: return "stfd %1,%0";
> + case 1: return "stxsd %1,%0";
> + case 2: return "std %1,%0";
> + default: gcc_unreachable ();
> + }
> +}
> + [(set_attr "type" "fpstore,fpstore,store")])
> +
> +(define_insn "*pcrel_opt_st<mode>"
> + [(set (match_operand:PO_VECT 0 "d_form_memory" "=o")
> + (unspec:PO_VECT [(match_operand:PO_VECT 1 "gpc_reg_operand" "wa")
> + (match_operand 2 "const_int_operand" "n")]
> + UNSPEC_PCREL_OPT_ST_RELOC))
> + (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
> + "TARGET_PCREL_OPT"
> +{
> + output_pcrel_opt_reloc (operands[2]);
> + return "stxv %x1,%0";
> +}
> + [(set_attr "type" "vecstore")])
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 4c2fe7fa312..9610587e1c1 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -1876,3 +1876,26 @@ (define_predicate "prefixed_memory"
> {
> return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT);
> })
> +
> +;; Return true if the operand is a valid memory operand with an offsettable
> +;; address that could be merged with the load of a PC-relative external address
> +;; with the PCREL_OPT optimization. We don't check here whether or not the
> +;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom 4
> +;; bits 0) instruction.
> +(define_predicate "d_form_memory"
> + (match_code "mem")
> +{
> + if (!memory_operand (op, mode))
> + return false;
> +
> + rtx addr = XEXP (op, 0);
> +
> + if (REG_P (addr) || SUBREG_P (addr))
> + return true;
> +
> + if (GET_CODE (addr) != PLUS)
> + return false;
> +
> + return (base_reg_operand (XEXP (addr, 0), Pmode)
> + && satisfies_constraint_I (XEXP (addr, 1)));
> +})
> diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
> index 8d2c1ffd6cf..d3f72d77a6e 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -78,6 +78,7 @@
> /* Flags that need to be turned off if -mno-power10. */
> #define OTHER_POWER10_MASKS (OPTION_MASK_MMA \
> | OPTION_MASK_PCREL \
> + | OPTION_MASK_PCREL_OPT \
> | OPTION_MASK_PREFIXED)
>
> #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \
> @@ -142,6 +143,7 @@
> | OPTION_MASK_P9_MISC \
> | OPTION_MASK_P9_VECTOR \
> | OPTION_MASK_PCREL \
> + | OPTION_MASK_PCREL_OPT \
> | OPTION_MASK_POPCNTB \
> | OPTION_MASK_POPCNTD \
> | OPTION_MASK_POWERPC64 \
> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> index 5164c526e34..b62244b8df2 100644
> --- a/gcc/config/rs6000/rs6000-passes.def
> +++ b/gcc/config/rs6000/rs6000-passes.def
> @@ -24,4 +24,12 @@ along with GCC; see the file COPYING3. If not see
> REPLACE_PASS (PASS, INSTANCE, TGT_PASS)
> */
>
> + /* Pass to add the appropriate vector swaps on power8 little endian systems.
> + The power8 does not have instructions that automaticaly do the byte swaps
> + for loads and stores. */
> INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> +
> + /* Pass to do the PCREL_OPT optimization that combines the load of an
> + external symbol's address along with a single load or store using that
> + address as a base register. */
> + INSERT_PASS_BEFORE (pass_sched2, 1, pass_pcrel_opt);
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 25fa5dd57cd..75c659971ea 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -190,10 +190,13 @@ enum non_prefixed_form {
>
> extern enum insn_form address_to_insn_form (rtx, machine_mode,
> enum non_prefixed_form);
> +extern enum non_prefixed_form reg_to_non_prefixed (rtx, machine_mode);
> +extern bool offsettable_non_prefixed_memory (rtx, machine_mode, rtx);
> extern bool prefixed_load_p (rtx_insn *);
> extern bool prefixed_store_p (rtx_insn *);
> extern bool prefixed_paddi_p (rtx_insn *);
> extern void rs6000_asm_output_opcode (FILE *);
> +extern void output_pcrel_opt_reloc (rtx);
> extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
> extern int rs6000_adjust_insn_length (rtx_insn *, int);
>
> @@ -306,6 +309,7 @@ namespace gcc { class context; }
> class rtl_opt_pass;
>
> extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> +extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
> extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
> extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx);
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 1e506b83762..2c09d33bd74 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1173,7 +1173,6 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
> machine_mode,
> secondary_reload_info *,
> bool);
> -static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
> rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>
> /* Hash table stuff for keeping track of TOC entries. */
> @@ -4435,6 +4434,14 @@ rs6000_option_override_internal (bool global_init_p)
> rs6000_isa_flags &= ~OPTION_MASK_MMA;
> }
>
> + if (!TARGET_PCREL && TARGET_PCREL_OPT)
> + {
> + if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL_OPT) != 0)
> + error ("%qs requires %qs", "-mpcrel-opt", "-mpcrel");
> +
> + rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
> + }
> +
> if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
> rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
>
> @@ -8634,8 +8641,57 @@ rs6000_delegitimize_address (rtx orig_x)
> {
> rtx x, y, offset;
>
> - if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
> - orig_x = XVECEXP (orig_x, 0, 0);
> + /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
> + encodes loading up the high part of the address of a TOC reference along
> + with a load of a GPR using the same base register used for the load. We
> + return the original SYMBOL_REF.
> +
> + (set (reg:INT1 <reg>
> + (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
> +
> + UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
> + UNSPECs include the external SYMBOL_REF along with the value being loaded.
> + We return the original SYMBOL_REF.
> +
> + (parallel [(set (reg:DI <base-reg>)
> + (unspec:DI [(symbol_ref <symbol>)
> + (const_int <marker>)]
> + UNSPEC_PCREL_OPT_LD_ADDR))
> + (set (reg:DI <load-reg>)
> + (unspec:DI [(const_int 0)]
> + UNSPEC_PCREL_OPT_LD_ADDR))])
> +
> + UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG is an alternative that is used if the
> + GPR being loaded is the same as the GPR used to hold the external address.
> +
> + (set (reg:DI <base-reg>)
> + (unspec:DI [(symbol_ref <symbol>)
> + (const_int <marker>)]
> + UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
> +
> + UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
> + UNSPEC include the external SYMBOL_REF along with the value being loaded.
> + We return the original SYMBOL_REF.
> +
> + (parallel [(set (reg:DI <base-reg>)
> + (unspec:DI [(symbol_ref <symbol>)
> + (const_int <marker>)]
> + UNSPEC_PCREL_OPT_ST_ADDR))
> + (use (reg <store-reg>))]) */
> +
> + if (GET_CODE (orig_x) == UNSPEC)
> + switch (XINT (orig_x, 1))
> + {
> + case UNSPEC_FUSION_GPR:
> + case UNSPEC_PCREL_OPT_LD_ADDR:
> + case UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG:
> + case UNSPEC_PCREL_OPT_ST_ADDR:
> + orig_x = XVECEXP (orig_x, 0, 0);
> + break;
> +
> + default:
> + break;
> + }
>
> orig_x = delegitimize_mem_from_attrs (orig_x);
>
> @@ -23391,6 +23447,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
> { "mulhw", OPTION_MASK_MULHW, false, true },
> { "multiple", OPTION_MASK_MULTIPLE, false, true },
> { "pcrel", OPTION_MASK_PCREL, false, true },
> + { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
> { "popcntb", OPTION_MASK_POPCNTB, false, true },
> { "popcntd", OPTION_MASK_POPCNTD, false, true },
> { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
> @@ -25490,6 +25547,32 @@ address_to_insn_form (rtx addr,
> return INSN_FORM_BAD;
> }
>
> +/* Return true if an REG with a given MODE is loaded from or stored into a MEM
> + location uses a non-prefixed offsettable address. This is used to validate
> + the load or store with the PCREL_OPT optimization to make sure it is an
> + instruction that can be optimized.
> +
> + We need to specify the MODE separately from the REG to allow for loads that
> + include zero/sign/float extension. */
> +
> +bool
> +offsettable_non_prefixed_memory (rtx reg, machine_mode mode, rtx mem)
> +{
> + /* If the instruction is indexed only like LFIWAX/LXSIWAX, it is not
> + offsettable. */
> + enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
> + if (non_prefixed == NON_PREFIXED_X)
> + return false;
> +
> + /* Check if this is a non-prefixed offsettable instruction. */
> + rtx addr = XEXP (mem, 0);
> + enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
> + return (iform == INSN_FORM_BASE_REG
> + || iform == INSN_FORM_D
> + || iform == INSN_FORM_DS
> + || iform == INSN_FORM_DQ);
> +}
> +
> /* Helper function to see if we're potentially looking at lfs/stfs.
> - PARALLEL containing a SET and a CLOBBER
> - stfs:
> @@ -25548,7 +25631,7 @@ is_lfs_stfs_insn (rtx_insn *insn)
> /* Helper function to take a REG and a MODE and turn it into the non-prefixed
> instruction format (D/DS/DQ) used for offset memory. */
>
> -static enum non_prefixed_form
> +enum non_prefixed_form
> reg_to_non_prefixed (rtx reg, machine_mode mode)
> {
> /* If it isn't a register, use the defaults. */
> @@ -25771,11 +25854,34 @@ void
> rs6000_asm_output_opcode (FILE *stream)
> {
> if (next_insn_prefixed_p)
> - fprintf (stream, "p");
> + {
> + fprintf (stream, "p");
> +
> + /* Reset flag in case there are separate insn lines in the sequence, so
> + the 'p' is only emited for the first line. This shows up when we are
> + doing the PCREL_OPT optimization, in that the label created with %r<n>
> + would have a leading 'p' printed. */
> + next_insn_prefixed_p = false;
> + }
>
> return;
> }
>
> +/* Emit the relocation to tie the next instruction to a previous instruction
> + that loads up an external address. This is used to do the PCREL_OPT
> + optimization. Note, the label is generated after the PLD of the got
> + pc-relative address to allow for the assembler to insert NOPs before the PLD
> + instruction. The operand is a constant integer that is the label
> + number. */
> +
> +void
> +output_pcrel_opt_reloc (rtx label_num)
> +{
> + rtx operands[1] = { label_num };
> + output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
> + operands);
> +}
> +
> /* Adjust the length of an INSN. LENGTH is the currently-computed length and
> should be adjusted to reflect any required changes. This macro is used when
> there is some systematic length adjustment required that would be difficult
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index dc060143104..adc8365df01 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -292,6 +292,10 @@ (define_attr "prefixed" "no,yes"
>
> (const_string "no")))
>
> +;; Whether an insn loads an external address for the PCREL_OPT optimizaton.
> +(define_attr "loads_extern_addr" "no,yes"
> + (const_string "no"))
> +
> ;; Return the number of real hardware instructions in a combined insn. If it
> ;; is 0, just use the length / 4.
> (define_attr "num_insns" "" (const_int 0))
> @@ -10226,7 +10230,8 @@ (define_insn "*pcrel_extern_addr"
> "TARGET_PCREL"
> "ld %0,%a1"
> [(set_attr "prefixed" "yes")
> - (set_attr "type" "load")])
> + (set_attr "type" "load")
> + (set_attr "loads_extern_addr" "yes")])
>
> ;; TOC register handling.
>
> @@ -14883,3 +14888,4 @@ (define_insn "*cmpeqb_internal"
> (include "dfp.md")
> (include "crypto.md")
> (include "htm.md")
> +(include "pcrel-opt.md")
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index b2a70e88ca8..d1719bfd2a0 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -586,6 +586,10 @@ mpcrel
> Target Report Mask(PCREL) Var(rs6000_isa_flags)
> Generate (do not generate) pc-relative memory addressing.
>
> +mpcrel-opt
> +Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags)
> +Generate (do not generate) pc-relative memory optimizations for externals.
> +
> mmma
> Target Report Mask(MMA) Var(rs6000_isa_flags)
> Generate (do not generate) MMA instructions.
> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> index 1ddb5729cb2..a617276484e 100644
> --- a/gcc/config/rs6000/t-rs6000
> +++ b/gcc/config/rs6000/t-rs6000
> @@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
> TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
> PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
>
> +pcrel-opt.o: $(srcdir)/config/rs6000/pcrel-opt.c
> + $(COMPILE) $<
> + $(POSTCOMPILE)
> +
> rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
> $(COMPILE) $<
> $(POSTCOMPILE)
> @@ -86,4 +90,5 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
> $(srcdir)/config/rs6000/mma.md \
> $(srcdir)/config/rs6000/crypto.md \
> $(srcdir)/config/rs6000/htm.md \
> - $(srcdir)/config/rs6000/dfp.md
> + $(srcdir)/config/rs6000/dfp.md \
> + $(srcdir)/config/rs6000/pcrel-opt.md
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
> new file mode 100644
> index 00000000000..f165068e2be
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE unsigned int
> +
> +/* Test whether using an external variable twice (doing an increment) prevents
> + the PCREL_OPT optimization. */
> +extern TYPE ext;
> +
> +void
> +inc (void)
> +{
> + ext++; /* No PCREL_OPT (uses address twice). */
> +}
> +
> +/* { dg-final { scan-assembler-not "R_PPC64_PCREL_OPT" } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
> new file mode 100644
> index 00000000000..d35862fcb6e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE double
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for
> + double. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
> new file mode 100644
> index 00000000000..12b51ab2e67
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE long long
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for long
> + long. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +double
> +get_double (void)
> +{
> + return (double) ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
> new file mode 100644
> index 00000000000..4143aeb7371
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE unsigned short
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
> + short. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +double
> +get_double (void)
> +{
> + return (double) ext[0]; /* No PCREL_OPT (LXSIHZX is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
> new file mode 100644
> index 00000000000..30d3236f95c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE unsigned char
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
> + char. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +double
> +get_double (void)
> +{
> + return (double) ext[0]; /* No PCREL_OPT (LXSIBZX is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
> new file mode 100644
> index 00000000000..9d1e2a1956f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE float
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for
> + float. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +double
> +get_double (void)
> +{
> + return (double) ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
> new file mode 100644
> index 00000000000..17be6fa1778
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE int
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for int. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +double
> +get_double (void)
> +{
> + return (double) ext[0]; /* No PCREL_OPT (LFIWAX is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
> new file mode 100644
> index 00000000000..8c12aea5acd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE vector double
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for
> + vector double. */
> +extern TYPE ext[];
> +
> +TYPE
> +get (void)
> +{
> + return ext[0]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get2 (void)
> +{
> + return ext[2]; /* PCREL_OPT relocation. */
> +}
> +
> +TYPE
> +get_large (void)
> +{
> + return ext[LARGE]; /* No PCREL_OPT (load is prefixed). */
> +}
> +
> +TYPE
> +get_variable (unsigned long n)
> +{
> + return ext[n]; /* No PCREL_OPT (load is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
> new file mode 100644
> index 00000000000..d795d35d8de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE double
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for
> + double. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c
> new file mode 100644
> index 00000000000..47554394cf7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE long long
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for long
> + long. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c
> new file mode 100644
> index 00000000000..8822e767dfe
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE unsigned short
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
> + short. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +void
> +store_double (double a)
> +{
> + ext[0] = (TYPE) a; /* No PCREL_OPT (STXIHZX is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c
> new file mode 100644
> index 00000000000..2f756833717
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE unsigned char
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
> + char. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +void
> +store_double (double a)
> +{
> + ext[0] = (TYPE) a; /* No PCREL_OPT (STXIBZX is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c
> new file mode 100644
> index 00000000000..3dd88aad856
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE float
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for
> + float. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c
> new file mode 100644
> index 00000000000..78dc8120efe
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE int
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for int. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +void
> +store_double (double a)
> +{
> + ext[0] = (TYPE) a; /* No PCREL_OPT (STFIWX is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c
> new file mode 100644
> index 00000000000..2c602eb3103
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_pcrel } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
> +
> +#define TYPE vector double
> +#define LARGE 0x20000
> +
> +/* Test whether we get the right number of PCREL_OPT optimizations for
> + vector double. */
> +extern TYPE ext[];
> +
> +void
> +store (TYPE a)
> +{
> + ext[0] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store2 (TYPE a)
> +{
> + ext[2] = a; /* PCREL_OPT relocation. */
> +}
> +
> +void
> +store_large (TYPE a)
> +{
> + ext[LARGE] = a; /* No PCREL_OPT (store is prefixed). */
> +}
> +
> +void
> +store_variable (TYPE a, unsigned long n)
> +{
> + ext[n] = a; /* No PCREL_OPT (store is indexed). */
> +}
> +
> +/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT" 2 } } */
> --
> 2.18.4
>
More information about the Gcc-patches
mailing list