[PATCH, rs6000] Optimize pcrel access of globals

acsawdey@linux.ibm.com acsawdey@linux.ibm.com
Tue Oct 20 23:29:11 GMT 2020


From: Aaron Sawdey <acsawdey@linux.ibm.com>

This patch implements a RTL pass that looks for pc-relative loads of the
address of an external variable using the PCREL_GOT relocation and a
single load or store that uses that external address. It then uses the
PCREL_OPT relocation to convert that first load into a single pc-relative
load or store to directly access that external variable.

Produced by a cast of thousands:
 * Michael Meissner
 * Peter Bergner
 * Bill Schmidt
 * Alan Modra
 * Segher Boessenkool
 * Aaron Sawdey

Passes bootstrap/regtest on ppc64le power10. OK for trunk?

gcc/ChangeLog:

	* config.gcc: Add pcrel-opt.o.
	* config/rs6000/pcrel-opt.c: New file.
	* config/rs6000/pcrel-opt.md: New file.
	* config/rs6000/predicates.md: Add d_form_memory predicate.
	* config/rs6000/rs6000-cpus.def: Add OPTION_MASK_PCREL_OPT.
	* config/rs6000/rs6000-passes.def: Add pass_pcrel_opt.
	* config/rs6000/rs6000-protos.h: Add reg_to_non_prefixed(),
	offsettable_non_prefixed_memory(), output_pcrel_opt_reloc(),
	and make_pass_pcrel_opt().
	* config/rs6000/rs6000.c (reg_to_non_prefixed): Make global.
	(rs6000_option_override_internal): Add pcrel-opt.
	(rs6000_delegitimize_address): Support pcrel-opt.
	(rs6000_opt_masks): Add pcrel-opt.
	(offsettable_non_prefixed_memory): New function.
	(reg_to_non_prefixed): Make global.
	(rs6000_asm_output_opcode): Reset next_insn_prefixed_p.
	(output_pcrel_opt_reloc): New function.
	* config/rs6000/rs6000.md (loads_extern_addr): New attr.
	(pcrel_extern_addr): Set loads_extern_addr.
	Add include for pcrel-opt.md.
	* config/rs6000/rs6000.opt: Add -mpcrel-opt.
	* config/rs6000/t-rs6000: Add rules for pcrel-opt.c and
pcrel-opt.md.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pcrel-opt-inc-di.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-df.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-di.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-hi.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-qi.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-sf.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-si.c: New test.
	* gcc.target/powerpc/pcrel-opt-ld-vector.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-df.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-di.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-hi.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-qi.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-sf.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-si.c: New test.
	* gcc.target/powerpc/pcrel-opt-st-vector.c: New test.
---
 gcc/config.gcc                                |   6 +-
 gcc/config/rs6000/pcrel-opt.c                 | 887 ++++++++++++++++++
 gcc/config/rs6000/pcrel-opt.md                | 386 ++++++++
 gcc/config/rs6000/predicates.md               |  23 +
 gcc/config/rs6000/rs6000-cpus.def             |   2 +
 gcc/config/rs6000/rs6000-passes.def           |   8 +
 gcc/config/rs6000/rs6000-protos.h             |   4 +
 gcc/config/rs6000/rs6000.c                    | 116 ++-
 gcc/config/rs6000/rs6000.md                   |   8 +-
 gcc/config/rs6000/rs6000.opt                  |   4 +
 gcc/config/rs6000/t-rs6000                    |   7 +-
 .../gcc.target/powerpc/pcrel-opt-inc-di.c     |  18 +
 .../gcc.target/powerpc/pcrel-opt-ld-df.c      |  36 +
 .../gcc.target/powerpc/pcrel-opt-ld-di.c      |  43 +
 .../gcc.target/powerpc/pcrel-opt-ld-hi.c      |  42 +
 .../gcc.target/powerpc/pcrel-opt-ld-qi.c      |  42 +
 .../gcc.target/powerpc/pcrel-opt-ld-sf.c      |  42 +
 .../gcc.target/powerpc/pcrel-opt-ld-si.c      |  41 +
 .../gcc.target/powerpc/pcrel-opt-ld-vector.c  |  36 +
 .../gcc.target/powerpc/pcrel-opt-st-df.c      |  36 +
 .../gcc.target/powerpc/pcrel-opt-st-di.c      |  37 +
 .../gcc.target/powerpc/pcrel-opt-st-hi.c      |  42 +
 .../gcc.target/powerpc/pcrel-opt-st-qi.c      |  42 +
 .../gcc.target/powerpc/pcrel-opt-st-sf.c      |  36 +
 .../gcc.target/powerpc/pcrel-opt-st-si.c      |  41 +
 .../gcc.target/powerpc/pcrel-opt-st-vector.c  |  36 +
 26 files changed, 2012 insertions(+), 9 deletions(-)
 create mode 100644 gcc/config/rs6000/pcrel-opt.c
 create mode 100644 gcc/config/rs6000/pcrel-opt.md
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index b79c544c9fa..4ab7cd1ee7c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -509,7 +509,7 @@ or1k*-*-*)
 	;;
 powerpc*-*-*)
 	cpu_type=rs6000
-	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o pcrel-opt.o"
 	extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
 	extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
 	extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
@@ -524,6 +524,7 @@ powerpc*-*-*)
 	esac
 	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c \$(srcdir)/config/rs6000/rs6000-call.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/pcrel-opt.c"
 	;;
 pru-*-*)
 	cpu_type=pru
@@ -535,8 +536,9 @@ riscv*)
 	;;
 rs6000*-*-*)
 	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
-	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-call.o pcrel-opt.o"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.c \$(srcdir)/config/rs6000/rs6000-call.c"
+	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/pcrel-opt.c"
 	;;
 sparc*-*-*)
 	cpu_type=sparc
diff --git a/gcc/config/rs6000/pcrel-opt.c b/gcc/config/rs6000/pcrel-opt.c
new file mode 100644
index 00000000000..217db9cc37f
--- /dev/null
+++ b/gcc/config/rs6000/pcrel-opt.c
@@ -0,0 +1,887 @@
+/* Subroutines used support the pc-relative linker optimization.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file implements a RTL pass that looks for pc-relative loads of the
+   address of an external variable using the PCREL_GOT relocation and a single
+   load that uses that external address.  If that is found we create the
+   PCREL_OPT relocation to possibly convert:
+
+	pld addr_reg,var@pcrel@got
+
+	<possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+	lwz data_reg,0(addr_reg)
+
+   into:
+
+	plwz data_reg,var@pcrel
+
+	<possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+	nop
+
+   If the variable is not defined in the main program or the code using it is
+   not in the main program, the linker put the address in the .got section and
+   do:
+
+		.section .got
+	.Lvar_got:
+		.dword var
+
+		.section .text
+		pld addr_reg,.Lvar_got@pcrel
+
+		<possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+		lwz data_reg,0(addr_reg)
+
+   We only look for a single usage in the basic block where the external
+   address is loaded.  Multiple uses or references in another basic block will
+   force us to not use the PCREL_OPT relocation.
+
+   We also optimize stores to the address of an external variable using the
+   PCREL_GOT relocation and a single store that uses that external address.  If
+   that is found we create the PCREL_OPT relocation to possibly convert:
+
+	pld addr_reg,var@pcrel@got
+
+	<possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+	stw data_reg,0(addr_reg)
+
+   into:
+
+	pstw data_reg,var@pcrel
+
+	<possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+	nop
+
+   If the variable is not defined in the main program or the code using it is
+   not in the main program, the linker put the address in the .got section and
+   do:
+
+		.section .got
+	.Lvar_got:
+		.dword var
+
+		.section .text
+		pld addr_reg,.Lvar_got@pcrel
+
+		<possibly other insns that do not use 'addr_reg' or 'data_reg'>
+
+		stw data_reg,0(addr_reg)
+
+   We only look for a single usage in the basic block where the external
+   address is loaded.  Multiple uses or references in another basic block will
+   force us to not use the PCREL_OPT relocation.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "expmed.h"
+#include "optabs.h"
+#include "recog.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+#include "rtx-vector-builder.h"
+#include "print-rtl.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+
+/* Various counters.  */
+static struct {
+  unsigned long extern_addrs;
+  unsigned long loads;
+  unsigned long adjacent_loads;
+  unsigned long failed_loads;
+  unsigned long stores;
+  unsigned long adjacent_stores;
+  unsigned long failed_stores;
+} counters;
+
+/* Return a marker to identify the PCREL_OPT load address and load/store
+   instruction.  We use a constant integer which is added to ".Lpcrel" to make
+   the label.  */
+
+static rtx
+pcrel_opt_next_marker (void)
+{
+  static unsigned int pcrel_opt_next_num;
+
+  pcrel_opt_next_num++;
+  return GEN_INT (pcrel_opt_next_num);
+}
+
+/* Optimize a PC-relative load address to be used in a load.
+
+   If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no
+   additional references to the address register, the address register dies at
+   the load, and no references to the load), convert insns of the form:
+
+	(set (reg:DI addr)
+	     (symbol_ref:DI "ext_symbol"))
+
+	...
+
+	(set (reg:<MODE> value)
+	     (mem:<MODE> (reg:DI addr)))
+
+   into:
+
+	(parallel [(set (reg:DI addr)
+			(unspec:<MODE> [(symbol_ref:DI "ext_symbol")
+					(const_int label_num)
+					(const_int 0)]
+				       UNSPEC_PCREL_OPT_LD_ADDR))
+		   (set (reg:DI data)
+			(unspec:DI [(const_int 0)]
+				   UNSPEC_PCREL_OPT_LD_ADDR))])
+
+	...
+
+	(parallel [(set (reg:<MODE>)
+			(unspec:<MODE> [(mem:<MODE> (reg:DI addr))
+					(reg:DI data)
+					(const_int label_num)]
+				       UNSPEC_PCREL_OPT_LD_RELOC))
+		   (clobber (reg:DI addr))])
+
+   If the register being loaded is the same register that was used to hold the
+   external address, we generate the following insn instead:
+
+	(set (reg:DI data)
+	     (unspec:DI [(symbol_ref:DI "ext_symbol")
+			 (const_int label_num)
+			 (const_int 1)]
+			UNSPEC_PCREL_OPT_LD_ADDR))
+
+   In the first insn, we set both the address of the external variable, and
+   mark that the variable being loaded both are created in that insn, and are
+   consumed in the second insn.  It doesn't matter what mode the register that
+   we will ultimately do the load into, so we use DImode.  We just need to mark
+   that both registers may be set in the first insn, and will be used in the
+   second insn.
+
+   The UNSPEC_PCREL_OPT_LD_ADDR insn will generate the load address plus
+   a definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_LD_RELOC
+   insn will generate the .reloc to tell the linker to tie the load address and
+   load using that address together.
+
+	pld b,ext_symbol@got@pcrel
+   .Lpcrel1:
+
+	...
+
+	.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+	lwz r,0(b)
+
+   If ext_symbol is defined in another object file in the main program and we
+   are linking the main program, the linker will convert the above instructions
+   to:
+
+	plwz r,ext_symbol@got@pcrel
+
+	...
+
+	nop  */
+
+static void
+pcrel_opt_load (rtx_insn *addr_insn,		/* insn loading address.  */
+		rtx_insn *load_insn)		/* insn using address.  */
+{
+  rtx addr_set = PATTERN (addr_insn);
+  gcc_assert (GET_CODE (addr_set) == SET);
+
+  rtx addr_reg = SET_DEST (addr_set);
+  gcc_assert (base_reg_operand (addr_reg, Pmode));
+
+  rtx addr_symbol = SET_SRC (addr_set);
+  gcc_assert (pcrel_external_address (addr_symbol, Pmode));
+
+  rtx load_set = PATTERN (load_insn);
+  gcc_assert (GET_CODE (load_set) == SET);
+
+  /* Make sure there are no references to the register being loaded inbetween
+     the two insns.  */
+  rtx reg = SET_DEST (load_set);
+  if (!register_operand (reg, GET_MODE (reg))
+      || reg_used_between_p (reg, addr_insn, load_insn)
+      || reg_set_between_p (reg, addr_insn, load_insn))
+    return;
+
+  rtx mem = SET_SRC (load_set);
+  machine_mode reg_mode = GET_MODE (reg);
+  machine_mode mem_mode = GET_MODE (mem);
+  rtx mem_inner = mem;
+  unsigned int reg_regno = reg_or_subregno (reg);
+
+  /* LWA is a DS format instruction, but LWZ is a D format instruction.  We use
+     DImode for the mode to force checking whether the bottom 2 bits are 0.
+     However FPR and vector registers uses the LFIWAX/LXSIWAX instructions
+     which only have indexed forms.  */
+  if (GET_CODE (mem) == SIGN_EXTEND && GET_MODE (XEXP (mem, 0)) == SImode)
+    {
+      if (!INT_REGNO_P (reg_regno))
+	return;
+
+      mem_inner = XEXP (mem, 0);
+      mem_mode = DImode;
+    }
+
+  else if (GET_CODE (mem) == SIGN_EXTEND
+	   || GET_CODE (mem) == ZERO_EXTEND
+	   || GET_CODE (mem) == FLOAT_EXTEND)
+    {
+      mem_inner = XEXP (mem, 0);
+      mem_mode = GET_MODE (mem_inner);
+    }
+
+  if (!MEM_P (mem_inner))
+    return;
+
+  /* If the address isn't a non-prefixed offsettable instruction, we can't do
+     the optimization.  */
+  if (!offsettable_non_prefixed_memory (reg, mem_mode, mem_inner))
+    return;
+
+  /* Allocate a new PC-relative label, and update the load external address
+     insn.
+
+     If the register being loaded is different from the address register, we
+     need to indicate both registers are set at the load of the address.
+
+	(parallel [(set (reg load)
+			(unspec [(symbol_ref addr_symbol)
+				 (const_int label_num)]
+				UNSPEC_PCREL_OPT_LD_ADDR))
+		   (set (reg addr)
+			(unspec [(const_int 0)]
+				UNSPEC_PCREL_OPT_LD_ADDR))])
+
+     If the register being loaded is the same as the address register, we use
+     an alternate form:
+
+	(set (reg load)
+	     (unspec [(symbol_ref addr_symbol)
+		      (const_int label_num)]
+		     UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))  */
+  unsigned int addr_regno = reg_or_subregno (addr_reg);
+  rtx label_num = pcrel_opt_next_marker ();
+  rtx reg_di = gen_rtx_REG (DImode, reg_regno);
+  rtx addr_pattern;
+
+  /* Create the load address, either using the pattern with an explicit clobber
+     if the address register is not the same as the register being loaded, or
+     using the pattern that requires the address register to be the address
+     loaded.  */
+  if (addr_regno != reg_regno)
+    addr_pattern = gen_pcrel_opt_ld_addr (addr_reg, addr_symbol, label_num,
+					  reg_di);
+  else
+    addr_pattern = gen_pcrel_opt_ld_addr_same_reg (addr_reg, addr_symbol,
+						   label_num);
+
+  validate_change (addr_insn, &PATTERN (addr_insn), addr_pattern, false);
+
+  /* Update the load insn.  If the mem had a sign/zero/float extend, add that
+     also after doing the UNSPEC.  Add an explicit clobber of the external
+     address register just to make it clear that the address register dies.
+
+	(parallel [(set (reg:<MODE> data)
+			(unspec:<MODE> [(mem (addr_reg)
+					(reg:DI data)
+					(const_int label_num)]
+				       UNSPEC_PCREL_OPT_LD_RELOC))
+		   (clobber (reg:DI addr_reg))])  */
+  rtvec v_load = gen_rtvec (3, mem_inner, reg_di, label_num);
+  rtx new_load = gen_rtx_UNSPEC (GET_MODE (mem_inner), v_load,
+				 UNSPEC_PCREL_OPT_LD_RELOC);
+
+  if (GET_CODE (mem) != GET_CODE (mem_inner))
+    new_load = gen_rtx_fmt_e (GET_CODE (mem), reg_mode, new_load);
+
+  rtx new_load_set = gen_rtx_SET (reg, new_load);
+  rtx load_clobber = gen_rtx_CLOBBER (VOIDmode,
+				      (addr_regno == reg_regno
+				       ? gen_rtx_SCRATCH (Pmode)
+				       : addr_reg));
+  rtx new_load_pattern
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_load_set, load_clobber));
+
+  validate_change (load_insn, &PATTERN (load_insn), new_load_pattern, false);
+
+  /* Note whether the changes were sucessful or not.  */
+  if (apply_change_group ())
+    {
+      /* PCREL_OPT load optimization succeeded.  */
+      counters.loads++;
+      if (next_nonnote_insn (addr_insn) == load_insn)
+	counters.adjacent_loads++;
+
+      if (dump_file)
+	fprintf (dump_file,
+		 "PCREL_OPT load (addr insn = %d, use insn = %d).\n",
+		 INSN_UID (addr_insn),
+		 INSN_UID (load_insn));
+
+      df_analyze ();
+    }
+  else
+    {
+      /* PCREL_OPT load optimization did not succeed.  */
+      counters.failed_loads++;
+      if (dump_file)
+	fprintf (dump_file,
+		 "PCREL_OPT load failed (addr insn = %d, use insn = %d).\n",
+		 INSN_UID (addr_insn),
+		 INSN_UID (load_insn));
+    }
+
+  return;
+}
+
+/* Optimize a PC-relative load address to be used in a store.
+
+   If the sequence of insns is safe to use the PCREL_OPT optimization (i.e. no
+   additional references to the address register, the address register dies at
+   the load, and no references to the load), convert insns of the form:
+
+	(set (reg:DI addr)
+	     (symbol_ref:DI "ext_symbol"))
+
+	...
+
+	(set (mem:<MODE> (reg:DI addr))
+	     (reg:<MODE> value))
+
+   into:
+
+	(parallel [(set (reg:DI addr)
+			(unspec:DI [(symbol_ref:DI "ext_symbol")
+				    (const_int label_num)]
+				  UNSPEC_PCREL_OPT_ST_ADDR))
+		  (use (reg:<MODE> value))])
+
+	...
+
+	(parallel [(set (mem:<MODE> (reg:DI addr))
+			(unspec:<MODE> [(reg:<MODE>)
+					(const_int label_num)]
+				       UNSPEC_PCREL_OPT_ST_RELOC))
+		   (clobber (reg:DI addr))])
+
+   The UNSPEC_PCREL_OPT_ST_ADDR insn will generate the load address plus a
+   definition of a label (.Lpcrel<n>), while the UNSPEC_PCREL_OPT_ST_RELOC insn
+   will generate the .reloc to tell the linker to tie the load address and load
+   using that address together.
+
+	pld b,ext_symbol@got@pcrel
+   .Lpcrel1:
+
+	...
+
+	.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+	stw r,0(b)
+
+   If ext_symbol is defined in another object file in the main program and we
+   are linking the main program, the linker will convert the above instructions
+   to:
+
+	pstwz r,ext_symbol@got@pcrel
+
+	...
+
+	nop  */
+
+static void
+pcrel_opt_store (rtx_insn *addr_insn,		/* insn loading address.  */
+		 rtx_insn *store_insn)		/* insn using address.  */
+{
+  rtx addr_old_set = PATTERN (addr_insn);
+  gcc_assert (GET_CODE (addr_old_set) == SET);
+
+  rtx addr_reg = SET_DEST (addr_old_set);
+  gcc_assert (base_reg_operand (addr_reg, Pmode));
+
+  rtx addr_symbol = SET_SRC (addr_old_set);
+  gcc_assert (pcrel_external_address (addr_symbol, Pmode));
+
+  rtx store_set = PATTERN (store_insn);
+  gcc_assert (GET_CODE (store_set) == SET);
+
+  rtx mem = SET_DEST (store_set);
+  if (!MEM_P (mem))
+    return;
+
+  machine_mode mem_mode = GET_MODE (mem);
+  rtx reg = SET_SRC (store_set);
+
+  /*  Don't allow storing the address of the external variable.  Make sure the
+      value being stored wasn't updated.  */
+  if (!register_operand (reg, GET_MODE (reg))
+      && reg_or_subregno (reg) != reg_or_subregno (addr_reg)
+      && !reg_set_between_p (reg, addr_insn, store_insn))
+    return;
+
+  /* If the address isn't a non-prefixed offsettable instruction, we can't do
+     the optimization.  */
+  if (!offsettable_non_prefixed_memory (reg, mem_mode, mem))
+    return;
+
+  /* Allocate a new PC-relative label, and update the load address insn.
+
+	(parallel [(set (reg addr)
+			(unspec [(symbol_ref symbol)
+				 (const_int label_num)]
+				UNSPEC_PCREL_OPT_ST_ADDR))
+		   (use (reg store))])  */
+  rtx label_num = pcrel_opt_next_marker ();
+  rtvec v_addr = gen_rtvec (2, addr_symbol, label_num);
+  rtx addr_unspec = gen_rtx_UNSPEC (Pmode, v_addr,
+				   UNSPEC_PCREL_OPT_ST_ADDR);
+  rtx addr_new_set = gen_rtx_SET (addr_reg, addr_unspec);
+  rtx addr_use = gen_rtx_USE (VOIDmode, reg);
+  rtx addr_new_pattern
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, addr_new_set, addr_use));
+
+  validate_change (addr_insn, &PATTERN (addr_insn), addr_new_pattern, false);
+
+  /* Update the store insn.  Add an explicit clobber of the external address
+     register just to be sure there are no additional uses of the address
+     register.
+
+	(parallel [(set (mem (addr_reg)
+			(unspec:<MODE> [(reg)
+					(const_int label_num)]
+				       UNSPEC_PCREL_OPT_ST_RELOC))
+		  (clobber (reg:DI addr_reg))])  */
+  rtvec v_store = gen_rtvec (2, reg, label_num);
+  rtx new_store = gen_rtx_UNSPEC (mem_mode, v_store,
+				  UNSPEC_PCREL_OPT_ST_RELOC);
+
+  rtx new_store_set = gen_rtx_SET (mem, new_store);
+  rtx store_clobber = gen_rtx_CLOBBER (VOIDmode, addr_reg);
+  rtx new_store_pattern
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, new_store_set, store_clobber));
+
+  validate_change (store_insn, &PATTERN (store_insn), new_store_pattern, false);
+
+  /* Note whether changes succeeded or not.  */
+  if (apply_change_group ())
+    {
+      /* PCREL_OPT store succeeded.  */
+      counters.stores++;
+      if (next_nonnote_insn (addr_insn) == store_insn)
+	counters.adjacent_stores++;
+
+      if (dump_file)
+	fprintf (dump_file,
+		 "PCREL_OPT store (addr insn = %d, use insn = %d).\n",
+		 INSN_UID (addr_insn),
+		 INSN_UID (store_insn));
+
+      df_analyze();
+    }
+  else
+    {
+      /* PCREL_OPT store failed.  */
+      counters.failed_stores++;
+      if (dump_file)
+	fprintf (dump_file,
+		 "PCREL_OPT store failed (addr insn = %d, use insn = %d).\n",
+		 INSN_UID (addr_insn),
+		 INSN_UID (store_insn));
+    }
+
+  return;
+}
+
+/* Return the register used as the base register of MEM.  We look for
+   BSWAP and UNSPEC (which might be LFIWAX/LFIWZX/STFIWX) to exclude
+   instructions that do not have a pc-relative form.  We don't
+   explicitly look for lxvd2x (rotate or vec_select) because we do not
+   expect to see that generated for p9 or newer anyway.  */
+
+static rtx
+get_mem_base_reg (rtx mem)
+{
+  const char * fmt;
+  /* If we have a zero_extend, etc., strip them.  */
+  while (!MEM_P (mem))
+    {
+      if (GET_CODE (mem) == BSWAP
+	  || GET_CODE (mem) == UNSPEC)
+	return NULL_RTX;
+      if (GET_RTX_LENGTH (GET_CODE (mem)) < 1)
+	return NULL_RTX;
+      fmt = GET_RTX_FORMAT (GET_CODE (mem));
+      if (fmt[0] != 'e')
+	return NULL_RTX;
+      mem = XEXP (mem, 0);
+      if (mem == NULL_RTX )
+	return NULL_RTX;
+    }
+
+  rtx addr_rtx;
+  if (!MEM_SIZE_KNOWN_P (mem))
+    return NULL_RTX;
+
+  addr_rtx = (XEXP (mem, 0));
+  if (GET_CODE (addr_rtx) == PRE_MODIFY)
+    addr_rtx = XEXP (addr_rtx, 1);
+
+  while (GET_CODE (addr_rtx) == PLUS
+	 && CONST_INT_P (XEXP (addr_rtx, 1)))
+    addr_rtx = XEXP (addr_rtx, 0);
+
+  return REG_P (addr_rtx) ? addr_rtx : NULL_RTX;
+}
+
+/* Check whether INSN contains an invalid reference to REGNO.  If TYPE is a
+   load or store instruction, then we cannot allow any definitions of REGNO.
+   If TYPE is a load instruction, then we cannot allow any uses either.  */
+
+static bool
+insn_references_regno_p (rtx_insn *insn, unsigned int regno, enum attr_type type)
+{
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+  df_ref ref;
+
+  /* All definitions of REGNO are invalid.  */
+  for (ref = DF_INSN_INFO_DEFS (insn_info); ref; ref = DF_REF_NEXT_LOC (ref))
+    if (DF_REF_REGNO (ref) == regno)
+      return true;
+
+  /* Any uses of REGNO are invalid if we're attempting to optimize a load.  */
+  if (type == TYPE_LOAD
+      || type == TYPE_FPLOAD
+      || type == TYPE_VECLOAD)
+    for (ref = DF_INSN_INFO_USES (insn_info); ref; ref = DF_REF_NEXT_LOC (ref))
+      if (DF_REF_REGNO (ref) == regno)
+	return true;
+
+  return false;
+}
+
+/* Given an insn with that loads up a base register with the address of an
+   external symbol, see if we can optimize it with the PCREL_OPT
+   optimization.  */
+
+static void
+pcrel_opt_address (rtx_insn *addr_insn)
+{
+  counters.extern_addrs++;
+
+  /* Do some basic validation.  */
+  rtx addr_set = PATTERN (addr_insn);
+  if (GET_CODE (addr_set) != SET)
+    return;
+
+  rtx addr_reg = SET_DEST (addr_set);
+  rtx addr_symbol = SET_SRC (addr_set);
+
+  if (!base_reg_operand (addr_reg, Pmode)
+      || !pcrel_external_address (addr_symbol, Pmode))
+    return;
+
+  /* The address register must have exactly one definition.  */
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (addr_insn);
+  if (!insn_info)
+    return;
+
+  df_ref def = df_single_def (insn_info);
+  if (!def)
+    return;
+
+  /* Make sure there is at least one use.  */
+  df_link *chain = DF_REF_CHAIN (def);
+  if (!chain || !chain->ref)
+    return;
+
+  /* Get the insn of the possible load or store.  */
+  rtx_insn *use_insn = DF_REF_INSN (chain->ref);
+
+  /* Ensure there are no other uses.  */
+  for (chain = chain->next; chain; chain = chain->next)
+    if (chain->ref && DF_REF_INSN_INFO (chain->ref))
+      {
+	gcc_assert (DF_REF_INSN (chain->ref));
+	if (NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
+	  return;
+      }
+
+  /* The use instruction must be a single non-prefixed instruction.  */
+  if (get_attr_length (use_insn) != 4)
+    return;
+
+  /* The address and the memory operation must be in the same basic block.  */
+  if (BLOCK_FOR_INSN (use_insn) != BLOCK_FOR_INSN (addr_insn))
+    return;
+
+  /* If this isn't a simple SET, skip doing the optimization.  */
+  if (GET_CODE (PATTERN (use_insn)) != SET)
+    return;
+
+  /* Check the insns between loading the address and its use to classify what
+     type of insn it is.  */
+  rtx_insn *insn;
+  rtx_insn *last_insn_in_bb = BB_END (BLOCK_FOR_INSN (use_insn));
+  enum attr_type use_insn_type = get_attr_type (use_insn);
+  unsigned int use_regno;
+
+  switch (use_insn_type)
+    {
+    case TYPE_LOAD:
+    case TYPE_FPLOAD:
+    case TYPE_VECLOAD:
+      /* Make sure our address register is the same register used in the
+	 base address of the load.  */
+      if (addr_reg != get_mem_base_reg (SET_SRC (PATTERN (use_insn))))
+	return;
+      use_regno = REGNO (SET_DEST (PATTERN (use_insn)));
+      break;
+    case TYPE_STORE:
+    case TYPE_FPSTORE:
+    case TYPE_VECSTORE:
+      /* Make sure our address register is the same register used in the
+	 base address of the store.  */
+      if (addr_reg != get_mem_base_reg (SET_DEST (PATTERN (use_insn))))
+	return;
+      use_regno = REGNO (SET_SRC (PATTERN (use_insn)));
+      break;
+    default:
+      /* We can only optimize loads and stores.  Ignore everything else.  */
+      return;
+    }
+
+
+  for (insn = NEXT_INSN (addr_insn);
+       insn != use_insn;
+       insn = NEXT_INSN (insn))
+    {
+      /* If we see things like labels, calls, etc., or we've reached the end
+	 of the block without seeing the load or store, then don't do the
+	 PCREL_OPT optimization.  */
+      if (!insn
+	  || LABEL_P (insn)
+	  || JUMP_P (insn)
+	  || CALL_P (insn)
+	  || BARRIER_P (insn)
+	  || insn == last_insn_in_bb)
+	return;
+
+      /* For a normal insn, see if it is a load or store.  */
+      if (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER)
+	{
+	  switch (get_attr_type (insn))
+	    {
+	    case TYPE_LOAD:
+	      /* While load of the external address is a 'load' for scheduling
+		 purposes, it should be safe to allow loading other external
+		 addresses between the load of the external address we are
+		 currently looking at and the load or store using that
+		 address.  */
+	      if (get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
+		break;
+	      /* fall through */
+
+	    case TYPE_FPLOAD:
+	    case TYPE_VECLOAD:
+	      /* Don't do the PCREL_OPT store optimization if there is a load
+		 operation.  For example, the load might be trying to load the
+		 value being stored in between getting the address and doing
+		 the store.  */
+	      if (use_insn_type == TYPE_STORE
+		  || use_insn_type == TYPE_FPSTORE
+		  || use_insn_type == TYPE_VECSTORE)
+	      return;
+
+	    case TYPE_STORE:
+	    case TYPE_FPSTORE:
+	    case TYPE_VECSTORE:
+	      /* Don't do the PCREL_OPT load optimization if there is a store
+		 operation.  Perhaps the store might be to the global variable
+		 through a pointer.  */
+	      return;
+
+	    case TYPE_LOAD_L:
+	    case TYPE_STORE_C:
+	    case TYPE_HTM:
+	    case TYPE_HTMSIMPLE:
+	      /* Don't do the optimization through atomic operations.  */
+	      return;
+
+	    default:
+	      break;
+	    }
+	}
+
+      /* Check for invalid references of the non-address register that is
+	 used in the load or store instruction.  */
+      if (insn_references_regno_p (insn, use_regno, use_insn_type))
+	return;
+    }
+
+  /* Is this a load or a store?  */
+  switch (use_insn_type)
+    {
+    case TYPE_LOAD:
+    case TYPE_FPLOAD:
+    case TYPE_VECLOAD:
+      pcrel_opt_load (addr_insn, use_insn);
+      break;
+
+    case TYPE_STORE:
+    case TYPE_FPSTORE:
+    case TYPE_VECSTORE:
+      pcrel_opt_store (addr_insn, use_insn);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Optimize pcrel external variable references.  */
+
+static unsigned int
+pcrel_opt_pass (function *fun)
+{
+  basic_block bb;
+  rtx_insn *insn, *curr_insn = 0;
+
+  memset ((char *) &counters, '\0', sizeof (counters));
+
+  /* Dataflow analysis for use-def chains.  */
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN | DF_LR_RUN_DCE);
+
+  if (dump_file)
+    fprintf (dump_file, "\n");
+
+  /* Look at each basic block to see if there is a load of an external
+     variable's external address, and a single load/store using that external
+     address.  */
+  FOR_ALL_BB_FN (bb, fun)
+    {
+      FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+	{
+	  if (NONJUMP_INSN_P (insn)
+	      && single_set (insn)
+	      && get_attr_loads_extern_addr (insn) == LOADS_EXTERN_ADDR_YES)
+	    pcrel_opt_address (insn);
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       "\n# of load(s) of an address of an external symbol = %lu\n",
+	       counters.extern_addrs);
+
+      fprintf (dump_file, "# of PCREL_OPT load(s) = %lu (adjacent %lu)\n",
+	       counters.loads, counters.adjacent_loads);
+
+      if (counters.failed_loads)
+	fprintf (dump_file, "# of failed PCREL_OPT load(s) = %lu\n",
+		 counters.failed_loads);
+
+      fprintf (dump_file, "# of PCREL_OPT store(s) = %lu (adjacent %lu)\n",
+	       counters.stores, counters.adjacent_stores);
+
+      if (counters.failed_stores)
+	fprintf (dump_file, "# of failed PCREL_OPT store(s) = %lu\n",
+		 counters.failed_stores);
+
+      fprintf (dump_file, "\n");
+    }
+
+  df_remove_problem (df_chain);
+  df_process_deferred_rescans ();
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS | DF_LR_RUN_DCE);
+  df_analyze ();
+  return 0;
+}
+
+/* Optimize pc-relative references for the new PCREL_OPT pass.  */
+const pass_data pass_data_pcrel_opt =
+{
+  RTL_PASS,			/* type.  */
+  "pcrel_opt",			/* name.  */
+  OPTGROUP_NONE,		/* optinfo_flags.  */
+  TV_NONE,			/* tv_id.  */
+  0,				/* properties_required.  */
+  0,				/* properties_provided.  */
+  0,				/* properties_destroyed.  */
+  0,				/* todo_flags_start.  */
+  TODO_df_finish,		/* todo_flags_finish.  */
+};
+
+/* Pass data structures.  */
+class pcrel_opt : public rtl_opt_pass
+{
+public:
+  pcrel_opt (gcc::context *ctxt)
+  : rtl_opt_pass (pass_data_pcrel_opt, ctxt)
+  {}
+
+  ~pcrel_opt (void)
+  {}
+
+  /* opt_pass methods:  */
+  virtual bool gate (function *)
+  {
+    return (TARGET_PCREL && TARGET_PCREL_OPT && optimize);
+  }
+
+  virtual unsigned int execute (function *fun)
+  {
+    return pcrel_opt_pass (fun);
+  }
+
+  opt_pass *clone ()
+  {
+    return new pcrel_opt (m_ctxt);
+  }
+};
+
+rtl_opt_pass *
+make_pass_pcrel_opt (gcc::context *ctxt)
+{
+  return new pcrel_opt (ctxt);
+}
diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md
new file mode 100644
index 00000000000..95338f9d2fe
--- /dev/null
+++ b/gcc/config/rs6000/pcrel-opt.md
@@ -0,0 +1,386 @@
+;; Machine description for the PCREL_OPT optimization.
+;; Copyright (C) 2020 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Support for the PCREL_OPT optimization.  PCREL_OPT looks for instances where
+;; an external variable is used only once, either for reading or for writing.
+;;
+;; If we are optimizing a single read, normally the code would look like:
+;;
+;;	(set (reg:DI <ptr>)
+;;	     (symbol_ref:DI "<extern_addr>"))	# <data> is currently dead
+;;
+;;		...	# insns do not need to be adjacent
+;;
+;;	(set (reg:SI <data>)
+;;	     (mem:SI (reg:DI <xxx>)))		# <ptr> dies with this insn
+;;
+;; We transform this into:
+;;
+;;	(parallel [(set (reg:DI <ptr>)
+;;			(unspec:SI [(symbol_ref:DI <extern_addr>)
+;;				    (const_int <marker>)]
+;;				   UNSPEC_PCREL_OPT_LD_ADDR))
+;;		   (set (reg:DI <data>)
+;;			(unspec:DI [(const_int 0)]
+;;				   UNSPEC_PCREL_OPT_LD_ADDR))])
+;;
+;;	...
+;;
+;;	(parallel [(set (reg:SI <data>)
+;;		   (unspec:SI [(mem:SI (reg:DI <ptr>))
+;;			       (reg:DI <data>)
+;;			       (const_int <marker>)]
+;;			      UNSPEC_PCREL_OPT_LD))
+;;		   (clobber (reg:DI <ptr>))])
+;;
+;; The marker is an integer constant that links the load of the external
+;; address to the load of the actual variable.
+;;
+;; In the first insn, we set both the address of the external variable, and
+;; mark that the variable being loaded both are created in that insn, and are
+;; consumed in the second insn.	 It doesn't matter what mode the register that
+;; we will ultimately do the load into, so we use DImode.  We just need to mark
+;; that both registers may be set in the first insn, and will be used in the
+;; second insn.
+;;
+;; Since we use UNSPEC's and link both the the register holding the external
+;; address and the value being loaded, it should prevent other passes from
+;; modifying it.
+;;
+;; If the register being loaded is the same as the base register, we use an
+;; alternate form of the insns.
+;;
+;;	(set (reg:DI <data_ptr>)
+;;	     (unspec:DI [(symbol_ref:DI <extern_addr>)
+;;			 (const_int <marker>)]
+;;			UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
+;;
+;;	...
+;;
+;;	(parallel [(set (reg:SI <data>)
+;;		   (unspec:SI [(mem:SI (reg:DI <ptr>))
+;;			       (reg:DI <data>)
+;;			       (const_int <marker>)]
+;;			      UNSPEC_PCREL_OPT_LD))
+;;		   (clobber (reg:DI <ptr>))])
+
+(define_c_enum "unspec"
+  [UNSPEC_PCREL_OPT_LD_ADDR
+   UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG
+   UNSPEC_PCREL_OPT_LD_RELOC
+   UNSPEC_PCREL_OPT_ST_ADDR
+   UNSPEC_PCREL_OPT_ST_RELOC])
+
+;; Modes that are supported for PCREL_OPT
+(define_mode_iterator PO [QI HI SI DI TI SF DF KF
+			  V1TI V2DI V4SI V8HI V16QI V2DF V4SF
+			  (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
+
+;; Vector modes for PCREL_OPT
+(define_mode_iterator PO_VECT [TI KF V1TI V2DI V4SI V8HI V16QI V2DF V4SF
+			       (TF "TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD")])
+
+;; Insn for loading the external address, where the register being loaded is not
+;; the same as the register being loaded with the data.
+(define_insn "pcrel_opt_ld_addr"
+  [(set (match_operand:DI 0 "base_reg_operand" "=&b,&b")
+	(unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+		    (match_operand 2 "const_int_operand" "n,n")]
+		   UNSPEC_PCREL_OPT_LD_ADDR))
+   (set (match_operand:DI 3 "gpc_reg_operand" "=r,wa")
+	(unspec:DI [(const_int 0)]
+		   UNSPEC_PCREL_OPT_LD_ADDR))]
+  "TARGET_PCREL_OPT
+   && reg_or_subregno (operands[0]) != reg_or_subregno (operands[3])"
+  "ld %0,%a1\n.Lpcrel%2:"
+  [(set_attr "prefixed" "yes")
+   (set_attr "type" "load")
+   (set_attr "loads_extern_addr" "yes")])
+
+;; Alternate form of loading up the external address that is the same register
+;; as the final load.
+(define_insn "pcrel_opt_ld_addr_same_reg"
+  [(set (match_operand:DI 0 "base_reg_operand" "=b")
+	(unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+		    (match_operand 2 "const_int_operand" "n")]
+		   UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))]
+  "TARGET_PCREL_OPT"
+  "ld %0,%a1\n.Lpcrel%2:"
+  [(set_attr "prefixed" "yes")
+   (set_attr "type" "load")
+   (set_attr "loads_extern_addr" "yes")])
+
+;; PCREL_OPT modes that are optimized for loading or storing GPRs.
+(define_mode_iterator PO_GPR [QI HI SI DI SF DF])
+
+(define_mode_attr PO_GPR_LD [(QI "lbz")
+			     (HI "lhz")
+			     (SI "lwz")
+			     (SF "lwz")
+			     (DI "ld")
+			     (DF "ld")])
+
+;; PCREL_OPT load operation of GPRs.  Operand 4 (the register used to hold the
+;; address of the external symbol) is SCRATCH if the same register is used for
+;; the normal load.
+(define_insn "*pcrel_opt_ld<mode>_gpr"
+  [(parallel [(set (match_operand:PO_GPR 0 "int_reg_operand" "+r")
+		   (unspec:PO_GPR [(match_operand:PO_GPR 1 "d_form_memory" "o")
+				   (match_operand:DI 2 "int_reg_operand" "0")
+				   (match_operand 3 "const_int_operand" "n")]
+				  UNSPEC_PCREL_OPT_LD_RELOC))
+	      (clobber (match_scratch:DI 4 "=bX"))])]
+  "TARGET_PCREL_OPT
+   && (GET_CODE (operands[4]) == SCRATCH
+       || reg_mentioned_p (operands[4], operands[1]))"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return "<PO_GPR_LD> %0,%1";
+}
+  [(set_attr "type" "load")])
+
+;; PCREL_OPT load with sign/zero extension
+(define_insn "*pcrel_opt_ldsi_<u><mode>_gpr"
+  [(set (match_operand:EXTSI 0 "int_reg_operand" "+r")
+	(any_extend:EXTSI
+	 (unspec:SI [(match_operand:SI 1 "d_form_memory" "o")
+		     (match_operand:DI 2 "int_reg_operand" "0")
+		     (match_operand 3 "const_int_operand" "n")]
+		     UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_scratch:DI 4 "=bX"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return "lw<az> %0,%1";
+}
+  [(set_attr "type" "load")])
+
+(define_insn "*pcrel_opt_ldhi_<u><mode>_gpr"
+  [(set (match_operand:EXTHI 0 "int_reg_operand" "+r")
+	(any_extend:EXTHI
+	 (unspec:HI [(match_operand:HI 1 "d_form_memory" "o")
+		     (match_operand:DI 2 "int_reg_operand" "0")
+		     (match_operand 3 "const_int_operand" "n")]
+		     UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_scratch:DI 4 "=bX"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return "lh<az> %0,%1";
+}
+  [(set_attr "type" "load")])
+
+(define_insn "*pcrel_opt_ldqi_u<mode>_gpr"
+  [(set (match_operand:EXTQI 0 "int_reg_operand" "+r")
+	(zero_extend:EXTQI
+	 (unspec:QI [(match_operand:QI 1 "d_form_memory" "o")
+		     (match_operand:DI 2 "int_reg_operand" "0")
+		     (match_operand 3 "const_int_operand" "n")]
+		     UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_scratch:DI 4 "=bX"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return "lbz %0,%1";
+}
+  [(set_attr "type" "load")])
+
+;; Scalar types that can be optimized by loading them into floating point
+;; or Altivec registers.
+(define_mode_iterator PO_FP [DI DF SF])
+
+;; Load instructions to load up scalar floating point or 64-bit integer values
+;; into floating point registers or Altivec registers.
+(define_mode_attr PO_FPR_LD [(DI "lfd")	 (DF "lfd")  (SF "lfs")])
+(define_mode_attr PO_AVX_LD [(DI "lxsd") (DF "lxsd") (SF "lxssp")])
+
+;; PCREL_OPT load operation of scalar DF/DI/SF into vector registers.
+(define_insn "*pcrel_opt_ld<mode>_vsx"
+  [(set (match_operand:PO_FP 0 "vsx_register_operand" "+d,v")
+	(unspec:PO_FP [(match_operand:PO_FP 1 "d_form_memory" "o,o")
+		       (match_operand:DI 2 "vsx_register_operand" "0,0")
+		       (match_operand 3 "const_int_operand" "n,n")]
+		       UNSPEC_PCREL_OPT_LD_RELOC))
+   (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return which_alternative ? "<PO_AVX_LD> %0,%1" : "<PO_FPR_LD> %0,%1";
+}
+  [(set_attr "type" "fpload")])
+
+;; PCREL_OPT optimization extending SFmode to DFmode via a load.
+(define_insn "*pcrel_opt_ldsf_df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "+d,v")
+	(float_extend:DF
+	 (unspec:SF [(match_operand:SF 1 "d_form_memory" "o,o")
+		     (match_operand:DI 2 "vsx_register_operand" "0,0")
+		     (match_operand 3 "const_int_operand" "n,n")]
+		    UNSPEC_PCREL_OPT_LD_RELOC)))
+   (clobber (match_operand:DI 4 "base_reg_operand" "=b,b"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return which_alternative ? "lxssp %0,%1" : "lfs %0,%1";
+}
+  [(set_attr "type" "fpload")])
+
+;; PCREL_OPT load operation of vector/float128 types into vector registers.
+(define_insn "*pcrel_opt_ld<mode>"
+  [(set (match_operand:PO_VECT 0 "vsx_register_operand" "+wa")
+	(unspec:PO_VECT [(match_operand:PO_VECT 1 "d_form_memory" "o")
+			 (match_operand:DI 2 "vsx_register_operand" "0")
+			 (match_operand 3 "const_int_operand" "n")]
+			UNSPEC_PCREL_OPT_LD_RELOC))
+   (clobber (match_operand:DI 4 "base_reg_operand" "=b"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[3]);
+  return "lxv %x0,%1";
+}
+  [(set_attr "type" "vecload")])
+
+
+;; PCREL_OPT optimization for stores.  We need to put the label after the PLD
+;; instruction, because the assembler might insert a NOP before the PLD for
+;; alignment.
+;;
+;; If we are optimizing a single write, normally the code would look like:
+;;
+;;	(set (reg:DI <ptr>)
+;;	     (symbol_ref:DI "<extern_addr>"))	# <data> must be live here
+;;
+;;	    ...		     # insns do not need to be adjacent
+;;
+;;	(set (mem:SI (reg:DI <xxx>))
+;;	     (reg:SI <data>))			# <ptr> dies with this insn
+;;
+;; We optimize this to be:
+;;
+;;	(parallel [(set (reg:DI <ptr>)
+;;			(unspec:DI [(symbol_ref:DI "<extern_addr>")
+;;				    (const_int <marker>)]
+;;				   UNSPEC_PCREL_OPT_ST_ADDR))
+;;		   (use (reg:<MODE> <data>))])
+;;
+;;	    ...		     # insns do not need to be adjacent
+;;
+;;	(parallel [(set (mem:<MODE> (reg:DI <ptr>))
+;;			(unspec:<MODE> [(reg:<MODE> <data>)
+;;					(const_int <marker>)]
+;;				       UNSPEC_PCREL_OPT_ST_RELOC))
+;;		   (clobber (reg:DI <ptr>))])
+
+(define_insn "*pcrel_opt_st_addr<mode>"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+	(unspec:DI [(match_operand:DI 1 "pcrel_external_address")
+		    (match_operand 2 "const_int_operand" "n")]
+		UNSPEC_PCREL_OPT_ST_ADDR))
+   (use (match_operand:PO 3 "gpc_reg_operand" "rwa"))]
+  "TARGET_PCREL_OPT"
+  "ld %0,%a1\n.Lpcrel%2:"
+  [(set_attr "prefixed" "yes")
+   (set_attr "type" "load")
+   (set_attr "loads_extern_addr" "yes")])
+
+;; PCREL_OPT stores.
+(define_insn "*pcrel_opt_st<mode>"
+  [(set (match_operand:QHSI 0 "d_form_memory" "=o")
+	(unspec:QHSI [(match_operand:QHSI 1 "gpc_reg_operand" "r")
+		      (match_operand 2 "const_int_operand" "n")]
+		     UNSPEC_PCREL_OPT_ST_RELOC))
+   (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[2]);
+  return "st<wd> %1,%0";
+}
+  [(set_attr "type" "store")])
+
+(define_insn "*pcrel_opt_stdi"
+  [(set (match_operand:DI 0 "d_form_memory" "=o,o,o")
+	(unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r,d,v")
+		    (match_operand 2 "const_int_operand" "n,n,n")]
+		   UNSPEC_PCREL_OPT_ST_RELOC))
+   (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+  "TARGET_PCREL_OPT && TARGET_POWERPC64"
+{
+  output_pcrel_opt_reloc (operands[2]);
+  switch (which_alternative)
+    {
+    case 0: return "std %1,%0";
+    case 1: return "stfd %1,%0";
+    case 2: return "stxsd %1,%0";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "store,fpstore,fpstore")])
+
+(define_insn "*pcrel_opt_stsf"
+  [(set (match_operand:SF 0 "d_form_memory" "=o,o,o")
+	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "d,v,r")
+		    (match_operand 2 "const_int_operand" "n,n,n")]
+		   UNSPEC_PCREL_OPT_ST_RELOC))
+   (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[2]);
+  switch (which_alternative)
+    {
+    case 0: return "stfs %1,%0";
+    case 1: return "stxssp %1,%0";
+    case 2: return "stw %1,%0";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fpstore,fpstore,store")])
+
+(define_insn "*pcrel_opt_stdf"
+  [(set (match_operand:DF 0 "d_form_memory" "=o,o,o")
+	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d,v,r")
+		    (match_operand 2 "const_int_operand" "n,n,n")]
+		   UNSPEC_PCREL_OPT_ST_RELOC))
+   (clobber (match_operand:DI 3 "base_reg_operand" "=b,b,b"))]
+  "TARGET_PCREL_OPT
+   && (TARGET_POWERPC64 || vsx_register_operand (operands[1], DFmode))"
+{
+  output_pcrel_opt_reloc (operands[2]);
+  switch (which_alternative)
+    {
+    case 0: return "stfd %1,%0";
+    case 1: return "stxsd %1,%0";
+    case 2: return "std %1,%0";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fpstore,fpstore,store")])
+
+(define_insn "*pcrel_opt_st<mode>"
+  [(set (match_operand:PO_VECT 0 "d_form_memory" "=o")
+	(unspec:PO_VECT [(match_operand:PO_VECT 1 "gpc_reg_operand" "wa")
+		     (match_operand 2 "const_int_operand" "n")]
+		    UNSPEC_PCREL_OPT_ST_RELOC))
+   (clobber (match_operand:DI 3 "base_reg_operand" "=b"))]
+  "TARGET_PCREL_OPT"
+{
+  output_pcrel_opt_reloc (operands[2]);
+  return "stxv %x1,%0";
+}
+  [(set_attr "type" "vecstore")])
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..9610587e1c1 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1876,3 +1876,26 @@ (define_predicate "prefixed_memory"
 {
   return address_is_prefixed (XEXP (op, 0), mode, NON_PREFIXED_DEFAULT);
 })
+
+;; Return true if the operand is a valid memory operand with an offsettable
+;; address that could be merged with the load of a PC-relative external address
+;; with the PCREL_OPT optimization.  We don't check here whether or not the
+;; offset needs to be used in a DS-FORM (bottom 2 bits 0) or DQ-FORM (bottom 4
+;; bits 0) instruction.
+(define_predicate "d_form_memory"
+  (match_code "mem")
+{
+  if (!memory_operand (op, mode))
+    return false;
+
+  rtx addr = XEXP (op, 0);
+
+  if (REG_P (addr) || SUBREG_P (addr))
+    return true;
+
+  if (GET_CODE (addr) != PLUS)
+    return false;
+
+  return (base_reg_operand (XEXP (addr, 0), Pmode)
+	  && satisfies_constraint_I (XEXP (addr, 1)));
+})
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 8d2c1ffd6cf..d3f72d77a6e 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -78,6 +78,7 @@
 /* Flags that need to be turned off if -mno-power10.  */
 #define OTHER_POWER10_MASKS	(OPTION_MASK_MMA			\
 				 | OPTION_MASK_PCREL			\
+				 | OPTION_MASK_PCREL_OPT		\
 				 | OPTION_MASK_PREFIXED)
 
 #define ISA_3_1_MASKS_SERVER	(ISA_3_0_MASKS_SERVER			\
@@ -142,6 +143,7 @@
 				 | OPTION_MASK_P9_MISC			\
 				 | OPTION_MASK_P9_VECTOR		\
 				 | OPTION_MASK_PCREL			\
+				 | OPTION_MASK_PCREL_OPT		\
 				 | OPTION_MASK_POPCNTB			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_POWERPC64		\
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index 5164c526e34..b62244b8df2 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -24,4 +24,12 @@ along with GCC; see the file COPYING3.  If not see
    REPLACE_PASS (PASS, INSTANCE, TGT_PASS)
  */
 
+  /* Pass to add the appropriate vector swaps on power8 little endian systems.
+     The power8 does not have instructions that automaticaly do the byte swaps
+     for loads and stores.  */
   INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+
+  /* Pass to do the PCREL_OPT optimization that combines the load of an
+     external symbol's address along with a single load or store using that
+     address as a base register.  */
+  INSERT_PASS_BEFORE (pass_sched2, 1, pass_pcrel_opt);
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 25fa5dd57cd..75c659971ea 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -190,10 +190,13 @@ enum non_prefixed_form {
 
 extern enum insn_form address_to_insn_form (rtx, machine_mode,
 					    enum non_prefixed_form);
+extern enum non_prefixed_form reg_to_non_prefixed (rtx, machine_mode);
+extern bool offsettable_non_prefixed_memory (rtx, machine_mode, rtx);
 extern bool prefixed_load_p (rtx_insn *);
 extern bool prefixed_store_p (rtx_insn *);
 extern bool prefixed_paddi_p (rtx_insn *);
 extern void rs6000_asm_output_opcode (FILE *);
+extern void output_pcrel_opt_reloc (rtx);
 extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
 extern int rs6000_adjust_insn_length (rtx_insn *, int);
 
@@ -306,6 +309,7 @@ namespace gcc { class context; }
 class rtl_opt_pass;
 
 extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
 extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
 extern bool rs6000_quadword_masked_address_p (const_rtx exp);
 extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index b58eeae2b98..70ea1d0086b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1173,7 +1173,6 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
 					  machine_mode,
 					  secondary_reload_info *,
 					  bool);
-static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
 
 /* Hash table stuff for keeping track of TOC entries.  */
@@ -4413,6 +4412,14 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_MMA;
     }
 
+  if (!TARGET_PCREL && TARGET_PCREL_OPT)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL_OPT) != 0)
+	error ("%qs requires %qs", "-mpcrel-opt", "-mpcrel");
+
+	rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
+    }
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
 
@@ -8612,8 +8619,57 @@ rs6000_delegitimize_address (rtx orig_x)
 {
   rtx x, y, offset;
 
-  if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
-    orig_x = XVECEXP (orig_x, 0, 0);
+  /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion.  It
+     encodes loading up the high part of the address of a TOC reference along
+     with a load of a GPR using the same base register used for the load.  We
+     return the original SYMBOL_REF.
+
+	(set (reg:INT1 <reg>
+	     (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
+
+     UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass.  These
+     UNSPECs include the external SYMBOL_REF along with the value being loaded.
+     We return the original SYMBOL_REF.
+
+	(parallel [(set (reg:DI <base-reg>)
+			(unspec:DI [(symbol_ref <symbol>)
+				    (const_int <marker>)]
+				   UNSPEC_PCREL_OPT_LD_ADDR))
+		   (set (reg:DI <load-reg>)
+			(unspec:DI [(const_int 0)]
+				   UNSPEC_PCREL_OPT_LD_ADDR))])
+
+     UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG is an alternative that is used if the
+     GPR being loaded is the same as the GPR used to hold the external address.
+
+	(set (reg:DI <base-reg>)
+	     (unspec:DI [(symbol_ref <symbol>)
+			 (const_int <marker>)]
+			UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG))
+
+     UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass.  This
+     UNSPEC include the external SYMBOL_REF along with the value being loaded.
+     We return the original SYMBOL_REF.
+
+	(parallel [(set (reg:DI <base-reg>)
+			(unspec:DI [(symbol_ref <symbol>)
+				    (const_int <marker>)]
+				   UNSPEC_PCREL_OPT_ST_ADDR))
+		   (use (reg <store-reg>))])  */
+
+  if (GET_CODE (orig_x) == UNSPEC)
+    switch (XINT (orig_x, 1))
+      {
+      case UNSPEC_FUSION_GPR:
+      case UNSPEC_PCREL_OPT_LD_ADDR:
+      case UNSPEC_PCREL_OPT_LD_ADDR_SAME_REG:
+      case UNSPEC_PCREL_OPT_ST_ADDR:
+	orig_x = XVECEXP (orig_x, 0, 0);
+	break;
+
+      default:
+	break;
+      }
 
   orig_x = delegitimize_mem_from_attrs (orig_x);
 
@@ -23375,6 +23431,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "mulhw",			OPTION_MASK_MULHW,		false, true  },
   { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
   { "pcrel",			OPTION_MASK_PCREL,		false, true  },
+  { "pcrel-opt",		OPTION_MASK_PCREL_OPT,		false, true  },
   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
   { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
@@ -25474,6 +25531,32 @@ address_to_insn_form (rtx addr,
   return INSN_FORM_BAD;
 }
 
+/* Return true if an REG with a given MODE is loaded from or stored into a MEM
+   location uses a non-prefixed offsettable address.  This is used to validate
+   the load or store with the PCREL_OPT optimization to make sure it is an
+   instruction that can be optimized.
+
+   We need to specify the MODE separately from the REG to allow for loads that
+   include zero/sign/float extension.  */
+
+bool
+offsettable_non_prefixed_memory (rtx reg, machine_mode mode, rtx mem)
+{
+  /* If the instruction is indexed only like LFIWAX/LXSIWAX, it is not
+     offsettable.  */
+  enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
+  if (non_prefixed == NON_PREFIXED_X)
+    return false;
+
+  /* Check if this is a non-prefixed offsettable instruction.  */
+  rtx addr = XEXP (mem, 0);
+  enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
+  return (iform == INSN_FORM_BASE_REG
+	  || iform == INSN_FORM_D
+	  || iform == INSN_FORM_DS
+	  || iform == INSN_FORM_DQ);
+}
+
 /* Helper function to see if we're potentially looking at lfs/stfs.
    - PARALLEL containing a SET and a CLOBBER
    - stfs:
@@ -25532,7 +25615,7 @@ is_lfs_stfs_insn (rtx_insn *insn)
 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
    instruction format (D/DS/DQ) used for offset memory.  */
 
-static enum non_prefixed_form
+enum non_prefixed_form
 reg_to_non_prefixed (rtx reg, machine_mode mode)
 {
   /* If it isn't a register, use the defaults.  */
@@ -25755,11 +25838,34 @@ void
 rs6000_asm_output_opcode (FILE *stream)
 {
   if (next_insn_prefixed_p)
-    fprintf (stream, "p");
+    {
+      fprintf (stream, "p");
+
+      /* Reset flag in case there are separate insn lines in the sequence, so
+	 the 'p' is only emited for the first line.  This shows up when we are
+	 doing the PCREL_OPT optimization, in that the label created with %r<n>
+	 would have a leading 'p' printed.  */
+      next_insn_prefixed_p = false;
+    }
 
   return;
 }
 
+/* Emit the relocation to tie the next instruction to a previous instruction
+   that loads up an external address.  This is used to do the PCREL_OPT
+   optimization.  Note, the label is generated after the PLD of the got
+   pc-relative address to allow for the assembler to insert NOPs before the PLD
+   instruction.  The operand is a constant integer that is the label
+   number.  */
+
+void
+output_pcrel_opt_reloc (rtx label_num)
+{
+  rtx operands[1] = { label_num };
+  output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
+		   operands);
+}
+
 /* Adjust the length of an INSN.  LENGTH is the currently-computed length and
    should be adjusted to reflect any required changes.  This macro is used when
    there is some systematic length adjustment required that would be difficult
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 779bfd11237..961a1df3c95 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -292,6 +292,10 @@ (define_attr "prefixed" "no,yes"
 
 	(const_string "no")))
 
+;; Whether an insn loads an external address for the PCREL_OPT optimizaton.
+(define_attr "loads_extern_addr" "no,yes"
+  (const_string "no"))
+
 ;; Return the number of real hardware instructions in a combined insn.  If it
 ;; is 0, just use the length / 4.
 (define_attr "num_insns" "" (const_int 0))
@@ -10226,7 +10230,8 @@ (define_insn "*pcrel_extern_addr"
   "TARGET_PCREL"
   "ld %0,%a1"
   [(set_attr "prefixed" "yes")
-   (set_attr "type" "load")])
+   (set_attr "type" "load")
+   (set_attr "loads_extern_addr" "yes")])
 
 ;; TOC register handling.
 
@@ -14883,3 +14888,4 @@ (define_insn "*cmpeqb_internal"
 (include "dfp.md")
 (include "crypto.md")
 (include "htm.md")
+(include "pcrel-opt.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index b2a70e88ca8..d1719bfd2a0 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -586,6 +586,10 @@ mpcrel
 Target Report Mask(PCREL) Var(rs6000_isa_flags)
 Generate (do not generate) pc-relative memory addressing.
 
+mpcrel-opt
+Target Undocumented Mask(PCREL_OPT) Var(rs6000_isa_flags)
+Generate (do not generate) pc-relative memory optimizations for externals.
+
 mmma
 Target Report Mask(MMA) Var(rs6000_isa_flags)
 Generate (do not generate) MMA instructions.
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb5729cb2..a617276484e 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,6 +23,10 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
 PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
 
+pcrel-opt.o: $(srcdir)/config/rs6000/pcrel-opt.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
 rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
@@ -86,4 +90,5 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/mma.md \
 	$(srcdir)/config/rs6000/crypto.md \
 	$(srcdir)/config/rs6000/htm.md \
-	$(srcdir)/config/rs6000/dfp.md
+	$(srcdir)/config/rs6000/dfp.md \
+	$(srcdir)/config/rs6000/pcrel-opt.md
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
new file mode 100644
index 00000000000..f165068e2be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	unsigned int
+
+/* Test whether using an external variable twice (doing an increment) prevents
+   the PCREL_OPT optimization.  */
+extern TYPE ext;
+
+void
+inc (void)
+{
+  ext++;		/* No PCREL_OPT (uses address twice).  */
+}
+
+/* { dg-final { scan-assembler-not "R_PPC64_PCREL_OPT" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
new file mode 100644
index 00000000000..d35862fcb6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	double
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for
+   double.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
new file mode 100644
index 00000000000..12b51ab2e67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	long long
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for long
+   long.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+double
+get_double (void)
+{
+  return (double) ext[0];	/* PCREL_OPT relocation.  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
new file mode 100644
index 00000000000..4143aeb7371
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-hi.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	unsigned short
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
+   short.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+double
+get_double (void)
+{
+  return (double) ext[0];	/* No PCREL_OPT (LXSIHZX is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
new file mode 100644
index 00000000000..30d3236f95c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-qi.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	unsigned char
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
+   char.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+double
+get_double (void)
+{
+  return (double) ext[0];	/* No PCREL_OPT (LXSIBZX is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
new file mode 100644
index 00000000000..9d1e2a1956f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-sf.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	float
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for
+   float.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+double
+get_double (void)
+{
+  return (double) ext[0];	/* PCREL_OPT relocation.  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  3 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
new file mode 100644
index 00000000000..17be6fa1778
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-si.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	int
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for int.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+double
+get_double (void)
+{
+  return (double) ext[0];	/* No PCREL_OPT (LFIWAX is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
new file mode 100644
index 00000000000..8c12aea5acd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-vector.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	vector double
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for
+   vector double.  */
+extern TYPE ext[];
+
+TYPE
+get (void)
+{
+  return ext[0];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get2 (void)
+{
+  return ext[2];		/* PCREL_OPT relocation.  */
+}
+
+TYPE
+get_large (void)
+{
+  return ext[LARGE];		/* No PCREL_OPT (load is  prefixed).  */
+}
+
+TYPE
+get_variable (unsigned long n)
+{
+  return ext[n];		/* No PCREL_OPT (load is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
new file mode 100644
index 00000000000..d795d35d8de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-df.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	double
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for
+   double.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c
new file mode 100644
index 00000000000..47554394cf7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-di.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	long long
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for long
+   long.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c
new file mode 100644
index 00000000000..8822e767dfe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-hi.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	unsigned short
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
+   short.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+void
+store_double (double a)
+{
+  ext[0] = (TYPE) a;		/* No PCREL_OPT (STXIHZX is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c
new file mode 100644
index 00000000000..2f756833717
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-qi.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	unsigned char
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for unsigned
+   char.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+void
+store_double (double a)
+{
+  ext[0] = (TYPE) a;		/* No PCREL_OPT (STXIBZX is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c
new file mode 100644
index 00000000000..3dd88aad856
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-sf.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	float
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for
+   float.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c
new file mode 100644
index 00000000000..78dc8120efe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-si.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	int
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for int.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+void
+store_double (double a)
+{
+  ext[0] = (TYPE) a;		/* No PCREL_OPT (STFIWX is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c
new file mode 100644
index 00000000000..2c602eb3103
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pcrel-opt-st-vector.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_pcrel } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+#define TYPE	vector double
+#define LARGE	0x20000
+
+/* Test whether we get the right number of PCREL_OPT optimizations for
+   vector double.  */
+extern TYPE ext[];
+
+void
+store (TYPE a)
+{
+  ext[0] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store2 (TYPE a)
+{
+  ext[2] = a;			/* PCREL_OPT relocation.  */
+}
+
+void
+store_large (TYPE a)
+{
+  ext[LARGE] = a;		/* No PCREL_OPT (store is prefixed).  */
+}
+
+void
+store_variable (TYPE a, unsigned long n)
+{
+  ext[n] = a;			/* No PCREL_OPT (store is indexed).  */
+}
+
+/* { dg-final { scan-assembler-times "R_PPC64_PCREL_OPT"  2 } } */
-- 
2.18.4



More information about the Gcc-patches mailing list