RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86
H.J. Lu
hjl.tools@gmail.com
Wed Aug 19 12:50:00 GMT 2015
On Mon, Aug 17, 2015 at 10:17:00AM -0700, H.J. Lu wrote:
> On Mon, Aug 17, 2015 at 10:08 AM, Alexander Monakov <amonakov@ispras.ru> wrote:
> >> >> Perhaps add a comment that GOT slots are 64-bit on x32?
> >> >>
> >> >
> >> > Good idea. I will update my patch.
> >> >
> >>
> >> How about this?
> >>
> >>
> >> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> >> index bf8a21d..216dee6 100644
> >> --- a/gcc/config/i386/i386.c
> >> +++ b/gcc/config/i386/i386.c
> >> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
> >> fnaddr);
> >> }
> >> fnaddr = gen_const_mem (Pmode, fnaddr);
> >> + /* Pmode may not be the same as word_mode for x32, which
> >
> > I think 'Pmode is not the same as word_mode on x32' is more appropriate here.
>
> "-maddress-mode=long -mx32" makes Pmode == word_mode.
>
> >> + doesn't support indirect branch va 32-bit memory slot.
> >
> > Typo: s/va/via.
> >
>
> Fixed.
>
> Here is the updated patch.
>
Hi Jefff,
Can you review this?
Thanks.
H.J.
---
It boils down to that -fno-plt should convert calling an external
function, foo, from
call foo@PLT
to
call *foo@GOT
to avoid one extra direct branch to PLT. The proper place for this is
in backend during expanding a function call. The backend already takes
of many details for calling an external function, like setting up a PIC
register. Using the GOT slot instead of PLT slot, just one of those
details. For x86, it should be done in ix86_expand_call, not
prepare_call_address and hope for the best, which doesn't always
happen. Also non-PIC case can only be handled in backend.
This patch reverts -fno-plt in prepare_call_address and handles it in
ix86_expand_call. Other backends may need similar changes to support
-fno-plt. Alternately, we can introduce a target hook to indicate
whether an external function should be called via register for -fno-plt
so that i386 backend can disable it in prepare_call_address.
sibcall_memory_operand is also updated to accept the GOT slot so that
call *foo@GOT(%reg)
can be generated by ix86_expand_call for 32-bit and 64-bit large model.
gcc/
PR target/67215
* calls.c (prepare_call_address): Don't handle -fno-plt here.
* config/i386/i386.c (ix86_expand_call): Generate indirect call
via GOT for -fno-plt. Support indirect call via GOT for x32.
* config/i386/predicates.md (sibcall_memory_operand): Allow
GOT memory operand.
gcc/testsuite/
PR target/67215
* gcc.target/i386/pr67215-1.c: New test.
* gcc.target/i386/pr67215-2.c: Likewise.
* gcc.target/i386/pr67215-3.c: Likewise.
---
gcc/calls.c | 12 ------
gcc/config/i386/i386.c | 71 ++++++++++++++++++++++++-------
gcc/config/i386/predicates.md | 7 ++-
gcc/testsuite/gcc.target/i386/pr67215-1.c | 20 +++++++++
gcc/testsuite/gcc.target/i386/pr67215-2.c | 20 +++++++++
gcc/testsuite/gcc.target/i386/pr67215-3.c | 12 ++++++
6 files changed, 113 insertions(+), 29 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-3.c
diff --git a/gcc/calls.c b/gcc/calls.c
index 5636725..7cce9be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -203,18 +203,6 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
&& targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
- else if (flag_pic
- && fndecl_or_type
- && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
- && (!flag_plt
- || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
- && !targetm.binds_local_p (fndecl_or_type))
- {
- /* This is done only for PIC code. There is no easy interface to force the
- function address into GOT for non-PIC case. non-PIC case needs to be
- handled specially by the backend. */
- funexp = force_reg (Pmode, funexp);
- }
else if (! sibcallp)
{
if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e1..ac9a6c4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25649,21 +25649,54 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
/* Static functions and indirect calls don't need the pic register. Also,
check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
it an indirect call. */
+ rtx addr = XEXP (fnaddr, 0);
if (flag_pic
- && (!TARGET_64BIT
- || (ix86_cmodel == CM_LARGE_PIC
- && DEFAULT_ABI != MS_ABI))
- && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
- && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
- && flag_plt
- && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
- || !lookup_attribute ("noplt",
- DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
+ && GET_CODE (addr) == SYMBOL_REF
+ && !SYMBOL_REF_LOCAL_P (addr))
{
- use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
- if (ix86_use_pseudo_pic_reg ())
- emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
- pic_offset_table_rtx);
+ if (flag_plt
+ && (SYMBOL_REF_DECL (addr) == NULL_TREE
+ || !lookup_attribute ("noplt",
+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
+ {
+ if (!TARGET_64BIT
+ || (ix86_cmodel == CM_LARGE_PIC
+ && DEFAULT_ABI != MS_ABI))
+ {
+ use_reg (&use, gen_rtx_REG (Pmode,
+ REAL_PIC_OFFSET_TABLE_REGNUM));
+ if (ix86_use_pseudo_pic_reg ())
+ emit_move_insn (gen_rtx_REG (Pmode,
+ REAL_PIC_OFFSET_TABLE_REGNUM),
+ pic_offset_table_rtx);
+ }
+ }
+ else if (!TARGET_PECOFF && !TARGET_MACHO)
+ {
+ if (TARGET_64BIT)
+ {
+ fnaddr = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (1, addr),
+ UNSPEC_GOTPCREL);
+ fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+ }
+ else
+ {
+ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+ UNSPEC_GOT);
+ fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+ fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+ fnaddr);
+ }
+ fnaddr = gen_const_mem (Pmode, fnaddr);
+ /* Pmode may not be the same as word_mode for x32, which
+ doesn't support indirect branch via 32-bit memory slot.
+ Since x32 GOT slot is 64 bit with zero upper 32 bits,
+ indirect branch via x32 GOT slot is OK. */
+ if (GET_MODE (fnaddr) != word_mode)
+ fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
}
}
@@ -25685,9 +25718,15 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
&& !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
- else if (sibcall
- ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
- : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
+ /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
+ branch via x32 GOT slot is OK. */
+ else if (!(TARGET_X32
+ && MEM_P (fnaddr)
+ && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+ && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
+ && (sibcall
+ ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+ : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
{
fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a9c8623..be2df76 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -593,7 +593,12 @@
;; Return true if OP is a memory operands that can be used in sibcalls.
(define_predicate "sibcall_memory_operand"
(and (match_operand 0 "memory_operand")
- (match_test "CONSTANT_P (XEXP (op, 0))")))
+ (match_test "CONSTANT_P (XEXP (op, 0))
+ || (GET_CODE (XEXP (op, 0)) == PLUS
+ && REG_P (XEXP (XEXP (op, 0), 0))
+ && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST
+ && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == UNSPEC
+ && XINT (XEXP (XEXP (XEXP (op, 0), 1), 0), 1) == UNSPEC_GOT)")))
;; Test for a valid operand for a call instruction.
;; Allow constant call address operands in Pmode only.
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-1.c b/gcc/testsuite/gcc.target/i386/pr67215-1.c
new file mode 100644
index 0000000..fd37f8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt" } */
+
+extern char* bar (int);
+extern char* arr[32];
+
+void
+foo (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-2.c b/gcc/testsuite/gcc.target/i386/pr67215-2.c
new file mode 100644
index 0000000..ebf2919
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic" } */
+
+extern char* bar (int) __attribute__ ((noplt));
+extern char* arr[32];
+
+void
+foo (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = bar (128);
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "mov(l|q)\[ \t\]*.bar@GOTPCREL" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "movl\[ \t\]*.bar@GOT\\(" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "call\[ \t\]*.bar@PLT" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67215-3.c b/gcc/testsuite/gcc.target/i386/pr67215-3.c
new file mode 100644
index 0000000..dbd9a2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr67215-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fpic -fno-plt -fdump-rtl-expand" } */
+
+extern int bar (void);
+
+int
+foo (void)
+{
+ return bar ();
+}
+
+/* { dg-final { scan-rtl-dump "\\(call \\(mem:QI \\(mem/u/c:" "expand" } } */
--
2.4.3
More information about the Gcc-patches
mailing list