This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: Properly generate X32 IE sequence
On Mon, Mar 19, 2012 at 9:19 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Mar 19, 2012 at 8:54 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Mar 19, 2012 at 8:51 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Sun, Mar 18, 2012 at 1:55 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>
>>>>>> I am testing this patch. ?OK for trunk if it passes all tests?
>>>>>
>>>>> No, force_reg will generate a pseudo, so this conversion is valid only
>>>>> for !can_create_pseudo ().
>>>>>
>>>>> At least for *tls_initial_exec_x32_store, you will need a temporary to
>>>>> split the pattern after reload.
>>>
>>> Here is the updated patch to add can_create_pseudo. ?I also changed
>>> tls_initial_exec_x32 to take an input register operand as thread pointer.
>>>
>>>> Please try attached patch. It simply throws away all recent
>>>> complications w.r.t. to thread pointer and always handles TP in
>>>> DImode.
>>>>
>>>> The testcase:
>>>>
>>>> --cut here--
>>>> __thread int foo __attribute__ ((tls_model ("initial-exec")));
>>>>
>>>> void bar (int x)
>>>> {
>>>> ?foo = x;
>>>> }
>>>>
>>>> int baz (void)
>>>> {
>>>> ?return foo;
>>>> }
>>>> --cut here--
>>>>
>>>> Now compiles to:
>>>>
>>>> bar:
>>>> ? ? ? ?movq ? ?foo@gottpoff(%rip), %rax
>>>> ? ? ? ?movl ? ?%edi, %fs:(%rax)
>>>> ? ? ? ?ret
>>>>
>>>> baz:
>>>> ? ? ? ?movq ? ?foo@gottpoff(%rip), %rax
>>>> ? ? ? ?movl ? ?%fs:(%rax), %eax
>>>> ? ? ? ?ret
>>>>
>>>> In effect, this always generates %fs(%rDI) and emits REX prefix before
>>>> mov/add to satisfy brain-dead linkers.
>>>>
>>>> The patch is bootstrapping now on x86_64-pc-linux-gnu.
>>>>
>>>
>>> For
>>>
>>> --
>>> extern __thread char c;
>>> extern char y;
>>> void
>>> ie (void)
>>> {
>>> ?y = c;
>>> }
>>> --
>>>
>>> Your patch generates:
>>>
>>> ? ? ? ?movl ? ?%fs:0, %eax
>>> ? ? ? ?movq ? ?c@gottpoff(%rip), %rdx
>>> ? ? ? ?movzbl ?(%rax,%rdx), %edx
>>> ? ? ? ?movb ? ?%dl, y(%rip)
>>> ? ? ? ?ret
>>>
>>> It can be optimized to:
>>>
>>> ? ? ? ?movq ? ?c@gottpoff(%rip), %rax
>>> ? ? ? ?movzbl ?%fs:(%rax), %eax
>>> ? ? ? ?movb ? ?%al, y(%rip)
>>> ? ? ? ?ret
>>>
>>
>> Combine failed:
>>
>> (set (reg:QI 63 [ c ])
>> ? ?(mem/c:QI (plus:DI (zero_extend:DI (unspec:SI [
>> ? ? ? ? ? ? ? ? ? ? ? ?(const_int 0 [0])
>> ? ? ? ? ? ? ? ? ? ?] UNSPEC_TP))
>> ? ? ? ? ? ?(mem/u/c:DI (const:DI (unspec:DI [
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ?(symbol_ref:SI ("c") [flags 0x60]
>> <var_decl 0x7ffff19b8140 c>)
>> ? ? ? ? ? ? ? ? ? ? ? ?] UNSPEC_GOTNTPOFF)) [2 S8 A8])) [0 c+0 S1 A8]))
>>
>>
>
> Wrong testcase. ?IT should be
>
> --
> extern __thread char c;
> extern __thread short w;
> extern char y;
> extern short i;
> void
> ie (void)
> {
> ?y = c;
> ?i = w;
> }
> ---
>
> I got
>
> ? ? ? ?movl ? ?%fs:0, %eax
> ? ? ? ?movq ? ?c@gottpoff(%rip), %rdx
> ? ? ? ?movzbl ?(%rax,%rdx), %edx
> ? ? ? ?movb ? ?%dl, y(%rip)
> ? ? ? ?movq ? ?w@gottpoff(%rip), %rdx
> ? ? ? ?movzwl ?(%rax,%rdx), %eax
> ? ? ? ?movw ? ?%ax, i(%rip)
> ? ? ? ?ret
>
> It can be
>
> ? ? ? ?movq ? ?c@gottpoff(%rip), %rax
> ? ? ? ?movzbl ?%fs:(%rax), %eax
> ? ? ? ?movb ? ?%al, y(%rip)
> ? ? ? ?movq ? ?w@gottpoff(%rip), %rax
> ? ? ? ?movzwl ?%fs:(%rax), %eax
> ? ? ? ?movw ? ?%ax, i(%rip)
> ? ? ? ?ret
>
>
How about this patch? I changed 32 TP load to
(define_insn "*load_tp_x32_<mode>"
[(set (match_operand:SWI48x 0 "register_operand" "=r")
(unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
"TARGET_X32"
"mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
[(set_attr "type" "imov")
(set_attr "modrm" "0")
(set_attr "length" "7")
(set_attr "memory" "load")
(set_attr "imm_disp" "false")])
and removed *load_tp_x32_zext.
--
H.J.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9aa5ee7..66221e4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12483,15 +12483,12 @@ legitimize_pic_address (rtx orig, rtx reg)
/* Load the thread pointer. If TO_REG is true, force it into a register. */
static rtx
-get_thread_pointer (bool to_reg)
+get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
{
- rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
-
- if (GET_MODE (tp) != Pmode)
- tp = convert_to_mode (Pmode, tp, 1);
+ rtx tp = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
if (to_reg)
- tp = copy_addr_to_reg (tp);
+ tp = copy_to_mode_reg (tp_mode, tp);
return tp;
}
@@ -12543,6 +12540,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
{
rtx dest, base, off;
rtx pic = NULL_RTX, tp = NULL_RTX;
+ enum machine_mode tp_mode = Pmode;
int type;
switch (model)
@@ -12568,7 +12566,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
else
emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
- tp = get_thread_pointer (true);
+ tp = get_thread_pointer (Pmode, true);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
@@ -12618,7 +12616,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
else
emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
- tp = get_thread_pointer (true);
+ tp = get_thread_pointer (Pmode, true);
set_unique_reg_note (get_last_insn (), REG_EQUAL,
gen_rtx_MINUS (Pmode, tmp, tp));
}
@@ -12664,27 +12662,18 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
case TLS_MODEL_INITIAL_EXEC:
if (TARGET_64BIT)
{
+ tp_mode = DImode;
+
if (TARGET_SUN_TLS)
{
/* The Sun linker took the AMD64 TLS spec literally
and can only handle %rax as destination of the
initial executable code sequence. */
- dest = gen_reg_rtx (Pmode);
+ dest = gen_reg_rtx (tp_mode);
emit_insn (gen_tls_initial_exec_64_sun (dest, x));
return dest;
}
- else if (Pmode == SImode)
- {
- /* Always generate
- movl %fs:0, %reg32
- addl xgottpoff(%rip), %reg32
- to support linker IE->LE optimization and avoid
- fs:(%reg32) as memory operand. */
- dest = gen_reg_rtx (Pmode);
- emit_insn (gen_tls_initial_exec_x32 (dest, x));
- return dest;
- }
pic = NULL;
type = UNSPEC_GOTNTPOFF;
@@ -12708,24 +12697,23 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
type = UNSPEC_INDNTPOFF;
}
- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
- off = gen_rtx_CONST (Pmode, off);
+ off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
+ off = gen_rtx_CONST (tp_mode, off);
if (pic)
- off = gen_rtx_PLUS (Pmode, pic, off);
- off = gen_const_mem (Pmode, off);
+ off = gen_rtx_PLUS (tp_mode, pic, off);
+ off = gen_const_mem (tp_mode, off);
set_mem_alias_set (off, ix86_GOT_alias_set ());
if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
{
- base = get_thread_pointer (for_mov
- || !(TARGET_TLS_DIRECT_SEG_REFS
- && TARGET_TLS_INDIRECT_SEG_REFS));
- off = force_reg (Pmode, off);
- return gen_rtx_PLUS (Pmode, base, off);
+ base = get_thread_pointer (tp_mode,
+ for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ off = force_reg (tp_mode, off);
+ return gen_rtx_PLUS (tp_mode, base, off);
}
else
{
- base = get_thread_pointer (true);
+ base = get_thread_pointer (Pmode, true);
dest = gen_reg_rtx (Pmode);
emit_insn (ix86_gen_sub3 (dest, base, off));
}
@@ -12739,14 +12727,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
{
- base = get_thread_pointer (for_mov
- || !(TARGET_TLS_DIRECT_SEG_REFS
- && TARGET_TLS_INDIRECT_SEG_REFS));
+ base = get_thread_pointer (Pmode,
+ for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
return gen_rtx_PLUS (Pmode, base, off);
}
else
{
- base = get_thread_pointer (true);
+ base = get_thread_pointer (Pmode, true);
dest = gen_reg_rtx (Pmode);
emit_insn (ix86_gen_sub3 (dest, base, off));
}
@@ -13274,8 +13261,7 @@ ix86_delegitimize_tls_address (rtx orig_x)
rtx x = orig_x, unspec;
struct ix86_address addr;
- if (!(TARGET_TLS_DIRECT_SEG_REFS
- && TARGET_TLS_INDIRECT_SEG_REFS))
+ if (!TARGET_TLS_DIRECT_SEG_REFS)
return orig_x;
if (MEM_P (x))
x = XEXP (x, 0);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e5ac00..3fcd209 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -467,9 +467,6 @@ extern int x86_prefetch_sse;
#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
#endif
-/* Address override works only on the (%reg) part of %fs:(%reg). */
-#define TARGET_TLS_INDIRECT_SEG_REFS (Pmode == word_mode)
-
/* Fence to use after loop using storent. */
extern tree x86_mfence;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d23c67b..e167ceb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12747,20 +12747,9 @@
(define_mode_attr tp_seg [(SI "gs") (DI "fs")])
;; Load and add the thread base pointer from %<tp_seg>:0.
-(define_insn "*load_tp_x32"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(const_int 0)] UNSPEC_TP))]
- "TARGET_X32"
- "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
- [(set_attr "type" "imov")
- (set_attr "modrm" "0")
- (set_attr "length" "7")
- (set_attr "memory" "load")
- (set_attr "imm_disp" "false")])
-
-(define_insn "*load_tp_x32_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
+(define_insn "*load_tp_x32_<mode>"
+ [(set (match_operand:SWI48x 0 "register_operand" "=r")
+ (unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
"TARGET_X32"
"mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
[(set_attr "type" "imov")
@@ -12836,28 +12825,6 @@
}
[(set_attr "type" "multi")])
-;; When Pmode == SImode, there may be no REX prefix for ADD. Avoid
-;; any instructions between MOV and ADD, which may interfere linker
-;; IE->LE optimization, since the last byte of the previous instruction
-;; before ADD may look like a REX prefix. This also avoids
-;; movl x@gottpoff(%rip), %reg32
-;; movl $fs:(%reg32), %reg32
-;; Since address override works only on the (reg32) part in fs:(reg32),
-;; we can't use it as memory operand.
-(define_insn "tls_initial_exec_x32"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI
- [(match_operand 1 "tls_symbolic_operand")]
- UNSPEC_TLS_IE_X32))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_X32"
-{
- output_asm_insn
- ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands);
- return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
-}
- [(set_attr "type" "multi")])
-
;; GNU2 TLS patterns can be split.
(define_expand "tls_dynamic_gnu2_32"