This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: RFC: TLS improvements for IA32 and AMD64/EM64T
On Sep 22, 2005, Richard Henderson <rth@redhat.com> wrote:
> On Thu, Sep 22, 2005 at 03:58:42AM -0300, Alexandre Oliva wrote:
>> - && (!current_function_is_leaf || current_function_calls_alloca))
>> + && (!current_function_is_leaf || current_function_calls_alloca
>> + || (ix86_current_function_calls_tls_descriptor
>> + && regs_ever_live[SP_REG])))
> What's the point of checking regs_ever_live[SP_REG]?
If SP_REG is not live, then all expanded tlscalls were optimized away.
I couldn't think of a better approximation, and I don't want to have
the tls calls be modeled as actual calls, since they are supposed to
not affect register allocation, since they don't clobber any
registers. I've documented these thoughts into a new macro's
comments.
>> + rtx id = gen_rtx_SYMBOL_REF (SImode, "_TLS_MODULE_BASE_");
>> + SYMBOL_REF_FLAGS (id)
>> + |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
> This should be created in one place, and cached.
Rats, I thought I'd already made this improvement :-/ Thanks for the
catch.
How's the patch below for 4.2?
Index: gcc/ChangeLog
from Alexandre Oliva <aoliva@redhat.com>
Introduce TLS descriptors for i386 and x86_64.
* config/i386/i386.h (TARGET_GNU2_TLS): New macro.
(TARGET_ANY_GNU_TLS): New macro.
(enum tls_dialect): Added TLS_DIALECT_GNU2.
(struct machine_function): Add tls_descriptor_call_expanded_p.
(ix86_tls_descriptor_calls_expande_in_cfun): New macro.
(ix86_current_function_calls_tls_descriptor): Likewise.
* config/i386/i386.c (ix86_tls_dialect): Fix typo in comment.
(override_options): Introduce gnu2 tls dialect.
(ix86_frame_pointer_required): Functions containing TLSCALLs are
not leaves.
(ix86_select_alt_pic_regnum, ix86_compute_frame_layout):
Likewise.
(legitimize_tls_address): Adjust logic for GNU2 TLS.
(ix86_init_machine_status): Initialize new field.
(ix86_tls_get_addr): Use TARGET_ANY_GNU_TLS.
(ix86_tls_module_base): New.
* config/i386/i386-protos.h (ix86_tls_module_base): Declare it.
* config/i386/i386.md (UNSPEC_TLSDESC): New constant.
(tls_global_dynamic_32, tls_global_dynamic_64): Handle GNU2 TLS.
(tls_local_dynamic_base_32, tls_local_dynamic_base_64): Likewise.
(tls_dynamic_gnu2_32, *tls_dynamic_lea_32): New patterns.
(*tls_dynamic_call_32, *tls_dynamic_gnu2_combine_32): Likewise.
(tls_dynamic_gnu2_64, *tls_dynamic_lea_64): Likewise.
(*tls_dynamic_call_64, *tls_dynamic_gnu2_combine_64): Likewise.
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -790,7 +790,7 @@ struct ix86_frame
enum cmodel ix86_cmodel;
/* Asm dialect. */
enum asm_dialect ix86_asm_dialect = ASM_ATT;
-/* TLS dialext. */
+/* TLS dialects. */
enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
/* Which unit we are generating floating point math for. */
@@ -1534,6 +1534,8 @@ override_options (void)
{
if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
ix86_tls_dialect = TLS_DIALECT_GNU;
+ else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU2;
else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
ix86_tls_dialect = TLS_DIALECT_SUN;
else
@@ -4294,7 +4296,8 @@ ix86_frame_pointer_required (void)
the frame pointer by default. Turn it back on now if we've not
got a leaf function. */
if (TARGET_OMIT_LEAF_FRAME_POINTER
- && (!current_function_is_leaf))
+ && (!current_function_is_leaf
+ || ix86_current_function_calls_tls_descriptor))
return 1;
if (current_function_profile)
@@ -4452,7 +4455,8 @@ gen_push (rtx arg)
static unsigned int
ix86_select_alt_pic_regnum (void)
{
- if (current_function_is_leaf && !current_function_profile)
+ if (current_function_is_leaf && !current_function_profile
+ && !ix86_current_function_calls_tls_descriptor)
{
int i;
for (i = 2; i >= 0; --i)
@@ -4639,7 +4643,8 @@ ix86_compute_frame_layout (struct ix86_f
expander assumes that last current_function_outgoing_args_size
of stack frame are unused. */
if (ACCUMULATE_OUTGOING_ARGS
- && (!current_function_is_leaf || current_function_calls_alloca))
+ && (!current_function_is_leaf || current_function_calls_alloca
+ || ix86_current_function_calls_tls_descriptor))
{
offset += current_function_outgoing_args_size;
frame->outgoing_arguments_size = current_function_outgoing_args_size;
@@ -4649,7 +4654,8 @@ ix86_compute_frame_layout (struct ix86_f
/* Align stack boundary. Only needed if we're calling another function
or using alloca. */
- if (!current_function_is_leaf || current_function_calls_alloca)
+ if (!current_function_is_leaf || current_function_calls_alloca
+ || ix86_current_function_calls_tls_descriptor)
frame->padding2 = ((offset + preferred_alignment - 1)
& -preferred_alignment) - offset;
else
@@ -4670,7 +4676,8 @@ ix86_compute_frame_layout (struct ix86_f
frame->save_regs_using_mov = false;
if (TARGET_RED_ZONE && current_function_sp_is_unchanging
- && current_function_is_leaf)
+ && current_function_is_leaf
+ && !ix86_current_function_calls_tls_descriptor)
{
frame->red_zone_size = frame->to_allocate;
if (frame->save_regs_using_mov)
@@ -6080,7 +6087,7 @@ legitimize_tls_address (rtx x, enum tls_
{
case TLS_MODEL_GLOBAL_DYNAMIC:
dest = gen_reg_rtx (Pmode);
- if (TARGET_64BIT)
+ if (TARGET_64BIT && !TARGET_GNU2_TLS)
{
rtx rax = gen_rtx_REG (Pmode, 0), insns;
@@ -6091,13 +6098,15 @@ legitimize_tls_address (rtx x, enum tls_
emit_libcall_block (insns, dest, rax, x);
}
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_global_dynamic_64 (dest, x));
else
emit_insn (gen_tls_global_dynamic_32 (dest, x));
break;
case TLS_MODEL_LOCAL_DYNAMIC:
base = gen_reg_rtx (Pmode);
- if (TARGET_64BIT)
+ if (TARGET_64BIT && !TARGET_GNU2_TLS)
{
rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
@@ -6110,6 +6119,8 @@ legitimize_tls_address (rtx x, enum tls_
note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
emit_libcall_block (insns, base, rax, note);
}
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_local_dynamic_base_64 (base));
else
emit_insn (gen_tls_local_dynamic_base_32 (base));
@@ -6129,9 +6140,9 @@ legitimize_tls_address (rtx x, enum tls_
if (reload_in_progress)
regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
pic = pic_offset_table_rtx;
- type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+ type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
}
- else if (!TARGET_GNU_TLS)
+ else if (!TARGET_ANY_GNU_TLS)
{
pic = gen_reg_rtx (Pmode);
emit_insn (gen_set_got (pic));
@@ -6150,7 +6161,7 @@ legitimize_tls_address (rtx x, enum tls_
off = gen_const_mem (Pmode, off);
set_mem_alias_set (off, ix86_GOT_alias_set ());
- if (TARGET_64BIT || TARGET_GNU_TLS)
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
{
base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
off = force_reg (Pmode, off);
@@ -6166,11 +6177,11 @@ legitimize_tls_address (rtx x, enum tls_
case TLS_MODEL_LOCAL_EXEC:
off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
- (TARGET_64BIT || TARGET_GNU_TLS)
+ (TARGET_64BIT || TARGET_ANY_GNU_TLS)
? UNSPEC_NTPOFF : UNSPEC_TPOFF);
off = gen_rtx_CONST (Pmode, off);
- if (TARGET_64BIT || TARGET_GNU_TLS)
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
{
base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
return gen_rtx_PLUS (Pmode, base, off);
@@ -12621,6 +12632,7 @@ ix86_init_machine_status (void)
f = ggc_alloc_cleared (sizeof (struct machine_function));
f->use_fast_prologue_epilogue_nregs = -1;
+ f->tls_descriptor_call_expanded_p = 0;
return f;
}
@@ -12663,13 +12675,32 @@ ix86_tls_get_addr (void)
if (!ix86_tls_symbol)
{
ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
- (TARGET_GNU_TLS && !TARGET_64BIT)
+ (TARGET_ANY_GNU_TLS
+ && !TARGET_64BIT)
? "___tls_get_addr"
: "__tls_get_addr");
}
return ix86_tls_symbol;
}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+rtx
+ix86_tls_module_base (void)
+{
+
+ if (!ix86_tls_module_base_symbol)
+ {
+ ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ "_TLS_MODULE_BASE_");
+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return ix86_tls_module_base_symbol;
+}
/* Calculate the length of the memory address in the instruction
encoding. Does not include the one-byte modrm, opcode, or prefix. */
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -225,6 +225,8 @@ extern int x86_prefetch_sse;
&& (ix86_fpmath & FPMATH_387))
#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
#define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch))
@@ -2131,6 +2133,7 @@ extern enum fpmath_unit ix86_fpmath;
enum tls_dialect
{
TLS_DIALECT_GNU,
+ TLS_DIALECT_GNU2,
TLS_DIALECT_SUN
};
@@ -2271,11 +2274,30 @@ struct machine_function GTY(())
/* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
for. */
int use_fast_prologue_epilogue_nregs;
+ /* If true, the current function needs the default PIC register, not
+ an alternate register (on x86) and must not use the red zone (on
+ x86_64), even if it's a leaf function. We don't want the
+ function to be regarded as non-leaf because TLS calls need not
+ affect register allocation. This flag is set when a TLS call
+ instruction is expanded within a function, and never reset, even
+ if all such instructions are optimized away. Use the
+ ix86_current_function_calls_tls_descriptor macro for a better
+ approximation. */
+ int tls_descriptor_call_expanded_p;
};
#define ix86_stack_locals (cfun->machine->stack_locals)
#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_tls_descriptor_calls_expanded_in_cfun \
+ (cfun->machine->tls_descriptor_call_expanded_p)
+/* Since tls_descriptor_call_expanded is not cleared, even if all TLS
+ calls are optimized away, we try to detect cases in which it was
+ optimized away. Since such instructions (use (reg REG_SP)), we can
+ verify whether there's any such instruction live by testing that
+ REG_SP is live. */
+#define ix86_current_function_calls_tls_descriptor \
+ (ix86_tls_descriptor_calls_expanded_in_cfun && regs_ever_live[SP_REG])
/* Control behavior of x86_file_start. */
#define X86_FILE_START_VERSION_DIRECTIVE false
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -71,6 +71,7 @@
(UNSPEC_TP 15)
(UNSPEC_TLS_GD 16)
(UNSPEC_TLS_LD_BASE 17)
+ (UNSPEC_TLSDESC 18)
; Other random patterns
(UNSPEC_SCAS 20)
@@ -14071,6 +14072,12 @@
operands[2] = gen_reg_rtx (Pmode);
emit_insn (gen_set_got (operands[2]));
}
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
operands[3] = ix86_tls_get_addr ();
})
@@ -14092,6 +14099,11 @@
UNSPEC_TLS_GD)])]
""
{
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], operands[1]));
+ DONE;
+ }
operands[2] = ix86_tls_get_addr ();
})
@@ -14138,6 +14150,13 @@
operands[1] = gen_reg_rtx (Pmode);
emit_insn (gen_set_got (operands[1]));
}
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_32 (operands[0],
+ ix86_tls_module_base (),
+ operands[1]));
+ DONE;
+ }
operands[2] = ix86_tls_get_addr ();
})
@@ -14157,6 +14176,12 @@
(unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
""
{
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_64 (operands[0],
+ ix86_tls_module_base ()));
+ DONE;
+ }
operands[1] = ix86_tls_get_addr ();
})
@@ -14234,6 +14259,159 @@
(set_attr "length" "7")
(set_attr "memory" "load")
(set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+;; FIXME aoliva: figure out whether forcing output of lea to eax might
+;; improve performance, perhaps turning the expand into a post-reload
+;; split and using a single pseudo for all temporaries.
+
+(define_expand "tls_dynamic_gnu2_32"
+ [(set (match_dup 3)
+ (plus:SI (match_operand:SI 2 "register_operand" "")
+ (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC)))
+ (parallel
+ [(set (match_dup 4)
+ (mem:SI
+ (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+ UNSPEC_TLSDESC)))
+ (use (reg:SI SP_REG))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+ (match_dup 4)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+ operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (SImode);
+ operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (SImode);
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_32"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_operand:SI 1 "register_operand" "b")
+ (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC)))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")
+ (set_attr "length" "6")
+ (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (mem:SI
+ (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+ (match_operand:SI 2 "register_operand" "0")
+ ;; we have to make sure %ebx still points to the GOT
+ (match_operand:SI 3 "register_operand" "b")]
+ UNSPEC_TLSDESC)))
+ (use (reg:SI SP_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+ [(set_attr "type" "call")
+ (set_attr "length" "2")
+ (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (plus:SI
+ (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+ (mem:SI
+ (unspec:SI [(match_operand:SI 4 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "b")]
+ UNSPEC_TLSDESC)))
+ (const:SI (unspec:SI
+ [(match_operand:SI 1 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (use (reg:SI SP_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "#"
+ ""
+ [(match_dup 0) (match_dup 1) (match_dup 2)]
+{
+ emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+ [(set (match_dup 2)
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))
+ (parallel
+ [(set (match_dup 3)
+ (mem:DI
+ (unspec:DI [(match_dup 1) (match_dup 2)]
+ UNSPEC_TLSDESC)))
+ (use (reg:DI SP_REG))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+ (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+ operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode);
+ operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode);
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "DI")
+ (set_attr "length" "7")
+ (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (mem:DI
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+ (match_operand:DI 2 "register_operand" "0")]
+ UNSPEC_TLSDESC)))
+ (use (reg:DI SP_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+ [(set_attr "type" "call")
+ (set_attr "length" "2")
+ (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (plus:DI
+ (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+ (mem:DI
+ (unspec:DI [(match_operand:DI 3 "tls_symbolic_operand" "")
+ (match_operand:DI 2 "register_operand" "r")]
+ UNSPEC_TLSDESC)))
+ (const:DI (unspec:DI
+ [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (use (reg:DI SP_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "#"
+ ""
+ [(match_dup 0) (match_dup 1)]
+{
+ emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], operands[1]));
+ DONE;
+})
+
+;;
;; These patterns match the binary 387 instructions for addM3, subM3,
;; mulM3 and divM3. There are three patterns for each of DFmode and
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc/config/i386/i386-protos.h.orig
+++ gcc/config/i386/i386-protos.h
@@ -226,6 +226,7 @@ extern int x86_field_alignment (tree, in
#endif
extern rtx ix86_tls_get_addr (void);
+extern rtx ix86_tls_module_base (void);
extern void ix86_expand_vector_init (bool, rtx, rtx);
extern void ix86_expand_vector_set (bool, rtx, rtx, int);
--
Alexandre Oliva http://www.lsd.ic.unicamp.br/~oliva/
Red Hat Compiler Engineer aoliva@{redhat.com, gcc.gnu.org}
Free Software Evangelist oliva@{lsd.ic.unicamp.br, gnu.org}