This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFC: TLS improvements for IA32 and AMD64/EM64T


On Sep 20, 2005, Mark Mitchell <mark@codesourcery.com> wrote:

> Please wait for 4.2.

Sure, no problem.

Could I still ask for a review of this revised patch, such that, in
case it needs additional changes before it's allowed into 4.2, I can
make them while this is still hot in my cache? :-)

The only change since the previous patch is that it now prevents the
use of the red zone on AMD64 in case TLSCALLs are present, otherwise
the call would clobber the red zone.  I ran into this while testing
glibc builds that were forced to not use the initial exec model in
their libraries.  After this fix, and one fix in glibc, all of its
tests passed.  At this point I think it's now safe to say that the new
model actually works :-)

Ok for mainline after 4.1 branches?

Index: gcc/ChangeLog
from  Alexandre Oliva  <aoliva@redhat.com>

	Introduce TLS descriptors for i386 and x86_64.
	* config/i386/i386.h (TARGET_GNU2_TLS): New macro.
	(TARGET_ANY_GNU_TLS): New macro.
	(enum tls_dialect): Added TLS_DIALECT_GNU2.
	(struct machine_function): Add calls_tls_descriptor.
	(ix86_current_function_calls_tls_descriptor): New macro.
	* config/i386/i386.c (ix86_tls_dialect): Fix typo in comment.
	(override_options): Introduce gnu2 tls dialect.
	(ix86_frame_pointer_required): Functions containing TLSCALLs are
	not leaves.
	(ix86_select_alt_pic_regnum, ix86_compute_frame_layout):
	Likewise.
	(legitimize_tls_address): Adjust logic for GNU2 TLS.
	(ix86_init_machine_status): Initialize calls_tls_descriptor.
	(ix86_tls_get_addr): Use TARGET_ANY_GNU_TLS.
	* config/i386/i386.md (UNSPEC_TLSDESC): New constant.
	(tls_global_dynamic_32, tls_global_dynamic_64): Handle GNU2 TLS.
	(tls_local_dynamic_base_32, tls_local_dynamic_base_64): Likewise.
	(tls_dynamic_gnu2_32, *tls_dynamic_lea_32): New patterns.
	(*tls_dynamic_call_32, *tls_dynamic_gnu2_combine_32): Likewise.
	(tls_dynamic_gnu2_64, *tls_dynamic_lea_64): Likewise.
	(*tls_dynamic_call_64, *tls_dynamic_gnu2_combine_64): Likewise.

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c.orig
+++ gcc/config/i386/i386.c
@@ -790,7 +790,7 @@ struct ix86_frame
 enum cmodel ix86_cmodel;
 /* Asm dialect.  */
 enum asm_dialect ix86_asm_dialect = ASM_ATT;
-/* TLS dialext.  */
+/* TLS dialects.  */
 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
 
 /* Which unit we are generating floating point math for.  */
@@ -1534,6 +1534,8 @@ override_options (void)
     {
       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
 	ix86_tls_dialect = TLS_DIALECT_GNU;
+      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+	ix86_tls_dialect = TLS_DIALECT_GNU2;
       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
 	ix86_tls_dialect = TLS_DIALECT_SUN;
       else
@@ -4294,7 +4296,9 @@ ix86_frame_pointer_required (void)
      the frame pointer by default.  Turn it back on now if we've not
      got a leaf function.  */
   if (TARGET_OMIT_LEAF_FRAME_POINTER
-      && (!current_function_is_leaf))
+      && (!current_function_is_leaf
+	  || (ix86_current_function_calls_tls_descriptor
+	      && regs_ever_live[SP_REG])))
     return 1;
 
   if (current_function_profile)
@@ -4452,7 +4456,9 @@ gen_push (rtx arg)
 static unsigned int
 ix86_select_alt_pic_regnum (void)
 {
-  if (current_function_is_leaf && !current_function_profile)
+  if (current_function_is_leaf && !current_function_profile
+      && !(ix86_current_function_calls_tls_descriptor
+	   && regs_ever_live[SP_REG]))
     {
       int i;
       for (i = 2; i >= 0; --i)
@@ -4639,7 +4645,9 @@ ix86_compute_frame_layout (struct ix86_f
      expander assumes that last current_function_outgoing_args_size
      of stack frame are unused.  */
   if (ACCUMULATE_OUTGOING_ARGS
-      && (!current_function_is_leaf || current_function_calls_alloca))
+      && (!current_function_is_leaf || current_function_calls_alloca
+	  || (ix86_current_function_calls_tls_descriptor
+	      && regs_ever_live[SP_REG])))
     {
       offset += current_function_outgoing_args_size;
       frame->outgoing_arguments_size = current_function_outgoing_args_size;
@@ -4649,7 +4657,9 @@ ix86_compute_frame_layout (struct ix86_f
 
   /* Align stack boundary.  Only needed if we're calling another function
      or using alloca.  */
-  if (!current_function_is_leaf || current_function_calls_alloca)
+  if (!current_function_is_leaf || current_function_calls_alloca
+      || (ix86_current_function_calls_tls_descriptor
+	  && regs_ever_live[SP_REG]))
     frame->padding2 = ((offset + preferred_alignment - 1)
 		       & -preferred_alignment) - offset;
   else
@@ -4670,7 +4680,9 @@ ix86_compute_frame_layout (struct ix86_f
     frame->save_regs_using_mov = false;
 
   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
-      && current_function_is_leaf)
+      && current_function_is_leaf
+      && !(ix86_current_function_calls_tls_descriptor
+	   && regs_ever_live[SP_REG]))
     {
       frame->red_zone_size = frame->to_allocate;
       if (frame->save_regs_using_mov)
@@ -6080,7 +6092,7 @@ legitimize_tls_address (rtx x, enum tls_
     {
     case TLS_MODEL_GLOBAL_DYNAMIC:
       dest = gen_reg_rtx (Pmode);
-      if (TARGET_64BIT)
+      if (TARGET_64BIT && !TARGET_GNU2_TLS)
 	{
 	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
 
@@ -6091,13 +6103,15 @@ legitimize_tls_address (rtx x, enum tls_
 
 	  emit_libcall_block (insns, dest, rax, x);
 	}
+      else if (TARGET_64BIT && TARGET_GNU2_TLS)
+	emit_insn (gen_tls_global_dynamic_64 (dest, x));
       else
 	emit_insn (gen_tls_global_dynamic_32 (dest, x));
       break;
 
     case TLS_MODEL_LOCAL_DYNAMIC:
       base = gen_reg_rtx (Pmode);
-      if (TARGET_64BIT)
+      if (TARGET_64BIT && !TARGET_GNU2_TLS)
 	{
 	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
 
@@ -6110,6 +6124,8 @@ legitimize_tls_address (rtx x, enum tls_
 	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
 	  emit_libcall_block (insns, base, rax, note);
 	}
+      else if (TARGET_64BIT && TARGET_GNU2_TLS)
+	emit_insn (gen_tls_local_dynamic_base_64 (base));
       else
 	emit_insn (gen_tls_local_dynamic_base_32 (base));
 
@@ -6129,9 +6145,9 @@ legitimize_tls_address (rtx x, enum tls_
 	  if (reload_in_progress)
 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
 	  pic = pic_offset_table_rtx;
-	  type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
 	}
-      else if (!TARGET_GNU_TLS)
+      else if (!TARGET_ANY_GNU_TLS)
 	{
 	  pic = gen_reg_rtx (Pmode);
 	  emit_insn (gen_set_got (pic));
@@ -6150,7 +6166,7 @@ legitimize_tls_address (rtx x, enum tls_
       off = gen_const_mem (Pmode, off);
       set_mem_alias_set (off, ix86_GOT_alias_set ());
 
-      if (TARGET_64BIT || TARGET_GNU_TLS)
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
 	  off = force_reg (Pmode, off);
@@ -6166,11 +6182,11 @@ legitimize_tls_address (rtx x, enum tls_
 
     case TLS_MODEL_LOCAL_EXEC:
       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
-			    (TARGET_64BIT || TARGET_GNU_TLS)
+			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
       off = gen_rtx_CONST (Pmode, off);
 
-      if (TARGET_64BIT || TARGET_GNU_TLS)
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
 	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
 	  return gen_rtx_PLUS (Pmode, base, off);
@@ -12619,6 +12635,7 @@ ix86_init_machine_status (void)
 
   f = ggc_alloc_cleared (sizeof (struct machine_function));
   f->use_fast_prologue_epilogue_nregs = -1;
+  f->calls_tls_descriptor = 0;
 
   return f;
 }
@@ -12661,7 +12678,8 @@ ix86_tls_get_addr (void)
   if (!ix86_tls_symbol)
     {
       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
-					    (TARGET_GNU_TLS && !TARGET_64BIT)
+					    (TARGET_ANY_GNU_TLS
+					     && !TARGET_64BIT)
 					    ? "___tls_get_addr"
 					    : "__tls_get_addr");
     }
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h.orig
+++ gcc/config/i386/i386.h
@@ -225,6 +225,8 @@ extern int x86_prefetch_sse;
 			     && (ix86_fpmath & FPMATH_387))
 
 #define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
 
 #define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch))
@@ -2131,6 +2133,7 @@ extern enum fpmath_unit ix86_fpmath;
 enum tls_dialect
 {
   TLS_DIALECT_GNU,
+  TLS_DIALECT_GNU2,
   TLS_DIALECT_SUN
 };
 
@@ -2271,11 +2274,16 @@ struct machine_function GTY(())
   /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
      for.  */
   int use_fast_prologue_epilogue_nregs;
+  /* If true, the current function needs the default PIC register, not
+     an alternate register, even if it's a leaf function.  */
+  int calls_tls_descriptor;
 };
 
 #define ix86_stack_locals (cfun->machine->stack_locals)
 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_current_function_calls_tls_descriptor \
+  (cfun->machine->calls_tls_descriptor)
 
 /* Control behavior of x86_file_start.  */
 #define X86_FILE_START_VERSION_DIRECTIVE false
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md.orig
+++ gcc/config/i386/i386.md
@@ -71,6 +71,7 @@
    (UNSPEC_TP			15)
    (UNSPEC_TLS_GD		16)
    (UNSPEC_TLS_LD_BASE		17)
+   (UNSPEC_TLSDESC		18)
 
    ; Other random patterns
    (UNSPEC_SCAS			20)
@@ -14071,6 +14072,12 @@
       operands[2] = gen_reg_rtx (Pmode);
       emit_insn (gen_set_got (operands[2]));
     }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], operands[1],
+							operands[2]));
+       DONE;
+    }
   operands[3] = ix86_tls_get_addr ();
 })
 
@@ -14092,6 +14099,11 @@
 			 UNSPEC_TLS_GD)])]
   ""
 {
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], operands[1]));
+       DONE;
+    }
   operands[2] = ix86_tls_get_addr ();
 })
 
@@ -14138,6 +14150,15 @@
       operands[1] = gen_reg_rtx (Pmode);
       emit_insn (gen_set_got (operands[1]));
     }
+  if (TARGET_GNU2_TLS)
+    {
+       rtx id = gen_rtx_SYMBOL_REF (SImode, "_TLS_MODULE_BASE_");
+       SYMBOL_REF_FLAGS (id)
+         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+       emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], id,
+					   operands[1]));
+       DONE;
+    }
   operands[2] = ix86_tls_get_addr ();
 })
 
@@ -14157,6 +14178,14 @@
 	      (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
   ""
 {
+  if (TARGET_GNU2_TLS)
+    {
+       rtx id = gen_rtx_SYMBOL_REF (DImode, "_TLS_MODULE_BASE_");
+       SYMBOL_REF_FLAGS (id)
+         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+       emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], id));
+       DONE;
+    }
   operands[1] = ix86_tls_get_addr ();
 })
 
@@ -14234,6 +14263,159 @@
    (set_attr "length" "7")
    (set_attr "memory" "load")
    (set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+;; FIXME aoliva: figure out whether forcing output of lea to eax might
+;; improve performance, perhaps turning the expand into a post-reload
+;; split and using a single pseudo for all temporaries.
+
+(define_expand "tls_dynamic_gnu2_32"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+			    UNSPEC_TLSDESC)))
+   (parallel
+    [(set (match_dup 4)
+	  (mem:SI
+	   (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+		      UNSPEC_TLSDESC)))
+     (use (reg:SI SP_REG))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		   (match_dup 4)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (SImode);
+  operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (SImode);
+  ix86_current_function_calls_tls_descriptor = true;
+})
+	
+(define_insn "*tls_dynamic_lea_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "b")
+		 (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+			    UNSPEC_TLSDESC)))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")
+   (set_attr "length" "6")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mem:SI
+	 (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+		     (match_operand:SI 2 "register_operand" "0")
+		     ;; we have to make sure %ebx still points to the GOT
+		     (match_operand:SI 3 "register_operand" "b")]
+		    UNSPEC_TLSDESC)))
+   (use (reg:SI SP_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+  
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI
+	 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		  (mem:SI
+		   (unspec:SI [(match_operand:SI 4 "tls_symbolic_operand" "")
+			       (match_operand:SI 3 "register_operand" "r")
+			       (match_operand:SI 2 "register_operand" "b")]
+			      UNSPEC_TLSDESC)))
+	 (const:SI (unspec:SI
+		    [(match_operand:SI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (use (reg:SI SP_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(match_dup 0) (match_dup 1) (match_dup 2)]
+{
+  emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+  [(set (match_dup 2)
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))
+   (parallel
+    [(set (match_dup 3)
+	  (mem:DI
+	   (unspec:DI [(match_dup 1) (match_dup 2)]
+		      UNSPEC_TLSDESC)))
+     (use (reg:DI SP_REG))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+		   (match_dup 3)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode);
+  operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode);
+  ix86_current_function_calls_tls_descriptor = true;
+})
+	
+(define_insn "*tls_dynamic_lea_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")
+   (set_attr "length" "7")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(mem:DI
+	 (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+		     (match_operand:DI 2 "register_operand" "0")]
+		    UNSPEC_TLSDESC)))
+   (use (reg:DI SP_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+  
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(plus:DI
+	 (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+		  (mem:DI
+		   (unspec:DI [(match_operand:DI 3 "tls_symbolic_operand" "")
+			       (match_operand:DI 2 "register_operand" "r")]
+			      UNSPEC_TLSDESC)))
+	 (const:DI (unspec:DI
+		    [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (use (reg:DI SP_REG))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(match_dup 0) (match_dup 1)]
+{
+  emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], operands[1]));
+  DONE;
+})
+
+;;
 
 ;; These patterns match the binary 387 instructions for addM3, subM3,
 ;; mulM3 and divM3.  There are three patterns for each of DFmode and


-- 
Alexandre Oliva         http://www.lsd.ic.unicamp.br/~oliva/
Red Hat Compiler Engineer   aoliva@{redhat.com, gcc.gnu.org}
Free Software Evangelist  oliva@{lsd.ic.unicamp.br, gnu.org}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]