This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFC: TLS improvements for IA32 and AMD64/EM64T


On Sep 16, 2005, Alexandre Oliva <aoliva@redhat.com> wrote:

> Over the past few months, I've been working on porting to IA32 and
> AMD64/EM64T the interesting bits of the TLS design I came up with for
> FR-V, achieving some impressive speedups along with slight code size
> reductions in the most common cases.

> Although the design is not set in stone yet, it's fully implemented
> and functional with patches I'm about to post for binutils, gcc and
> glibc mainline, as follow-ups to this message, except that the GCC
> patch will go to gcc-patches, as expected.

Following up from the message just posted to gcc@gcc.gnu.org...
Here's the patch.  Code is not generated using the new model by
default; one has to compile with -mtls-dialect=gnu2 for now.

Since this doesn't modify the generated code by default, I'd like very
much to have it added to GCC 4.1, instead of waiting for 4.2.  The
reason it's not default is that I expect it to take a while until
binutils and glibc that support the new relocations are widely
available, but it probably wouldn't hurt to offer the option.

Bootstrapped uberbaum on x86_64-linux-gnu, along with the patches just
posted to binutils.  Ok to install?

Index: gcc/ChangeLog
from  Alexandre Oliva  <aoliva@redhat.com>

	Introduce TLS descriptors for i386 and x86_64.
	* config/i386/i386.h (TARGET_GNU2_TLS): New macro.
	(TARGET_ANY_GNU_TLS): New macro.
	(enum tls_dialect): Added TLS_DIALECT_GNU2.
	(struct machine_function): Add need_standard_pic_reg.
	(ix86_need_standard_pic_reg): New macro.
	* config/i386/i386.c (ix86_tls_dialect): Fix typo in comment.
	(override_options): Introduce gnu2 tls dialect.
	(ix86_select_alt_pic_regnum): Use ix86_need_standard_pic_reg.
	(legitimize_tls_address): Adjust logic for GNU2 TLS.
	(ix86_init_machine_status): Initialize need_standard_pic_reg.
	(ix86_tls_get_addr): Use TARGET_ANY_GNU_TLS.
	* config/i386/i386.md (UNSPEC_TLSDESC): New constant.
	(tls_global_dynamic_32, tls_global_dynamic_64): Handle GNU2 TLS.
	(tls_local_dynamic_base_32, tls_local_dynamic_base_64): Likewise.
	(tls_dynamic_gnu2_32, *tls_dynamic_lea_32): New patterns.
	(*tls_dynamic_call_32, *tls_dynamic_gnu2_combine_32): Likewise.
	(tls_dynamic_gnu2_64, *tls_dynamic_lea_64): Likewise.
	(*tls_dynamic_call_64, *tls_dynamic_gnu2_combine_64): Likewise.

Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/uberbaum/gcc/config/i386/i386.c,v
retrieving revision 1.858
diff -u -p -r1.858 i386.c
--- gcc/config/i386/i386.c 6 Sep 2005 19:57:46 -0000 1.858
+++ gcc/config/i386/i386.c 15 Sep 2005 22:38:58 -0000
@@ -790,7 +790,7 @@ struct ix86_frame
 enum cmodel ix86_cmodel;
 /* Asm dialect.  */
 enum asm_dialect ix86_asm_dialect = ASM_ATT;
-/* TLS dialext.  */
+/* TLS dialects.  */
 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
 
 /* Which unit we are generating floating point math for.  */
@@ -1534,6 +1534,8 @@ override_options (void)
     {
       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
 	ix86_tls_dialect = TLS_DIALECT_GNU;
+      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+	ix86_tls_dialect = TLS_DIALECT_GNU2;
       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
 	ix86_tls_dialect = TLS_DIALECT_SUN;
       else
@@ -4451,7 +4453,8 @@ gen_push (rtx arg)
 static unsigned int
 ix86_select_alt_pic_regnum (void)
 {
-  if (current_function_is_leaf && !current_function_profile)
+  if (current_function_is_leaf && !current_function_profile
+      && !ix86_need_standard_pic_reg)
     {
       int i;
       for (i = 2; i >= 0; --i)
@@ -6079,7 +6082,7 @@ legitimize_tls_address (rtx x, enum tls_
     {
     case TLS_MODEL_GLOBAL_DYNAMIC:
       dest = gen_reg_rtx (Pmode);
-      if (TARGET_64BIT)
+      if (TARGET_64BIT && !TARGET_GNU2_TLS)
 	{
 	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
 
@@ -6090,13 +6093,15 @@ legitimize_tls_address (rtx x, enum tls_
 
 	  emit_libcall_block (insns, dest, rax, x);
 	}
+      else if (TARGET_64BIT && TARGET_GNU2_TLS)
+	emit_insn (gen_tls_global_dynamic_64 (dest, x));
       else
 	emit_insn (gen_tls_global_dynamic_32 (dest, x));
       break;
 
     case TLS_MODEL_LOCAL_DYNAMIC:
       base = gen_reg_rtx (Pmode);
-      if (TARGET_64BIT)
+      if (TARGET_64BIT && !TARGET_GNU2_TLS)
 	{
 	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
 
@@ -6109,6 +6114,8 @@ legitimize_tls_address (rtx x, enum tls_
 	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
 	  emit_libcall_block (insns, base, rax, note);
 	}
+      else if (TARGET_64BIT && TARGET_GNU2_TLS)
+	emit_insn (gen_tls_local_dynamic_base_64 (base));
       else
 	emit_insn (gen_tls_local_dynamic_base_32 (base));
 
@@ -6128,9 +6135,9 @@ legitimize_tls_address (rtx x, enum tls_
 	  if (reload_in_progress)
 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
 	  pic = pic_offset_table_rtx;
-	  type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
 	}
-      else if (!TARGET_GNU_TLS)
+      else if (!TARGET_ANY_GNU_TLS)
 	{
 	  pic = gen_reg_rtx (Pmode);
 	  emit_insn (gen_set_got (pic));
@@ -6149,7 +6156,7 @@ legitimize_tls_address (rtx x, enum tls_
       off = gen_const_mem (Pmode, off);
       set_mem_alias_set (off, ix86_GOT_alias_set ());
 
-      if (TARGET_64BIT || TARGET_GNU_TLS)
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
 	  off = force_reg (Pmode, off);
@@ -6165,11 +6172,11 @@ legitimize_tls_address (rtx x, enum tls_
 
     case TLS_MODEL_LOCAL_EXEC:
       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
-			    (TARGET_64BIT || TARGET_GNU_TLS)
+			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
       off = gen_rtx_CONST (Pmode, off);
 
-      if (TARGET_64BIT || TARGET_GNU_TLS)
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
 	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
 	  return gen_rtx_PLUS (Pmode, base, off);
@@ -12618,6 +12625,7 @@ ix86_init_machine_status (void)
 
   f = ggc_alloc_cleared (sizeof (struct machine_function));
   f->use_fast_prologue_epilogue_nregs = -1;
+  f->need_standard_pic_reg = 0;
 
   return f;
 }
@@ -12660,7 +12668,8 @@ ix86_tls_get_addr (void)
   if (!ix86_tls_symbol)
     {
       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
-					    (TARGET_GNU_TLS && !TARGET_64BIT)
+					    (TARGET_ANY_GNU_TLS
+					     && !TARGET_64BIT)
 					    ? "___tls_get_addr"
 					    : "__tls_get_addr");
     }
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /cvs/uberbaum/gcc/config/i386/i386.h,v
retrieving revision 1.445
diff -u -p -r1.445 i386.h
--- gcc/config/i386/i386.h 6 Aug 2005 13:26:07 -0000 1.445
+++ gcc/config/i386/i386.h 15 Sep 2005 22:39:00 -0000
@@ -225,6 +225,8 @@ extern int x86_prefetch_sse;
 			     && (ix86_fpmath & FPMATH_387))
 
 #define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
 
 #define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch))
@@ -2131,6 +2133,7 @@ extern enum fpmath_unit ix86_fpmath;
 enum tls_dialect
 {
   TLS_DIALECT_GNU,
+  TLS_DIALECT_GNU2,
   TLS_DIALECT_SUN
 };
 
@@ -2271,11 +2274,15 @@ struct machine_function GTY(())
   /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
      for.  */
   int use_fast_prologue_epilogue_nregs;
+  /* If true, the current function needs the default PIC register, not
+     an alternate register, even if it's a leaf function.  */
+  int need_standard_pic_reg;
 };
 
 #define ix86_stack_locals (cfun->machine->stack_locals)
 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_need_standard_pic_reg (cfun->machine->need_standard_pic_reg)
 
 /* Control behavior of x86_file_start.  */
 #define X86_FILE_START_VERSION_DIRECTIVE false
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/uberbaum/gcc/config/i386/i386.md,v
retrieving revision 1.654
diff -u -p -r1.654 i386.md
--- gcc/config/i386/i386.md 6 Sep 2005 08:53:06 -0000 1.654
+++ gcc/config/i386/i386.md 15 Sep 2005 22:39:06 -0000
@@ -71,6 +71,7 @@
    (UNSPEC_TP			15)
    (UNSPEC_TLS_GD		16)
    (UNSPEC_TLS_LD_BASE		17)
+   (UNSPEC_TLSDESC		18)
 
    ; Other random patterns
    (UNSPEC_SCAS			20)
@@ -14071,6 +14072,12 @@
       operands[2] = gen_reg_rtx (Pmode);
       emit_insn (gen_set_got (operands[2]));
     }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], operands[1],
+							operands[2]));
+       DONE;
+    }
   operands[3] = ix86_tls_get_addr ();
 })
 
@@ -14092,6 +14099,11 @@
 			 UNSPEC_TLS_GD)])]
   ""
 {
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], operands[1]));
+       DONE;
+    }
   operands[2] = ix86_tls_get_addr ();
 })
 
@@ -14138,6 +14150,15 @@
       operands[1] = gen_reg_rtx (Pmode);
       emit_insn (gen_set_got (operands[1]));
     }
+  if (TARGET_GNU2_TLS)
+    {
+       rtx id = gen_rtx_SYMBOL_REF (SImode, "_TLS_MODULE_BASE_");
+       SYMBOL_REF_FLAGS (id)
+         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+       emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], id,
+					   operands[1]));
+       DONE;
+    }
   operands[2] = ix86_tls_get_addr ();
 })
 
@@ -14157,6 +14178,14 @@
 	      (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
   ""
 {
+  if (TARGET_GNU2_TLS)
+    {
+       rtx id = gen_rtx_SYMBOL_REF (DImode, "_TLS_MODULE_BASE_");
+       SYMBOL_REF_FLAGS (id)
+         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+       emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], id));
+       DONE;
+    }
   operands[1] = ix86_tls_get_addr ();
 })
 
@@ -14234,6 +14263,147 @@
    (set_attr "length" "7")
    (set_attr "memory" "load")
    (set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+;; FIXME aoliva: figure out whether forcing output of lea to eax might
+;; improve performance, perhaps turning the expand into a post-reload
+;; split and using a single pseudo for all temporaries.
+
+(define_expand "tls_dynamic_gnu2_32"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+			    UNSPEC_TLSDESC)))
+   (set (match_dup 4)
+	(mem:SI
+	 (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+		    UNSPEC_TLSDESC)))
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		   (match_dup 4)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (SImode);
+  operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (SImode);
+  ix86_need_standard_pic_reg = true;
+})
+	
+(define_insn "*tls_dynamic_lea_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "b")
+		 (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+			    UNSPEC_TLSDESC)))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")
+   (set_attr "length" "6")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mem:SI
+	 (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+		     (match_operand:SI 2 "register_operand" "0")
+		     ;; we have to make sure %ebx still points to the GOT
+		     (match_operand:SI 3 "register_operand" "b")]
+		    UNSPEC_TLSDESC)))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+  
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI
+	 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+		  (mem:SI
+		   (unspec:SI [(match_operand:SI 4 "tls_symbolic_operand" "")
+			       (match_operand:SI 3 "register_operand" "r")
+			       (match_operand:SI 2 "register_operand" "b")]
+			      UNSPEC_TLSDESC)))
+	 (const:SI (unspec:SI
+		    [(match_operand:SI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(match_dup 0) (match_dup 1) (match_dup 2)]
+{
+  emit_insn (gen_tls_dynamic_gnu2_32 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+  [(set (match_dup 2)
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))
+   (set (match_dup 3)
+	(mem:DI
+	 (unspec:DI [(match_dup 1)
+		    (match_dup 2)]
+		    UNSPEC_TLSDESC)))
+   (parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+		   (match_dup 3)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode);
+  operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (DImode);
+})
+	
+(define_insn "*tls_dynamic_lea_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")
+   (set_attr "length" "7")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(mem:DI
+	 (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+		     (match_operand:DI 2 "register_operand" "0")]
+		    UNSPEC_TLSDESC)))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+  
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(plus:DI
+	 (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+		  (mem:DI
+		   (unspec:DI [(match_operand:DI 3 "tls_symbolic_operand" "")
+			       (match_operand:DI 2 "register_operand" "r")]
+			      UNSPEC_TLSDESC)))
+	 (const:DI (unspec:DI
+		    [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(match_dup 0) (match_dup 1)]
+{
+  emit_insn (gen_tls_dynamic_gnu2_64 (operands[0], operands[1]));
+  DONE;
+})
+
+;;
 
 ;; These patterns match the binary 387 instructions for addM3, subM3,
 ;; mulM3 and divM3.  There are three patterns for each of DFmode and
-- 
Alexandre Oliva         http://www.lsd.ic.unicamp.br/~oliva/
Red Hat Compiler Engineer   aoliva@{redhat.com, gcc.gnu.org}
Free Software Evangelist  oliva@{lsd.ic.unicamp.br, gnu.org}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]