This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATCH: Properly generate X32 IE sequence


On Mon, Mar 19, 2012 at 9:19 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Mar 19, 2012 at 8:54 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Mon, Mar 19, 2012 at 8:51 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Sun, Mar 18, 2012 at 1:55 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>
>>>>>> I am testing this patch. ?OK for trunk if it passes all tests?
>>>>>
>>>>> No, force_reg will generate a pseudo, so this conversion is valid only
>>>>> for !can_create_pseudo ().
>>>>>
>>>>> At least for *tls_initial_exec_x32_store, you will need a temporary to
>>>>> split the pattern after reload.
>>>
>>> Here is the updated patch to add can_create_pseudo. ?I also changed
>>> tls_initial_exec_x32 to take an input register operand as thread pointer.
>>>
>>>> Please try attached patch. It simply throws away all recent
>>>> complications w.r.t. to thread pointer and always handles TP in
>>>> DImode.
>>>>
>>>> The testcase:
>>>>
>>>> --cut here--
>>>> __thread int foo __attribute__ ((tls_model ("initial-exec")));
>>>>
>>>> void bar (int x)
>>>> {
>>>> ?foo = x;
>>>> }
>>>>
>>>> int baz (void)
>>>> {
>>>> ?return foo;
>>>> }
>>>> --cut here--
>>>>
>>>> Now compiles to:
>>>>
>>>> bar:
>>>> ? ? ? ?movq ? ?foo@gottpoff(%rip), %rax
>>>> ? ? ? ?movl ? ?%edi, %fs:(%rax)
>>>> ? ? ? ?ret
>>>>
>>>> baz:
>>>> ? ? ? ?movq ? ?foo@gottpoff(%rip), %rax
>>>> ? ? ? ?movl ? ?%fs:(%rax), %eax
>>>> ? ? ? ?ret
>>>>
>>>> In effect, this always generates %fs(%rDI) and emits REX prefix before
>>>> mov/add to satisfy brain-dead linkers.
>>>>
>>>> The patch is bootstrapping now on x86_64-pc-linux-gnu.
>>>>
>>>
>>> For
>>>
>>> --
>>> extern __thread char c;
>>> extern char y;
>>> void
>>> ie (void)
>>> {
>>> ?y = c;
>>> }
>>> --
>>>
>>> Your patch generates:
>>>
>>> ? ? ? ?movl ? ?%fs:0, %eax
>>> ? ? ? ?movq ? ?c@gottpoff(%rip), %rdx
>>> ? ? ? ?movzbl ?(%rax,%rdx), %edx
>>> ? ? ? ?movb ? ?%dl, y(%rip)
>>> ? ? ? ?ret
>>>
>>> It can be optimized to:
>>>
>>> ? ? ? ?movq ? ?c@gottpoff(%rip), %rax
>>> ? ? ? ?movzbl ?%fs:(%rax), %eax
>>> ? ? ? ?movb ? ?%al, y(%rip)
>>> ? ? ? ?ret
>>>
>>
>> Combine failed:
>>
>> (set (reg:QI 63 [ c ])
>> ? ?(mem/c:QI (plus:DI (zero_extend:DI (unspec:SI [
>> ? ? ? ? ? ? ? ? ? ? ? ?(const_int 0 [0])
>> ? ? ? ? ? ? ? ? ? ?] UNSPEC_TP))
>> ? ? ? ? ? ?(mem/u/c:DI (const:DI (unspec:DI [
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ?(symbol_ref:SI ("c") [flags 0x60]
>> <var_decl 0x7ffff19b8140 c>)
>> ? ? ? ? ? ? ? ? ? ? ? ?] UNSPEC_GOTNTPOFF)) [2 S8 A8])) [0 c+0 S1 A8]))
>>
>>
>
> Wrong testcase. ?IT should be
>
> --
> extern __thread char c;
> extern __thread short w;
> extern char y;
> extern short i;
> void
> ie (void)
> {
> ?y = c;
> ?i = w;
> }
> ---
>
> I got
>
> ? ? ? ?movl ? ?%fs:0, %eax
> ? ? ? ?movq ? ?c@gottpoff(%rip), %rdx
> ? ? ? ?movzbl ?(%rax,%rdx), %edx
> ? ? ? ?movb ? ?%dl, y(%rip)
> ? ? ? ?movq ? ?w@gottpoff(%rip), %rdx
> ? ? ? ?movzwl ?(%rax,%rdx), %eax
> ? ? ? ?movw ? ?%ax, i(%rip)
> ? ? ? ?ret
>
> It can be
>
> ? ? ? ?movq ? ?c@gottpoff(%rip), %rax
> ? ? ? ?movzbl ?%fs:(%rax), %eax
> ? ? ? ?movb ? ?%al, y(%rip)
> ? ? ? ?movq ? ?w@gottpoff(%rip), %rax
> ? ? ? ?movzwl ?%fs:(%rax), %eax
> ? ? ? ?movw ? ?%ax, i(%rip)
> ? ? ? ?ret
>
>

How about this patch?  I changed 32 TP load to

(define_insn "*load_tp_x32_<mode>"
  [(set (match_operand:SWI48x 0 "register_operand" "=r")
        (unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
  "TARGET_X32"
  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
  [(set_attr "type" "imov")
   (set_attr "modrm" "0")
   (set_attr "length" "7")
   (set_attr "memory" "load")
   (set_attr "imm_disp" "false")])

and removed *load_tp_x32_zext.


-- 
H.J.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9aa5ee7..66221e4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12483,15 +12483,12 @@ legitimize_pic_address (rtx orig, rtx reg)
 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
 
 static rtx
-get_thread_pointer (bool to_reg)
+get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
 {
-  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
-
-  if (GET_MODE (tp) != Pmode)
-    tp = convert_to_mode (Pmode, tp, 1);
+  rtx tp = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
 
   if (to_reg)
-    tp = copy_addr_to_reg (tp);
+    tp = copy_to_mode_reg (tp_mode, tp);
 
   return tp;
 }
@@ -12543,6 +12540,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 {
   rtx dest, base, off;
   rtx pic = NULL_RTX, tp = NULL_RTX;
+  enum machine_mode tp_mode = Pmode;
   int type;
 
   switch (model)
@@ -12568,7 +12566,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  else
 	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
 
-	  tp = get_thread_pointer (true);
+	  tp = get_thread_pointer (Pmode, true);
 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
 
 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
@@ -12618,7 +12616,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  else
 	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
 
-	  tp = get_thread_pointer (true);
+	  tp = get_thread_pointer (Pmode, true);
 	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
 			       gen_rtx_MINUS (Pmode, tmp, tp));
 	}
@@ -12664,27 +12662,18 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
     case TLS_MODEL_INITIAL_EXEC:
       if (TARGET_64BIT)
 	{
+	  tp_mode = DImode;
+
 	  if (TARGET_SUN_TLS)
 	    {
 	      /* The Sun linker took the AMD64 TLS spec literally
 		 and can only handle %rax as destination of the
 		 initial executable code sequence.  */
 
-	      dest = gen_reg_rtx (Pmode);
+	      dest = gen_reg_rtx (tp_mode);
 	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
 	      return dest;
 	    }
-	  else if (Pmode == SImode)
-	    {
-	      /* Always generate
-			movl %fs:0, %reg32
-			addl xgottpoff(%rip), %reg32
-		 to support linker IE->LE optimization and avoid
-		 fs:(%reg32) as memory operand.  */
-	      dest = gen_reg_rtx (Pmode);
-	      emit_insn (gen_tls_initial_exec_x32 (dest, x));
-	      return dest;
-	    }
 
 	  pic = NULL;
 	  type = UNSPEC_GOTNTPOFF;
@@ -12708,24 +12697,23 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  type = UNSPEC_INDNTPOFF;
 	}
 
-      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
-      off = gen_rtx_CONST (Pmode, off);
+      off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
+      off = gen_rtx_CONST (tp_mode, off);
       if (pic)
-	off = gen_rtx_PLUS (Pmode, pic, off);
-      off = gen_const_mem (Pmode, off);
+	off = gen_rtx_PLUS (tp_mode, pic, off);
+      off = gen_const_mem (tp_mode, off);
       set_mem_alias_set (off, ix86_GOT_alias_set ());
 
       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
-          base = get_thread_pointer (for_mov
-				     || !(TARGET_TLS_DIRECT_SEG_REFS
-					  && TARGET_TLS_INDIRECT_SEG_REFS));
-	  off = force_reg (Pmode, off);
-	  return gen_rtx_PLUS (Pmode, base, off);
+	  base = get_thread_pointer (tp_mode,
+				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+	  off = force_reg (tp_mode, off);
+	  return gen_rtx_PLUS (tp_mode, base, off);
 	}
       else
 	{
-	  base = get_thread_pointer (true);
+	  base = get_thread_pointer (Pmode, true);
 	  dest = gen_reg_rtx (Pmode);
 	  emit_insn (ix86_gen_sub3 (dest, base, off));
 	}
@@ -12739,14 +12727,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 
       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
-	  base = get_thread_pointer (for_mov
-				     || !(TARGET_TLS_DIRECT_SEG_REFS
-					  && TARGET_TLS_INDIRECT_SEG_REFS));
+	  base = get_thread_pointer (Pmode,
+				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
 	  return gen_rtx_PLUS (Pmode, base, off);
 	}
       else
 	{
-	  base = get_thread_pointer (true);
+	  base = get_thread_pointer (Pmode, true);
 	  dest = gen_reg_rtx (Pmode);
 	  emit_insn (ix86_gen_sub3 (dest, base, off));
 	}
@@ -13274,8 +13261,7 @@ ix86_delegitimize_tls_address (rtx orig_x)
   rtx x = orig_x, unspec;
   struct ix86_address addr;
 
-  if (!(TARGET_TLS_DIRECT_SEG_REFS
-	&& TARGET_TLS_INDIRECT_SEG_REFS))
+  if (!TARGET_TLS_DIRECT_SEG_REFS)
     return orig_x;
   if (MEM_P (x))
     x = XEXP (x, 0);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e5ac00..3fcd209 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -467,9 +467,6 @@ extern int x86_prefetch_sse;
 #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
 #endif
 
-/* Address override works only on the (%reg) part of %fs:(%reg).  */
-#define TARGET_TLS_INDIRECT_SEG_REFS (Pmode == word_mode)
-
 /* Fence to use after loop using storent.  */
 
 extern tree x86_mfence;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d23c67b..e167ceb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12747,20 +12747,9 @@
 (define_mode_attr tp_seg [(SI "gs") (DI "fs")])
 
 ;; Load and add the thread base pointer from %<tp_seg>:0.
-(define_insn "*load_tp_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(const_int 0)] UNSPEC_TP))]
-  "TARGET_X32"
-  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
-
-(define_insn "*load_tp_x32_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
+(define_insn "*load_tp_x32_<mode>"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+	(unspec:SWI48x [(const_int 0)] UNSPEC_TP))]
   "TARGET_X32"
   "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
   [(set_attr "type" "imov")
@@ -12836,28 +12825,6 @@
 }
   [(set_attr "type" "multi")])
 
-;; When Pmode == SImode, there may be no REX prefix for ADD.  Avoid
-;; any instructions between MOV and ADD, which may interfere linker
-;; IE->LE optimization, since the last byte of the previous instruction
-;; before ADD may look like a REX prefix.  This also avoids
-;;	movl x@gottpoff(%rip), %reg32
-;;	movl $fs:(%reg32), %reg32
-;; Since address override works only on the (reg32) part in fs:(reg32),
-;; we can't use it as memory operand.
-(define_insn "tls_initial_exec_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI
-	 [(match_operand 1 "tls_symbolic_operand")]
-	 UNSPEC_TLS_IE_X32))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_X32"
-{
-  output_asm_insn
-    ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands);
-  return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
-}
-  [(set_attr "type" "multi")])
-
 ;; GNU2 TLS patterns can be split.
 
 (define_expand "tls_dynamic_gnu2_32"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]