This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- From: Wei Mi <wmi at google dot com>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, David Li <davidxl at google dot com>, Uros Bizjak <ubizjak at gmail dot com>
- Date: Thu, 13 Mar 2014 08:14:23 -0700
- Subject: Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Authentication-results: sourceware.org; auth=none
- References: <CA+4CFy5BdB2=mnd=o+8GmQ5EBtJfGUFB-0AKFou2xMHvekCAJw at mail dot gmail dot com> <CAMe9rOrrF5zTYh6=KoKcmNDZYi_oirh6SNC+_JdCY4U6NpBMTQ at mail dot gmail dot com> <CA+4CFy7SHAHmEmreq+GsTzA9nYnWiSG_Fut_fs16BnFH+LOQow at mail dot gmail dot com> <CAMe9rOpG8N8YPA=dHMDq7fL6Kyn1PU1-RrD6bqBjHFUOfz7n9Q at mail dot gmail dot com> <CA+4CFy79py1XrgDygpRg9j4ZLz9waKMKkoAvvA_QnN6M7usCaA at mail dot gmail dot com> <CAMe9rOqdN=ebqYnN7skYzJF5sEK0Q_2rAX1SQ=hWouZtUaqJGw at mail dot gmail dot com> <CA+4CFy7wt4EKUwQAQ8zqP=gbVwAZmUu57oKNxjvyNRfkYoPdPw at mail dot gmail dot com> <CAMe9rOrOXkKB99AzmMsmHEPQENG=tUhqBoNEW+s_6E2MAL+c-A at mail dot gmail dot com> <CA+4CFy6XnBNbgWgoQmR2Mso9gJCKQSo1ek1f-NyUT7FpEqP9DA at mail dot gmail dot com> <CA+4CFy4PrriWXnfdixdpJO-6XmQSwCPWV9a3-GsZ2Uy2U+bQVA at mail dot gmail dot com> <CAMe9rOrEGOAr-OvJ1iieoLRybWXA+-e9QHUYf-v8k4x1zoJ+Vg at mail dot gmail dot com> <CA+4CFy5NmXE656_hCOmC3_J2Pmm_KiamoLX7Pep0Z7yPNuObqQ at mail dot gmail dot com> <CA+4CFy64JfOzoyYNdr-xUr3x7LZPGLmWqPa9+pv1SXVtyVDvQw at mail dot gmail dot com> <CAMe9rOqto_vXZUN4Mx8ogGw5KH9=XmvkQfmc+x8RFZe=dL+5Cw at mail dot gmail dot com> <CA+4CFy6v4nC6eaVMq1GsP51r3Qbmj7R0kAc06DLDQKdGcTxRmQ at mail dot gmail dot com>
pr58066-2.patch worked for pr58066.c on ia32/x32/x86_64, but it failed
on bootstrap.
/usr/local/google/home/wmi/workarea/gcc-r208410-2/build/./gcc/xgcc
-B/usr/local/google/home/wmi/workarea/gcc-r208410-2/build/./gcc/
-B/usr/local/google/home/wmi/workarea/gcc-r208410-2/build/install/x86_64-unknown-linux-gnu/bin/
-B/usr/local/google/home/wmi/workarea/gcc-r208410-2/build/install/x86_64-unknown-linux-gnu/lib/
-isystem /usr/local/google/home/wmi/workarea/gcc-r208410-2/build/install/x86_64-unknown-linux-gnu/include
-isystem /usr/local/google/home/wmi/workarea/gcc-r208410-2/build/install/x86_64-unknown-linux-gnu/sys-include
-g -O2 -m64 -O2 -g -O2 -DIN_GCC -W -Wall -Wwrite-strings
-Wcast-qual -Wno-format -Wstrict-prototypes -Wmissing-prototypes
-Wold-style-definition -isystem ./include -fpic -mlong-double-80 -g
-DIN_LIBGCC2 -fbuilding-libgcc -fno-stack-protector -fpic
-mlong-double-80 -I. -I. -I../../.././gcc -I../../../../src/libgcc
-I../../../../src/libgcc/. -I../../../../src/libgcc/../gcc
-I../../../../src/libgcc/../include
-I../../../../src/libgcc/config/libbid -DENABLE_DECIMAL_BID_FORMAT
-DHAVE_CC_TLS -DUSE_TLS -o bid_decimal_globals.o -MT
bid_decimal_globals.o -MD -MP -MF bid_decimal_globals.dep -c
../../../../src/libgcc/config/libbid/bid_decimal_globals.c
(call_insn 5 2 6 2 (parallel [
(set (reg/f:SI 85)
(call:SI (mem:QI (symbol_ref:SI ("___tls_get_addr")) [0 S1 A8])
(const_int 0 [0])))
(unspec:SI [
(reg:SI 3 bx)
(symbol_ref:SI ("__bid_IDEC_glbflags") [flags
0x10] <var_decl 0x7ffff61c1da8 __bid_IDEC_glbflags>)
] UNSPEC_TLS_GD)
(clobber (reg:SI 91))
(clobber (reg:SI 92))
(clobber (reg:CC 17 flags))
]) ../../../../src/libgcc/config/libbid/bid_decimal_globals.c:51
772 {*tls_global_dynamic_32_gnu}
(expr_list:REG_UNUSED (reg:SI 92)
(expr_list:REG_UNUSED (reg:SI 91)
(nil)))
(nil))
../../../../src/libgcc/config/libbid/bid_decimal_globals.c:52:1:
internal compiler error: in curr_insn_transform, at
lra-constraints.c:3262
0xad8453 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
../../src/gcc/rtl-error.c:109
0x9d1221 curr_insn_transform
../../src/gcc/lra-constraints.c:3262
0x9d40e4 lra_constraints(bool)
../../src/gcc/lra-constraints.c:4157
0x9c0ad8 lra(_IO_FILE*)
../../src/gcc/lra.c:2340
0x96e310 do_reload
../../src/gcc/ira.c:5457
0x96e622 rest_of_handle_reload
../../src/gcc/ira.c:5598
0x96e66c execute
../../src/gcc/ira.c:5627
The problem is the return value of the call may be assigned to a
different hardreg than AX_REG. But LRA cannot do reload for output
operand of call. The fix is to change the above pattern to the
following pattern in legitimize_tls_address() in config/i386/i386.c.
(call_insn/u 5 4 6 (parallel [
(set (reg:SI 0 ax)
(call:SI (mem:QI (symbol_ref:SI ("___tls_get_addr")) [0 S1 A8])
(const_int 0 [0])))
(unspec:SI [
(reg:SI 3 bx)
(symbol_ref:SI ("__bid_IDEC_glbflags") [flags
0x10] <var_decl 0x7ffff5ef3da8 __bid_IDEC_glbflags>)
] UNSPEC_TLS_GD)
(clobber (scratch:SI))
(clobber (scratch:SI))
(clobber (reg:CC 17 flags))
]) ../../../../src/libgcc/config/libbid/bid_decimal_globals.c:51 -1
(expr_list:REG_EH_REGION (const_int -2147483648 [0xffffffff80000000])
(nil))
(nil))
(insn 6 5 7 (set (reg/f:SI 85)
(reg:SI 0 ax))
../../../../src/libgcc/config/libbid/bid_decimal_globals.c:51 -1
(expr_list:REG_EQUAL (symbol_ref:SI ("__bid_IDEC_glbflags")
[flags 0x10] <var_decl 0x7ffff5ef3da8 __bid_IDEC_glbflags>)
After the problem is fixed, bootstrap and regression test on x86-64 are ok.
Thanks,
Wei.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 208410)
+++ config/i386/i386.md (working copy)
@@ -12859,13 +12859,14 @@
(define_insn "*tls_global_dynamic_32_gnu"
[(set (match_operand:SI 0 "register_operand" "=a")
- (unspec:SI
- [(match_operand:SI 1 "register_operand" "b")
- (match_operand 2 "tls_symbolic_operand")
- (match_operand 3 "constant_call_address_operand" "z")]
- UNSPEC_TLS_GD))
- (clobber (match_scratch:SI 4 "=d"))
- (clobber (match_scratch:SI 5 "=c"))
+ (call:SI
+ (mem:QI (match_operand 3 "constant_call_address_operand" "z"))
+ (match_operand 4)))
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand 2 "tls_symbolic_operand")]
+ UNSPEC_TLS_GD)
+ (clobber (match_scratch:SI 5 "=d"))
+ (clobber (match_scratch:SI 6 "=c"))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_64BIT && TARGET_GNU_TLS"
{
@@ -12885,13 +12886,19 @@
(define_expand "tls_global_dynamic_32"
[(parallel
[(set (match_operand:SI 0 "register_operand")
- (unspec:SI [(match_operand:SI 2 "register_operand")
- (match_operand 1 "tls_symbolic_operand")
- (match_operand 3 "constant_call_address_operand")]
- UNSPEC_TLS_GD))
+ (call:SI
+ (mem:QI (match_operand 3 "constant_call_address_operand"))
+ (const_int 0)))
+ (unspec:SI [(match_operand:SI 2 "register_operand")
+ (match_operand 1 "tls_symbolic_operand")]
+ UNSPEC_TLS_GD)
(clobber (match_scratch:SI 4))
(clobber (match_scratch:SI 5))
- (clobber (reg:CC FLAGS_REG))])])
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
(define_insn "*tls_global_dynamic_64_<mode>"
[(set (match_operand:P 0 "register_operand" "=a")
@@ -12946,16 +12953,20 @@
(const_int 0)))
(unspec:P [(match_operand 1 "tls_symbolic_operand")]
UNSPEC_TLS_GD)])]
- "TARGET_64BIT")
+ "TARGET_64BIT"
+{
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
(define_insn "*tls_local_dynamic_base_32_gnu"
[(set (match_operand:SI 0 "register_operand" "=a")
- (unspec:SI
- [(match_operand:SI 1 "register_operand" "b")
- (match_operand 2 "constant_call_address_operand" "z")]
- UNSPEC_TLS_LD_BASE))
- (clobber (match_scratch:SI 3 "=d"))
- (clobber (match_scratch:SI 4 "=c"))
+ (call:SI
+ (mem:QI (match_operand 2 "constant_call_address_operand" "z"))
+ (match_operand 3)))
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")]
+ UNSPEC_TLS_LD_BASE)
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_64BIT && TARGET_GNU_TLS"
{
@@ -12976,13 +12987,18 @@
(define_expand "tls_local_dynamic_base_32"
[(parallel
[(set (match_operand:SI 0 "register_operand")
- (unspec:SI
- [(match_operand:SI 1 "register_operand")
- (match_operand 2 "constant_call_address_operand")]
- UNSPEC_TLS_LD_BASE))
+ (call:SI
+ (mem:QI (match_operand 2 "constant_call_address_operand"))
+ (const_int 0)))
+ (unspec:SI [(match_operand:SI 1 "register_operand")]
+ UNSPEC_TLS_LD_BASE)
(clobber (match_scratch:SI 3))
(clobber (match_scratch:SI 4))
- (clobber (reg:CC FLAGS_REG))])])
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
(define_insn "*tls_local_dynamic_base_64_<mode>"
[(set (match_operand:P 0 "register_operand" "=a")
@@ -13029,33 +13045,10 @@
(mem:QI (match_operand 1))
(const_int 0)))
(unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
- "TARGET_64BIT")
-
-;; Local dynamic of a single variable is a lose. Show combine how
-;; to convert that back to global dynamic.
-
-(define_insn_and_split "*tls_local_dynamic_32_once"
- [(set (match_operand:SI 0 "register_operand" "=a")
- (plus:SI
- (unspec:SI [(match_operand:SI 1 "register_operand" "b")
- (match_operand 2 "constant_call_address_operand" "z")]
- UNSPEC_TLS_LD_BASE)
- (const:SI (unspec:SI
- [(match_operand 3 "tls_symbolic_operand")]
- UNSPEC_DTPOFF))))
- (clobber (match_scratch:SI 4 "=d"))
- (clobber (match_scratch:SI 5 "=c"))
- (clobber (reg:CC FLAGS_REG))]
- ""
- "#"
- ""
- [(parallel
- [(set (match_dup 0)
- (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
- UNSPEC_TLS_GD))
- (clobber (match_dup 4))
- (clobber (match_dup 5))
- (clobber (reg:CC FLAGS_REG))])])
+ "TARGET_64BIT"
+{
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
;; Segment register for the thread base ptr load
(define_mode_attr tp_seg [(SI "gs") (DI "fs")])
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 208410)
+++ config/i386/i386.c (working copy)
@@ -9490,20 +9490,30 @@ ix86_compute_frame_layout (struct ix86_f
frame->nregs = ix86_nsaved_regs ();
frame->nsseregs = ix86_nsaved_sseregs ();
- stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
- preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
-
/* 64-bit MS ABI seem to require stack alignment to be always 16 except for
function prologues and leaf. */
- if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
+ if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
&& (!crtl->is_leaf || cfun->calls_alloca != 0
|| ix86_current_function_calls_tls_descriptor))
{
- preferred_alignment = 16;
- stack_alignment_needed = 16;
crtl->preferred_stack_boundary = 128;
crtl->stack_alignment_needed = 128;
}
+ /* preferred_stack_boundary is never updated for call
+ expanded from tls descriptor. Update it here. We don't update it in
+ expand stage because according to the comments before
+ ix86_current_function_calls_tls_descriptor, tls calls may be optimized
+ away. */
+ else if (ix86_current_function_calls_tls_descriptor
+ && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+ {
+ crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+ if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
+ crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
+ }
+
+ stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
gcc_assert (!size || stack_alignment_needed);
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
@@ -13437,26 +13447,25 @@ legitimize_tls_address (rtx x, enum tls_
else
{
rtx caddr = ix86_tls_get_addr ();
+ rtx ax = gen_rtx_REG (Pmode, AX_REG);
+ rtx insns;
+ start_sequence ();
if (TARGET_64BIT)
- {
- rtx rax = gen_rtx_REG (Pmode, AX_REG);
- rtx insns;
+ emit_call_insn
+ (ix86_gen_tls_global_dynamic_64 (ax, x, caddr));
+ else
+ emit_call_insn
+ (gen_tls_global_dynamic_32 (ax, x, pic, caddr));
- start_sequence ();
- emit_call_insn
- (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
- insns = get_insns ();
- end_sequence ();
+ insns = get_insns ();
+ end_sequence ();
- if (GET_MODE (x) != Pmode)
- x = gen_rtx_ZERO_EXTEND (Pmode, x);
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
- RTL_CONST_CALL_P (insns) = 1;
- emit_libcall_block (insns, dest, rax, x);
- }
- else
- emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, dest, ax, x);
}
break;
@@ -13490,28 +13499,28 @@ legitimize_tls_address (rtx x, enum tls_
else
{
rtx caddr = ix86_tls_get_addr ();
+ rtx ax = gen_rtx_REG (Pmode, AX_REG);
+ rtx insns, eqv;
+
+ start_sequence ();
if (TARGET_64BIT)
- {
- rtx rax = gen_rtx_REG (Pmode, AX_REG);
- rtx insns, eqv;
+ emit_call_insn
+ (ix86_gen_tls_local_dynamic_base_64 (ax, caddr));
+ else
+ emit_call_insn
+ (gen_tls_local_dynamic_base_32 (ax, pic, caddr));
- start_sequence ();
- emit_call_insn
- (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
- insns = get_insns ();
- end_sequence ();
-
- /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
- share the LD_BASE result with other LD model accesses. */
- eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
- UNSPEC_TLS_LD_BASE);
+ insns = get_insns ();
+ end_sequence ();
- RTL_CONST_CALL_P (insns) = 1;
- emit_libcall_block (insns, base, rax, eqv);
- }
- else
- emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
+ /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
+ share the LD_BASE result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_TLS_LD_BASE);
+
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, base, ax, eqv);
}
off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
- References:
- [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call
- Re: [PATCH, PR58066] preferred_stack_boundary update for tls expanded call