This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: PR target/59379: [4.9 Regression] gomp_init_num_threads is compiled into an infinite loop with --with-arch=corei7 --with-cpu=slm
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Sun, 19 Jan 2014 06:20:30 -0800
- Subject: Re: PATCH: PR target/59379: [4.9 Regression] gomp_init_num_threads is compiled into an infinite loop with --with-arch=corei7 --with-cpu=slm
- Authentication-results: sourceware.org; auth=none
- References: <20140118201557 dot GA4402 at gmail dot com> <CAFULd4ar_8gcvBnfvp52n5H5zQMKvxty5KNnHebpAbuKXE9_aA at mail dot gmail dot com>
On Sun, Jan 19, 2014 at 1:55 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sat, Jan 18, 2014 at 9:15 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> For LEA operation with SImode_address_operand, which zero-extends SImode
>> to DImode, ix86_split_lea_for_addr turns
>>
>> (set (reg:DI) ...)
>>
>> into
>>
>> (set (reg:SI) ...)
>>
>> We need to do
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>
>> at the end. If the LEA operation is
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>
> ree pass should remove these. However, we can just emit zero-extend
> insn at the end of sequence, and ree (which is located after
> post-reload split) should handle it:
>
> --cut here--
> Index: config/i386/i386.md
> ===================================================================
> --- config/i386/i386.md (revision 206753)
> +++ config/i386/i386.md (working copy)
> @@ -5428,12 +5428,17 @@
> operands[0] = SET_DEST (pat);
> operands[1] = SET_SRC (pat);
>
> - /* Emit all operations in SImode for zero-extended addresses. Recall
> - that x86_64 inheretly zero-extends SImode operations to DImode. */
> + /* Emit all operations in SImode for zero-extended addresses. */
> if (SImode_address_operand (operands[1], VOIDmode))
> mode = SImode;
>
> ix86_split_lea_for_addr (curr_insn, operands, mode);
> +
> + /* Zero-extend return register to DImode for zero-extended addresses. */
> + if (mode != <MODE>mode)
> + emit_insn (gen_zero_extendsidi2
> + (operands[0], gen_lowpart ((mode), operands[0])));
> +
> DONE;
> }
> [(set_attr "type" "lea")
> --cut here--
>
> The patch was tested with a testcase from Comment #9 of the PR using
> "-O --march=corei7 -mtune=slm", and resulting binary runs without
> problems.
>
Yes, the resulting GCC works correctly. However, we generate
extra
(set (reg:DI) (zero_extend:DI (reg:SI)))
It is because we generate
(set (reg:SI) (reg:SI)
(set (reg:DI) (zero_extend:DI (reg:SI)))
REE pass doesn't know
(set (reg:SI) (reg:SI)
has an implicit ZERO_EXTEND. Here is a testcase:
---foo.c---
extern __thread unsigned int __bid_IDEC_glbflags;
typedef unsigned long long UINT64;
typedef __attribute__ ((aligned(16))) struct
{
UINT64 w[2];
} UINT128;
extern UINT64 __bid64_from_uint64 (UINT64);
extern void __bid_round64_2_18 (int q,
int x,
UINT64 C,
UINT64 * ptr_Cstar,
int *delta_exp,
int *ptr_is_midpoint_lt_even,
int *ptr_is_midpoint_gt_even,
int *ptr_is_inexact_lt_midpoint,
int *ptr_is_inexact_gt_midpoint);
extern void __bid_round128_19_38 (int q,
int x,
UINT128 C,
UINT128 * ptr_Cstar,
int *delta_exp,
int *ptr_is_midpoint_lt_even,
int *ptr_is_midpoint_gt_even,
int *ptr_is_inexact_lt_midpoint,
int *ptr_is_inexact_gt_midpoint);
UINT64
__bid64_from_uint64 (UINT64 x)
{
UINT64 res;
UINT128 x128, res128;
unsigned int q, ind;
int incr_exp = 0;
int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
if (x <= 0x002386F26FC0ffffull) {
if (x < 0x0020000000000000ull) {
res = 0x31c0000000000000ull | x;
} else {
res = 0x6c70000000000000ull | (x & 0x0007ffffffffffffull);
}
}
else
{
if (x < 0x16345785d8a0000ull) {
q = 17;
ind = 1;
} else if (x < 0xde0b6b3a7640000ull) {
q = 18;
ind = 2;
} else if (x < 0x8ac7230489e80000ull) {
q = 19;
ind = 3;
} else {
q = 20;
ind = 4;
}
if (q <= 19) {
__bid_round64_2_18 (
q, ind, x, &res, &incr_exp,
&is_midpoint_lt_even, &is_midpoint_gt_even,
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
}
else {
x128.w[1] = 0x0;
x128.w[0] = x;
__bid_round128_19_38 (q, ind, x128, &res128, &incr_exp,
&is_midpoint_lt_even, &is_midpoint_gt_even,
&is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
res = res128.w[0];
}
if (incr_exp)
ind++;
if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
is_midpoint_lt_even || is_midpoint_gt_even)
*&__bid_IDEC_glbflags |= 0x00000020;
if (res < 0x0020000000000000ull) {
res = (((UINT64) ind + 398) << 53) | res;
} else
{
res = 0x6000000000000000ull | (((UINT64) ind + 398) << 51) |
(res & 0x0007ffffffffffffull);
}
}
return(res);;
}
-----------
Compiling with -fPIC -O2, the differences between your patch and mine
are
--- bad.s 2014-01-19 06:10:28.006570325 -0800
+++ foo.s 2014-01-19 06:11:46.117754696 -0800
@@ -84,19 +84,18 @@ __bid64_from_uint64:
movabsq $9007199254740991, %rax
cmpq %rax, %rbx
jbe .L23
- movl %ebp, %edx
leaq 88(%rsp), %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
movabsq $2251799813685247, %rax
- movl %edx, %edx
+ movl %ebp, %edx
andq %rbx, %rax
- movabsq $6917529027641081856, %rcx
popq %rbx
.cfi_def_cfa_offset 16
+ movabsq $6917529027641081856, %rcx
addq $398, %rdx
- orq %rcx, %rax
salq $51, %rdx
+ orq %rcx, %rax
popq %rbp
.cfi_def_cfa_offset 8
orq %rdx, %rax
@@ -154,7 +153,6 @@ __bid64_from_uint64:
leaq 88(%rsp), %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
- movl %eax, %eax
addq $398, %rax
salq $53, %rax
orq %rbx, %rax
My patch removes 2 extra
(set (reg:DI) (zero_extend:DI (reg:SI)))
--
H.J.