PATCH: PR target/59379: [4.9 Regression] gomp_init_num_threads is compiled into an infinite loop with --with-arch=corei7 --with-cpu=slm

H.J. Lu hjl.tools@gmail.com
Sun Jan 19 14:20:00 GMT 2014


On Sun, Jan 19, 2014 at 1:55 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sat, Jan 18, 2014 at 9:15 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> For LEA operation with SImode_address_operand, which zero-extends SImode
>> to DImode, ix86_split_lea_for_addr turns
>>
>> (set (reg:DI) ...)
>>
>> into
>>
>> (set (reg:SI) ...)
>>
>> We need to do
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>
>> at the end. If the LEA operation is
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>
> ree pass should remove these. However, we can just emit zero-extend
> insn at the end of sequence, and ree (which is located after
> post-reload split) should handle it:
>
> --cut here--
> Index: config/i386/i386.md
> ===================================================================
> --- config/i386/i386.md (revision 206753)
> +++ config/i386/i386.md (working copy)
> @@ -5428,12 +5428,17 @@
>    operands[0] = SET_DEST (pat);
>    operands[1] = SET_SRC (pat);
>
> -  /* Emit all operations in SImode for zero-extended addresses.  Recall
> -     that x86_64 inheretly zero-extends SImode operations to DImode.  */
> +  /* Emit all operations in SImode for zero-extended addresses.  */
>    if (SImode_address_operand (operands[1], VOIDmode))
>      mode = SImode;
>
>    ix86_split_lea_for_addr (curr_insn, operands, mode);
> +
> +  /* Zero-extend return register to DImode for zero-extended addresses.  */
> +  if (mode != <MODE>mode)
> +    emit_insn (gen_zero_extendsidi2
> +              (operands[0], gen_lowpart ((mode), operands[0])));
> +
>    DONE;
>  }
>    [(set_attr "type" "lea")
> --cut here--
>
> The patch was tested with a testcase from Comment #9 of the PR using
> "-O --march=corei7 -mtune=slm", and resulting binary runs without
> problems.
>

Yes, the resulting GCC works correctly.  However, we generate
extra

(set (reg:DI) (zero_extend:DI (reg:SI)))

It is because we generate

(set (reg:SI) (reg:SI)
(set (reg:DI) (zero_extend:DI (reg:SI)))

REE pass doesn't know

(set (reg:SI) (reg:SI)

has an implicit ZERO_EXTEND.  Here is a testcase:

---foo.c---
extern __thread unsigned int __bid_IDEC_glbflags;
typedef unsigned long long UINT64;
typedef __attribute__ ((aligned(16))) struct
{
  UINT64 w[2];
} UINT128;
extern UINT64 __bid64_from_uint64 (UINT64);
extern void __bid_round64_2_18 (int q,
int x,
UINT64 C,
UINT64 * ptr_Cstar,
int *delta_exp,
int *ptr_is_midpoint_lt_even,
int *ptr_is_midpoint_gt_even,
int *ptr_is_inexact_lt_midpoint,
int *ptr_is_inexact_gt_midpoint);
extern void __bid_round128_19_38 (int q,
 int x,
 UINT128 C,
 UINT128 * ptr_Cstar,
 int *delta_exp,
 int *ptr_is_midpoint_lt_even,
 int *ptr_is_midpoint_gt_even,
 int *ptr_is_inexact_lt_midpoint,
 int *ptr_is_inexact_gt_midpoint);
UINT64
__bid64_from_uint64 (UINT64 x)
{
  UINT64 res;
  UINT128 x128, res128;
  unsigned int q, ind;
  int incr_exp = 0;
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
  int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
  if (x <= 0x002386F26FC0ffffull) {
    if (x < 0x0020000000000000ull) {
      res = 0x31c0000000000000ull | x;
    } else {
      res = 0x6c70000000000000ull | (x & 0x0007ffffffffffffull);
    }
  }
  else
    {
      if (x < 0x16345785d8a0000ull) {
q = 17;
ind = 1;
      } else if (x < 0xde0b6b3a7640000ull) {
q = 18;
ind = 2;
      } else if (x < 0x8ac7230489e80000ull) {
q = 19;
ind = 3;
      } else {
q = 20;
ind = 4;
      }
      if (q <= 19) {
__bid_round64_2_18 (
   q, ind, x, &res, &incr_exp,
   &is_midpoint_lt_even, &is_midpoint_gt_even,
   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
      }
      else {
x128.w[1] = 0x0;
x128.w[0] = x;
__bid_round128_19_38 (q, ind, x128, &res128, &incr_exp,
     &is_midpoint_lt_even, &is_midpoint_gt_even,
     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
res = res128.w[0];
      }
      if (incr_exp)
ind++;
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
 is_midpoint_lt_even || is_midpoint_gt_even)
*&__bid_IDEC_glbflags |= 0x00000020;
      if (res < 0x0020000000000000ull) {
res = (((UINT64) ind + 398) << 53) | res;
      } else
{
 res = 0x6000000000000000ull | (((UINT64) ind + 398) << 51) |
   (res & 0x0007ffffffffffffull);
}
    }
  return(res);;
}
-----------

Compiling with -fPIC -O2, the differences between your patch and mine
are

--- bad.s 2014-01-19 06:10:28.006570325 -0800
+++ foo.s 2014-01-19 06:11:46.117754696 -0800
@@ -84,19 +84,18 @@ __bid64_from_uint64:
  movabsq $9007199254740991, %rax
  cmpq %rax, %rbx
  jbe .L23
- movl %ebp, %edx
  leaq 88(%rsp), %rsp
  .cfi_remember_state
  .cfi_def_cfa_offset 24
  movabsq $2251799813685247, %rax
- movl %edx, %edx
+ movl %ebp, %edx
  andq %rbx, %rax
- movabsq $6917529027641081856, %rcx
  popq %rbx
  .cfi_def_cfa_offset 16
+ movabsq $6917529027641081856, %rcx
  addq $398, %rdx
- orq %rcx, %rax
  salq $51, %rdx
+ orq %rcx, %rax
  popq %rbp
  .cfi_def_cfa_offset 8
  orq %rdx, %rax
@@ -154,7 +153,6 @@ __bid64_from_uint64:
  leaq 88(%rsp), %rsp
  .cfi_remember_state
  .cfi_def_cfa_offset 24
- movl %eax, %eax
  addq $398, %rax
  salq $53, %rax
  orq %rbx, %rax

My patch removes 2 extra

(set (reg:DI) (zero_extend:DI (reg:SI)))

-- 
H.J.



More information about the Gcc-patches mailing list